{ "cells": [ { "cell_type": "markdown", "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac", "metadata": { "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac" }, "source": [ "# Klasifikasi Teks menggunakan ANN\n", "## Fahrizal Setiawan\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "53a214ae-c9cf-4d46-925d-068f1685537b", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "53a214ae-c9cf-4d46-925d-068f1685537b", "outputId": "f224e8ff-e3a6-49d9-fac9-cafc0202eb4c" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Classification Report ===\n", " precision recall f1-score support\n", "\n", " negative 0.33 1.00 0.50 1\n", " positive 0.00 0.00 0.00 2\n", "\n", " accuracy 0.33 3\n", " macro avg 0.17 0.50 0.25 3\n", "weighted avg 0.11 0.33 0.17 3\n", "\n", "=== Confusion Matrix ===\n", "[[1 0]\n", " [2 0]]\n", "\n", "Prediksi untuk: barang buruk, saya kecewa\n", "Hasil: negative\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" ] } ], "source": [ "# ---------------------------------------------------------\n", "# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network\n", "# ---------------------------------------------------------\n", "\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "\n", "# -----------------------------------------\n", "# 1. Contoh Dataset\n", "# -----------------------------------------\n", "# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)\n", "\n", "data = {\n", " \"text\": [\n", " \"Saya suka produk ini, luar biasa\",\n", " \"Layanannya buruk, saya sangat kecewa\",\n", " \"Penjual tidak responsif, sangat kecewa\",\n", " \"Pembelian terbaik yang pernah saya lakukan\",\n", " \"Saya benci produk ini, buang-buang uang\",\n", " \"Kualitasnya sangat bagus, direkomendasikan\",\n", " \"Pengalaman buruk, tidak akan membeli lagi\",\n", "\n", " ],\n", " \"label\": [\n", " \"positive\",\n", " \"negative\",\n", " \"negative\", # Corrected: Was positive, now negative to match sentiment\n", " \"positive\", # Corrected: Was negative, now positive to match sentiment\n", " \"negative\", # Corrected: Was positive, now negative to match sentiment\n", " \"positive\", # Corrected: Was negative, now positive to match sentiment\n", " \"negative\",\n", " # Added missing label to match length of 'text' list\n", " ]\n", "}\n", "\n", "df = pd.DataFrame(data)\n", "\n", "# -----------------------------------------\n", "# 2. Split Train & Test\n", "# -----------------------------------------\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " df[\"text\"], df[\"label\"], test_size=0.3, random_state=42\n", ")\n", "\n", "# -----------------------------------------\n", "# 3. TF-IDF Vectorization\n", "# -----------------------------------------\n", "tfidf = TfidfVectorizer(max_features=5000)\n", "X_train_tfidf = tfidf.fit_transform(X_train)\n", "X_test_tfidf = tfidf.transform(X_test)\n", "\n", "# -----------------------------------------\n", "# 4. Feedforward ANN (MLPClassifier)\n", "# -----------------------------------------\n", "model = MLPClassifier(\n", " hidden_layer_sizes=(256, 64),\n", " activation='relu',\n", " solver='adam',\n", " max_iter=500,\n", " random_state=42\n", ")\n", "\n", "model.fit(X_train_tfidf, y_train)\n", "\n", "# -----------------------------------------\n", "# 5. Evaluasi Model\n", "# -----------------------------------------\n", "y_pred = model.predict(X_test_tfidf)\n", "\n", "print(\"=== Classification Report ===\")\n", "print(classification_report(y_test, y_pred))\n", "\n", "print(\"=== Confusion Matrix ===\")\n", "print(confusion_matrix(y_test, y_pred))\n", "\n", "# -----------------------------------------\n", "# 6. Prediksi Teks Baru\n", "# -----------------------------------------\n", "#sample_text = [\"barang bagus luar biasa\"]\n", "sample_text = [\"barang buruk, saya kecewa\"]\n", "sample_vec = tfidf.transform(sample_text)\n", "prediction = model.predict(sample_vec)\n", "\n", "print(\"\\nPrediksi untuk:\", sample_text[0])\n", "print(\"Hasil:\", prediction[0])" ] }, { "cell_type": "code", "execution_count": 12, "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7", "outputId": "4a889f91-ff57-459e-8987-43a230489899" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Prediksi untuk: saya benci barang ini\n", "Hasil: negative\n" ] } ], "source": [ "#sample_text = [\"barang bagus luar biasa\"]\n", "sample_text = [\"saya benci barang ini\"]\n", "sample_vec = tfidf.transform(sample_text)\n", "prediction = model.predict(sample_vec)\n", "print(\"\\nPrediksi untuk:\", sample_text[0])\n", "print(\"Hasil:\", prediction[0])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917", "metadata": { "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917" }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" }, "colab": { "provenance": [] } }, "nbformat": 4, "nbformat_minor": 5 }