diff --git a/Klasifikasi_Teks_FNN.ipynb b/Klasifikasi_Teks_FNN.ipynb new file mode 100644 index 0000000..2f68593 --- /dev/null +++ b/Klasifikasi_Teks_FNN.ipynb @@ -0,0 +1,493 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac", + "metadata": { + "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac" + }, + "source": [ + "# Klasifikasi Teks menggunakan FNN\n", + "## Lusiana Situmorang\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "53a214ae-c9cf-4d46-925d-068f1685537b", + "metadata": { + "id": "53a214ae-c9cf-4d46-925d-068f1685537b" + }, + "outputs": [], + "source": [ + "# =========================\n", + "# 1. IMPORT LIBRARY\n", + "# =========================\n", + "import re\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, Dropout\n", + "from tensorflow.keras.utils import to_categorical" + ] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 2. DATA TEKS MANUAL\n", + "# =========================\n", + "texts = [\n", + " \"saya suka belajar machine learning\",\n", + " \"data science sangat menarik\",\n", + " \"saya tidak suka matematika\",\n", + " \"python mudah dipelajari\",\n", + " \"machine learning membutuhkan data\",\n", + " \"saya benci debugging error\",\n", + " \"belajar python menyenangkan\",\n", + " \"matematika penting dalam data science\"\n", + "]\n", + "\n", + "labels = [\n", + " \"positif\",\n", + " \"positif\",\n", + " \"negatif\",\n", + " \"positif\",\n", + " \"netral\",\n", + " \"negatif\",\n", + " \"positif\",\n", + " \"netral\"\n", + "]" + ], + "metadata": { + "id": "YUpHatB8LATR" + }, + "id": "YUpHatB8LATR", + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7", + "metadata": { + "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7" + }, + "outputs": [], + "source": [ + "# =========================\n", + "# 3. PREPROCESSING TEKS\n", + "# =========================\n", + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r\"[^a-z\\s]\", \"\", text)\n", + " text = re.sub(r\"\\s+\", \" \", text).strip()\n", + " return text\n", + "\n", + "texts = [clean_text(t) for t in texts]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917", + "metadata": { + "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917" + }, + "outputs": [], + "source": [ + "# =========================\n", + "# 4. ENCODING LABEL\n", + "# =========================\n", + "le = LabelEncoder()\n", + "y = le.fit_transform(labels)\n", + "y = to_categorical(y)" + ] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 5. FEATURE EXTRACTION (TF-IDF)\n", + "# =========================\n", + "vectorizer = TfidfVectorizer(max_features=1000)\n", + "X = vectorizer.fit_transform(texts).toarray()" + ], + "metadata": { + "id": "jgBIwoPxLJkw" + }, + "id": "jgBIwoPxLJkw", + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 6. SPLIT DATA\n", + "# =========================\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.25, random_state=42\n", + ")" + ], + "metadata": { + "id": "112p_r_WLMEI" + }, + "id": "112p_r_WLMEI", + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 7. MODEL FNN (DIMODIFIKASI)\n", + "# =========================\n", + "model = Sequential()\n", + "model.add(Dense(64, activation=\"relu\", input_shape=(X_train.shape[1],)))\n", + "model.add(Dropout(0.3))\n", + "model.add(Dense(32, activation=\"relu\"))\n", + "model.add(Dense(y.shape[1], activation=\"softmax\"))\n", + "\n", + "model.compile(\n", + " optimizer=\"adam\",\n", + " loss=\"categorical_crossentropy\",\n", + " metrics=[\"accuracy\"]\n", + ")\n", + "\n", + "model.summary()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 329 + }, + "id": "WFemRciMLORN", + "outputId": "a76a6d80-fce1-4ca7-fcb5-9d6e048b7382" + }, + "id": "WFemRciMLORN", + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/dense.py:93: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", + " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"sequential\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"sequential\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", + "│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m1,408\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m2,080\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m99\u001b[0m │\n", + "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" + ], + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+              "┃ Layer (type)                     Output Shape                  Param # ┃\n",
+              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+              "│ dense (Dense)                   │ (None, 64)             │         1,408 │\n",
+              "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+              "│ dropout (Dropout)               │ (None, 64)             │             0 │\n",
+              "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+              "│ dense_1 (Dense)                 │ (None, 32)             │         2,080 │\n",
+              "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+              "│ dense_2 (Dense)                 │ (None, 3)              │            99 │\n",
+              "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m3,587\u001b[0m (14.01 KB)\n" + ], + "text/html": [ + "
 Total params: 3,587 (14.01 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m3,587\u001b[0m (14.01 KB)\n" + ], + "text/html": [ + "
 Trainable params: 3,587 (14.01 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ], + "text/html": [ + "
 Non-trainable params: 0 (0.00 B)\n",
+              "
\n" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 8. TRAINING\n", + "# =========================\n", + "history = model.fit(\n", + " X_train,\n", + " y_train,\n", + " epochs=30,\n", + " batch_size=4,\n", + " validation_split=0.2,\n", + " verbose=1\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQH90ZNrLRBt", + "outputId": "40144c0a-ecbd-478b-a35c-30a51c755836" + }, + "id": "TQH90ZNrLRBt", + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 2s/step - accuracy: 0.5000 - loss: 1.0428 - val_accuracy: 0.0000e+00 - val_loss: 1.0974\n", + "Epoch 2/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 168ms/step - accuracy: 0.2500 - loss: 1.0820 - val_accuracy: 0.0000e+00 - val_loss: 1.1007\n", + "Epoch 3/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 239ms/step - accuracy: 0.5000 - loss: 1.0417 - val_accuracy: 0.0000e+00 - val_loss: 1.1046\n", + "Epoch 4/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 86ms/step - accuracy: 0.7500 - loss: 1.0252 - val_accuracy: 0.0000e+00 - val_loss: 1.1079\n", + "Epoch 5/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 88ms/step - accuracy: 0.5000 - loss: 1.0166 - val_accuracy: 0.0000e+00 - val_loss: 1.1109\n", + "Epoch 6/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 132ms/step - accuracy: 0.5000 - loss: 0.9857 - val_accuracy: 0.0000e+00 - val_loss: 1.1139\n", + "Epoch 7/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 93ms/step - accuracy: 1.0000 - loss: 0.9839 - val_accuracy: 0.0000e+00 - val_loss: 1.1175\n", + "Epoch 8/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 92ms/step - accuracy: 1.0000 - loss: 0.9848 - val_accuracy: 0.0000e+00 - val_loss: 1.1218\n", + "Epoch 9/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 87ms/step - accuracy: 1.0000 - loss: 0.9697 - val_accuracy: 0.0000e+00 - val_loss: 1.1260\n", + "Epoch 10/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 93ms/step - accuracy: 0.7500 - loss: 0.9688 - val_accuracy: 0.0000e+00 - val_loss: 1.1295\n", + "Epoch 11/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 94ms/step - accuracy: 1.0000 - loss: 0.9126 - val_accuracy: 0.0000e+00 - val_loss: 1.1330\n", + "Epoch 12/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 87ms/step - accuracy: 0.5000 - loss: 1.0176 - val_accuracy: 0.0000e+00 - val_loss: 1.1366\n", + "Epoch 13/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 96ms/step - accuracy: 0.7500 - loss: 0.9968 - val_accuracy: 0.0000e+00 - val_loss: 1.1404\n", + "Epoch 14/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 93ms/step - accuracy: 0.7500 - loss: 0.9723 - val_accuracy: 0.0000e+00 - val_loss: 1.1439\n", + "Epoch 15/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 91ms/step - accuracy: 0.7500 - loss: 0.9863 - val_accuracy: 0.0000e+00 - val_loss: 1.1474\n", + "Epoch 16/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 94ms/step - accuracy: 1.0000 - loss: 0.9335 - val_accuracy: 0.0000e+00 - val_loss: 1.1506\n", + "Epoch 17/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 109ms/step - accuracy: 0.7500 - loss: 0.9062 - val_accuracy: 0.0000e+00 - val_loss: 1.1541\n", + "Epoch 18/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step - accuracy: 0.7500 - loss: 0.9250 - val_accuracy: 0.0000e+00 - val_loss: 1.1575\n", + "Epoch 19/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 90ms/step - accuracy: 0.7500 - loss: 0.9448 - val_accuracy: 0.0000e+00 - val_loss: 1.1609\n", + "Epoch 20/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 90ms/step - accuracy: 0.7500 - loss: 0.8955 - val_accuracy: 0.0000e+00 - val_loss: 1.1640\n", + "Epoch 21/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 92ms/step - accuracy: 1.0000 - loss: 0.8694 - val_accuracy: 0.0000e+00 - val_loss: 1.1667\n", + "Epoch 22/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 89ms/step - accuracy: 0.5000 - loss: 0.9652 - val_accuracy: 0.0000e+00 - val_loss: 1.1694\n", + "Epoch 23/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 97ms/step - accuracy: 0.7500 - loss: 0.8697 - val_accuracy: 0.0000e+00 - val_loss: 1.1723\n", + "Epoch 24/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 89ms/step - accuracy: 0.7500 - loss: 0.8764 - val_accuracy: 0.0000e+00 - val_loss: 1.1758\n", + "Epoch 25/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 89ms/step - accuracy: 0.7500 - loss: 0.8423 - val_accuracy: 0.0000e+00 - val_loss: 1.1791\n", + "Epoch 26/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 94ms/step - accuracy: 0.7500 - loss: 0.7802 - val_accuracy: 0.0000e+00 - val_loss: 1.1823\n", + "Epoch 27/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 109ms/step - accuracy: 0.7500 - loss: 0.8458 - val_accuracy: 0.0000e+00 - val_loss: 1.1854\n", + "Epoch 28/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 95ms/step - accuracy: 0.7500 - loss: 0.8357 - val_accuracy: 0.0000e+00 - val_loss: 1.1881\n", + "Epoch 29/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 86ms/step - accuracy: 0.7500 - loss: 0.7801 - val_accuracy: 0.0000e+00 - val_loss: 1.1910\n", + "Epoch 30/30\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 90ms/step - accuracy: 0.7500 - loss: 0.8469 - val_accuracy: 0.0000e+00 - val_loss: 1.1941\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 9. EVALUASI\n", + "# =========================\n", + "y_pred = model.predict(X_test)\n", + "y_pred_class = np.argmax(y_pred, axis=1)\n", + "y_true = np.argmax(y_test, axis=1)\n", + "\n", + "print(\"\\nAccuracy:\", accuracy_score(y_true, y_pred_class))\n", + "print(\"\\nClassification Report:\")\n", + "print(classification_report(y_true, y_pred_class, target_names=le.classes_))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KZsJ9UBSLTYe", + "outputId": "a1c9f60c-cb11-4ac2-cfe5-b9b541c4444d" + }, + "id": "KZsJ9UBSLTYe", + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 76ms/step\n", + "\n", + "Accuracy: 0.0\n", + "\n", + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " negatif 0.00 0.00 0.00 1.0\n", + " netral 0.00 0.00 0.00 0.0\n", + " positif 0.00 0.00 0.00 1.0\n", + "\n", + " accuracy 0.00 2.0\n", + " macro avg 0.00 0.00 0.00 2.0\n", + "weighted avg 0.00 0.00 0.00 2.0\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# =========================\n", + "# 10. UJI KALIMAT BARU\n", + "# =========================\n", + "test_text = [\"saya suka belajar python\"]\n", + "test_text = [clean_text(test_text[0])]\n", + "X_new = vectorizer.transform(test_text).toarray()\n", + "\n", + "prediction = model.predict(X_new)\n", + "predicted_label = le.inverse_transform([np.argmax(prediction)])\n", + "\n", + "print(\"\\nKalimat uji:\", test_text[0])\n", + "print(\"Hasil klasifikasi:\", predicted_label[0])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hfiVQ8rGLWZ-", + "outputId": "091f87bc-e9ec-4853-d253-32e5d87f4d95" + }, + "id": "hfiVQ8rGLWZ-", + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 70ms/step\n", + "\n", + "Kalimat uji: saya suka belajar python\n", + "Hasil klasifikasi: netral\n" + ] + } + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file