diff --git a/klasifikasi teks FNN/Untitled0.ipynb b/klasifikasi teks FNN/Untitled0.ipynb new file mode 100644 index 0000000..7682b77 --- /dev/null +++ b/klasifikasi teks FNN/Untitled0.ipynb @@ -0,0 +1,216 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "PiGdhtYtor9F" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.svm import LinearSVC\n", + "from sklearn.naive_bayes import MultinomialNB\n", + "from sklearn.metrics import classification_report, accuracy_score\n", + "\n", + "# Sample dataset (replace with your actual dataset)\n", + "df = pd.DataFrame({\n", + " 'text': [\n", + " 'This is a positive review.',\n", + " 'Negative experience, very bad.',\n", + " 'Neutral product, neither good nor bad.',\n", + " 'I absolutely love this!',\n", + " 'Terrible service and slow delivery.',\n", + " 'It was okay, nothing special.'\n", + " ],\n", + " 'label': ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral']\n", + "})\n", + "# If you have a CSV file, uncomment the line below and comment out the sample DataFrame above:\n", + "# df = pd.read_csv(\"dataset.csv\")\n", + "# Make sure your CSV has 'text' and 'label' columns.\n", + "\n", + "# PREPROCESSING\n", + "X = df['text']\n", + "y = df['label']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "vectorizer = TfidfVectorizer(max_features=5000)\n", + "X_train_tfidf = vectorizer.fit_transform(X_train)\n", + "X_test_tfidf = vectorizer.transform(X_test)\n" + ] + }, + { + "cell_type": "code", + "source": [ + "svm_model = LinearSVC()\n", + "svm_model.fit(X_train_tfidf, y_train)\n", + "\n", + "svm_pred = svm_model.predict(X_test_tfidf)\n", + "\n", + "print(\"=== SVM Accuracy ===\")\n", + "print(accuracy_score(y_test, svm_pred))\n", + "print(\"\\n=== Classification Report SVM ===\")\n", + "print(classification_report(y_test, svm_pred))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mUdBJD5gpELB", + "outputId": "ec430145-2b3d-4ea9-e121-8abdc90f5119" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "=== SVM Accuracy ===\n", + "0.5\n", + "\n", + "=== Classification Report SVM ===\n", + " precision recall f1-score support\n", + "\n", + " negative 0.00 0.00 0.00 1\n", + " neutral 0.00 0.00 0.00 0\n", + " positive 1.00 1.00 1.00 1\n", + "\n", + " accuracy 0.50 2\n", + " macro avg 0.33 0.33 0.33 2\n", + "weighted avg 0.50 0.50 0.50 2\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "nb_model = MultinomialNB()\n", + "nb_model.fit(X_train_tfidf, y_train)\n", + "\n", + "nb_pred = nb_model.predict(X_test_tfidf)\n", + "\n", + "print(\"=== Naive Bayes Accuracy ===\")\n", + "print(accuracy_score(y_test, nb_pred))\n", + "print(\"\\n=== Classification Report Naive Bayes ===\")\n", + "print(classification_report(y_test, nb_pred))\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JeIVhQVopJcv", + "outputId": "22e504c9-f279-49f7-b190-bd04b1986ec4" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "=== Naive Bayes Accuracy ===\n", + "0.0\n", + "\n", + "=== Classification Report Naive Bayes ===\n", + " precision recall f1-score support\n", + "\n", + " negative 0.00 0.00 0.00 1.0\n", + " neutral 0.00 0.00 0.00 0.0\n", + " positive 0.00 0.00 0.00 1.0\n", + "\n", + " accuracy 0.00 2.0\n", + " macro avg 0.00 0.00 0.00 2.0\n", + "weighted avg 0.00 0.00 0.00 2.0\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", + "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "sample_text = [\"barang buruk, saya kecewa\"]\n", + "\n", + "sample_vec = vectorizer.transform(sample_text)\n", + "\n", + "print(\"SVM Prediction:\", svm_model.predict(sample_vec)[0])\n", + "print(\"Naive Bayes Prediction:\", nb_model.predict(sample_vec)[0])\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Axyz7fN9pVf0", + "outputId": "2c1e2e67-8c0c-48c1-df58-69ad52ca9675" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SVM Prediction: neutral\n", + "Naive Bayes Prediction: neutral\n" + ] + } + ] + } + ] +} \ No newline at end of file