repositori-NLP/klasifikasi teks FNN/Untitled0.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "PiGdhtYtor9F"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
        "from sklearn.svm import LinearSVC\n",
        "from sklearn.naive_bayes import MultinomialNB\n",
        "from sklearn.metrics import classification_report, accuracy_score\n",
        "\n",
        "# Sample dataset (replace with your actual dataset)\n",
        "df = pd.DataFrame({\n",
        "    'text': [\n",
        "        'This is a positive review.',\n",
        "        'Negative experience, very bad.',\n",
        "        'Neutral product, neither good nor bad.',\n",
        "        'I absolutely love this!',\n",
        "        'Terrible service and slow delivery.',\n",
        "        'It was okay, nothing special.'\n",
        "    ],\n",
        "    'label': ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral']\n",
        "})\n",
        "# If you have a CSV file, uncomment the line below and comment out the sample DataFrame above:\n",
        "# df = pd.read_csv(\"dataset.csv\")\n",
        "# Make sure your CSV has 'text' and 'label' columns.\n",
        "\n",
        "# PREPROCESSING\n",
        "X = df['text']\n",
        "y = df['label']\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(\n",
        "    X, y, test_size=0.2, random_state=42\n",
        ")\n",
        "\n",
        "vectorizer = TfidfVectorizer(max_features=5000)\n",
        "X_train_tfidf = vectorizer.fit_transform(X_train)\n",
        "X_test_tfidf = vectorizer.transform(X_test)\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "svm_model = LinearSVC()\n",
        "svm_model.fit(X_train_tfidf, y_train)\n",
        "\n",
        "svm_pred = svm_model.predict(X_test_tfidf)\n",
        "\n",
        "print(\"=== SVM Accuracy ===\")\n",
        "print(accuracy_score(y_test, svm_pred))\n",
        "print(\"\\n=== Classification Report SVM ===\")\n",
        "print(classification_report(y_test, svm_pred))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mUdBJD5gpELB",
        "outputId": "ec430145-2b3d-4ea9-e121-8abdc90f5119"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "=== SVM Accuracy ===\n",
            "0.5\n",
            "\n",
            "=== Classification Report SVM ===\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    negative       0.00      0.00      0.00         1\n",
            "     neutral       0.00      0.00      0.00         0\n",
            "    positive       1.00      1.00      1.00         1\n",
            "\n",
            "    accuracy                           0.50         2\n",
            "   macro avg       0.33      0.33      0.33         2\n",
            "weighted avg       0.50      0.50      0.50         2\n",
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "nb_model = MultinomialNB()\n",
        "nb_model.fit(X_train_tfidf, y_train)\n",
        "\n",
        "nb_pred = nb_model.predict(X_test_tfidf)\n",
        "\n",
        "print(\"=== Naive Bayes Accuracy ===\")\n",
        "print(accuracy_score(y_test, nb_pred))\n",
        "print(\"\\n=== Classification Report Naive Bayes ===\")\n",
        "print(classification_report(y_test, nb_pred))\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JeIVhQVopJcv",
        "outputId": "22e504c9-f279-49f7-b190-bd04b1986ec4"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "=== Naive Bayes Accuracy ===\n",
            "0.0\n",
            "\n",
            "=== Classification Report Naive Bayes ===\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    negative       0.00      0.00      0.00       1.0\n",
            "     neutral       0.00      0.00      0.00       0.0\n",
            "    positive       0.00      0.00      0.00       1.0\n",
            "\n",
            "    accuracy                           0.00       2.0\n",
            "   macro avg       0.00      0.00      0.00       2.0\n",
            "weighted avg       0.00      0.00      0.00       2.0\n",
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "sample_text = [\"barang buruk, saya kecewa\"]\n",
        "\n",
        "sample_vec = vectorizer.transform(sample_text)\n",
        "\n",
        "print(\"SVM Prediction:\", svm_model.predict(sample_vec)[0])\n",
        "print(\"Naive Bayes Prediction:\", nb_model.predict(sample_vec)[0])\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Axyz7fN9pVf0",
        "outputId": "2c1e2e67-8c0c-48c1-df58-69ad52ca9675"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "SVM Prediction: neutral\n",
            "Naive Bayes Prediction: neutral\n"
          ]
        }
      ]
    }
  ]
}