{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "id": "PiGdhtYtor9F" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.svm import LinearSVC\n", "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.metrics import classification_report, accuracy_score\n", "\n", "# Sample dataset (replace with your actual dataset)\n", "df = pd.DataFrame({\n", " 'text': [\n", " 'This is a positive review.',\n", " 'Negative experience, very bad.',\n", " 'Neutral product, neither good nor bad.',\n", " 'I absolutely love this!',\n", " 'Terrible service and slow delivery.',\n", " 'It was okay, nothing special.'\n", " ],\n", " 'label': ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral']\n", "})\n", "# If you have a CSV file, uncomment the line below and comment out the sample DataFrame above:\n", "# df = pd.read_csv(\"dataset.csv\")\n", "# Make sure your CSV has 'text' and 'label' columns.\n", "\n", "# PREPROCESSING\n", "X = df['text']\n", "y = df['label']\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.2, random_state=42\n", ")\n", "\n", "vectorizer = TfidfVectorizer(max_features=5000)\n", "X_train_tfidf = vectorizer.fit_transform(X_train)\n", "X_test_tfidf = vectorizer.transform(X_test)\n" ] }, { "cell_type": "code", "source": [ "svm_model = LinearSVC()\n", "svm_model.fit(X_train_tfidf, y_train)\n", "\n", "svm_pred = svm_model.predict(X_test_tfidf)\n", "\n", "print(\"=== SVM Accuracy ===\")\n", "print(accuracy_score(y_test, svm_pred))\n", "print(\"\\n=== Classification Report SVM ===\")\n", "print(classification_report(y_test, svm_pred))\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mUdBJD5gpELB", "outputId": "ec430145-2b3d-4ea9-e121-8abdc90f5119" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== SVM Accuracy ===\n", "0.5\n", "\n", "=== Classification Report SVM ===\n", " precision recall f1-score support\n", "\n", " negative 0.00 0.00 0.00 1\n", " neutral 0.00 0.00 0.00 0\n", " positive 1.00 1.00 1.00 1\n", "\n", " accuracy 0.50 2\n", " macro avg 0.33 0.33 0.33 2\n", "weighted avg 0.50 0.50 0.50 2\n", "\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" ] } ] }, { "cell_type": "code", "source": [ "nb_model = MultinomialNB()\n", "nb_model.fit(X_train_tfidf, y_train)\n", "\n", "nb_pred = nb_model.predict(X_test_tfidf)\n", "\n", "print(\"=== Naive Bayes Accuracy ===\")\n", "print(accuracy_score(y_test, nb_pred))\n", "print(\"\\n=== Classification Report Naive Bayes ===\")\n", "print(classification_report(y_test, nb_pred))\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JeIVhQVopJcv", "outputId": "22e504c9-f279-49f7-b190-bd04b1986ec4" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Naive Bayes Accuracy ===\n", "0.0\n", "\n", "=== Classification Report Naive Bayes ===\n", " precision recall f1-score support\n", "\n", " negative 0.00 0.00 0.00 1.0\n", " neutral 0.00 0.00 0.00 0.0\n", " positive 0.00 0.00 0.00 1.0\n", "\n", " accuracy 0.00 2.0\n", " macro avg 0.00 0.00 0.00 2.0\n", "weighted avg 0.00 0.00 0.00 2.0\n", "\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" ] } ] }, { "cell_type": "code", "source": [ "sample_text = [\"barang buruk, saya kecewa\"]\n", "\n", "sample_vec = vectorizer.transform(sample_text)\n", "\n", "print(\"SVM Prediction:\", svm_model.predict(sample_vec)[0])\n", "print(\"Naive Bayes Prediction:\", nb_model.predict(sample_vec)[0])\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Axyz7fN9pVf0", "outputId": "2c1e2e67-8c0c-48c1-df58-69ad52ca9675" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "SVM Prediction: neutral\n", "Naive Bayes Prediction: neutral\n" ] } ] } ] }