216 lines
10 KiB
Plaintext
216 lines
10 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"id": "PiGdhtYtor9F"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
|
"from sklearn.svm import LinearSVC\n",
|
|
"from sklearn.naive_bayes import MultinomialNB\n",
|
|
"from sklearn.metrics import classification_report, accuracy_score\n",
|
|
"\n",
|
|
"# Sample dataset (replace with your actual dataset)\n",
|
|
"df = pd.DataFrame({\n",
|
|
" 'text': [\n",
|
|
" 'This is a positive review.',\n",
|
|
" 'Negative experience, very bad.',\n",
|
|
" 'Neutral product, neither good nor bad.',\n",
|
|
" 'I absolutely love this!',\n",
|
|
" 'Terrible service and slow delivery.',\n",
|
|
" 'It was okay, nothing special.'\n",
|
|
" ],\n",
|
|
" 'label': ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral']\n",
|
|
"})\n",
|
|
"# If you have a CSV file, uncomment the line below and comment out the sample DataFrame above:\n",
|
|
"# df = pd.read_csv(\"dataset.csv\")\n",
|
|
"# Make sure your CSV has 'text' and 'label' columns.\n",
|
|
"\n",
|
|
"# PREPROCESSING\n",
|
|
"X = df['text']\n",
|
|
"y = df['label']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
|
" X, y, test_size=0.2, random_state=42\n",
|
|
")\n",
|
|
"\n",
|
|
"vectorizer = TfidfVectorizer(max_features=5000)\n",
|
|
"X_train_tfidf = vectorizer.fit_transform(X_train)\n",
|
|
"X_test_tfidf = vectorizer.transform(X_test)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"svm_model = LinearSVC()\n",
|
|
"svm_model.fit(X_train_tfidf, y_train)\n",
|
|
"\n",
|
|
"svm_pred = svm_model.predict(X_test_tfidf)\n",
|
|
"\n",
|
|
"print(\"=== SVM Accuracy ===\")\n",
|
|
"print(accuracy_score(y_test, svm_pred))\n",
|
|
"print(\"\\n=== Classification Report SVM ===\")\n",
|
|
"print(classification_report(y_test, svm_pred))\n"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "mUdBJD5gpELB",
|
|
"outputId": "ec430145-2b3d-4ea9-e121-8abdc90f5119"
|
|
},
|
|
"execution_count": 3,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"=== SVM Accuracy ===\n",
|
|
"0.5\n",
|
|
"\n",
|
|
"=== Classification Report SVM ===\n",
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" negative 0.00 0.00 0.00 1\n",
|
|
" neutral 0.00 0.00 0.00 0\n",
|
|
" positive 1.00 1.00 1.00 1\n",
|
|
"\n",
|
|
" accuracy 0.50 2\n",
|
|
" macro avg 0.33 0.33 0.33 2\n",
|
|
"weighted avg 0.50 0.50 0.50 2\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"nb_model = MultinomialNB()\n",
|
|
"nb_model.fit(X_train_tfidf, y_train)\n",
|
|
"\n",
|
|
"nb_pred = nb_model.predict(X_test_tfidf)\n",
|
|
"\n",
|
|
"print(\"=== Naive Bayes Accuracy ===\")\n",
|
|
"print(accuracy_score(y_test, nb_pred))\n",
|
|
"print(\"\\n=== Classification Report Naive Bayes ===\")\n",
|
|
"print(classification_report(y_test, nb_pred))\n",
|
|
"\n"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "JeIVhQVopJcv",
|
|
"outputId": "22e504c9-f279-49f7-b190-bd04b1986ec4"
|
|
},
|
|
"execution_count": 4,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"=== Naive Bayes Accuracy ===\n",
|
|
"0.0\n",
|
|
"\n",
|
|
"=== Classification Report Naive Bayes ===\n",
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" negative 0.00 0.00 0.00 1.0\n",
|
|
" neutral 0.00 0.00 0.00 0.0\n",
|
|
" positive 0.00 0.00 0.00 1.0\n",
|
|
"\n",
|
|
" accuracy 0.00 2.0\n",
|
|
" macro avg 0.00 0.00 0.00 2.0\n",
|
|
"weighted avg 0.00 0.00 0.00 2.0\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|
"/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"sample_text = [\"barang buruk, saya kecewa\"]\n",
|
|
"\n",
|
|
"sample_vec = vectorizer.transform(sample_text)\n",
|
|
"\n",
|
|
"print(\"SVM Prediction:\", svm_model.predict(sample_vec)[0])\n",
|
|
"print(\"Naive Bayes Prediction:\", nb_model.predict(sample_vec)[0])\n"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "Axyz7fN9pVf0",
|
|
"outputId": "2c1e2e67-8c0c-48c1-df58-69ad52ca9675"
|
|
},
|
|
"execution_count": 6,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"SVM Prediction: neutral\n",
|
|
"Naive Bayes Prediction: neutral\n"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
} |