Tugas praktikum Sudah saya edit sesuai instruksi dosen. Signed-off-by: 202210715288 FATAH SABILA ROSYAD <202210715288@mhs.ubharajaya.ac.id>
361 lines
11 KiB
Plaintext
361 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Klasifikasi Teks menggunakan ANN (TF-IDF + FNN)\n",
|
|
"\n",
|
|
"**Nama:** Fatah Sabila Rosyad \n",
|
|
"**NIM:** 202210715288 \n",
|
|
"**Kelas:** F7B2 \n",
|
|
"**MK:** NLP \n",
|
|
"\n",
|
|
"**Tujuan praktikum:**\n",
|
|
"Menerapkan klasifikasi teks sentimen sederhana menggunakan TF-IDF dan Feedforward Neural Network (MLPClassifier), dengan:\n",
|
|
"- Mengubah contoh teks (menggunakan kalimat yang dibuat sendiri)\n",
|
|
"- Mengubah parameter TF-IDF (`max_features`, `ngram_range`)\n",
|
|
"- Mengubah arsitektur dan parameter model ANN (`hidden_layer_sizes`, `max_iter`, `learning_rate_init`)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "4c395092-326a-4abc-b308-067392277cfa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ---------------------------------------------------------\n",
|
|
"# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network\n",
|
|
"# ---------------------------------------------------------\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
|
"from sklearn.neural_network import MLPClassifier\n",
|
|
"from sklearn.metrics import classification_report, confusion_matrix"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "4ac91b0c-e6af-4766-8933-db10ebf69140",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>text</th>\n",
|
|
" <th>label</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Saya Fatah Sabila Rosyad merasa sangat puas de...</td>\n",
|
|
" <td>positive</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Sebagai pelanggan, Fatah kecewa karena pelayan...</td>\n",
|
|
" <td>negative</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Pengalaman belanja Fatah kali ini menyenangkan...</td>\n",
|
|
" <td>positive</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Fatah benci produk ini karena mudah rusak dan ...</td>\n",
|
|
" <td>negative</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Menurut Fatah kualitas produk ini sangat bagus...</td>\n",
|
|
" <td>positive</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Fatah tidak akan membeli lagi di sini karena p...</td>\n",
|
|
" <td>negative</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" text label\n",
|
|
"0 Saya Fatah Sabila Rosyad merasa sangat puas de... positive\n",
|
|
"1 Sebagai pelanggan, Fatah kecewa karena pelayan... negative\n",
|
|
"2 Pengalaman belanja Fatah kali ini menyenangkan... positive\n",
|
|
"3 Fatah benci produk ini karena mudah rusak dan ... negative\n",
|
|
"4 Menurut Fatah kualitas produk ini sangat bagus... positive\n",
|
|
"5 Fatah tidak akan membeli lagi di sini karena p... negative"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 1. Contoh Dataset (teks buatan Fatah)\n",
|
|
"# -----------------------------------------\n",
|
|
"\n",
|
|
"data = {\n",
|
|
" \"text\": [\n",
|
|
" \"Saya Fatah Sabila Rosyad merasa sangat puas dengan kualitas produk ini.\",\n",
|
|
" \"Sebagai pelanggan, Fatah kecewa karena pelayanan toko sangat lambat.\",\n",
|
|
" \"Pengalaman belanja Fatah kali ini menyenangkan, proses cepat dan barang sesuai.\",\n",
|
|
" \"Fatah benci produk ini karena mudah rusak dan tidak sesuai deskripsi.\",\n",
|
|
" \"Menurut Fatah kualitas produk ini sangat bagus dan layak direkomendasikan.\",\n",
|
|
" \"Fatah tidak akan membeli lagi di sini karena pelayanan buruk dan respon yang lambat.\"\n",
|
|
" ],\n",
|
|
" \"label\": [\"positive\", \"negative\", \"positive\", \"negative\", \"positive\", \"negative\"]\n",
|
|
"}\n",
|
|
"\n",
|
|
"df = pd.DataFrame(data)\n",
|
|
"df\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "6dab8e80-c225-4de8-aecc-8b457153c3ee",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Jumlah data latih : 3\n",
|
|
"Jumlah data uji : 3\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 2. Split Train & Test (PERUBAHAN: test_size & random_state)\n",
|
|
"# -----------------------------------------\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
|
" df[\"text\"],\n",
|
|
" df[\"label\"],\n",
|
|
" test_size=0.34, # semula 0.3\n",
|
|
" random_state=7 # semula 42\n",
|
|
")\n",
|
|
"\n",
|
|
"print(\"Jumlah data latih :\", len(X_train))\n",
|
|
"print(\"Jumlah data uji :\", len(X_test))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "2cb05f0c-b497-4e9e-87bc-25d167f0c0ee",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Shape X_train_tfidf: (3, 52)\n",
|
|
"Shape X_test_tfidf : (3, 52)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 3. TF-IDF Vectorization (PERUBAHAN PARAMETER)\n",
|
|
"# -----------------------------------------\n",
|
|
"\n",
|
|
"tfidf = TfidfVectorizer(\n",
|
|
" max_features=1000, # semula 5000\n",
|
|
" ngram_range=(1, 2) # tambahan: gunakan unigram + bigram\n",
|
|
")\n",
|
|
"\n",
|
|
"X_train_tfidf = tfidf.fit_transform(X_train)\n",
|
|
"X_test_tfidf = tfidf.transform(X_test)\n",
|
|
"\n",
|
|
"print(\"Shape X_train_tfidf:\", X_train_tfidf.shape)\n",
|
|
"print(\"Shape X_test_tfidf :\", X_test_tfidf.shape)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "0cb99708-b1bd-43a7-84b9-4e4925bf2914",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Model selesai dilatih.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 4. Feedforward ANN (MLPClassifier) (PERUBAHAN PARAMETER)\n",
|
|
"# -----------------------------------------\n",
|
|
"\n",
|
|
"model = MLPClassifier(\n",
|
|
" hidden_layer_sizes=(128, 32), # semula (256, 64)\n",
|
|
" activation='relu',\n",
|
|
" solver='adam',\n",
|
|
" learning_rate_init=0.001, # tambahan\n",
|
|
" max_iter=300, # semula 500\n",
|
|
" random_state=7\n",
|
|
")\n",
|
|
"\n",
|
|
"model.fit(X_train_tfidf, y_train)\n",
|
|
"print(\"Model selesai dilatih.\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "d388afdf-0f08-48ea-92d1-e03390dee1d9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=== Classification Report ===\n",
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" negative 1.00 0.50 0.67 2\n",
|
|
" positive 0.50 1.00 0.67 1\n",
|
|
"\n",
|
|
" accuracy 0.67 3\n",
|
|
" macro avg 0.75 0.75 0.67 3\n",
|
|
"weighted avg 0.83 0.67 0.67 3\n",
|
|
"\n",
|
|
"=== Confusion Matrix ===\n",
|
|
"[[1 1]\n",
|
|
" [0 1]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 5. Evaluasi Model\n",
|
|
"# -----------------------------------------\n",
|
|
"\n",
|
|
"y_pred = model.predict(X_test_tfidf)\n",
|
|
"\n",
|
|
"print(\"=== Classification Report ===\")\n",
|
|
"print(classification_report(y_test, y_pred))\n",
|
|
"\n",
|
|
"print(\"=== Confusion Matrix ===\")\n",
|
|
"print(confusion_matrix(y_test, y_pred))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "64141093-c8fd-4118-aaf3-6e48454c5e76",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Prediksi untuk: Menurut Fatah, pengalaman belanja kali ini sangat memuaskan.\n",
|
|
"Hasil: positive\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 6. Prediksi Teks Baru (contoh 1 - positif)\n",
|
|
"# -----------------------------------------\n",
|
|
"sample_text = [\"Menurut Fatah, pengalaman belanja kali ini sangat memuaskan.\"]\n",
|
|
"sample_vec = tfidf.transform(sample_text)\n",
|
|
"prediction = model.predict(sample_vec)\n",
|
|
"\n",
|
|
"print(\"Prediksi untuk:\", sample_text[0])\n",
|
|
"print(\"Hasil:\", prediction[0])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "d4bf8434-fe3b-4a88-a294-207fa731de7d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Prediksi untuk: Saya Fatah merasa kecewa karena layanan toko sangat buruk.\n",
|
|
"Hasil: negative\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# -----------------------------------------\n",
|
|
"# 6. Prediksi Teks Baru (contoh 2 - negatif)\n",
|
|
"# -----------------------------------------\n",
|
|
"sample_text = [\"Saya Fatah merasa kecewa karena layanan toko sangat buruk.\"]\n",
|
|
"sample_vec = tfidf.transform(sample_text)\n",
|
|
"prediction = model.predict(sample_vec)\n",
|
|
"\n",
|
|
"print(\"Prediksi untuk:\", sample_text[0])\n",
|
|
"print(\"Hasil:\", prediction[0])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "724617be-aa1d-41bd-ad39-e6517fbcf837",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|