85 lines
2.4 KiB
Plaintext
85 lines
2.4 KiB
Plaintext
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network
|
|
# ---------------------------------------------------------
|
|
|
|
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.neural_network import MLPClassifier
|
|
from sklearn.metrics import classification_report, confusion_matrix
|
|
|
|
# -----------------------------------------
|
|
# 1. Contoh Dataset
|
|
# -----------------------------------------
|
|
# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)
|
|
|
|
data = {
|
|
"text": [
|
|
"Saya suka produk ini, luar biasa",
|
|
"Layanannya buruk, sangat kecewa",
|
|
"Pembelian terbaik yang pernah saya lakukan",
|
|
"Saya benci produk ini, buang-buang uang",
|
|
"Kualitasnya sangat bagus, direkomendasikan",
|
|
"Pengalaman buruk, tidak akan membeli lagi"
|
|
],
|
|
"label": ["positive", "negative", "positive", "negative", "positive", "negative"]
|
|
}
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
# -----------------------------------------
|
|
# 2. Split Train & Test
|
|
# -----------------------------------------
|
|
X_train, X_test, y_train, y_test = train_test_split(
|
|
df["text"], df["label"], test_size=0.3, random_state=42
|
|
)
|
|
|
|
# -----------------------------------------
|
|
# 3. TF-IDF Vectorization
|
|
# -----------------------------------------
|
|
tfidf = TfidfVectorizer(max_features=5000)
|
|
X_train_tfidf = tfidf.fit_transform(X_train)
|
|
X_test_tfidf = tfidf.transform(X_test)
|
|
|
|
# -----------------------------------------
|
|
# 4. Feedforward ANN (MLPClassifier)
|
|
# -----------------------------------------
|
|
model = MLPClassifier(
|
|
hidden_layer_sizes=(256, 64),
|
|
activation='relu',
|
|
solver='adam',
|
|
max_iter=500,
|
|
random_state=42
|
|
)
|
|
|
|
model.fit(X_train_tfidf, y_train)
|
|
|
|
# -----------------------------------------
|
|
# 5. Evaluasi Model
|
|
# -----------------------------------------
|
|
y_pred = model.predict(X_test_tfidf)
|
|
|
|
print("=== Classification Report ===")
|
|
print(classification_report(y_test, y_pred))
|
|
|
|
print("=== Confusion Matrix ===")
|
|
print(confusion_matrix(y_test, y_pred))
|
|
|
|
# -----------------------------------------
|
|
# 6. Prediksi Teks Baru
|
|
# -----------------------------------------
|
|
sample_text = ["barang bagus luar biasa"]
|
|
sample_text = ["barang buruk, saya kecewa"]
|
|
sample_vec = tfidf.transform(sample_text)
|
|
prediction = model.predict(sample_vec)
|
|
|
|
print("\nPrediksi untuk:", sample_text[0])
|
|
print("Hasil:", prediction[0])
|
|
|
|
|
|
|
|
|