# Klasifikasi Teks menggunakan FNN
## Lusiana Situmorang


In [3]:
# =========================
# 1. IMPORT LIBRARY
# =========================
import re
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [4]:
# =========================
# 2. DATA TEKS MANUAL
# =========================
texts = [
    "saya suka belajar machine learning",
    "data science sangat menarik",
    "saya tidak suka matematika",
    "python mudah dipelajari",
    "machine learning membutuhkan data",
    "saya benci debugging error",
    "belajar python menyenangkan",
    "matematika penting dalam data science"
]

labels = [
    "positif",
    "positif",
    "negatif",
    "positif",
    "netral",
    "negatif",
    "positif",
    "netral"
]

In [5]:
# =========================
# 3. PREPROCESSING TEKS
# =========================
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

texts = [clean_text(t) for t in texts]

In [6]:
# =========================
# 4. ENCODING LABEL
# =========================
le = LabelEncoder()
y = le.fit_transform(labels)
y = to_categorical(y)

In [7]:
# =========================
# 5. FEATURE EXTRACTION (TF-IDF)
# =========================
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(texts).toarray()

In [8]:
# =========================
# 6. SPLIT DATA
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [9]:
# =========================
# 7. MODEL FNN (DIMODIFIKASI)
# =========================
model = Sequential()
model.add(Dense(64, activation="relu", input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(32, activation="relu"))
model.add(Dense(y.shape[1], activation="softmax"))

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# =========================
# 8. TRAINING
# =========================
history = model.fit(
    X_train,
    y_train,
    epochs=30,
    batch_size=4,
    validation_split=0.2,
    verbose=1
)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5000 - loss: 1.0428 - val_accuracy: 0.0000e+00 - val_loss: 1.0974
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - accuracy: 0.2500 - loss: 1.0820 - val_accuracy: 0.0000e+00 - val_loss: 1.1007
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239ms/step - accuracy: 0.5000 - loss: 1.0417 - val_accuracy: 0.0000e+00 - val_loss: 1.1046
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - accuracy: 0.7500 - loss: 1.0252 - val_accuracy: 0.0000e+00 - val_loss: 1.1079
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.5000 - loss: 1.0166 - val_accuracy: 0.0000e+00 - val_loss: 1.1109
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.5000 - loss: 0.9857 - val_accuracy: 0.0000e+00 - val_loss: 1.1139
Epoch 7/30
[1m1/1[0

In [11]:
# =========================
# 9. EVALUASI
# =========================
y_pred = model.predict(X_test)
y_pred_class = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print("\nAccuracy:", accuracy_score(y_true, y_pred_class))
print("\nClassification Report:")
print(classification_report(y_true, y_pred_class, target_names=le.classes_))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step

Accuracy: 0.0

Classification Report:
              precision    recall  f1-score   support

     negatif       0.00      0.00      0.00       1.0
      netral       0.00      0.00      0.00       0.0
     positif       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# =========================
# 10. UJI KALIMAT BARU
# =========================
test_text = ["saya suka belajar python"]
test_text = [clean_text(test_text[0])]
X_new = vectorizer.transform(test_text).toarray()

prediction = model.predict(X_new)
predicted_label = le.inverse_transform([np.argmax(prediction)])

print("\nKalimat uji:", test_text[0])
print("Hasil klasifikasi:", predicted_label[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step

Kalimat uji: saya suka belajar python
Hasil klasifikasi: netral
