Initial commit - NLP text classification project

2026-01-19 18:18:20 +07:00 · 2026-01-19 18:18:20 +07:00 · d53f7ec5a6
commit d53f7ec5a6
5 changed files with 54 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,20 @@
 # Klasifikasi Topik Berita Menggunakan NLP
 Project ini mengimplementasikan Natural Language Processing (NLP)
 untuk mengklasifikasikan teks berita Bahasa Indonesia ke dalam
 tiga kategori: Politik, Olahraga, dan Teknologi.
 ## Metode
 - Preprocessing teks
 - TF-IDF
 - Multinomial Naive Bayes
 ## Tools
 - Python
 - Scikit-learn
 - Streamlit
 ## Cara Menjalankan
 1. Install dependency
   pip install -r requirements.txt
 2. Jalankan aplikasi
   streamlit run app.py
--- a/app.py
+++ b/app.py
@ -0,0 +1,28 @@
 import streamlit as st
 import joblib
 import re
 # Load model & vectorizer
 model = joblib.load("model_nb.pkl")
 vectorizer = joblib.load("tfidf_vectorizer.pkl")
 st.title("📰 Klasifikasi Topik Berita (NLP)")
 st.write("Masukkan teks berita berbahasa Indonesia")
 text = st.text_area("Teks Berita", height=200)
 def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text
 if st.button("Klasifikasikan"):
    if text.strip() == "":
        st.warning("Teks tidak boleh kosong!")
    else:
        clean_text = preprocess_text(text)
        text_tfidf = vectorizer.transform([clean_text])
        prediction = model.predict(text_tfidf)[0]
        st.success(f"Prediksi Topik: **{prediction}**")
--- a/model_nb.pkl
+++ b/model_nb.pkl
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
 streamlit
 scikit-learn
 pandas
 numpy
 joblib
 Sastrawi
--- a/tfidf_vectorizer.pkl
+++ b/tfidf_vectorizer.pkl