commit d53f7ec5a6088305f082864149d34c40c1034238 Author: 202210715288 FATAH SABILA ROSYAD <202210715288@mhs.ubharajaya.ac.id> Date: Mon Jan 19 18:18:20 2026 +0700 Initial commit - NLP text classification project diff --git a/README.md b/README.md new file mode 100644 index 0000000..4dfd091 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# Klasifikasi Topik Berita Menggunakan NLP +Project ini mengimplementasikan Natural Language Processing (NLP) +untuk mengklasifikasikan teks berita Bahasa Indonesia ke dalam +tiga kategori: Politik, Olahraga, dan Teknologi. + +## Metode +- Preprocessing teks +- TF-IDF +- Multinomial Naive Bayes + +## Tools +- Python +- Scikit-learn +- Streamlit + +## Cara Menjalankan +1. Install dependency + pip install -r requirements.txt +2. Jalankan aplikasi + streamlit run app.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..9a5cb12 --- /dev/null +++ b/app.py @@ -0,0 +1,28 @@ +import streamlit as st +import joblib +import re + +# Load model & vectorizer +model = joblib.load("model_nb.pkl") +vectorizer = joblib.load("tfidf_vectorizer.pkl") + +st.title("📰 Klasifikasi Topik Berita (NLP)") +st.write("Masukkan teks berita berbahasa Indonesia") + +text = st.text_area("Teks Berita", height=200) + +def preprocess_text(text): + text = text.lower() + text = re.sub(r"http\S+", "", text) + text = re.sub(r"[^a-zA-Z\s]", " ", text) + text = re.sub(r"\s+", " ", text).strip() + return text + +if st.button("Klasifikasikan"): + if text.strip() == "": + st.warning("Teks tidak boleh kosong!") + else: + clean_text = preprocess_text(text) + text_tfidf = vectorizer.transform([clean_text]) + prediction = model.predict(text_tfidf)[0] + st.success(f"Prediksi Topik: **{prediction}**") diff --git a/model_nb.pkl b/model_nb.pkl new file mode 100644 index 0000000..a12251e Binary files /dev/null and b/model_nb.pkl differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a0fc4c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +streamlit +scikit-learn +pandas +numpy +joblib +Sastrawi \ No newline at end of file diff --git a/tfidf_vectorizer.pkl b/tfidf_vectorizer.pkl new file mode 100644 index 0000000..74d269b Binary files /dev/null and b/tfidf_vectorizer.pkl differ