{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "PKHmOczbUdK0" }, "outputs": [], "source": [ "{\n", " \"cells\": [\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 11,\n", " \"metadata\": {\n", " \"id\": \"qBYcPYAb059g\"\n", " },\n", " \"outputs\": [],\n", " \"source\": [\n", " \"# =========================\\n\",\n", " \"# 1. IMPORT LIBRARY\\n\",\n", " \"# =========================\\n\",\n", " \"import re\\n\",\n", " \"import pandas as pd\\n\",\n", " \"\\n\",\n", " \"from sklearn.feature_extraction.text import CountVectorizer\\n\",\n", " \"from sklearn.naive_bayes import MultinomialNB\\n\",\n", " \"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 12,\n", " \"metadata\": {\n", " \"id\": \"mo-yt5Ob1N8j\"\n", " },\n", " \"outputs\": [],\n", " \"source\": [\n", " \"# =========================\\n\",\n", " \"# 2. DATA TEKS MANUAL\\n\",\n", " \"# =========================\\n\",\n", " \"documents = [\\n\",\n", " \" \\\"saya suka belajar data science\\\",\\n\",\n", " \" \\\"machine learning sangat menarik\\\",\\n\",\n", " \" \\\"saya tidak suka matematika\\\",\\n\",\n", " \" \\\"belajar python itu menyenangkan\\\",\\n\",\n", " \" \\\"data science membutuhkan matematika\\\",\\n\",\n", " \" \\\"python sangat membantu data science\\\"\\n\",\n", " \"]\\n\",\n", " \"\\n\",\n", " \"labels = [\\n\",\n", " \" \\\"positif\\\",\\n\",\n", " \" \\\"positif\\\",\\n\",\n", " \" \\\"negatif\\\",\\n\",\n", " \" \\\"positif\\\",\\n\",\n", " \" \\\"netral\\\",\\n\",\n", " \" \\\"positif\\\"\\n\",\n", " \"]\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 13,\n", " \"metadata\": {\n", " \"id\": \"FkmxRAFq1oDK\"\n", " },\n", " \"outputs\": [],\n", " \"source\": [\n", " \"# =========================\\n\",\n", " \"# 3. PREPROCESSING\\n\",\n", " \"# =========================\\n\",\n", " \"def clean_text(text):\\n\",\n", " \" text = text.lower()\\n\",\n", " \" text = re.sub(r\\\"[^a-z\\\\s]\\\", \\\"\\\", text)\\n\",\n", " \" text = re.sub(r\\\"\\\\s+\\\", \\\" \\\", text).strip()\\n\",\n", " \" return text\\n\",\n", " \"\\n\",\n", " \"documents = [clean_text(doc) for doc in documents]\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 14,\n", " \"metadata\": {\n", " \"colab\": {\n", " \"base_uri\": \"https://localhost:8080/\"\n", " },\n", " \"id\": \"ybC1Vo2C_c3q\",\n", " \"outputId\": \"1695b30d-059d-4bce-e224-7c100b1958ee\"\n", " },\n", " \"outputs\": [\n", " {\n", " \"output_type\": \"stream\",\n", " \"name\": \"stdout\",\n", " \"text\": [\n", " \"=== Bag of Words ===\\n\",\n", " \" belajar data itu learning machine matematika membantu membutuhkan \\\\\\n\",\n", " \"0 1 1 0 0 0 0 0 0 \\n\",\n", " \"1 0 0 0 1 1 0 0 0 \\n\",\n", " \"2 0 0 0 0 0 1 0 0 \\n\",\n", " \"3 1 0 1 0 0 0 0 0 \\n\",\n", " \"4 0 1 0 0 0 1 0 1 \\n\",\n", " \"5 0 1 0 0 0 0 1 0 \\n\",\n", " \"\\n\",\n", " \" menarik menyenangkan python sangat saya science suka tidak \\n\",\n", " \"0 0 0 0 0 1 1 1 0 \\n\",\n", " \"1 1 0 0 1 0 0 0 0 \\n\",\n", " \"2 0 0 0 0 1 0 1 1 \\n\",\n", " \"3 0 1 1 0 0 0 0 0 \\n\",\n", " \"4 0 0 0 0 0 1 0 0 \\n\",\n", " \"5 0 0 1 1 0 1 0 0 \\n\"\n", " ]\n", " }\n", " ],\n", " \"source\": [\n", " \"# =========================\\n\",\n", " \"# 4. FEATURE EXTRACTION (BOW)\\n\",\n", " \"# =========================\\n\",\n", " \"vectorizer = CountVectorizer()\\n\",\n", " \"X = vectorizer.fit_transform(documents)\\n\",\n", " \"\\n\",\n", " \"df_bow = pd.DataFrame(\\n\",\n", " \" X.toarray(),\\n\",\n", " \" columns=vectorizer.get_feature_names_out()\\n\",\n", " \")\\n\",\n", " \"\\n\",\n", " \"print(\\\"=== Bag of Words ===\\\")\\n\",\n", " \"print(df_bow)\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 15,\n", " \"metadata\": {\n", " \"colab\": {\n", " \"base_uri\": \"https://localhost:8080/\",\n", " \"height\": 80\n", " },\n", " \"id\": \"s6S-Ma4R1xuq\",\n", " \"outputId\": \"f65359e3-bb87-42b4-fb37-9c035f88e7ad\"\n", " },\n", " \"outputs\": [\n", " {\n", " \"output_type\": \"execute_result\",\n", " \"data\": {\n", " \"text/plain\": [\n", " \"MultinomialNB()\"\n", " ],\n", " \"text/html\": [\n", " \"
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB()