7 changed files with 1806 additions and 3 deletions
--- a/.ipynb_checkpoints/Fitur_Ekstraksi_BOW-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Fitur_Ekstraksi_BOW-checkpoint.ipynb
@ -0,0 +1,335 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "qBYcPYAb059g",
+    "outputId": "9f57b704-da1b-4495-d366-24c30586dc76",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Masukkan jumlah dokumen yang ingin dimasukkan:  4\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Input jumlah dokumen\n",
+    "import pandas as pd\n",
+    "n = int(input(\"Masukkan jumlah dokumen yang ingin dimasukkan: \"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mo-yt5Ob1N8j",
+    "outputId": "362ac3e0-d84b-4014-db96-cc3b10ecdb32"
+   },
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Masukkan teks untuk dokumen ke-1:  saya adalah seorang pria\n",
+      "Masukkan teks untuk dokumen ke-2:  saya adalah pria yang memiliki hati\n",
+      "Masukkan teks untuk dokumen ke-3:  hati saya telah terisi satu nama\n",
+      "Masukkan teks untuk dokumen ke-4:  di dalam hati saya terukir nama pasangan saya\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Dokumen yang Dimasukkan ===\n",
+      "Doc 1: saya adalah seorang pria\n",
+      "Doc 2: saya adalah pria yang memiliki hati\n",
+      "Doc 3: hati saya telah terisi satu nama\n",
+      "Doc 4: di dalam hati saya terukir nama pasangan saya\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Input teks dokumen satu per satu\n",
+    "documents = []\n",
+    "for i in range(n):\n",
+    "    teks = input(f\"Masukkan teks untuk dokumen ke-{i+1}: \")\n",
+    "    documents.append(teks)\n",
+    "\n",
+    "print(\"\\n=== Dokumen yang Dimasukkan ===\")\n",
+    "for i, doc in enumerate(documents):\n",
+    "    print(f\"Doc {i+1}: {doc}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "FkmxRAFq1oDK",
+    "outputId": "62c4508e-1725-4f30-fbdb-4de8072498b2"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Hasil Tokenisasi ===\n",
+      "Doc 1: ['saya', 'adalah', 'seorang', 'pria']\n",
+      "Doc 2: ['saya', 'adalah', 'pria', 'yang', 'memiliki', 'hati']\n",
+      "Doc 3: ['hati', 'saya', 'telah', 'terisi', 'satu', 'nama']\n",
+      "Doc 4: ['di', 'dalam', 'hati', 'saya', 'terukir', 'nama', 'pasangan', 'saya']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Tahap Tokenisasi\n",
+    "tokenized_docs = []\n",
+    "for doc in documents:\n",
+    "    tokens = doc.lower().split()\n",
+    "    tokenized_docs.append(tokens)\n",
+    "\n",
+    "print(\"\\n=== Hasil Tokenisasi ===\")\n",
+    "for i, tokens in enumerate(tokenized_docs):\n",
+    "    print(f\"Doc {i+1}: {tokens}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "ybC1Vo2C_c3q",
+    "outputId": "fa31c57e-5364-4ded-fcd0-54d0db46c34b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\n",
+      "['saya', 'adalah', 'seorang', 'pria', 'saya', 'adalah', 'pria', 'yang', 'memiliki', 'hati', 'hati', 'saya', 'telah', 'terisi', 'satu', 'nama', 'di', 'dalam', 'hati', 'saya', 'terukir', 'nama', 'pasangan', 'saya']\n",
+      "Jumlah total kata dalam seluruh dokumen: 24\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pembuatan Corpus\n",
+    "corpus_all = [word for doc in tokenized_docs for word in doc]\n",
+    "\n",
+    "print(\"\\n=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\")\n",
+    "print(corpus_all)\n",
+    "print(f\"Jumlah total kata dalam seluruh dokumen: {len(corpus_all)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "s6S-Ma4R1xuq",
+    "outputId": "98c3685b-1798-4038-d17e-6e45ca419b51"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Vocabulary (Kata Unik) ===\n",
+      "['adalah', 'dalam', 'di', 'hati', 'memiliki', 'nama', 'pasangan', 'pria', 'satu', 'saya', 'seorang', 'telah', 'terisi', 'terukir', 'yang']\n",
+      "Jumlah kata unik (vocabulary size): 15\n",
+      "\n",
+      "=== Vocabulary (Kata Unik) ===\n",
+      " 1. adalah\n",
+      " 2. dalam\n",
+      " 3. di\n",
+      " 4. hati\n",
+      " 5. memiliki\n",
+      " 6. nama\n",
+      " 7. pasangan\n",
+      " 8. pria\n",
+      " 9. satu\n",
+      "10. saya\n",
+      "11. seorang\n",
+      "12. telah\n",
+      "13. terisi\n",
+      "14. terukir\n",
+      "15. yang\n",
+      "\n",
+      "Jumlah kata unik (vocabulary size): 15\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pembuatan Vocabulary\n",
+    "vocabulary = sorted(set(corpus_all))\n",
+    "\n",
+    "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+    "print(vocabulary)\n",
+    "print(f\"Jumlah kata unik (vocabulary size): {len(vocabulary)}\")\n",
+    "\n",
+    "\n",
+    "vocabulary = sorted(set(corpus_all))\n",
+    "\n",
+    "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+    "for idx, word in enumerate(vocabulary, start=1):\n",
+    "    print(f\"{idx:>2}. {word}\")\n",
+    "print(f\"\\nJumlah kata unik (vocabulary size): {len(vocabulary)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "id": "ShevCTva2Fg9"
+   },
+   "outputs": [],
+   "source": [
+    "# Representasi Numerik (Matriks BoW)\n",
+    "bow_matrix = []\n",
+    "for doc in tokenized_docs:\n",
+    "    vector = [doc.count(word) for word in vocabulary]\n",
+    "    bow_matrix.append(vector)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "-yB6D2pY2M0E",
+    "outputId": "b6b2f4d3-da8b-4aee-e9ce-034def4d5cf7"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Matriks Bag of Words ===\n",
+      "    adalah  dalam  di  hati  memiliki  nama  pasangan  pria  satu  saya  \\\n",
+      "D1       1      0   0     0         0     0         0     1     0     1   \n",
+      "D2       1      0   0     1         1     0         0     1     0     1   \n",
+      "D3       0      0   0     1         0     1         0     0     1     1   \n",
+      "D4       0      1   1     1         0     1         1     0     0     2   \n",
+      "\n",
+      "    seorang  telah  terisi  terukir  yang  \n",
+      "D1        1      0       0        0     0  \n",
+      "D2        0      0       0        0     1  \n",
+      "D3        0      1       1        0     0  \n",
+      "D4        0      0       0        1     0  \n"
+     ]
+    }
+   ],
+   "source": [
+    "df_bow = pd.DataFrame(bow_matrix, columns=vocabulary)\n",
+    "df_bow.index = [f\"D{i}\" for i in range(1, len(documents)+1)]  # ubah label indeks jadi D1, D2, D3\n",
+    "\n",
+    "print(\"\\n=== Matriks Bag of Words ===\")\n",
+    "print(df_bow)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "8ruf5vKL2rGD",
+    "outputId": "65a4674e-1c01-4833-ec55-f66f77b8b6c2"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\n",
+      "        Kata  Frekuensi\n",
+      "0       saya          5\n",
+      "1       hati          3\n",
+      "2       nama          2\n",
+      "3       pria          2\n",
+      "4     adalah          2\n",
+      "5         di          1\n",
+      "6      dalam          1\n",
+      "7   pasangan          1\n",
+      "8   memiliki          1\n",
+      "9       satu          1\n",
+      "10   seorang          1\n",
+      "11     telah          1\n",
+      "12    terisi          1\n",
+      "13   terukir          1\n",
+      "14      yang          1\n",
+      "Frekuensi kata: 15\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Membuat Tabel Frekuensi Kata (Total dari seluruh dokumen)\n",
+    "word_frequencies = df_bow.sum().sort_values(ascending=False).reset_index()\n",
+    "word_frequencies.columns = [\"Kata\", \"Frekuensi\"]\n",
+    "\n",
+    "print(\"\\n=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\")\n",
+    "print(word_frequencies)\n",
+    "print(f\"Frekuensi kata: {len(word_frequencies)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "NQjExannHuj0"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/.ipynb_checkpoints/Klasifikasi
+++ b/.ipynb_checkpoints/Klasifikasi
@ -0,0 +1,176 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac",
+   "metadata": {},
+   "source": [
+    "# Klasifikasi Teks menggunakan ANN\n",
+    "## Arif R Dwiyanto\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "53a214ae-c9cf-4d46-925d-068f1685537b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Classification Report ===\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "    negative       0.00      0.00      0.00       1.0\n",
+      "    positive       0.00      0.00      0.00       1.0\n",
+      "\n",
+      "    accuracy                           0.00       2.0\n",
+      "   macro avg       0.00      0.00      0.00       2.0\n",
+      "weighted avg       0.00      0.00      0.00       2.0\n",
+      "\n",
+      "=== Confusion Matrix ===\n",
+      "[[0 1]\n",
+      " [1 0]]\n",
+      "\n",
+      "Prediksi untuk: barang buruk, saya kecewa\n",
+      "Hasil: negative\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------------------------------------------------\n",
+    "# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network\n",
+    "# ---------------------------------------------------------\n",
+    "\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 1. Contoh Dataset\n",
+    "# -----------------------------------------\n",
+    "# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)\n",
+    "\n",
+    "data = {\n",
+    "    \"text\": [\n",
+    "        \"Saya suka produk ini, luar biasa\",\n",
+    "        \"Layanannya buruk, saya sangat kecewa\",\n",
+    "        \"Pembelian terbaik yang pernah saya lakukan\",\n",
+    "        \"Saya benci produk ini, buang-buang uang\",\n",
+    "        \"Kualitasnya sangat bagus, direkomendasikan\",\n",
+    "        \"Pengalaman buruk, tidak akan membeli lagi\"\n",
+    "    ],\n",
+    "    \"label\": [\"positive\", \"negative\", \"positive\", \"negative\", \"positive\", \"negative\"]\n",
+    "}\n",
+    "\n",
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 2. Split Train & Test\n",
+    "# -----------------------------------------\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    df[\"text\"], df[\"label\"], test_size=0.3, random_state=42\n",
+    ")\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 3. TF-IDF Vectorization\n",
+    "# -----------------------------------------\n",
+    "tfidf = TfidfVectorizer(max_features=5000)\n",
+    "X_train_tfidf = tfidf.fit_transform(X_train)\n",
+    "X_test_tfidf = tfidf.transform(X_test)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 4. Feedforward ANN (MLPClassifier)\n",
+    "# -----------------------------------------\n",
+    "model = MLPClassifier(\n",
+    "    hidden_layer_sizes=(256, 64),\n",
+    "    activation='relu',\n",
+    "    solver='adam',\n",
+    "    max_iter=500,\n",
+    "    random_state=42\n",
+    ")\n",
+    "\n",
+    "model.fit(X_train_tfidf, y_train)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 5. Evaluasi Model\n",
+    "# -----------------------------------------\n",
+    "y_pred = model.predict(X_test_tfidf)\n",
+    "\n",
+    "print(\"=== Classification Report ===\")\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "\n",
+    "print(\"=== Confusion Matrix ===\")\n",
+    "print(confusion_matrix(y_test, y_pred))\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 6. Prediksi Teks Baru\n",
+    "# -----------------------------------------\n",
+    "#sample_text = [\"barang bagus luar biasa\"]\n",
+    "sample_text = [\"barang buruk, saya kecewa\"]\n",
+    "sample_vec = tfidf.transform(sample_text)\n",
+    "prediction = model.predict(sample_vec)\n",
+    "\n",
+    "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+    "print(\"Hasil:\", prediction[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Prediksi untuk: saya benci barang ini\n",
+      "Hasil: negative\n"
+     ]
+    }
+   ],
+   "source": [
+    "#sample_text = [\"barang bagus luar biasa\"]\n",
+    "sample_text = [\"saya benci barang ini\"]\n",
+    "sample_vec = tfidf.transform(sample_text)\n",
+    "prediction = model.predict(sample_vec)\n",
+    "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+    "print(\"Hasil:\", prediction[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/.ipynb_checkpoints/N-Gram-checkpoint.ipynb
+++ b/.ipynb_checkpoints/N-Gram-checkpoint.ipynb
@ -0,0 +1,380 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JVPdWpz3hhbj"
+   },
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4Mvva3v65h1v"
+   },
+   "source": [
+    "# **UNIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1cub_VJnUJMl",
+    "outputId": "1889eb61-4f3f-4780-f42e-02368076cce3"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: saya suka makan nasi\n",
+      "Tokens (4): ['saya', 'suka', 'makan', 'nasi']\n",
+      "\n",
+      "Frekuensi Unigram dalam kalimat\n",
+      " ('saya'): 1\n",
+      " ('suka'): 1\n",
+      " ('makan'): 1\n",
+      " ('nasi'): 1\n",
+      "\n",
+      "Total unigram dalam 1 kalimat: 4\n",
+      "\n",
+      "Probabilitas masing-masing unigram:\n",
+      " P(saya) = 0.25 (25.00%)\n",
+      " P(suka) = 0.25 (25.00%)\n",
+      " P(makan) = 0.25 (25.00%)\n",
+      " P(nasi) = 0.25 (25.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Unigram):\n",
+      " P(saya suka makan nasi) = P(saya)=0.25 x P(suka)=0.25 x P(makan)=0.25 x P(nasi)=0.25 = 0.0039 (0.39%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenize\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Unigram\n",
+    "unigram_counts = Counter(tokens)\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "\n",
+    "print(\"\\nFrekuensi Unigram dalam kalimat\")\n",
+    "for pair, count in unigram_counts.items():\n",
+    "    print(f\" ('{pair}'): {count}\")\n",
+    "print(f\"\\nTotal unigram dalam 1 kalimat: {total_tokens}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Unigram: P(wi) = Count(wi) / Total Kata\n",
+    "unigram_probabilities = {}\n",
+    "for word, count in unigram_counts.items():\n",
+    "    prob = count / total_tokens\n",
+    "    unigram_probabilities[word] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing unigram:\")\n",
+    "for word, prob in unigram_probabilities.items():\n",
+    "    print(f\" P({word}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (P(kalimat) = P(w1) * P(w2) * ...)\n",
+    "p_kalimat = 1\n",
+    "prob_parts = []\n",
+    "\n",
+    "# Loop untuk menghitung probabilitas total dan membangun string rumus detail\n",
+    "for word in tokens:\n",
+    "    prob_value = unigram_probabilities[word]\n",
+    "    p_kalimat *= prob_value\n",
+    "    # Format: P(word)=prob_value\n",
+    "    prob_parts.append(f\"P({word})={prob_value:.2f}\")\n",
+    "\n",
+    "# Gabungkan bagian-bagian rumus untuk mendapatkan prob_str detail\n",
+    "prob_str = \" x \".join(prob_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Unigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.4f} ({p_kalimat*100:.2f}%)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Vstwt996-FrS"
+   },
+   "source": [
+    "# **BIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "XRIY4qgTVbjl",
+    "outputId": "ea6e62ce-45a0-40c9-ca98-1fcc30558479"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: saya makan nasi dan saya makan roti\n",
+      "Tokens (7): ['saya', 'makan', 'nasi', 'dan', 'saya', 'makan', 'roti']\n",
+      "\n",
+      "Frekuensi Bigram dalam kalimat:\n",
+      " ('saya', 'makan'): 2\n",
+      " ('makan', 'nasi'): 1\n",
+      " ('nasi', 'dan'): 1\n",
+      " ('dan', 'saya'): 1\n",
+      " ('makan', 'roti'): 1\n",
+      "\n",
+      "Total bigram dalam 1 kalimat: 6\n",
+      "\n",
+      "Probabilitas masing-masing bigram:\n",
+      " P(makan|saya) = 1.00 (100.00%)\n",
+      " P(nasi|makan) = 0.50 (50.00%)\n",
+      " P(dan|nasi) = 1.00 (100.00%)\n",
+      " P(saya|dan) = 1.00 (100.00%)\n",
+      " P(roti|makan) = 0.50 (50.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Bigram):\n",
+      " P(saya makan nasi dan saya makan roti) = P(saya)=0.29 x P(makan|saya)=1.00 x P(nasi|makan)=0.50 x P(dan|nasi)=1.00 x P(saya|dan)=1.00 x P(makan|saya)=1.00 x P(roti|makan)=0.50 = 0.071429 (7.14%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenisasi\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Unigram dan Bigram\n",
+    "unigram_counts = Counter(tokens)\n",
+    "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+    "bigram_counts = Counter(bigrams)\n",
+    "\n",
+    "print(\"\\nFrekuensi Bigram dalam kalimat:\")\n",
+    "for pair, count in bigram_counts.items():\n",
+    "    print(f\" {pair}: {count}\")\n",
+    "print(f\"\\nTotal bigram dalam 1 kalimat: {sum(bigram_counts.values())}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Bigram: P(w2 | w1) = Count(w1,w2) / Count(w1)\n",
+    "bigram_probabilities = {}\n",
+    "for (w1, w2), count in bigram_counts.items():\n",
+    "    prob = count / unigram_counts[w1]\n",
+    "    bigram_probabilities[(w1, w2)] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing bigram:\")\n",
+    "for (w1, w2), prob in bigram_probabilities.items():\n",
+    "    print(f\" P({w2}|{w1}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Bigram)\n",
+    "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w2) * ...\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens # P(w1)\n",
+    "p_kalimat = p_w1 # Inisialisasi dengan P(w1)\n",
+    "\n",
+    "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"] # Tambahkan P(w1) ke rumus\n",
+    "\n",
+    "for i in range(1, len(tokens)):\n",
+    "    pair = (tokens[i-1], tokens[i])\n",
+    "    p = bigram_probabilities.get(pair, 0)\n",
+    "    p_kalimat *= p\n",
+    "    prob_str_parts.append(f\"P({pair[1]}|{pair[0]})={p:.2f}\")\n",
+    "\n",
+    "# Gabungkan rumus perkalian untuk ditampilkan\n",
+    "prob_str = \" x \".join(prob_str_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Bigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "E6n1IU8X-G9S"
+   },
+   "source": [
+    "# **TRIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "BIRARsj2FHJg",
+    "outputId": "968d420e-9370-40e5-e7e1-148e1d351d62"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: mahasiswa mengerjakan tugas kemudian mahasiswa upload e-learning\n",
+      "Tokens (7): ['mahasiswa', 'mengerjakan', 'tugas', 'kemudian', 'mahasiswa', 'upload', 'e-learning']\n",
+      "\n",
+      "Frekuensi Trigram dalam kalimat:\n",
+      " ('mahasiswa', 'mengerjakan', 'tugas'): 1\n",
+      " ('mengerjakan', 'tugas', 'kemudian'): 1\n",
+      " ('tugas', 'kemudian', 'mahasiswa'): 1\n",
+      " ('kemudian', 'mahasiswa', 'upload'): 1\n",
+      " ('mahasiswa', 'upload', 'e-learning'): 1\n",
+      "\n",
+      "Total trigram dalam 1 kalimat: 5\n",
+      "\n",
+      "Probabilitas masing-masing trigram:\n",
+      " P(tugas|mahasiswa,mengerjakan) = 1.00 (100.00%)\n",
+      " P(kemudian|mengerjakan,tugas) = 1.00 (100.00%)\n",
+      " P(mahasiswa|tugas,kemudian) = 1.00 (100.00%)\n",
+      " P(upload|kemudian,mahasiswa) = 1.00 (100.00%)\n",
+      " P(e-learning|mahasiswa,upload) = 1.00 (100.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Trigram):\n",
+      " P(mahasiswa mengerjakan tugas kemudian mahasiswa upload e-learning) = P(mahasiswa)=0.29 x P(mengerjakan|mahasiswa)=0.50 x P(tugas|mahasiswa,mengerjakan)=1.00 x P(kemudian|mengerjakan,tugas)=1.00 x P(mahasiswa|tugas,kemudian)=1.00 x P(upload|kemudian,mahasiswa)=1.00 x P(e-learning|mahasiswa,upload)=1.00 = 0.142857 (14.29%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenisasi\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Bigram dan Trigram\n",
+    "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+    "trigrams = [(tokens[i], tokens[i+1], tokens[i+2]) for i in range(len(tokens) - 2)]\n",
+    "\n",
+    "bigram_counts = Counter(bigrams)\n",
+    "trigram_counts = Counter(trigrams)\n",
+    "\n",
+    "print(\"\\nFrekuensi Trigram dalam kalimat:\")\n",
+    "for tg, count in trigram_counts.items():\n",
+    "    print(f\" {tg}: {count}\")\n",
+    "print(f\"\\nTotal trigram dalam 1 kalimat: {sum(trigram_counts.values())}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Trigram: P(w3 | w1, w2) = Count(w1,w2,w3) / Count(w1,w2)\n",
+    "trigram_probabilities = {}\n",
+    "for (w1, w2, w3), count in trigram_counts.items():\n",
+    "    # Hindari pembagian dengan nol (jika ada bigram yang tidak muncul)\n",
+    "    if bigram_counts[(w1, w2)] > 0:\n",
+    "        prob = count / bigram_counts[(w1, w2)]\n",
+    "    else:\n",
+    "        prob = 0\n",
+    "    trigram_probabilities[(w1, w2, w3)] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing trigram:\")\n",
+    "for (w1, w2, w3), prob in trigram_probabilities.items():\n",
+    "    print(f\" P({w3}|{w1},{w2}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# Tambahkan perhitungan Unigram Count (dibutuhkan untuk P(w1) dan P(w2|w1))\n",
+    "unigram_counts = Counter(tokens)\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Trigram)\n",
+    "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w1,w2) * ...\n",
+    "\n",
+    "# a. P(w1)\n",
+    "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens if total_tokens > 0 else 0\n",
+    "\n",
+    "# b. P(w2|w1) (Menggunakan Bigram tanpa smoothing)\n",
+    "if len(tokens) > 1:\n",
+    "    count_w1 = unigram_counts.get(tokens[0], 1) # Hindari pembagian dengan 0\n",
+    "    p_w2_w1 = bigram_counts.get((tokens[0], tokens[1]), 0) / count_w1\n",
+    "else:\n",
+    "    p_w2_w1 = 1.0 # Jika hanya 1 kata\n",
+    "\n",
+    "p_kalimat = p_w1 * p_w2_w1 # Inisialisasi dengan P(w1) * P(w2|w1)\n",
+    "\n",
+    "# Daftar bagian rumus untuk ditampilkan\n",
+    "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"]\n",
+    "if len(tokens) > 1:\n",
+    "    prob_str_parts.append(f\"P({tokens[1]}|{tokens[0]})={p_w2_w1:.2f}\")\n",
+    "\n",
+    "# c. Perkalian Trigram P(wi | wi-2, wi-1) untuk i >= 3\n",
+    "for i in range(len(tokens) - 2):\n",
+    "    triplet = (tokens[i], tokens[i+1], tokens[i+2])\n",
+    "    p = trigram_probabilities.get(triplet, 0)\n",
+    "    p_kalimat *= p\n",
+    "    prob_str_parts.append(f\"P({triplet[2]}|{triplet[0]},{triplet[1]})={p:.2f}\")\n",
+    "\n",
+    "prob_str = \" x \".join(prob_str_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Trigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/Fitur_Ekstraksi_BOW.ipynb
+++ b/Fitur_Ekstraksi_BOW.ipynb
@ -0,0 +1,351 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "qBYcPYAb059g",
+    "outputId": "9f57b704-da1b-4495-d366-24c30586dc76",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Masukkan jumlah dokumen yang ingin dimasukkan:  4\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Input jumlah dokumen\n",
+    "import pandas as pd\n",
+    "n = int(input(\"Masukkan jumlah dokumen yang ingin dimasukkan: \"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mo-yt5Ob1N8j",
+    "outputId": "362ac3e0-d84b-4014-db96-cc3b10ecdb32"
+   },
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Masukkan teks untuk dokumen ke-1:  saya adalah seorang pria\n",
+      "Masukkan teks untuk dokumen ke-2:  saya adalah pria yang memiliki hati\n",
+      "Masukkan teks untuk dokumen ke-3:  hati saya telah terisi satu nama\n",
+      "Masukkan teks untuk dokumen ke-4:  di dalam hati saya terukir nama pasangan saya\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Dokumen yang Dimasukkan ===\n",
+      "Doc 1: saya adalah seorang pria\n",
+      "Doc 2: saya adalah pria yang memiliki hati\n",
+      "Doc 3: hati saya telah terisi satu nama\n",
+      "Doc 4: di dalam hati saya terukir nama pasangan saya\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Input teks dokumen satu per satu\n",
+    "documents = []\n",
+    "for i in range(n):\n",
+    "    teks = input(f\"Masukkan teks untuk dokumen ke-{i+1}: \")\n",
+    "    documents.append(teks)\n",
+    "\n",
+    "print(\"\\n=== Dokumen yang Dimasukkan ===\")\n",
+    "for i, doc in enumerate(documents):\n",
+    "    print(f\"Doc {i+1}: {doc}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "FkmxRAFq1oDK",
+    "outputId": "62c4508e-1725-4f30-fbdb-4de8072498b2"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Hasil Tokenisasi ===\n",
+      "Doc 1: ['saya', 'adalah', 'seorang', 'pria']\n",
+      "Doc 2: ['saya', 'adalah', 'pria', 'yang', 'memiliki', 'hati']\n",
+      "Doc 3: ['hati', 'saya', 'telah', 'terisi', 'satu', 'nama']\n",
+      "Doc 4: ['di', 'dalam', 'hati', 'saya', 'terukir', 'nama', 'pasangan', 'saya']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Tahap Tokenisasi\n",
+    "tokenized_docs = []\n",
+    "for doc in documents:\n",
+    "    tokens = doc.lower().split()\n",
+    "    tokenized_docs.append(tokens)\n",
+    "\n",
+    "print(\"\\n=== Hasil Tokenisasi ===\")\n",
+    "for i, tokens in enumerate(tokenized_docs):\n",
+    "    print(f\"Doc {i+1}: {tokens}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "ybC1Vo2C_c3q",
+    "outputId": "fa31c57e-5364-4ded-fcd0-54d0db46c34b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\n",
+      "['saya', 'adalah', 'seorang', 'pria', 'saya', 'adalah', 'pria', 'yang', 'memiliki', 'hati', 'hati', 'saya', 'telah', 'terisi', 'satu', 'nama', 'di', 'dalam', 'hati', 'saya', 'terukir', 'nama', 'pasangan', 'saya']\n",
+      "Jumlah total kata dalam seluruh dokumen: 24\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pembuatan Corpus\n",
+    "corpus_all = [word for doc in tokenized_docs for word in doc]\n",
+    "\n",
+    "print(\"\\n=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\")\n",
+    "print(corpus_all)\n",
+    "print(f\"Jumlah total kata dalam seluruh dokumen: {len(corpus_all)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "s6S-Ma4R1xuq",
+    "outputId": "98c3685b-1798-4038-d17e-6e45ca419b51"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Vocabulary (Kata Unik) ===\n",
+      "['adalah', 'dalam', 'di', 'hati', 'memiliki', 'nama', 'pasangan', 'pria', 'satu', 'saya', 'seorang', 'telah', 'terisi', 'terukir', 'yang']\n",
+      "Jumlah kata unik (vocabulary size): 15\n",
+      "\n",
+      "=== Vocabulary (Kata Unik) ===\n",
+      " 1. adalah\n",
+      " 2. dalam\n",
+      " 3. di\n",
+      " 4. hati\n",
+      " 5. memiliki\n",
+      " 6. nama\n",
+      " 7. pasangan\n",
+      " 8. pria\n",
+      " 9. satu\n",
+      "10. saya\n",
+      "11. seorang\n",
+      "12. telah\n",
+      "13. terisi\n",
+      "14. terukir\n",
+      "15. yang\n",
+      "\n",
+      "Jumlah kata unik (vocabulary size): 15\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pembuatan Vocabulary\n",
+    "vocabulary = sorted(set(corpus_all))\n",
+    "\n",
+    "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+    "print(vocabulary)\n",
+    "print(f\"Jumlah kata unik (vocabulary size): {len(vocabulary)}\")\n",
+    "\n",
+    "\n",
+    "vocabulary = sorted(set(corpus_all))\n",
+    "\n",
+    "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+    "for idx, word in enumerate(vocabulary, start=1):\n",
+    "    print(f\"{idx:>2}. {word}\")\n",
+    "print(f\"\\nJumlah kata unik (vocabulary size): {len(vocabulary)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "id": "ShevCTva2Fg9"
+   },
+   "outputs": [],
+   "source": [
+    "# Representasi Numerik (Matriks BoW)\n",
+    "bow_matrix = []\n",
+    "for doc in tokenized_docs:\n",
+    "    vector = [doc.count(word) for word in vocabulary]\n",
+    "    bow_matrix.append(vector)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "-yB6D2pY2M0E",
+    "outputId": "b6b2f4d3-da8b-4aee-e9ce-034def4d5cf7"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Matriks Bag of Words ===\n",
+      "    adalah  dalam  di  hati  memiliki  nama  pasangan  pria  satu  saya  \\\n",
+      "D1       1      0   0     0         0     0         0     1     0     1   \n",
+      "D2       1      0   0     1         1     0         0     1     0     1   \n",
+      "D3       0      0   0     1         0     1         0     0     1     1   \n",
+      "D4       0      1   1     1         0     1         1     0     0     2   \n",
+      "\n",
+      "    seorang  telah  terisi  terukir  yang  \n",
+      "D1        1      0       0        0     0  \n",
+      "D2        0      0       0        0     1  \n",
+      "D3        0      1       1        0     0  \n",
+      "D4        0      0       0        1     0  \n"
+     ]
+    }
+   ],
+   "source": [
+    "df_bow = pd.DataFrame(bow_matrix, columns=vocabulary)\n",
+    "df_bow.index = [f\"D{i}\" for i in range(1, len(documents)+1)]  # ubah label indeks jadi D1, D2, D3\n",
+    "\n",
+    "print(\"\\n=== Matriks Bag of Words ===\")\n",
+    "print(df_bow)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "8ruf5vKL2rGD",
+    "outputId": "65a4674e-1c01-4833-ec55-f66f77b8b6c2"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\n",
+      "        Kata  Frekuensi\n",
+      "0       saya          5\n",
+      "1       hati          3\n",
+      "2       nama          2\n",
+      "3       pria          2\n",
+      "4     adalah          2\n",
+      "5         di          1\n",
+      "6      dalam          1\n",
+      "7   pasangan          1\n",
+      "8   memiliki          1\n",
+      "9       satu          1\n",
+      "10   seorang          1\n",
+      "11     telah          1\n",
+      "12    terisi          1\n",
+      "13   terukir          1\n",
+      "14      yang          1\n",
+      "Frekuensi kata: 15\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Membuat Tabel Frekuensi Kata (Total dari seluruh dokumen)\n",
+    "word_frequencies = df_bow.sum().sort_values(ascending=False).reset_index()\n",
+    "word_frequencies.columns = [\"Kata\", \"Frekuensi\"]\n",
+    "\n",
+    "print(\"\\n=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\")\n",
+    "print(word_frequencies)\n",
+    "print(f\"Frekuensi kata: {len(word_frequencies)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (3022847739.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "  \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[31m    \u001b[39m\u001b[31mgit remote add origin https://git.lab.ubharajaya.ac.id/202210715229-ALPRIAN-BAHARAJA-SITORUS/Praktikum_NLP\u001b[39m\n        ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "git remote add origin https://git.lab.ubharajaya.ac.id/202210715229-ALPRIAN-BAHARAJA-SITORUS/Praktikum_NLP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/FNN.ipynb
+++ b/FNN.ipynb
@ -0,0 +1,169 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac",
+   "metadata": {},
+   "source": [
+    "# Klasifikasi Teks menggunakan ANN\n",
+    "## Arif R Dwiyanto\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "53a214ae-c9cf-4d46-925d-068f1685537b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "All arrays must be of the same length",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mValueError\u001b[39m                                Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 28\u001b[39m\n\u001b[32m     11\u001b[39m \u001b[38;5;66;03m# -----------------------------------------\u001b[39;00m\n\u001b[32m     12\u001b[39m \u001b[38;5;66;03m# 1. Contoh Dataset\u001b[39;00m\n\u001b[32m     13\u001b[39m \u001b[38;5;66;03m# -----------------------------------------\u001b[39;00m\n\u001b[32m     14\u001b[39m \u001b[38;5;66;03m# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)\u001b[39;00m\n\u001b[32m     16\u001b[39m data = {\n\u001b[32m     17\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mtext\u001b[39m\u001b[33m\"\u001b[39m: [\n\u001b[32m     18\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mTempat ini sangat nyaman dan bersih.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m   (...)\u001b[39m\u001b[32m     25\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mlabel\u001b[39m\u001b[33m\"\u001b[39m: [\u001b[33m\"\u001b[39m\u001b[33mpositive\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mnegative\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mpositive\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mnegative\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mpositive\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mnegative\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m     26\u001b[39m }\n\u001b[32m---> \u001b[39m\u001b[32m28\u001b[39m df = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     30\u001b[39m \u001b[38;5;66;03m# -----------------------------------------\u001b[39;00m\n\u001b[32m     31\u001b[39m \u001b[38;5;66;03m# 2. Split Train & Test\u001b[39;00m\n\u001b[32m     32\u001b[39m \u001b[38;5;66;03m# -----------------------------------------\u001b[39;00m\n\u001b[32m     33\u001b[39m X_train, X_test, y_train, y_test = train_test_split(\n\u001b[32m     34\u001b[39m     df[\u001b[33m\"\u001b[39m\u001b[33mtext\u001b[39m\u001b[33m\"\u001b[39m], df[\u001b[33m\"\u001b[39m\u001b[33mlabel\u001b[39m\u001b[33m\"\u001b[39m], test_size=\u001b[32m0.3\u001b[39m, random_state=\u001b[32m42\u001b[39m\n\u001b[32m     35\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\miniforge3\\Lib\\site-packages\\pandas\\core\\frame.py:782\u001b[39m, in \u001b[36mDataFrame.__init__\u001b[39m\u001b[34m(self, data, index, columns, dtype, copy)\u001b[39m\n\u001b[32m    776\u001b[39m     mgr = \u001b[38;5;28mself\u001b[39m._init_mgr(\n\u001b[32m    777\u001b[39m         data, axes={\u001b[33m\"\u001b[39m\u001b[33mindex\u001b[39m\u001b[33m\"\u001b[39m: index, \u001b[33m\"\u001b[39m\u001b[33mcolumns\u001b[39m\u001b[33m\"\u001b[39m: columns}, dtype=dtype, copy=copy\n\u001b[32m    778\u001b[39m     )\n\u001b[32m    780\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, \u001b[38;5;28mdict\u001b[39m):\n\u001b[32m    781\u001b[39m     \u001b[38;5;66;03m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m782\u001b[39m     mgr = \u001b[43mdict_to_mgr\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmanager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    783\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, ma.MaskedArray):\n\u001b[32m    784\u001b[39m     \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mma\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m mrecords\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\miniforge3\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:503\u001b[39m, in \u001b[36mdict_to_mgr\u001b[39m\u001b[34m(data, index, columns, dtype, typ, copy)\u001b[39m\n\u001b[32m    499\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    500\u001b[39m         \u001b[38;5;66;03m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[32m    501\u001b[39m         arrays = [x.copy() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(x, \u001b[33m\"\u001b[39m\u001b[33mdtype\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m x \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m arrays]\n\u001b[32m--> \u001b[39m\u001b[32m503\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marrays_to_mgr\u001b[49m\u001b[43m(\u001b[49m\u001b[43marrays\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtyp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconsolidate\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\miniforge3\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:114\u001b[39m, in \u001b[36marrays_to_mgr\u001b[39m\u001b[34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[39m\n\u001b[32m    111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m verify_integrity:\n\u001b[32m    112\u001b[39m     \u001b[38;5;66;03m# figure out the index, if necessary\u001b[39;00m\n\u001b[32m    113\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m index \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m         index = \u001b[43m_extract_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43marrays\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    115\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    116\u001b[39m         index = ensure_index(index)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\miniforge3\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:677\u001b[39m, in \u001b[36m_extract_index\u001b[39m\u001b[34m(data)\u001b[39m\n\u001b[32m    675\u001b[39m lengths = \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mset\u001b[39m(raw_lengths))\n\u001b[32m    676\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(lengths) > \u001b[32m1\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m677\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mAll arrays must be of the same length\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m    679\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m have_dicts:\n\u001b[32m    680\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m    681\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    682\u001b[39m     )\n",
+      "\u001b[31mValueError\u001b[39m: All arrays must be of the same length"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------------------------------------------------\n",
+    "# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network\n",
+    "# ---------------------------------------------------------\n",
+    "\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 1. Contoh Dataset\n",
+    "# -----------------------------------------\n",
+    "# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)\n",
+    "\n",
+    "data = {\n",
+    "    \"text\": [\n",
+    "        \"Tempat ini sangat nyaman dan bersih.\"\n",
+    "        \"Akses menuju ke sana cukup sulit dan membingungkan.\"\n",
+    "        \"Pelayanan staf di sini juga sangat ramah dan cepat tanggap.\"\n",
+    "        \"Lokasi kafe ini strategis dan mudah ditemukan.\"\n",
+    "        \"Suasananya kadang terlalu bising karena sering ada keramaian.\"\n",
+    "        \"Pilihan menu minumannya sangat beragam dan lezat.\"\n",
+    "    ],\n",
+    "    \"label\": [\"positive\", \"negative\", \"positive\", \"negative\", \"positive\", \"negative\"]\n",
+    "}\n",
+    "\n",
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 2. Split Train & Test\n",
+    "# -----------------------------------------\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    df[\"text\"], df[\"label\"], test_size=0.3, random_state=42\n",
+    ")\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 3. TF-IDF Vectorization\n",
+    "# -----------------------------------------\n",
+    "tfidf = TfidfVectorizer(max_features=5000)\n",
+    "X_train_tfidf = tfidf.fit_transform(X_train)\n",
+    "X_test_tfidf = tfidf.transform(X_test)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 4. Feedforward ANN (MLPClassifier)\n",
+    "# -----------------------------------------\n",
+    "model = MLPClassifier(\n",
+    "    hidden_layer_sizes=(256, 64),\n",
+    "    activation='relu',\n",
+    "    solver='adam',\n",
+    "    max_iter=500,\n",
+    "    random_state=42\n",
+    ")\n",
+    "\n",
+    "model.fit(X_train_tfidf, y_train)\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 5. Evaluasi Model\n",
+    "# -----------------------------------------\n",
+    "y_pred = model.predict(X_test_tfidf)\n",
+    "\n",
+    "print(\"=== Classification Report ===\")\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "\n",
+    "print(\"=== Confusion Matrix ===\")\n",
+    "print(confusion_matrix(y_test, y_pred))\n",
+    "\n",
+    "# -----------------------------------------\n",
+    "# 6. Prediksi Teks Baru\n",
+    "# -----------------------------------------\n",
+    "#sample_text = [\"barang bagus luar biasa\"]\n",
+    "sample_text = [\"Tempat nyaman, saya suka\"]\n",
+    "sample_vec = tfidf.transform(sample_text)\n",
+    "prediction = model.predict(sample_vec)\n",
+    "\n",
+    "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+    "print(\"Hasil:\", prediction[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#sample_text = [\"barang bagus luar biasa\"]\n",
+    "sample_text = [\"Tempat bising saya tidak suka\"]\n",
+    "sample_vec = tfidf.transform(sample_text)\n",
+    "prediction = model.predict(sample_vec)\n",
+    "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+    "print(\"Hasil:\", prediction[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0413b4bf-beb1-483b-a081-b540fce1b21c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d714bd96-09a0-4439-8286-0cb39e2fb4df",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/N-Gram.ipynb
+++ b/N-Gram.ipynb
@ -0,0 +1,395 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JVPdWpz3hhbj"
+   },
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4Mvva3v65h1v"
+   },
+   "source": [
+    "# **UNIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1cub_VJnUJMl",
+    "outputId": "1889eb61-4f3f-4780-f42e-02368076cce3"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: Saya sangat mencintai pacar saya\n",
+      "Tokens (5): ['saya', 'sangat', 'mencintai', 'pacar', 'saya']\n",
+      "\n",
+      "Frekuensi Unigram dalam kalimat\n",
+      " ('saya'): 2\n",
+      " ('sangat'): 1\n",
+      " ('mencintai'): 1\n",
+      " ('pacar'): 1\n",
+      "\n",
+      "Total unigram dalam 1 kalimat: 5\n",
+      "\n",
+      "Probabilitas masing-masing unigram:\n",
+      " P(saya) = 0.40 (40.00%)\n",
+      " P(sangat) = 0.20 (20.00%)\n",
+      " P(mencintai) = 0.20 (20.00%)\n",
+      " P(pacar) = 0.20 (20.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Unigram):\n",
+      " P(saya sangat mencintai pacar saya) = P(saya)=0.40 x P(sangat)=0.20 x P(mencintai)=0.20 x P(pacar)=0.20 x P(saya)=0.40 = 0.0013 (0.13%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenize\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Unigram\n",
+    "unigram_counts = Counter(tokens)\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "\n",
+    "print(\"\\nFrekuensi Unigram dalam kalimat\")\n",
+    "for pair, count in unigram_counts.items():\n",
+    "    print(f\" ('{pair}'): {count}\")\n",
+    "print(f\"\\nTotal unigram dalam 1 kalimat: {total_tokens}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Unigram: P(wi) = Count(wi) / Total Kata\n",
+    "unigram_probabilities = {}\n",
+    "for word, count in unigram_counts.items():\n",
+    "    prob = count / total_tokens\n",
+    "    unigram_probabilities[word] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing unigram:\")\n",
+    "for word, prob in unigram_probabilities.items():\n",
+    "    print(f\" P({word}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (P(kalimat) = P(w1) * P(w2) * ...)\n",
+    "p_kalimat = 1\n",
+    "prob_parts = []\n",
+    "\n",
+    "# Loop untuk menghitung probabilitas total dan membangun string rumus detail\n",
+    "for word in tokens:\n",
+    "    prob_value = unigram_probabilities[word]\n",
+    "    p_kalimat *= prob_value\n",
+    "    # Format: P(word)=prob_value\n",
+    "    prob_parts.append(f\"P({word})={prob_value:.2f}\")\n",
+    "\n",
+    "# Gabungkan bagian-bagian rumus untuk mendapatkan prob_str detail\n",
+    "prob_str = \" x \".join(prob_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Unigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.4f} ({p_kalimat*100:.2f}%)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Vstwt996-FrS"
+   },
+   "source": [
+    "# **BIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "XRIY4qgTVbjl",
+    "outputId": "ea6e62ce-45a0-40c9-ca98-1fcc30558479"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: Saya adalah pemain liga sepak bola terbaik di dunia\n",
+      "Tokens (9): ['saya', 'adalah', 'pemain', 'liga', 'sepak', 'bola', 'terbaik', 'di', 'dunia']\n",
+      "\n",
+      "Frekuensi Bigram dalam kalimat:\n",
+      " ('saya', 'adalah'): 1\n",
+      " ('adalah', 'pemain'): 1\n",
+      " ('pemain', 'liga'): 1\n",
+      " ('liga', 'sepak'): 1\n",
+      " ('sepak', 'bola'): 1\n",
+      " ('bola', 'terbaik'): 1\n",
+      " ('terbaik', 'di'): 1\n",
+      " ('di', 'dunia'): 1\n",
+      "\n",
+      "Total bigram dalam 1 kalimat: 8\n",
+      "\n",
+      "Probabilitas masing-masing bigram:\n",
+      " P(adalah|saya) = 1.00 (100.00%)\n",
+      " P(pemain|adalah) = 1.00 (100.00%)\n",
+      " P(liga|pemain) = 1.00 (100.00%)\n",
+      " P(sepak|liga) = 1.00 (100.00%)\n",
+      " P(bola|sepak) = 1.00 (100.00%)\n",
+      " P(terbaik|bola) = 1.00 (100.00%)\n",
+      " P(di|terbaik) = 1.00 (100.00%)\n",
+      " P(dunia|di) = 1.00 (100.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Bigram):\n",
+      " P(saya adalah pemain liga sepak bola terbaik di dunia) = P(saya)=0.11 x P(adalah|saya)=1.00 x P(pemain|adalah)=1.00 x P(liga|pemain)=1.00 x P(sepak|liga)=1.00 x P(bola|sepak)=1.00 x P(terbaik|bola)=1.00 x P(di|terbaik)=1.00 x P(dunia|di)=1.00 = 0.111111 (11.11%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenisasi\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Unigram dan Bigram\n",
+    "unigram_counts = Counter(tokens)\n",
+    "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+    "bigram_counts = Counter(bigrams)\n",
+    "\n",
+    "print(\"\\nFrekuensi Bigram dalam kalimat:\")\n",
+    "for pair, count in bigram_counts.items():\n",
+    "    print(f\" {pair}: {count}\")\n",
+    "print(f\"\\nTotal bigram dalam 1 kalimat: {sum(bigram_counts.values())}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Bigram: P(w2 | w1) = Count(w1,w2) / Count(w1)\n",
+    "bigram_probabilities = {}\n",
+    "for (w1, w2), count in bigram_counts.items():\n",
+    "    prob = count / unigram_counts[w1]\n",
+    "    bigram_probabilities[(w1, w2)] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing bigram:\")\n",
+    "for (w1, w2), prob in bigram_probabilities.items():\n",
+    "    print(f\" P({w2}|{w1}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Bigram)\n",
+    "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w2) * ...\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens # P(w1)\n",
+    "p_kalimat = p_w1 # Inisialisasi dengan P(w1)\n",
+    "\n",
+    "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"] # Tambahkan P(w1) ke rumus\n",
+    "\n",
+    "for i in range(1, len(tokens)):\n",
+    "    pair = (tokens[i-1], tokens[i])\n",
+    "    p = bigram_probabilities.get(pair, 0)\n",
+    "    p_kalimat *= p\n",
+    "    prob_str_parts.append(f\"P({pair[1]}|{pair[0]})={p:.2f}\")\n",
+    "\n",
+    "# Gabungkan rumus perkalian untuk ditampilkan\n",
+    "prob_str = \" x \".join(prob_str_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Bigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "E6n1IU8X-G9S"
+   },
+   "source": [
+    "# **TRIGRAM**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "BIRARsj2FHJg",
+    "outputId": "968d420e-9370-40e5-e7e1-148e1d351d62"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Corpus: Saya adalah anak dari ibu dan bapak saya\n",
+      "Tokens (8): ['saya', 'adalah', 'anak', 'dari', 'ibu', 'dan', 'bapak', 'saya']\n",
+      "\n",
+      "Frekuensi Trigram dalam kalimat:\n",
+      " ('saya', 'adalah', 'anak'): 1\n",
+      " ('adalah', 'anak', 'dari'): 1\n",
+      " ('anak', 'dari', 'ibu'): 1\n",
+      " ('dari', 'ibu', 'dan'): 1\n",
+      " ('ibu', 'dan', 'bapak'): 1\n",
+      " ('dan', 'bapak', 'saya'): 1\n",
+      "\n",
+      "Total trigram dalam 1 kalimat: 6\n",
+      "\n",
+      "Probabilitas masing-masing trigram:\n",
+      " P(anak|saya,adalah) = 1.00 (100.00%)\n",
+      " P(dari|adalah,anak) = 1.00 (100.00%)\n",
+      " P(ibu|anak,dari) = 1.00 (100.00%)\n",
+      " P(dan|dari,ibu) = 1.00 (100.00%)\n",
+      " P(bapak|ibu,dan) = 1.00 (100.00%)\n",
+      " P(saya|dan,bapak) = 1.00 (100.00%)\n",
+      "\n",
+      "Probabilitas Keseluruhan Kalimat (Model Trigram):\n",
+      " P(saya adalah anak dari ibu dan bapak saya) = P(saya)=0.25 x P(adalah|saya)=0.50 x P(anak|saya,adalah)=1.00 x P(dari|adalah,anak)=1.00 x P(ibu|anak,dari)=1.00 x P(dan|dari,ibu)=1.00 x P(bapak|ibu,dan)=1.00 x P(saya|dan,bapak)=1.00 = 0.125000 (12.50%)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from IPython.display import clear_output\n",
+    "import math\n",
+    "\n",
+    "# 1. Input Kalimat dan Tokenisasi\n",
+    "kalimat = input(\"Masukkan kalimat: \").strip()\n",
+    "\n",
+    "# Bersihkan output (khusus lingkungan notebook)\n",
+    "try:\n",
+    "    clear_output()\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "print(f\"Corpus: {kalimat}\")\n",
+    "\n",
+    "# Tokenisasi\n",
+    "tokens = kalimat.lower().split()\n",
+    "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+    "\n",
+    "# 2. Hitung Frekuensi Bigram dan Trigram\n",
+    "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+    "trigrams = [(tokens[i], tokens[i+1], tokens[i+2]) for i in range(len(tokens) - 2)]\n",
+    "\n",
+    "bigram_counts = Counter(bigrams)\n",
+    "trigram_counts = Counter(trigrams)\n",
+    "\n",
+    "print(\"\\nFrekuensi Trigram dalam kalimat:\")\n",
+    "for tg, count in trigram_counts.items():\n",
+    "    print(f\" {tg}: {count}\")\n",
+    "print(f\"\\nTotal trigram dalam 1 kalimat: {sum(trigram_counts.values())}\")\n",
+    "\n",
+    "# 3. Hitung Probabilitas Trigram: P(w3 | w1, w2) = Count(w1,w2,w3) / Count(w1,w2)\n",
+    "trigram_probabilities = {}\n",
+    "for (w1, w2, w3), count in trigram_counts.items():\n",
+    "    # Hindari pembagian dengan nol (jika ada bigram yang tidak muncul)\n",
+    "    if bigram_counts[(w1, w2)] > 0:\n",
+    "        prob = count / bigram_counts[(w1, w2)]\n",
+    "    else:\n",
+    "        prob = 0\n",
+    "    trigram_probabilities[(w1, w2, w3)] = prob\n",
+    "\n",
+    "print(\"\\nProbabilitas masing-masing trigram:\")\n",
+    "for (w1, w2, w3), prob in trigram_probabilities.items():\n",
+    "    print(f\" P({w3}|{w1},{w2}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+    "\n",
+    "# Tambahkan perhitungan Unigram Count (dibutuhkan untuk P(w1) dan P(w2|w1))\n",
+    "unigram_counts = Counter(tokens)\n",
+    "total_tokens = sum(unigram_counts.values())\n",
+    "\n",
+    "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Trigram)\n",
+    "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w1,w2) * ...\n",
+    "\n",
+    "# a. P(w1)\n",
+    "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens if total_tokens > 0 else 0\n",
+    "\n",
+    "# b. P(w2|w1) (Menggunakan Bigram tanpa smoothing)\n",
+    "if len(tokens) > 1:\n",
+    "    count_w1 = unigram_counts.get(tokens[0], 1) # Hindari pembagian dengan 0\n",
+    "    p_w2_w1 = bigram_counts.get((tokens[0], tokens[1]), 0) / count_w1\n",
+    "else:\n",
+    "    p_w2_w1 = 1.0 # Jika hanya 1 kata\n",
+    "\n",
+    "p_kalimat = p_w1 * p_w2_w1 # Inisialisasi dengan P(w1) * P(w2|w1)\n",
+    "\n",
+    "# Daftar bagian rumus untuk ditampilkan\n",
+    "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"]\n",
+    "if len(tokens) > 1:\n",
+    "    prob_str_parts.append(f\"P({tokens[1]}|{tokens[0]})={p_w2_w1:.2f}\")\n",
+    "\n",
+    "# c. Perkalian Trigram P(wi | wi-2, wi-1) untuk i >= 3\n",
+    "for i in range(len(tokens) - 2):\n",
+    "    triplet = (tokens[i], tokens[i+1], tokens[i+2])\n",
+    "    p = trigram_probabilities.get(triplet, 0)\n",
+    "    p_kalimat *= p\n",
+    "    prob_str_parts.append(f\"P({triplet[2]}|{triplet[0]},{triplet[1]})={p:.2f}\")\n",
+    "\n",
+    "prob_str = \" x \".join(prob_str_parts)\n",
+    "\n",
+    "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Trigram):\")\n",
+    "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/README.md
+++ b/README.md
@ -1,3 +0,0 @@
-#praktikum nlp
-
-Nama : Alprian Baharaja Sitorus