4 changed files with 943 additions and 0 deletions
--- a/(1).ipynb
+++ b/(1).ipynb
@ -0,0 +1,318 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qBYcPYAb059g",
+        "outputId": "ac27d686-2d15-4b2f-cc13-963fadf3100f"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Masukkan jumlah dokumen yang ingin dimasukkan: 4\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Input jumlah dokumen\n",
+        "import pandas as pd\n",
+        "n = int(input(\"Masukkan jumlah dokumen yang ingin dimasukkan: \"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mo-yt5Ob1N8j",
+        "outputId": "21da9ff1-2954-4b39-b207-017d03d0294f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Masukkan teks untuk dokumen ke-1: saya ingin memasak\n",
+            "Masukkan teks untuk dokumen ke-2: masak ayam goreng sepertinya enak\n",
+            "Masukkan teks untuk dokumen ke-3: enakan ayam goreng atau ikan goreng\n",
+            "Masukkan teks untuk dokumen ke-4: dibarengi dengan saus sepertinya akan lezat\n",
+            "\n",
+            "=== Dokumen yang Dimasukkan ===\n",
+            "Doc 1: saya ingin memasak\n",
+            "Doc 2: masak ayam goreng sepertinya enak\n",
+            "Doc 3: enakan ayam goreng atau ikan goreng\n",
+            "Doc 4: dibarengi dengan saus sepertinya akan lezat\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Input teks dokumen satu per satu\n",
+        "documents = []\n",
+        "for i in range(n):\n",
+        "    teks = input(f\"Masukkan teks untuk dokumen ke-{i+1}: \")\n",
+        "    documents.append(teks)\n",
+        "\n",
+        "print(\"\\n=== Dokumen yang Dimasukkan ===\")\n",
+        "for i, doc in enumerate(documents):\n",
+        "    print(f\"Doc {i+1}: {doc}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FkmxRAFq1oDK",
+        "outputId": "e451e801-161a-4618-f047-97893cc7a68b"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "=== Hasil Tokenisasi ===\n",
+            "Doc 1: ['saya', 'ingin', 'memasak']\n",
+            "Doc 2: ['masak', 'ayam', 'goreng', 'sepertinya', 'enak']\n",
+            "Doc 3: ['enakan', 'ayam', 'goreng', 'atau', 'ikan', 'goreng']\n",
+            "Doc 4: ['dibarengi', 'dengan', 'saus', 'sepertinya', 'akan', 'lezat']\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Tahap Tokenisasi\n",
+        "tokenized_docs = []\n",
+        "for doc in documents:\n",
+        "    tokens = doc.lower().split()\n",
+        "    tokenized_docs.append(tokens)\n",
+        "\n",
+        "print(\"\\n=== Hasil Tokenisasi ===\")\n",
+        "for i, tokens in enumerate(tokenized_docs):\n",
+        "    print(f\"Doc {i+1}: {tokens}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ybC1Vo2C_c3q",
+        "outputId": "f1e97af1-3af9-4dee-b59a-2a8baa79a370"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\n",
+            "['saya', 'ingin', 'memasak', 'masak', 'ayam', 'goreng', 'sepertinya', 'enak', 'enakan', 'ayam', 'goreng', 'atau', 'ikan', 'goreng', 'dibarengi', 'dengan', 'saus', 'sepertinya', 'akan', 'lezat']\n",
+            "Jumlah total kata dalam seluruh dokumen: 20\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Pembuatan Corpus\n",
+        "corpus_all = [word for doc in tokenized_docs for word in doc]\n",
+        "\n",
+        "print(\"\\n=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\")\n",
+        "print(corpus_all)\n",
+        "print(f\"Jumlah total kata dalam seluruh dokumen: {len(corpus_all)}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "s6S-Ma4R1xuq",
+        "outputId": "7643748e-937e-4724-8db0-0a768ad7182f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "=== Vocabulary (Kata Unik) ===\n",
+            "['akan', 'atau', 'ayam', 'dengan', 'dibarengi', 'enak', 'enakan', 'goreng', 'ikan', 'ingin', 'lezat', 'masak', 'memasak', 'saus', 'saya', 'sepertinya']\n",
+            "Jumlah kata unik (vocabulary size): 16\n",
+            "\n",
+            "=== Vocabulary (Kata Unik) ===\n",
+            " 1. akan\n",
+            " 2. atau\n",
+            " 3. ayam\n",
+            " 4. dengan\n",
+            " 5. dibarengi\n",
+            " 6. enak\n",
+            " 7. enakan\n",
+            " 8. goreng\n",
+            " 9. ikan\n",
+            "10. ingin\n",
+            "11. lezat\n",
+            "12. masak\n",
+            "13. memasak\n",
+            "14. saus\n",
+            "15. saya\n",
+            "16. sepertinya\n",
+            "\n",
+            "Jumlah kata unik (vocabulary size): 16\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Pembuatan Vocabulary\n",
+        "vocabulary = sorted(set(corpus_all))\n",
+        "\n",
+        "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+        "print(vocabulary)\n",
+        "print(f\"Jumlah kata unik (vocabulary size): {len(vocabulary)}\")\n",
+        "\n",
+        "\n",
+        "vocabulary = sorted(set(corpus_all))\n",
+        "\n",
+        "print(\"\\n=== Vocabulary (Kata Unik) ===\")\n",
+        "for idx, word in enumerate(vocabulary, start=1):\n",
+        "    print(f\"{idx:>2}. {word}\")\n",
+        "print(f\"\\nJumlah kata unik (vocabulary size): {len(vocabulary)}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ShevCTva2Fg9"
+      },
+      "outputs": [],
+      "source": [
+        "# Representasi Numerik (Matriks BoW)\n",
+        "bow_matrix = []\n",
+        "for doc in tokenized_docs:\n",
+        "    vector = [doc.count(word) for word in vocabulary]\n",
+        "    bow_matrix.append(vector)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-yB6D2pY2M0E",
+        "outputId": "b6b2f4d3-da8b-4aee-e9ce-034def4d5cf7"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "=== Matriks Bag of Words ===\n",
+            "    ai  belajar  dan  data  di  kampus  mahasiswa  nlp  saya  science  suka\n",
+            "D1   0        1    0     0   1       1          0    1     1        0     0\n",
+            "D2   1        1    0     0   0       0          0    0     1        0     1\n",
+            "D3   0        1    1     1   0       0          1    1     0        1     0\n"
+          ]
+        }
+      ],
+      "source": [
+        "df_bow = pd.DataFrame(bow_matrix, columns=vocabulary)\n",
+        "df_bow.index = [f\"D{i}\" for i in range(1, len(documents)+1)]  # ubah label indeks jadi D1, D2, D3\n",
+        "\n",
+        "print(\"\\n=== Matriks Bag of Words ===\")\n",
+        "print(df_bow)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "8ruf5vKL2rGD",
+        "outputId": "65a4674e-1c01-4833-ec55-f66f77b8b6c2"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\n",
+            "         Kata  Frekuensi\n",
+            "0     belajar          3\n",
+            "1         nlp          2\n",
+            "2        saya          2\n",
+            "3         dan          1\n",
+            "4          ai          1\n",
+            "5        data          1\n",
+            "6          di          1\n",
+            "7   mahasiswa          1\n",
+            "8      kampus          1\n",
+            "9     science          1\n",
+            "10       suka          1\n",
+            "Frekuensi kata: 11\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Membuat Tabel Frekuensi Kata (Total dari seluruh dokumen)\n",
+        "word_frequencies = df_bow.sum().sort_values(ascending=False).reset_index()\n",
+        "word_frequencies.columns = [\"Kata\", \"Frekuensi\"]\n",
+        "\n",
+        "print(\"\\n=== Tabel Frekuensi Kata (Keseluruhan Dokumen) ===\")\n",
+        "print(word_frequencies)\n",
+        "print(f\"Frekuensi kata: {len(word_frequencies)}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NQjExannHuj0"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/Klasifikasi_Teks_FNN.ipynb
+++ b/Klasifikasi_Teks_FNN.ipynb
@ -0,0 +1,218 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac",
+      "metadata": {
+        "id": "f4a1399a-f23d-4060-a07e-bce5a5c7ddac"
+      },
+      "source": [
+        "# Klasifikasi Teks menggunakan ANN\n",
+        "## Fahrizal Setiawan\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "id": "53a214ae-c9cf-4d46-925d-068f1685537b",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "53a214ae-c9cf-4d46-925d-068f1685537b",
+        "outputId": "f224e8ff-e3a6-49d9-fac9-cafc0202eb4c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "=== Classification Report ===\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "    negative       0.33      1.00      0.50         1\n",
+            "    positive       0.00      0.00      0.00         2\n",
+            "\n",
+            "    accuracy                           0.33         3\n",
+            "   macro avg       0.17      0.50      0.25         3\n",
+            "weighted avg       0.11      0.33      0.17         3\n",
+            "\n",
+            "=== Confusion Matrix ===\n",
+            "[[1 0]\n",
+            " [2 0]]\n",
+            "\n",
+            "Prediksi untuk: barang buruk, saya kecewa\n",
+            "Hasil: negative\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+            "/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
+          ]
+        }
+      ],
+      "source": [
+        "# ---------------------------------------------------------\n",
+        "# Klasifikasi Teks dengan TF-IDF + Feedforward Neural Network\n",
+        "# ---------------------------------------------------------\n",
+        "\n",
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.neural_network import MLPClassifier\n",
+        "from sklearn.metrics import classification_report, confusion_matrix\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 1. Contoh Dataset\n",
+        "# -----------------------------------------\n",
+        "# Anda bisa mengganti dataset ini dengan dataset lain (CSV, JSON, dll)\n",
+        "\n",
+        "data = {\n",
+        "    \"text\": [\n",
+        "        \"Saya suka produk ini, luar biasa\",\n",
+        "        \"Layanannya buruk, saya sangat kecewa\",\n",
+        "        \"Penjual tidak responsif, sangat kecewa\",\n",
+        "        \"Pembelian terbaik yang pernah saya lakukan\",\n",
+        "        \"Saya benci produk ini, buang-buang uang\",\n",
+        "        \"Kualitasnya sangat bagus, direkomendasikan\",\n",
+        "        \"Pengalaman buruk, tidak akan membeli lagi\",\n",
+        "\n",
+        "    ],\n",
+        "    \"label\": [\n",
+        "        \"positive\",\n",
+        "        \"negative\",\n",
+        "        \"negative\", # Corrected: Was positive, now negative to match sentiment\n",
+        "        \"positive\", # Corrected: Was negative, now positive to match sentiment\n",
+        "        \"negative\", # Corrected: Was positive, now negative to match sentiment\n",
+        "        \"positive\", # Corrected: Was negative, now positive to match sentiment\n",
+        "        \"negative\",\n",
+        "        # Added missing label to match length of 'text' list\n",
+        "    ]\n",
+        "}\n",
+        "\n",
+        "df = pd.DataFrame(data)\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 2. Split Train & Test\n",
+        "# -----------------------------------------\n",
+        "X_train, X_test, y_train, y_test = train_test_split(\n",
+        "    df[\"text\"], df[\"label\"], test_size=0.3, random_state=42\n",
+        ")\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 3. TF-IDF Vectorization\n",
+        "# -----------------------------------------\n",
+        "tfidf = TfidfVectorizer(max_features=5000)\n",
+        "X_train_tfidf = tfidf.fit_transform(X_train)\n",
+        "X_test_tfidf = tfidf.transform(X_test)\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 4. Feedforward ANN (MLPClassifier)\n",
+        "# -----------------------------------------\n",
+        "model = MLPClassifier(\n",
+        "    hidden_layer_sizes=(256, 64),\n",
+        "    activation='relu',\n",
+        "    solver='adam',\n",
+        "    max_iter=500,\n",
+        "    random_state=42\n",
+        ")\n",
+        "\n",
+        "model.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 5. Evaluasi Model\n",
+        "# -----------------------------------------\n",
+        "y_pred = model.predict(X_test_tfidf)\n",
+        "\n",
+        "print(\"=== Classification Report ===\")\n",
+        "print(classification_report(y_test, y_pred))\n",
+        "\n",
+        "print(\"=== Confusion Matrix ===\")\n",
+        "print(confusion_matrix(y_test, y_pred))\n",
+        "\n",
+        "# -----------------------------------------\n",
+        "# 6. Prediksi Teks Baru\n",
+        "# -----------------------------------------\n",
+        "#sample_text = [\"barang bagus luar biasa\"]\n",
+        "sample_text = [\"barang buruk, saya kecewa\"]\n",
+        "sample_vec = tfidf.transform(sample_text)\n",
+        "prediction = model.predict(sample_vec)\n",
+        "\n",
+        "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+        "print(\"Hasil:\", prediction[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "9f7d90fe-4af4-446c-9547-c9312bfa6fc7",
+        "outputId": "4a889f91-ff57-459e-8987-43a230489899"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "Prediksi untuk: saya benci barang ini\n",
+            "Hasil: negative\n"
+          ]
+        }
+      ],
+      "source": [
+        "#sample_text = [\"barang bagus luar biasa\"]\n",
+        "sample_text = [\"saya benci barang ini\"]\n",
+        "sample_vec = tfidf.transform(sample_text)\n",
+        "prediction = model.predict(sample_vec)\n",
+        "print(\"\\nPrediksi untuk:\", sample_text[0])\n",
+        "print(\"Hasil:\", prediction[0])\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917",
+      "metadata": {
+        "id": "d4b9a7c2-0f08-43fd-8da8-018d839a4917"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.12"
+    },
+    "colab": {
+      "provenance": []
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
--- a/f.ipynb
+++ b/f.ipynb
@ -0,0 +1,394 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JVPdWpz3hhbj"
+      },
+      "source": [
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4Mvva3v65h1v"
+      },
+      "source": [
+        "# **UNIGRAM**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "1cub_VJnUJMl",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a712acbd-01e2-4c9e-f2c0-d7d33f3bc9fb"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Corpus: Jangan pernah berhenti belajar, karena hidup tak pernah berhenti mengajarkan\n",
+            "Tokens (10): ['jangan', 'pernah', 'berhenti', 'belajar,', 'karena', 'hidup', 'tak', 'pernah', 'berhenti', 'mengajarkan']\n",
+            "\n",
+            "Frekuensi Unigram dalam kalimat\n",
+            " ('jangan'): 1\n",
+            " ('pernah'): 2\n",
+            " ('berhenti'): 2\n",
+            " ('belajar,'): 1\n",
+            " ('karena'): 1\n",
+            " ('hidup'): 1\n",
+            " ('tak'): 1\n",
+            " ('mengajarkan'): 1\n",
+            "\n",
+            "Total unigram dalam 1 kalimat: 10\n",
+            "\n",
+            "Probabilitas masing-masing unigram:\n",
+            " P(jangan) = 0.10 (10.00%)\n",
+            " P(pernah) = 0.20 (20.00%)\n",
+            " P(berhenti) = 0.20 (20.00%)\n",
+            " P(belajar,) = 0.10 (10.00%)\n",
+            " P(karena) = 0.10 (10.00%)\n",
+            " P(hidup) = 0.10 (10.00%)\n",
+            " P(tak) = 0.10 (10.00%)\n",
+            " P(mengajarkan) = 0.10 (10.00%)\n",
+            "\n",
+            "Probabilitas Keseluruhan Kalimat (Model Unigram):\n",
+            " P(jangan pernah berhenti belajar, karena hidup tak pernah berhenti mengajarkan) = P(jangan)=0.10 x P(pernah)=0.20 x P(berhenti)=0.20 x P(belajar,)=0.10 x P(karena)=0.10 x P(hidup)=0.10 x P(tak)=0.10 x P(pernah)=0.20 x P(berhenti)=0.20 x P(mengajarkan)=0.10 = 0.0000 (0.00%)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from collections import Counter\n",
+        "from IPython.display import clear_output\n",
+        "import math\n",
+        "\n",
+        "# 1. Input Kalimat dan Tokenisasi\n",
+        "kalimat = input(\"Masukkan kalimat: Jangan pernah berhenti belajar, karena hidup tak pernah berhenti mengajarkan \").strip()\n",
+        "\n",
+        "# Bersihkan output (khusus lingkungan notebook)\n",
+        "try:\n",
+        "    clear_output()\n",
+        "except:\n",
+        "    pass\n",
+        "\n",
+        "print(f\"Corpus: {kalimat}\")\n",
+        "\n",
+        "# Tokenize\n",
+        "tokens = kalimat.lower().split()\n",
+        "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+        "\n",
+        "# 2. Hitung Frekuensi Unigram\n",
+        "unigram_counts = Counter(tokens)\n",
+        "total_tokens = sum(unigram_counts.values())\n",
+        "\n",
+        "print(\"\\nFrekuensi Unigram dalam kalimat\")\n",
+        "for pair, count in unigram_counts.items():\n",
+        "    print(f\" ('{pair}'): {count}\")\n",
+        "print(f\"\\nTotal unigram dalam 1 kalimat: {total_tokens}\")\n",
+        "\n",
+        "# 3. Hitung Probabilitas Unigram: P(wi) = Count(wi) / Total Kata\n",
+        "unigram_probabilities = {}\n",
+        "for word, count in unigram_counts.items():\n",
+        "    prob = count / total_tokens\n",
+        "    unigram_probabilities[word] = prob\n",
+        "\n",
+        "print(\"\\nProbabilitas masing-masing unigram:\")\n",
+        "for word, prob in unigram_probabilities.items():\n",
+        "    print(f\" P({word}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+        "\n",
+        "# 4. Hitung Probabilitas Kalimat Keseluruhan (P(kalimat) = P(w1) * P(w2) * ...)\n",
+        "p_kalimat = 1\n",
+        "prob_parts = []\n",
+        "\n",
+        "# Loop untuk menghitung probabilitas total dan membangun string rumus detail\n",
+        "for word in tokens:\n",
+        "    prob_value = unigram_probabilities[word]\n",
+        "    p_kalimat *= prob_value\n",
+        "    # Format: P(word)=prob_value\n",
+        "    prob_parts.append(f\"P({word})={prob_value:.2f}\")\n",
+        "\n",
+        "# Gabungkan bagian-bagian rumus untuk mendapatkan prob_str detail\n",
+        "prob_str = \" x \".join(prob_parts)\n",
+        "\n",
+        "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Unigram):\")\n",
+        "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.4f} ({p_kalimat*100:.2f}%)\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Vstwt996-FrS"
+      },
+      "source": [
+        "# **BIGRAM**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XRIY4qgTVbjl",
+        "outputId": "4eff35ea-8a13-4b4a-fd8f-e0f3518c1add"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Corpus: ilmu adalah cahaya, dan belajar adalah menyalakan lentera dalam kegelapan\n",
+            "Tokens (10): ['ilmu', 'adalah', 'cahaya,', 'dan', 'belajar', 'adalah', 'menyalakan', 'lentera', 'dalam', 'kegelapan']\n",
+            "\n",
+            "Frekuensi Bigram dalam kalimat:\n",
+            " ('ilmu', 'adalah'): 1\n",
+            " ('adalah', 'cahaya,'): 1\n",
+            " ('cahaya,', 'dan'): 1\n",
+            " ('dan', 'belajar'): 1\n",
+            " ('belajar', 'adalah'): 1\n",
+            " ('adalah', 'menyalakan'): 1\n",
+            " ('menyalakan', 'lentera'): 1\n",
+            " ('lentera', 'dalam'): 1\n",
+            " ('dalam', 'kegelapan'): 1\n",
+            "\n",
+            "Total bigram dalam 1 kalimat: 9\n",
+            "\n",
+            "Probabilitas masing-masing bigram:\n",
+            " P(adalah|ilmu) = 1.00 (100.00%)\n",
+            " P(cahaya,|adalah) = 0.50 (50.00%)\n",
+            " P(dan|cahaya,) = 1.00 (100.00%)\n",
+            " P(belajar|dan) = 1.00 (100.00%)\n",
+            " P(adalah|belajar) = 1.00 (100.00%)\n",
+            " P(menyalakan|adalah) = 0.50 (50.00%)\n",
+            " P(lentera|menyalakan) = 1.00 (100.00%)\n",
+            " P(dalam|lentera) = 1.00 (100.00%)\n",
+            " P(kegelapan|dalam) = 1.00 (100.00%)\n",
+            "\n",
+            "Probabilitas Keseluruhan Kalimat (Model Bigram):\n",
+            " P(ilmu adalah cahaya, dan belajar adalah menyalakan lentera dalam kegelapan) = P(ilmu)=0.10 x P(adalah|ilmu)=1.00 x P(cahaya,|adalah)=0.50 x P(dan|cahaya,)=1.00 x P(belajar|dan)=1.00 x P(adalah|belajar)=1.00 x P(menyalakan|adalah)=0.50 x P(lentera|menyalakan)=1.00 x P(dalam|lentera)=1.00 x P(kegelapan|dalam)=1.00 = 0.025000 (2.50%)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from collections import Counter\n",
+        "from IPython.display import clear_output\n",
+        "import math\n",
+        "\n",
+        "# 1. Input Kalimat dan Tokenisasi\n",
+        "kalimat = input(\"Masukkan kalimat: Ilmu adalah cahaya, dan belajar adalah menyalakan lentera dalam kegelapan \").strip()\n",
+        "\n",
+        "# Bersihkan output (khusus lingkungan notebook)\n",
+        "try:\n",
+        "    clear_output()\n",
+        "except:\n",
+        "    pass\n",
+        "\n",
+        "print(f\"Corpus: {kalimat}\")\n",
+        "\n",
+        "# Tokenisasi\n",
+        "tokens = kalimat.lower().split()\n",
+        "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+        "\n",
+        "# 2. Hitung Frekuensi Unigram dan Bigram\n",
+        "unigram_counts = Counter(tokens)\n",
+        "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+        "bigram_counts = Counter(bigrams)\n",
+        "\n",
+        "print(\"\\nFrekuensi Bigram dalam kalimat:\")\n",
+        "for pair, count in bigram_counts.items():\n",
+        "    print(f\" {pair}: {count}\")\n",
+        "print(f\"\\nTotal bigram dalam 1 kalimat: {sum(bigram_counts.values())}\")\n",
+        "\n",
+        "# 3. Hitung Probabilitas Bigram: P(w2 | w1) = Count(w1,w2) / Count(w1)\n",
+        "bigram_probabilities = {}\n",
+        "for (w1, w2), count in bigram_counts.items():\n",
+        "    prob = count / unigram_counts[w1]\n",
+        "    bigram_probabilities[(w1, w2)] = prob\n",
+        "\n",
+        "print(\"\\nProbabilitas masing-masing bigram:\")\n",
+        "for (w1, w2), prob in bigram_probabilities.items():\n",
+        "    print(f\" P({w2}|{w1}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+        "\n",
+        "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Bigram)\n",
+        "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w2) * ...\n",
+        "total_tokens = sum(unigram_counts.values())\n",
+        "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens # P(w1)\n",
+        "p_kalimat = p_w1 # Inisialisasi dengan P(w1)\n",
+        "\n",
+        "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"] # Tambahkan P(w1) ke rumus\n",
+        "\n",
+        "for i in range(1, len(tokens)):\n",
+        "    pair = (tokens[i-1], tokens[i])\n",
+        "    p = bigram_probabilities.get(pair, 0)\n",
+        "    p_kalimat *= p\n",
+        "    prob_str_parts.append(f\"P({pair[1]}|{pair[0]})={p:.2f}\")\n",
+        "\n",
+        "# Gabungkan rumus perkalian untuk ditampilkan\n",
+        "prob_str = \" x \".join(prob_str_parts)\n",
+        "\n",
+        "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Bigram):\")\n",
+        "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "E6n1IU8X-G9S"
+      },
+      "source": [
+        "# **TRIGRAM**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "BIRARsj2FHJg",
+        "outputId": "6e09b998-b787-4c91-a710-57a809bf2223"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Corpus: belajar adalah kunci membuka pintu kesuksesan\n",
+            "Tokens (6): ['belajar', 'adalah', 'kunci', 'membuka', 'pintu', 'kesuksesan']\n",
+            "\n",
+            "Frekuensi Trigram dalam kalimat:\n",
+            " ('belajar', 'adalah', 'kunci'): 1\n",
+            " ('adalah', 'kunci', 'membuka'): 1\n",
+            " ('kunci', 'membuka', 'pintu'): 1\n",
+            " ('membuka', 'pintu', 'kesuksesan'): 1\n",
+            "\n",
+            "Total trigram dalam 1 kalimat: 4\n",
+            "\n",
+            "Probabilitas masing-masing trigram:\n",
+            " P(kunci|belajar,adalah) = 1.00 (100.00%)\n",
+            " P(membuka|adalah,kunci) = 1.00 (100.00%)\n",
+            " P(pintu|kunci,membuka) = 1.00 (100.00%)\n",
+            " P(kesuksesan|membuka,pintu) = 1.00 (100.00%)\n",
+            "\n",
+            "Probabilitas Keseluruhan Kalimat (Model Trigram):\n",
+            " P(belajar adalah kunci membuka pintu kesuksesan) = P(belajar)=0.17 x P(adalah|belajar)=1.00 x P(kunci|belajar,adalah)=1.00 x P(membuka|adalah,kunci)=1.00 x P(pintu|kunci,membuka)=1.00 x P(kesuksesan|membuka,pintu)=1.00 = 0.166667 (16.67%)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from collections import Counter\n",
+        "from IPython.display import clear_output\n",
+        "import math\n",
+        "\n",
+        "# 1. Input Kalimat dan Tokenisasi\n",
+        "kalimat = input(\"Masukkan kalimat: Belajar adalah kunci membuka pintu kesuksesan\").strip()\n",
+        "\n",
+        "# Bersihkan output (khusus lingkungan notebook)\n",
+        "try:\n",
+        "    clear_output()\n",
+        "except:\n",
+        "    pass\n",
+        "\n",
+        "print(f\"Corpus: {kalimat}\")\n",
+        "\n",
+        "# Tokenisasi\n",
+        "tokens = kalimat.lower().split()\n",
+        "print(f\"Tokens ({len(tokens)}): {tokens}\")\n",
+        "\n",
+        "# 2. Hitung Frekuensi Bigram dan Trigram\n",
+        "bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)]\n",
+        "trigrams = [(tokens[i], tokens[i+1], tokens[i+2]) for i in range(len(tokens) - 2)]\n",
+        "\n",
+        "bigram_counts = Counter(bigrams)\n",
+        "trigram_counts = Counter(trigrams)\n",
+        "\n",
+        "print(\"\\nFrekuensi Trigram dalam kalimat:\")\n",
+        "for tg, count in trigram_counts.items():\n",
+        "    print(f\" {tg}: {count}\")\n",
+        "print(f\"\\nTotal trigram dalam 1 kalimat: {sum(trigram_counts.values())}\")\n",
+        "\n",
+        "# 3. Hitung Probabilitas Trigram: P(w3 | w1, w2) = Count(w1,w2,w3) / Count(w1,w2)\n",
+        "trigram_probabilities = {}\n",
+        "for (w1, w2, w3), count in trigram_counts.items():\n",
+        "    # Hindari pembagian dengan nol (jika ada bigram yang tidak muncul)\n",
+        "    if bigram_counts[(w1, w2)] > 0:\n",
+        "        prob = count / bigram_counts[(w1, w2)]\n",
+        "    else:\n",
+        "        prob = 0\n",
+        "    trigram_probabilities[(w1, w2, w3)] = prob\n",
+        "\n",
+        "print(\"\\nProbabilitas masing-masing trigram:\")\n",
+        "for (w1, w2, w3), prob in trigram_probabilities.items():\n",
+        "    print(f\" P({w3}|{w1},{w2}) = {prob:.2f} ({prob*100:.2f}%)\")\n",
+        "\n",
+        "# Tambahkan perhitungan Unigram Count (dibutuhkan untuk P(w1) dan P(w2|w1))\n",
+        "unigram_counts = Counter(tokens)\n",
+        "total_tokens = sum(unigram_counts.values())\n",
+        "\n",
+        "# 4. Hitung Probabilitas Kalimat Keseluruhan (Model Trigram)\n",
+        "#    P(kalimat) = P(w1) * P(w2|w1) * P(w3|w1,w2) * ...\n",
+        "\n",
+        "# a. P(w1)\n",
+        "p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens if total_tokens > 0 else 0\n",
+        "\n",
+        "# b. P(w2|w1) (Menggunakan Bigram tanpa smoothing)\n",
+        "if len(tokens) > 1:\n",
+        "    count_w1 = unigram_counts.get(tokens[0], 1) # Hindari pembagian dengan 0\n",
+        "    p_w2_w1 = bigram_counts.get((tokens[0], tokens[1]), 0) / count_w1\n",
+        "else:\n",
+        "    p_w2_w1 = 1.0 # Jika hanya 1 kata\n",
+        "\n",
+        "p_kalimat = p_w1 * p_w2_w1 # Inisialisasi dengan P(w1) * P(w2|w1)\n",
+        "\n",
+        "# Daftar bagian rumus untuk ditampilkan\n",
+        "prob_str_parts = [f\"P({tokens[0]})={p_w1:.2f}\"]\n",
+        "if len(tokens) > 1:\n",
+        "    prob_str_parts.append(f\"P({tokens[1]}|{tokens[0]})={p_w2_w1:.2f}\")\n",
+        "\n",
+        "# c. Perkalian Trigram P(wi | wi-2, wi-1) untuk i >= 3\n",
+        "for i in range(len(tokens) - 2):\n",
+        "    triplet = (tokens[i], tokens[i+1], tokens[i+2])\n",
+        "    p = trigram_probabilities.get(triplet, 0)\n",
+        "    p_kalimat *= p\n",
+        "    prob_str_parts.append(f\"P({triplet[2]}|{triplet[0]},{triplet[1]})={p:.2f}\")\n",
+        "\n",
+        "prob_str = \" x \".join(prob_str_parts)\n",
+        "\n",
+        "print(\"\\nProbabilitas Keseluruhan Kalimat (Model Trigram):\")\n",
+        "print(f\" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)\")\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/README.md
+++ b/README.md
@ -0,0 +1,13 @@
+#repo repo repo
+
+
+Nama saya Fahrizal Setiawan
+
+
+
+Saya ingin menjadi Pahlawan seperti deku san
+
+
+
+
+kenapa? karena pahlawan himmel akan melakukan hal yang sama