diff --git a/Fitur_Ekstraksi_BOW_(wildanul_jannah).ipynb b/Fitur_Ekstraksi_BOW_(wildanul_jannah).ipynb new file mode 100644 index 0000000..50eeace --- /dev/null +++ b/Fitur_Ekstraksi_BOW_(wildanul_jannah).ipynb @@ -0,0 +1,786 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "vY2aVoMNVWho", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 106 + }, + "outputId": "e457b721-8441-4e02-a4c4-9eb0665de961" + }, + "outputs": [ + { + "output_type": "error", + "ename": "SyntaxError", + "evalue": "unterminated string literal (detected at line 344) (ipython-input-4182680070.py, line 344)", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"/tmp/ipython-input-4182680070.py\"\u001b[0;36m, line \u001b[0;32m344\u001b[0m\n\u001b[0;31m \" 'wildanul jannah melakukan' ,]\\\"n\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m unterminated string literal (detected at line 344)\n" + ] + } + ], + "source": [ + "{\n", + " \"cells\": [\n", + " {\n", + " \"cell_type\": \"markdown\",\n", + " \"metadata\": {},\n", + " \"source\": [\n", + " \"# Fitur Ekstraksi Bag-of-Words (BOW)\\n\",\n", + " \"\\n\",\n", + " \"**Nama:** Wildanul Jannah \\n\",\n", + " \"**NIM:** **202210715061** \\n\",\n", + " \"**Kelas:** F7B2 \\n\",\n", + " \"**MK:** NLP \\n\",\n", + " \"\\n\",\n", + " \"**Tujuan praktikum:** \\n\",\n", + " \"Melakukan ekstraksi fitur teks menggunakan Bag-of-Words dengan variasi parameter, yaitu: \\n\",\n", + " \"- Mengubah contoh teks \\n\",\n", + " \"- Mengubah jumlah fitur (`max_features`) \\n\",\n", + " \"- Menggunakan rentang n-gram baru (`ngram_range = (1,3)`) \\n\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 6,\n", + " \"metadata\": {},\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdin\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"Masukkan jumlah dokumen yang ingin dimasukkan: 3\\n\"\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# Input jumlah dokumen\\n\",\n", + " \"n = int(input(\\\"Masukkan jumlah dokumen yang ingin dimasukkan: \\\"))\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 7,\n", + " \"metadata\": {\n", + " \"colab\": {\n", + " \"base_uri\": \"https://localhost:8080/\"\n", + " },\n", + " \"id\": \"mo-yt5Ob1N8j\",\n", + " \"outputId\": \"362ac3e0-d84b-4014-db96-cc3b10ecdb32\"\n", + " },\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdin\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"Masukkan teks untuk dokumen ke-1: Saya Wildanul Jannah, mahasiswa informatika yang sedang mempelajari dasar pemrosesan bahasa alami\\n\",\n", + " \"Masukkan teks untuk dokumen ke-2: Wildanul Jannah melakukan analisis teks menggunakan metode Bag of Words untuk mengubah kata menjadi data numerik\\n\"\n", + " \"Masukkan teks untuk dokumen ke-3: Dalam percobaan ini Wildanul Jannah mengamati kemunculan kata untuk memahami representasi fitur teks\\n\",\n", + " ]\n", + "\n", + " },\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Dokumen yang Dimasukkan ===\\n\",\n", + " \"Doc 1: Saya Wildanul Jannah, mahasiswa informatika yang sedang mempelajari dasar pemrosesan bahasa alami\\n\",\n", + " \"Doc 2: Wildanul Jannah melakukan analisis teks menggunakan metode Bag of Words untuk mengubah kata menjadi data numerik\\n\",\n", + " \"Doc 3: Dalam percobaan ini Wildanul Jannah mengamati kemunculan kata untuk memahami representasi fitur teks\\n\"\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# Input teks dokumen satu per satu\\n\",\n", + " \"documents = []\\n\",\n", + " \"for i in range(n):\\n\",\n", + " \" teks = input(f\\\"Masukkan teks untuk dokumen ke-{i+1}: \\\")\\n\",\n", + " \" documents.append(teks)\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Dokumen yang Dimasukkan ===\\\")\\n\",\n", + " \"for i, doc in enumerate(documents):\\n\",\n", + " \" print(f\\\"Doc {i+1}: {doc}\\\")\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 8,\n", + " \"metadata\": {\n", + " \"colab\": {\n", + " \"base_uri\": \"https://localhost:8080/\"\n", + " },\n", + " \"id\": \"FkmxRAFq1oDK\",\n", + " \"outputId\": \"62c4508e-1725-4f30-fbdb-4de8072498b2\"\n", + " },\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Hasil Tokenisasi ===\\n\",\n", + "\"Doc 1: ['saya', 'wildanul', 'jannah,', 'mahasiswa', 'informatika', 'yang', 'sedang', 'mempelajari', 'nlp', 'dan', 'machine', 'learning']\\n\",\n", + "\"Doc 2: ['wildanul', 'jannah', 'melakukan', 'eksperimen', 'bag-of-words', 'untuk', 'melihat', 'bagaimana', 'fitur', 'teks', 'direpresentasikan', 'sebagai', 'angka']\\n\",\n", + "\"Doc 3: ['pada', 'tahap', 'ini', 'wildanul', 'jannah', 'menggunakan', 'n-gram', 'untuk', 'menguji', 'kombinasi', 'kata', 'yang', 'sering', 'muncul', 'bersama']\\n\",\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# Tahap Tokenisasi\\n\",\n", + " \"tokenized_docs = []\\n\",\n", + " \"for doc in documents:\\n\",\n", + " \" tokens = doc.lower().split()\\n\",\n", + " \" tokenized_docs.append(tokens)\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Hasil Tokenisasi ===\\\")\\n\",\n", + " \"for i, tokens in enumerate(tokenized_docs):\\n\",\n", + " \" print(f\\\"Doc {i+1}: {tokens}\\\")\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 9,\n", + " \"metadata\": {\n", + " \"colab\": {\n", + " \"base_uri\": \"https://localhost:8080/\"\n", + " },\n", + " \"id\": \"ybC1Vo2C_c3q\",\n", + " \"outputId\": \"fa31c57e-5364-4ded-fcd0-54d0db46c34b\"\n", + " },\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\\n\",\n", + " \"['saya', 'wildanul', 'jannah,', 'mahasiswa', 'informatika', 'yang', 'sedang', 'mempelajari', 'nlp', 'dan', 'machine', 'learning', 'wildanul', 'jannah', 'melakukan', 'eksperimen', 'bag-of-words', 'untuk', 'melihat', 'bagaimana', 'fitur', 'teks', 'direpresentasikan', 'sebagai', 'angka','pada', 'tahap', 'ini', 'wildanul', 'jannah', 'menggunakan', 'n-gram', 'untuk', 'menguji', 'kombinasi', 'kata', 'yang', 'sering', 'muncul', 'bersama']\\n\",\n", + " \"Jumlah total kata dalam seluruh dokumen: 39\\n\"\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# Pembuatan Corpus\\n\",\n", + " \"corpus_all = [word for doc in tokenized_docs for word in doc]\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Corpus Keseluruhan (Semua Kata dari Semua Dokumen) ===\\\")\\n\",\n", + " \"print(corpus_all)\\n\",\n", + " \"print(f\\\"Jumlah total kata dalam seluruh dokumen: {len(corpus_all)}\\\")\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 10,\n", + " \"metadata\": {},\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Hasil BOW Manual (Frekuensi Kata) ===\\n\"\n", + " ]\n", + " },\n", + " {\n", + " \"data\": {\n", + " \"text/plain\": [\n", + " {\n", + " \" 'wildanul': 3,\\n\",\n", + " \" 'jannah': 3,\\n\",\n", + " \" 'yang': 2,\\n\",\n", + " \" 'untuk': 2,\\n\",\n", + " \" 'saya': 1,\\n\",\n", + " \" 'mahasiswa': 1,\\n\",\n", + " \" 'informatika': 1,\\n\",\n", + " \" 'sedang': 1,\\n\",\n", + " \" 'mempelajari': 1,\\n\",\n", + " \" 'nlp': 1,\\n\",\n", + " \" 'dan': 1,\\n\",\n", + " \" 'machine': 1,\\n\",\n", + " \" 'learning': 1,\\n\",\n", + " \" 'melakukan': 1,\\n\",\n", + " \" 'eksperimen': 1,\\n\",\n", + " \" 'bag-of-words': 1,\\n\",\n", + " \" 'melihat': 1,\\n\",\n", + " \" 'bagaimana': 1,\\n\",\n", + " \" 'fitur': 1,\\n\",\n", + " \" 'teks': 1,\\n\",\n", + " \" 'direpresentasikan': 1,\\n\",\n", + " \" 'sebagai': 1,\\n\",\n", + " \" 'angka': 1,\\n\",\n", + " \" 'pada': 1,\\n\",\n", + " \" 'tahap': 1,\\n\",\n", + " \" 'ini': 1,\\n\",\n", + " \" 'menggunakan': 1,\\n\",\n", + " \" 'n-gram': 1,\\n\",\n", + " \" 'menguji': 1,\\n\",\n", + " \" 'kombinasi': 1,\\n\",\n", + " \" 'kata': 1,\\n\",\n", + " \" 'sering': 1,\\n\",\n", + " \" 'muncul': 1,\\n\",\n", + " \" 'bersama': 1\\n\",\n", + "}\n", + " ]\n", + " },\n", + " \"execution_count\": 10,\n", + " \"metadata\": {},\n", + " \"output_type\": \"execute_result\"\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# 4) Membuat Bag-of-Words manual (frekuensi kata)\\n\",\n", + " \"bow_manual = {}\\n\",\n", + " \"for tokens in tokenized_docs:\\n\",\n", + " \" for token in tokens:\\n\",\n", + " \" bow_manual[token] = bow_manual.get(token, 0) + 1\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Hasil BOW Manual (Frekuensi Kata) ===\\\")\\n\",\n", + " \"# Tampilkan dictionary secara sorted by frequency (desc)\\n\",\n", + " \"bow_sorted = dict(sorted(bow_manual.items(), key=lambda x: x[1], reverse=True))\\n\",\n", + " \"bow_sorted\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 12,\n", + " \"metadata\": {\n", + " \"colab\": {\n", + " \"base_uri\": \"https://localhost:8080/\"\n", + " },\n", + " \"id\": \"s6S-Ma4R1xuq\",\n", + " \"outputId\": \"98c3685b-1798-4038-d17e-6e45ca419b51\"\n", + " },\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Vocabulary (Kata Unik) ===\\n\",\n", + " \"['angka', 'bag-of-words', 'bagaimana', 'bersama', 'dan', 'direpresentasikan','eksperimen', 'fitur', 'informatika', 'ini', 'jannah', 'jannah,', 'kata','kombinasi', 'learning', 'machine', 'mahasiswa', 'melakukan', 'melihat','mempelajari', 'menggunakan', 'menguji', 'muncul', 'n-gram', 'nlp', 'pada', 'saya', 'sebagai', 'sedang', 'sering', 'tahap', 'teks', 'untuk', 'wildanul','yang']\\n\",\n", + " \"Jumlah kata unik (vocabulary size): 35\\n\",\n", + " \"\\n\",\n", + " \"=== Vocabulary (Kata Unik) ===\\n\",\n", + " \" 1. angka\\n\",\n", + " \" 2. bag-of-words\\n\",\n", + " \" 3. bagaimana\\n\",\n", + " \" 4. bersama\\n\",\n", + " \" 5. dan\\n\",\n", + " \" 6. direpresentasikan\\n\",\n", + " \" 7. eksperimen\\n\",\n", + " \" 8. fitur\\n\",\n", + " \" 9. informatika\\n\",\n", + " \"10. ini\\n\",\n", + " \"11. jannah\\n\",\n", + " \"12. jannah\\n\",\n", + " \"13. kata\\n\",\n", + " \"14. kombinasi\\n\",\n", + " \"15. learning\\n\",\n", + " \"16. machine\\n\",\n", + " \"17. melakukan\\n\",\n", + " \"18. melihat\\n\",\n", + " \"19. mempelajari\\n\",\n", + " \"20. menggunakan\\n\",\n", + " \"21. menguji\\n\",\n", + " \"22. muncul\\n\",\n", + " \"23. n-gram\\n\",\n", + " \"24. nlp\\n\",\n", + " \"25. pada\\n\",\n", + " \"26. saya\\n\",\n", + " \"27. sebagai\\n\",\n", + " \"28. sedang\\n\",\n", + " \"29. sering\\n\",\n", + " \"30. tahap\\n\",\n", + " \"31. texs\\n\",\n", + " \"32. untuk\\n\",\n", + " \"33. wildanul\\n\",\n", + " \"34. untuk\\n\",\n", + " \"35. yang\\n\",\n", + " \"\\n\",\n", + " \"Jumlah kata unik (vocabulary size): 35\\n\"\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# Pembuatan Vocabulary\\n\",\n", + " \"vocabulary = sorted(set(corpus_all))\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Vocabulary (Kata Unik) ===\\\")\\n\",\n", + " \"print(vocabulary)\\n\",\n", + " \"print(f\\\"Jumlah kata unik (vocabulary size): {len(vocabulary)}\\\")\\n\",\n", + " \"\\n\",\n", + " \"\\n\",\n", + " \"vocabulary = sorted(set(corpus_all))\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Vocabulary (Kata Unik) ===\\\")\\n\",\n", + " \"for idx, word in enumerate(vocabulary, start=1):\\n\",\n", + " \" print(f\\\"{idx:>2}. {word}\\\")\\n\",\n", + " \"print(f\\\"\\\\nJumlah kata unik (vocabulary size): {len(vocabulary)}\\\")\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 13,\n", + " \"metadata\": {},\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"Jumlah dokumen: 3\\n\",\n", + " \"Jumlah tokenized_docs: 3\\n\",\n", + " \"Jumlah kata di vocabulary (unique): 35\\n\",\n", + " \"Contoh 10 kata pertama vocabulary: ['angka', 'bag-of-words', 'bagaimana', 'bersama', 'dan', 'direpresentasikan', 'eksperimen', 'fitur', 'informatika', 'ini']\\n\"\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"print(\\\"Jumlah dokumen:\\\", len(documents))\\n\",\n", + " \"print(\\\"Jumlah tokenized_docs:\\\", len(tokenized_docs))\\n\",\n", + " \"print(\\\"Jumlah kata di vocabulary (unique):\\\", len(vocabulary))\\n\",\n", + " \"print(\\\"Contoh 10 kata pertama vocabulary:\\\", vocabulary[:10])\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 11,\n", + " \"metadata\": {},\n", + " \"outputs\": [\n", + " {\n", + " \"name\": \"stdout\",\n", + " \"output_type\": \"stream\",\n", + " \"text\": [\n", + " \"\\n\",\n", + " \"=== Info CountVectorizer ===\\n\",\n", + " \"n-gram range: (1, 3)\\n\",\n", + " \"max_features: 500\\n\",\n", + " \"Jumlah fitur (vocabulary size): 110\\n\",\n", + " \"\\n\",\n", + " \"nContoh 20 fitur pertama:\\n\",\n", + " \"['angka' 'bag' 'bag of' 'bag of words' 'bagaimana' 'bagaimana fitur'\\n\",\n", + " \" 'bagaimana fitur teks' 'bersama' 'dan' 'dan machine'\\n\",\n", + " \" 'dan machine learning' 'direpresentasikan' 'direpresentasikan sebagai'\\n\",\n", + " \" 'direpresentasikan sebagai angka' 'eksperimen' 'eksperimen bag'\\n\",\n", + " \" 'eksperimen bag of' 'wildanul' 'wildanul jannah'\\n\",\n", + " \" 'wildanul jannah melakukan' ,]\\\"n\n", + " ]\n", + " }\n", + " ],\n", + " \"source\": [\n", + " \"# BOW modern: CountVectorizer dengan n-gram dan max_features \\n\",\n", + " \"from sklearn.feature_extraction.text import CountVectorizer\\n\",\n", + " \"\\n\",\n", + " \"# Ubah di sini bila mau nilai lain:\\n\",\n", + " \"NGRAM_RANGE = (1, 3) # ubah n-gram (contoh: (1,2) atau (1,4))\\n\",\n", + " \"MAX_FEATURES = 500 # ubah jumlah fitur (contoh: 200, 1000)\\n\",\n", + " \"\\n\",\n", + " \"vectorizer = CountVectorizer(ngram_range=NGRAM_RANGE, max_features=MAX_FEATURES)\\n\",\n", + " \"X = vectorizer.fit_transform(documents)\\n\",\n", + " \"\\n\",\n", + " \"print(\\\"\\\\n=== Info CountVectorizer ===\\\")\\n\",\n", + " \"print(\\\"n-gram range:\\\", NGRAM_RANGE)\\n\",\n", + " \"print(\\\"max_features:\\\", MAX_FEATURES)\\n\",\n", + " \"print(\\\"Jumlah fitur (vocabulary size):\\\", len(vectorizer.vocabulary_))\\n\",\n", + " \"print(\\\"\\\\nContoh 20 fitur pertama:\\\")\\n\",\n", + " \"print(vectorizer.get_feature_names_out()[:20])\"\n", + " ]\n", + " },\n", + " {\n", + " \"cell_type\": \"code\",\n", + " \"execution_count\": 12,\n", + " \"metadata\": {},\n", + " \"outputs\": [\n", + " {\n", + " \"data\": {\n", + " \"text/html\": [\n", + " \"
| \\n\",\n", + " \" | angka | \\n\",\n", + " \"bag | \\n\",\n", + " \"bag of | \\n\",\n", + " \"bag of words | \\n\",\n", + " \"bagaimana | \\n\",\n", + " \"bagaimana fitur | \\n\",\n", + " \"bagaimana fitur teks | \\n\",\n", + " \"bersama | \\n\",\n", + " \"dan | \\n\",\n", + " \"dan machine | \\n\",\n", + " \"... | \\n\",\n", + " \"untuk menguji | \\n\",\n", + " \"untuk menguji kombinasi | \\n\",\n", + " \"words | \\n\",\n", + " \"words untuk | \\n\",\n", + " \"words untuk melihat | \\n\",\n", + " \"yang | \\n\",\n", + " \"yang sedang | \\n\",\n", + " \"yang sedang mempelajari | \\n\",\n", + " \"yang sering | \\n\",\n", + " \"yang sering muncul | \\n\",\n", + " \"
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"... | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"
| 1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"... | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"
| 2 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"... | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"
3 rows × 110 columns
\\n\",\n", + " \"| \\n\",\n", + " \" | angka | \\n\",\n", + " \"bag-of-words | \\n\",\n", + " \"bagaimana | \\n\",\n", + " \"bersama | \\n\",\n", + " \"dan | \\n\",\n", + " \"direpresentasikan | \\n\",\n", + " \"eksperimen | \\n\",\n", + " \"wilda | \\n\",\n", + " \"fitur | \\n\",\n", + " \"informatika | \\n\",\n", + " \"... | \\n\",\n", + " \"jannah, | \\n\",\n", + " \"jannah | \\n\",\n", + " \"saya | \\n\",\n", + " \"sebagai | \\n\",\n", + " \"sedang | \\n\",\n", + " \"sering | \\n\",\n", + " \"tahap | \\n\",\n", + " \"teks | \\n\",\n", + " \"untuk | \\n\",\n", + " \"yang | \\n\",\n", + " \"
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"... | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"
| 1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"... | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"
| 2 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"... | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"0 | \\n\",\n", + " \"1 | \\n\",\n", + " \"1 | \\n\",\n", + " \"
3 rows × 35 columns
\\n\",\n", + " \"