Upload files to "/"
This commit is contained in:
parent
f9d1c958e6
commit
ba1061b563
640
Fitur_Ekstraksi_BOW.ipynb
Normal file
640
Fitur_Ekstraksi_BOW.ipynb
Normal file
@ -0,0 +1,640 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {
|
||||||
|
"id": "qBYcPYAb059g"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 1. IMPORT LIBRARY\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"import re\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
||||||
|
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||||
|
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {
|
||||||
|
"id": "mo-yt5Ob1N8j"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 2. DATA TEKS MANUAL\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"documents = [\n",
|
||||||
|
" \"saya suka belajar data science\",\n",
|
||||||
|
" \"machine learning sangat menarik\",\n",
|
||||||
|
" \"saya tidak suka matematika\",\n",
|
||||||
|
" \"belajar python itu menyenangkan\",\n",
|
||||||
|
" \"data science membutuhkan matematika\",\n",
|
||||||
|
" \"python sangat membantu data science\"\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"labels = [\n",
|
||||||
|
" \"positif\",\n",
|
||||||
|
" \"positif\",\n",
|
||||||
|
" \"negatif\",\n",
|
||||||
|
" \"positif\",\n",
|
||||||
|
" \"netral\",\n",
|
||||||
|
" \"positif\"\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {
|
||||||
|
"id": "FkmxRAFq1oDK"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 3. PREPROCESSING\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"def clean_text(text):\n",
|
||||||
|
" text = text.lower()\n",
|
||||||
|
" text = re.sub(r\"[^a-z\\s]\", \"\", text)\n",
|
||||||
|
" text = re.sub(r\"\\s+\", \" \", text).strip()\n",
|
||||||
|
" return text\n",
|
||||||
|
"\n",
|
||||||
|
"documents = [clean_text(doc) for doc in documents]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "ybC1Vo2C_c3q",
|
||||||
|
"outputId": "1695b30d-059d-4bce-e224-7c100b1958ee"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"=== Bag of Words ===\n",
|
||||||
|
" belajar data itu learning machine matematika membantu membutuhkan \\\n",
|
||||||
|
"0 1 1 0 0 0 0 0 0 \n",
|
||||||
|
"1 0 0 0 1 1 0 0 0 \n",
|
||||||
|
"2 0 0 0 0 0 1 0 0 \n",
|
||||||
|
"3 1 0 1 0 0 0 0 0 \n",
|
||||||
|
"4 0 1 0 0 0 1 0 1 \n",
|
||||||
|
"5 0 1 0 0 0 0 1 0 \n",
|
||||||
|
"\n",
|
||||||
|
" menarik menyenangkan python sangat saya science suka tidak \n",
|
||||||
|
"0 0 0 0 0 1 1 1 0 \n",
|
||||||
|
"1 1 0 0 1 0 0 0 0 \n",
|
||||||
|
"2 0 0 0 0 1 0 1 1 \n",
|
||||||
|
"3 0 1 1 0 0 0 0 0 \n",
|
||||||
|
"4 0 0 0 0 0 1 0 0 \n",
|
||||||
|
"5 0 0 1 1 0 1 0 0 \n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 4. FEATURE EXTRACTION (BOW)\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"vectorizer = CountVectorizer()\n",
|
||||||
|
"X = vectorizer.fit_transform(documents)\n",
|
||||||
|
"\n",
|
||||||
|
"df_bow = pd.DataFrame(\n",
|
||||||
|
" X.toarray(),\n",
|
||||||
|
" columns=vectorizer.get_feature_names_out()\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"=== Bag of Words ===\")\n",
|
||||||
|
"print(df_bow)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 80
|
||||||
|
},
|
||||||
|
"id": "s6S-Ma4R1xuq",
|
||||||
|
"outputId": "f65359e3-bb87-42b4-fb37-9c035f88e7ad"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "execute_result",
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"MultinomialNB()"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<style>#sk-container-id-1 {\n",
|
||||||
|
" /* Definition of color scheme common for light and dark mode */\n",
|
||||||
|
" --sklearn-color-text: #000;\n",
|
||||||
|
" --sklearn-color-text-muted: #666;\n",
|
||||||
|
" --sklearn-color-line: gray;\n",
|
||||||
|
" /* Definition of color scheme for unfitted estimators */\n",
|
||||||
|
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||||||
|
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||||||
|
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||||||
|
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||||||
|
" /* Definition of color scheme for fitted estimators */\n",
|
||||||
|
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||||||
|
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||||||
|
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||||||
|
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||||||
|
"\n",
|
||||||
|
" /* Specific color for light theme */\n",
|
||||||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||||||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||||||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||||||
|
" --sklearn-color-icon: #696969;\n",
|
||||||
|
"\n",
|
||||||
|
" @media (prefers-color-scheme: dark) {\n",
|
||||||
|
" /* Redefinition of color scheme for dark theme */\n",
|
||||||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||||||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||||||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||||||
|
" --sklearn-color-icon: #878787;\n",
|
||||||
|
" }\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 {\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 pre {\n",
|
||||||
|
" padding: 0;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 input.sk-hidden--visually {\n",
|
||||||
|
" border: 0;\n",
|
||||||
|
" clip: rect(1px 1px 1px 1px);\n",
|
||||||
|
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||||||
|
" height: 1px;\n",
|
||||||
|
" margin: -1px;\n",
|
||||||
|
" overflow: hidden;\n",
|
||||||
|
" padding: 0;\n",
|
||||||
|
" position: absolute;\n",
|
||||||
|
" width: 1px;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
|
||||||
|
" border: 1px dashed var(--sklearn-color-line);\n",
|
||||||
|
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||||||
|
" box-sizing: border-box;\n",
|
||||||
|
" padding-bottom: 0.4em;\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-container {\n",
|
||||||
|
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||||||
|
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||||||
|
" so we also need the `!important` here to be able to override the\n",
|
||||||
|
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||||||
|
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||||||
|
" display: inline-block !important;\n",
|
||||||
|
" position: relative;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
|
||||||
|
" display: none;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"div.sk-parallel-item,\n",
|
||||||
|
"div.sk-serial,\n",
|
||||||
|
"div.sk-item {\n",
|
||||||
|
" /* draw centered vertical line to link estimators */\n",
|
||||||
|
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||||||
|
" background-size: 2px 100%;\n",
|
||||||
|
" background-repeat: no-repeat;\n",
|
||||||
|
" background-position: center center;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Parallel-specific style estimator block */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel-item::after {\n",
|
||||||
|
" content: \"\";\n",
|
||||||
|
" width: 100%;\n",
|
||||||
|
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||||||
|
" flex-grow: 1;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel {\n",
|
||||||
|
" display: flex;\n",
|
||||||
|
" align-items: stretch;\n",
|
||||||
|
" justify-content: center;\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
" position: relative;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel-item {\n",
|
||||||
|
" display: flex;\n",
|
||||||
|
" flex-direction: column;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
|
||||||
|
" align-self: flex-end;\n",
|
||||||
|
" width: 50%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
|
||||||
|
" align-self: flex-start;\n",
|
||||||
|
" width: 50%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
|
||||||
|
" width: 0;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Serial-specific style estimator block */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-serial {\n",
|
||||||
|
" display: flex;\n",
|
||||||
|
" flex-direction: column;\n",
|
||||||
|
" align-items: center;\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
" padding-right: 1em;\n",
|
||||||
|
" padding-left: 1em;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||||||
|
"clickable and can be expanded/collapsed.\n",
|
||||||
|
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||||||
|
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||||||
|
"*/\n",
|
||||||
|
"\n",
|
||||||
|
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-toggleable {\n",
|
||||||
|
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||||||
|
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Toggleable label */\n",
|
||||||
|
"#sk-container-id-1 label.sk-toggleable__label {\n",
|
||||||
|
" cursor: pointer;\n",
|
||||||
|
" display: flex;\n",
|
||||||
|
" width: 100%;\n",
|
||||||
|
" margin-bottom: 0;\n",
|
||||||
|
" padding: 0.5em;\n",
|
||||||
|
" box-sizing: border-box;\n",
|
||||||
|
" text-align: center;\n",
|
||||||
|
" align-items: start;\n",
|
||||||
|
" justify-content: space-between;\n",
|
||||||
|
" gap: 0.5em;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 label.sk-toggleable__label .caption {\n",
|
||||||
|
" font-size: 0.6rem;\n",
|
||||||
|
" font-weight: lighter;\n",
|
||||||
|
" color: var(--sklearn-color-text-muted);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
|
||||||
|
" /* Arrow on the left of the label */\n",
|
||||||
|
" content: \"▸\";\n",
|
||||||
|
" float: left;\n",
|
||||||
|
" margin-right: 0.25em;\n",
|
||||||
|
" color: var(--sklearn-color-icon);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Toggleable content - dropdown */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-toggleable__content {\n",
|
||||||
|
" max-height: 0;\n",
|
||||||
|
" max-width: 0;\n",
|
||||||
|
" overflow: hidden;\n",
|
||||||
|
" text-align: left;\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
|
||||||
|
" margin: 0.2em;\n",
|
||||||
|
" border-radius: 0.25em;\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||||||
|
" /* Expand drop-down */\n",
|
||||||
|
" max-height: 200px;\n",
|
||||||
|
" max-width: 100%;\n",
|
||||||
|
" overflow: auto;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||||||
|
" content: \"▾\";\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Estimator-specific style */\n",
|
||||||
|
"\n",
|
||||||
|
"/* Colorize estimator box */\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
|
||||||
|
"#sk-container-id-1 div.sk-label label {\n",
|
||||||
|
" /* The background is the default theme color */\n",
|
||||||
|
" color: var(--sklearn-color-text-on-default-background);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* On hover, darken the color of the background */\n",
|
||||||
|
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Label box, darken color on hover, fitted */\n",
|
||||||
|
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Estimator label */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-label label {\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" font-weight: bold;\n",
|
||||||
|
" display: inline-block;\n",
|
||||||
|
" line-height: 1.2em;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-label-container {\n",
|
||||||
|
" text-align: center;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Estimator-specific */\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator {\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||||||
|
" border-radius: 0.25em;\n",
|
||||||
|
" box-sizing: border-box;\n",
|
||||||
|
" margin-bottom: 0.5em;\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator.fitted {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* on hover */\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator:hover {\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||||||
|
"\n",
|
||||||
|
"/* Common style for \"i\" and \"?\" */\n",
|
||||||
|
"\n",
|
||||||
|
".sk-estimator-doc-link,\n",
|
||||||
|
"a:link.sk-estimator-doc-link,\n",
|
||||||
|
"a:visited.sk-estimator-doc-link {\n",
|
||||||
|
" float: right;\n",
|
||||||
|
" font-size: smaller;\n",
|
||||||
|
" line-height: 1em;\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
" border-radius: 1em;\n",
|
||||||
|
" height: 1em;\n",
|
||||||
|
" width: 1em;\n",
|
||||||
|
" text-decoration: none !important;\n",
|
||||||
|
" margin-left: 0.5em;\n",
|
||||||
|
" text-align: center;\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||||||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
".sk-estimator-doc-link.fitted,\n",
|
||||||
|
"a:link.sk-estimator-doc-link.fitted,\n",
|
||||||
|
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||||||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* On hover */\n",
|
||||||
|
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||||||
|
".sk-estimator-doc-link:hover,\n",
|
||||||
|
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||||||
|
".sk-estimator-doc-link:hover {\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||||||
|
" color: var(--sklearn-color-background);\n",
|
||||||
|
" text-decoration: none;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||||||
|
".sk-estimator-doc-link.fitted:hover,\n",
|
||||||
|
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||||||
|
".sk-estimator-doc-link.fitted:hover {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||||||
|
" color: var(--sklearn-color-background);\n",
|
||||||
|
" text-decoration: none;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* Span, style for the box shown on hovering the info icon */\n",
|
||||||
|
".sk-estimator-doc-link span {\n",
|
||||||
|
" display: none;\n",
|
||||||
|
" z-index: 9999;\n",
|
||||||
|
" position: relative;\n",
|
||||||
|
" font-weight: normal;\n",
|
||||||
|
" right: .2ex;\n",
|
||||||
|
" padding: .5ex;\n",
|
||||||
|
" margin: .5ex;\n",
|
||||||
|
" width: min-content;\n",
|
||||||
|
" min-width: 20ex;\n",
|
||||||
|
" max-width: 50ex;\n",
|
||||||
|
" color: var(--sklearn-color-text);\n",
|
||||||
|
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||||||
|
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
".sk-estimator-doc-link.fitted span {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background: var(--sklearn-color-fitted-level-0);\n",
|
||||||
|
" border: var(--sklearn-color-fitted-level-3);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
".sk-estimator-doc-link:hover span {\n",
|
||||||
|
" display: block;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 a.estimator_doc_link {\n",
|
||||||
|
" float: right;\n",
|
||||||
|
" font-size: 1rem;\n",
|
||||||
|
" line-height: 1em;\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" background-color: var(--sklearn-color-background);\n",
|
||||||
|
" border-radius: 1rem;\n",
|
||||||
|
" height: 1rem;\n",
|
||||||
|
" width: 1rem;\n",
|
||||||
|
" text-decoration: none;\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||||||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||||||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"/* On hover */\n",
|
||||||
|
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
|
||||||
|
" /* unfitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||||||
|
" color: var(--sklearn-color-background);\n",
|
||||||
|
" text-decoration: none;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
|
||||||
|
" /* fitted */\n",
|
||||||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||||||
|
"}\n",
|
||||||
|
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>MultinomialNB</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.naive_bayes.MultinomialNB.html\">?<span>Documentation for MultinomialNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>MultinomialNB()</pre></div> </div></div></div></div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"execution_count": 15
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 5. MODEL KLASIFIKASI\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"model = MultinomialNB()\n",
|
||||||
|
"model.fit(X, labels)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {
|
||||||
|
"id": "ShevCTva2Fg9",
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"outputId": "228e8f03-bcfb-4ecc-c36f-d86402530a5b"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"Kalimat uji : saya suka belajar python\n",
|
||||||
|
"Hasil klasifikasi : positif\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# =========================\n",
|
||||||
|
"# 6. PREDIKSI DATA BARU\n",
|
||||||
|
"# =========================\n",
|
||||||
|
"test_sentence = [\"saya suka belajar python\"]\n",
|
||||||
|
"test_sentence = [clean_text(test_sentence[0])]\n",
|
||||||
|
"\n",
|
||||||
|
"X_test = vectorizer.transform(test_sentence)\n",
|
||||||
|
"prediction = model.predict(X_test)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"\\nKalimat uji :\", test_sentence[0])\n",
|
||||||
|
"print(\"Hasil klasifikasi :\", prediction[0])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {
|
||||||
|
"id": "NQjExannHuj0"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user