195 lines
9.0 KiB
Plaintext
195 lines
9.0 KiB
Plaintext
{
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0,
|
||
"metadata": {
|
||
"colab": {
|
||
"provenance": []
|
||
},
|
||
"kernelspec": {
|
||
"name": "python3",
|
||
"display_name": "Python 3"
|
||
},
|
||
"language_info": {
|
||
"name": "python"
|
||
}
|
||
},
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "8kSq7ukiTzaw"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"{\n",
|
||
" \"cells\": [\n",
|
||
" {\n",
|
||
" \"cell_type\": \"code\",\n",
|
||
" \"execution_count\": 6,\n",
|
||
" \"id\": \"7c7601d6-3c91-453e-8c29-706528237596\",\n",
|
||
" \"metadata\": {\n",
|
||
" \"scrolled\": true,\n",
|
||
" \"colab\": {\n",
|
||
" \"base_uri\": \"https://localhost:8080/\"\n",
|
||
" },\n",
|
||
" \"id\": \"7c7601d6-3c91-453e-8c29-706528237596\",\n",
|
||
" \"outputId\": \"df473be6-c537-431b-8bc9-66b1ca1d64b1\"\n",
|
||
" },\n",
|
||
" \"outputs\": [\n",
|
||
" {\n",
|
||
" \"output_type\": \"stream\",\n",
|
||
" \"name\": \"stdout\",\n",
|
||
" \"text\": [\n",
|
||
" \"Teks sumber:\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026\\n\",\n",
|
||
" \"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\n\",\n",
|
||
" \"disaksikan oleh ratusan undangan.\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"HASIL INFORMATION EXTRACTION:\\n\",\n",
|
||
" \"EventType: Product Launch\\n\",\n",
|
||
" \"Organization: PT Abadi\\n\",\n",
|
||
" \"LaunchDateOriginal: 12 Agustus 2026\\n\",\n",
|
||
" \"LaunchDateISO: 2026-08-12\\n\",\n",
|
||
" \"Location: Jakarta\\n\",\n",
|
||
" \"SourceText: PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026 di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan disaksikan oleh ratusan undangan.\\n\"\n",
|
||
" ]\n",
|
||
" }\n",
|
||
" ],\n",
|
||
" \"source\": [\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# NLP – Information Extraction (MODIFIED VERSION)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"import re\\n\",\n",
|
||
" \"import nltk\\n\",\n",
|
||
" \"from datetime import datetime\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 1. DATA TEKS (DIMODIFIKASI)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"text = \\\"\\\"\\\"\\n\",\n",
|
||
" \"PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026\\n\",\n",
|
||
" \"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\n\",\n",
|
||
" \"disaksikan oleh ratusan undangan.\\n\",\n",
|
||
" \"\\\"\\\"\\\"\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"print(\\\"Teks sumber:\\\")\\n\",\n",
|
||
" \"print(text)\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 2. PREPROCESSING TAMBAHAN\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"def clean_text(text):\\n\",\n",
|
||
" \" text = text.strip()\\n\",\n",
|
||
" \" text = re.sub(r\\\"\\\\s+\\\", \\\" \\\", text)\\n\",\n",
|
||
" \" return text\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"text = clean_text(text)\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 3. EKSTRAKSI TANGGAL (LEBIH DARI 1 FORMAT)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"date_patterns = [\\n\",\n",
|
||
" \" r\\\"\\\\d{1,2}\\\\s(?:Januari|Februari|Maret|April|Mei|Juni|Juli|Agustus|September|Oktober|November|Desember)\\\\s\\\\d{4}\\\",\\n\",\n",
|
||
" \" r\\\"\\\\d{4}-\\\\d{2}-\\\\d{2}\\\"\\n\",\n",
|
||
" \"]\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"dates = []\\n\",\n",
|
||
" \"for pattern in date_patterns:\\n\",\n",
|
||
" \" dates.extend(re.findall(pattern, text))\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 4. NORMALISASI TANGGAL\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"bulan_map = {\\n\",\n",
|
||
" \" \\\"Januari\\\": \\\"01\\\", \\\"Februari\\\": \\\"02\\\", \\\"Maret\\\": \\\"03\\\",\\n\",\n",
|
||
" \" \\\"April\\\": \\\"04\\\", \\\"Mei\\\": \\\"05\\\", \\\"Juni\\\": \\\"06\\\",\\n\",\n",
|
||
" \" \\\"Juli\\\": \\\"07\\\", \\\"Agustus\\\": \\\"08\\\", \\\"September\\\": \\\"09\\\",\\n\",\n",
|
||
" \" \\\"Oktober\\\": \\\"10\\\", \\\"November\\\": \\\"11\\\", \\\"Desember\\\": \\\"12\\\"\\n\",\n",
|
||
" \"}\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"def normalize_date(date_str):\\n\",\n",
|
||
" \" try:\\n\",\n",
|
||
" \" parts = date_str.split()\\n\",\n",
|
||
" \" return f\\\"{parts[2]}-{bulan_map[parts[1]]}-{parts[0].zfill(2)}\\\"\\n\",\n",
|
||
" \" except:\\n\",\n",
|
||
" \" return None\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"normalized_dates = [normalize_date(d) for d in dates]\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 5. EKSTRAKSI LOKASI (RULE-BASED)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"location_pattern = r\\\"di\\\\s([A-Z][a-zA-Z]+)\\\"\\n\",\n",
|
||
" \"location_match = re.search(location_pattern, text)\\n\",\n",
|
||
" \"location = location_match.group(1) if location_match else None\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 6. EKSTRAKSI ORGANISASI (MODIFIKASI)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"org_pattern = r\\\"PT\\\\s[A-Z][a-zA-Z]+\\\\s[A-Z][a-zA-Z]+\\\"\\n\",\n",
|
||
" \"organization = re.findall(org_pattern, text)\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 7. EVENT TEMPLATE (LEBIH LENGKAP)\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"event_template = {\\n\",\n",
|
||
" \" \\\"EventType\\\": \\\"Product Launch\\\",\\n\",\n",
|
||
" \" \\\"Organization\\\": organization[0] if organization else None,\\n\",\n",
|
||
" \" \\\"LaunchDateOriginal\\\": dates[0] if dates else None,\\n\",\n",
|
||
" \" \\\"LaunchDateISO\\\": normalized_dates[0] if normalized_dates else None,\\n\",\n",
|
||
" \" \\\"Location\\\": location,\\n\",\n",
|
||
" \" \\\"SourceText\\\": text\\n\",\n",
|
||
" \"}\\n\",\n",
|
||
" \"\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"# 8. OUTPUT\\n\",\n",
|
||
" \"# =========================\\n\",\n",
|
||
" \"print(\\\"\\\\nHASIL INFORMATION EXTRACTION:\\\")\\n\",\n",
|
||
" \"for key, value in event_template.items():\\n\",\n",
|
||
" \" print(f\\\"{key}: {value}\\\")\"\n",
|
||
" ]\n",
|
||
" },\n",
|
||
" {\n",
|
||
" \"cell_type\": \"code\",\n",
|
||
" \"execution_count\": 6,\n",
|
||
" \"id\": \"1e128113-af1e-45a1-8586-48c4acf578b4\",\n",
|
||
" \"metadata\": {\n",
|
||
" \"id\": \"1e128113-af1e-45a1-8586-48c4acf578b4\"\n",
|
||
" },\n",
|
||
" \"outputs\": [],\n",
|
||
" \"source\": []\n",
|
||
" }\n",
|
||
" ],\n",
|
||
" \"metadata\": {\n",
|
||
" \"kernelspec\": {\n",
|
||
" \"display_name\": \"Python 3 (ipykernel)\",\n",
|
||
" \"language\": \"python\",\n",
|
||
" \"name\": \"python3\"\n",
|
||
" },\n",
|
||
" \"language_info\": {\n",
|
||
" \"codemirror_mode\": {\n",
|
||
" \"name\": \"ipython\",\n",
|
||
" \"version\": 3\n",
|
||
" },\n",
|
||
" \"file_extension\": \".py\",\n",
|
||
" \"mimetype\": \"text/x-python\",\n",
|
||
" \"name\": \"python\",\n",
|
||
" \"nbconvert_exporter\": \"python\",\n",
|
||
" \"pygments_lexer\": \"ipython3\",\n",
|
||
" \"version\": \"3.12.2\"\n",
|
||
" },\n",
|
||
" \"colab\": {\n",
|
||
" \"provenance\": []\n",
|
||
" }\n",
|
||
" },\n",
|
||
" \"nbformat\": 4,\n",
|
||
" \"nbformat_minor\": 5\n",
|
||
"}"
|
||
]
|
||
}
|
||
]
|
||
} |