{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "8kSq7ukiTzaw" }, "outputs": [], "source": [ "{\n", " \"cells\": [\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 6,\n", " \"id\": \"7c7601d6-3c91-453e-8c29-706528237596\",\n", " \"metadata\": {\n", " \"scrolled\": true,\n", " \"colab\": {\n", " \"base_uri\": \"https://localhost:8080/\"\n", " },\n", " \"id\": \"7c7601d6-3c91-453e-8c29-706528237596\",\n", " \"outputId\": \"df473be6-c537-431b-8bc9-66b1ca1d64b1\"\n", " },\n", " \"outputs\": [\n", " {\n", " \"output_type\": \"stream\",\n", " \"name\": \"stdout\",\n", " \"text\": [\n", " \"Teks sumber:\\n\",\n", " \"\\n\",\n", " \"PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026\\n\",\n", " \"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\n\",\n", " \"disaksikan oleh ratusan undangan.\\n\",\n", " \"\\n\",\n", " \"\\n\",\n", " \"HASIL INFORMATION EXTRACTION:\\n\",\n", " \"EventType: Product Launch\\n\",\n", " \"Organization: PT Abadi\\n\",\n", " \"LaunchDateOriginal: 12 Agustus 2026\\n\",\n", " \"LaunchDateISO: 2026-08-12\\n\",\n", " \"Location: Jakarta\\n\",\n", " \"SourceText: PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026 di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan disaksikan oleh ratusan undangan.\\n\"\n", " ]\n", " }\n", " ],\n", " \"source\": [\n", " \"# =========================\\n\",\n", " \"# NLP – Information Extraction (MODIFIED VERSION)\\n\",\n", " \"# =========================\\n\",\n", " \"\\n\",\n", " \"import re\\n\",\n", " \"import nltk\\n\",\n", " \"from datetime import datetime\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 1. DATA TEKS (DIMODIFIKASI)\\n\",\n", " \"# =========================\\n\",\n", " \"text = \\\"\\\"\\\"\\n\",\n", " \"PT Abadi resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026\\n\",\n", " \"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\n\",\n", " \"disaksikan oleh ratusan undangan.\\n\",\n", " \"\\\"\\\"\\\"\\n\",\n", " \"\\n\",\n", " \"print(\\\"Teks sumber:\\\")\\n\",\n", " \"print(text)\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 2. PREPROCESSING TAMBAHAN\\n\",\n", " \"# =========================\\n\",\n", " \"def clean_text(text):\\n\",\n", " \" text = text.strip()\\n\",\n", " \" text = re.sub(r\\\"\\\\s+\\\", \\\" \\\", text)\\n\",\n", " \" return text\\n\",\n", " \"\\n\",\n", " \"text = clean_text(text)\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 3. EKSTRAKSI TANGGAL (LEBIH DARI 1 FORMAT)\\n\",\n", " \"# =========================\\n\",\n", " \"date_patterns = [\\n\",\n", " \" r\\\"\\\\d{1,2}\\\\s(?:Januari|Februari|Maret|April|Mei|Juni|Juli|Agustus|September|Oktober|November|Desember)\\\\s\\\\d{4}\\\",\\n\",\n", " \" r\\\"\\\\d{4}-\\\\d{2}-\\\\d{2}\\\"\\n\",\n", " \"]\\n\",\n", " \"\\n\",\n", " \"dates = []\\n\",\n", " \"for pattern in date_patterns:\\n\",\n", " \" dates.extend(re.findall(pattern, text))\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 4. NORMALISASI TANGGAL\\n\",\n", " \"# =========================\\n\",\n", " \"bulan_map = {\\n\",\n", " \" \\\"Januari\\\": \\\"01\\\", \\\"Februari\\\": \\\"02\\\", \\\"Maret\\\": \\\"03\\\",\\n\",\n", " \" \\\"April\\\": \\\"04\\\", \\\"Mei\\\": \\\"05\\\", \\\"Juni\\\": \\\"06\\\",\\n\",\n", " \" \\\"Juli\\\": \\\"07\\\", \\\"Agustus\\\": \\\"08\\\", \\\"September\\\": \\\"09\\\",\\n\",\n", " \" \\\"Oktober\\\": \\\"10\\\", \\\"November\\\": \\\"11\\\", \\\"Desember\\\": \\\"12\\\"\\n\",\n", " \"}\\n\",\n", " \"\\n\",\n", " \"def normalize_date(date_str):\\n\",\n", " \" try:\\n\",\n", " \" parts = date_str.split()\\n\",\n", " \" return f\\\"{parts[2]}-{bulan_map[parts[1]]}-{parts[0].zfill(2)}\\\"\\n\",\n", " \" except:\\n\",\n", " \" return None\\n\",\n", " \"\\n\",\n", " \"normalized_dates = [normalize_date(d) for d in dates]\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 5. EKSTRAKSI LOKASI (RULE-BASED)\\n\",\n", " \"# =========================\\n\",\n", " \"location_pattern = r\\\"di\\\\s([A-Z][a-zA-Z]+)\\\"\\n\",\n", " \"location_match = re.search(location_pattern, text)\\n\",\n", " \"location = location_match.group(1) if location_match else None\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 6. EKSTRAKSI ORGANISASI (MODIFIKASI)\\n\",\n", " \"# =========================\\n\",\n", " \"org_pattern = r\\\"PT\\\\s[A-Z][a-zA-Z]+\\\\s[A-Z][a-zA-Z]+\\\"\\n\",\n", " \"organization = re.findall(org_pattern, text)\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 7. EVENT TEMPLATE (LEBIH LENGKAP)\\n\",\n", " \"# =========================\\n\",\n", " \"event_template = {\\n\",\n", " \" \\\"EventType\\\": \\\"Product Launch\\\",\\n\",\n", " \" \\\"Organization\\\": organization[0] if organization else None,\\n\",\n", " \" \\\"LaunchDateOriginal\\\": dates[0] if dates else None,\\n\",\n", " \" \\\"LaunchDateISO\\\": normalized_dates[0] if normalized_dates else None,\\n\",\n", " \" \\\"Location\\\": location,\\n\",\n", " \" \\\"SourceText\\\": text\\n\",\n", " \"}\\n\",\n", " \"\\n\",\n", " \"# =========================\\n\",\n", " \"# 8. OUTPUT\\n\",\n", " \"# =========================\\n\",\n", " \"print(\\\"\\\\nHASIL INFORMATION EXTRACTION:\\\")\\n\",\n", " \"for key, value in event_template.items():\\n\",\n", " \" print(f\\\"{key}: {value}\\\")\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 6,\n", " \"id\": \"1e128113-af1e-45a1-8586-48c4acf578b4\",\n", " \"metadata\": {\n", " \"id\": \"1e128113-af1e-45a1-8586-48c4acf578b4\"\n", " },\n", " \"outputs\": [],\n", " \"source\": []\n", " }\n", " ],\n", " \"metadata\": {\n", " \"kernelspec\": {\n", " \"display_name\": \"Python 3 (ipykernel)\",\n", " \"language\": \"python\",\n", " \"name\": \"python3\"\n", " },\n", " \"language_info\": {\n", " \"codemirror_mode\": {\n", " \"name\": \"ipython\",\n", " \"version\": 3\n", " },\n", " \"file_extension\": \".py\",\n", " \"mimetype\": \"text/x-python\",\n", " \"name\": \"python\",\n", " \"nbconvert_exporter\": \"python\",\n", " \"pygments_lexer\": \"ipython3\",\n", " \"version\": \"3.12.2\"\n", " },\n", " \"colab\": {\n", " \"provenance\": []\n", " }\n", " },\n", " \"nbformat\": 4,\n", " \"nbformat_minor\": 5\n", "}" ] } ] }