{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "ky8yGDf_kyx8" }, "outputs": [], "source": [ "{\n", " \"nbformat\": 4,\n", " \"nbformat_minor\": 0,\n", " \"metadata\": {\n", " \"colab\": {\n", " \"provenance\": []\n", " },\n", " \"kernelspec\": {\n", " \"name\": \"python3\",\n", " \"display_name\": \"Python 3\"\n", " },\n", " \"language_info\": {\n", " \"name\": \"python\"\n", " }\n", " },\n", " \"cells\": [\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": null,\n", " \"metadata\": {\n", " \"id\": \"8kSq7ukiTzaw\"\n", " },\n", " \"outputs\": [],\n", " \"source\": [\n", " \"{\\n\",\n", " \" \\\"cells\\\": [\\n\",\n", " \" {\\n\",\n", " \" \\\"cell_type\\\": \\\"code\\\",\\n\",\n", " \" \\\"execution_count\\\": 6,\\n\",\n", " \" \\\"id\\\": \\\"7c7601d6-3c91-453e-8c29-706528237596\\\",\\n\",\n", " \" \\\"metadata\\\": {\\n\",\n", " \" \\\"scrolled\\\": true,\\n\",\n", " \" \\\"colab\\\": {\\n\",\n", " \" \\\"base_uri\\\": \\\"https://localhost:8080/\\\"\\n\",\n", " \" },\\n\",\n", " \" \\\"id\\\": \\\"7c7601d6-3c91-453e-8c29-706528237596\\\",\\n\",\n", " \" \\\"outputId\\\": \\\"df473be6-c537-431b-8bc9-66b1ca1d64b1\\\"\\n\",\n", " \" },\\n\",\n", " \" \\\"outputs\\\": [\\n\",\n", " \" {\\n\",\n", " \" \\\"output_type\\\": \\\"stream\\\",\\n\",\n", " \" \\\"name\\\": \\\"stdout\\\",\\n\",\n", " \" \\\"text\\\": [\\n\",\n", " \" \\\"Teks sumber:\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"PT Tempo Grub resmi meluncurkan produk terbaru mereka pada 28 maret 2026\\\\n\\\",\\n\",\n", " \" \\\"di Bandung. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\\\n\\\",\\n\",\n", " \" \\\"disaksikan oleh ratusan undangan.\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"HASIL INFORMATION EXTRACTION:\\\\n\\\",\\n\",\n", " \" \\\"EventType: Product Launch\\\\n\\\",\\n\",\n", " \" \\\"Organization: PT Abadi\\\\n\\\",\\n\",\n", " \" \\\"LaunchDateOriginal: 12 Agustus 2026\\\\n\\\",\\n\",\n", " \" \\\"LaunchDateISO: 2026-03-28\\\\n\\\",\\n\",\n", " \" \\\"Location: Bandung\\\\n\\\",\\n\",\n", " \" \\\"SourceText: PT Tempo Grub resmi meluncurkan produk terbaru mereka pada 12 Agustus 2026 di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan disaksikan oleh ratusan undangan.\\\\n\\\"\\n\",\n", " \" ]\\n\",\n", " \" }\\n\",\n", " \" ],\\n\",\n", " \" \\\"source\\\": [\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# NLP – Information Extraction (MODIFIED VERSION)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"import re\\\\n\\\",\\n\",\n", " \" \\\"import nltk\\\\n\\\",\\n\",\n", " \" \\\"from datetime import datetime\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 1. DATA TEKS (DIMODIFIKASI)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"text = \\\\\\\"\\\\\\\"\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"PT Tempo Grub resmi meluncurkan produk terbaru mereka pada 28 maret 2026\\\\n\\\",\\n\",\n", " \" \\\"di Bandung. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\\\\n\\\",\\n\",\n", " \" \\\"disaksikan oleh ratusan undangan.\\\\n\\\",\\n\",\n", " \" \\\"\\\\\\\"\\\\\\\"\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"print(\\\\\\\"Teks sumber:\\\\\\\")\\\\n\\\",\\n\",\n", " \" \\\"print(text)\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 2. PREPROCESSING TAMBAHAN\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"def clean_text(text):\\\\n\\\",\\n\",\n", " \" \\\" text = text.strip()\\\\n\\\",\\n\",\n", " \" \\\" text = re.sub(r\\\\\\\"\\\\\\\\s+\\\\\\\", \\\\\\\" \\\\\\\", text)\\\\n\\\",\\n\",\n", " \" \\\" return text\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"text = clean_text(text)\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 3. EKSTRAKSI TANGGAL (LEBIH DARI 1 FORMAT)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"date_patterns = [\\\\n\\\",\\n\",\n", " \" \\\" r\\\\\\\"\\\\\\\\d{2,8}\\\\\\\\s(?:Januari|Februari|Maret|April|Mei|Juni|Juli|Agustus|September|Oktober|November|Desember)\\\\\\\\s\\\\\\\\d{4}\\\\\\\",\\\\n\\\",\\n\",\n", " \" \\\" r\\\\\\\"\\\\\\\\d{4}-\\\\\\\\d{2}-\\\\\\\\d{2}\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"]\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"dates = []\\\\n\\\",\\n\",\n", " \" \\\"for pattern in date_patterns:\\\\n\\\",\\n\",\n", " \" \\\" dates.extend(re.findall(pattern, text))\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 4. NORMALISASI TANGGAL\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"bulan_map = {\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"Januari\\\\\\\": \\\\\\\"01\\\\\\\", \\\\\\\"Februari\\\\\\\": \\\\\\\"02\\\\\\\", \\\\\\\"Maret\\\\\\\": \\\\\\\"03\\\\\\\",\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"April\\\\\\\": \\\\\\\"04\\\\\\\", \\\\\\\"Mei\\\\\\\": \\\\\\\"05\\\\\\\", \\\\\\\"Juni\\\\\\\": \\\\\\\"06\\\\\\\",\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"Juli\\\\\\\": \\\\\\\"07\\\\\\\", \\\\\\\"Agustus\\\\\\\": \\\\\\\"08\\\\\\\", \\\\\\\"September\\\\\\\": \\\\\\\"09\\\\\\\",\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"Oktober\\\\\\\": \\\\\\\"10\\\\\\\", \\\\\\\"November\\\\\\\": \\\\\\\"11\\\\\\\", \\\\\\\"Desember\\\\\\\": \\\\\\\"12\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"}\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"def normalize_date(date_str):\\\\n\\\",\\n\",\n", " \" \\\" try:\\\\n\\\",\\n\",\n", " \" \\\" parts = date_str.split()\\\\n\\\",\\n\",\n", " \" \\\" return f\\\\\\\"{parts[2]}-{bulan_map[parts[1]]}-{parts[0].zfill(2)}\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\" except:\\\\n\\\",\\n\",\n", " \" \\\" return None\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"normalized_dates = [normalize_date(d) for d in dates]\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 5. EKSTRAKSI LOKASI (RULE-BASED)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"location_pattern = r\\\\\\\"di\\\\\\\\s([A-Z][a-zA-Z]+)\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"location_match = re.search(location_pattern, text)\\\\n\\\",\\n\",\n", " \" \\\"location = location_match.group(1) if location_match else None\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 6. EKSTRAKSI ORGANISASI (MODIFIKASI)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"org_pattern = r\\\\\\\"PT\\\\\\\\s[A-Z][a-zA-Z]+\\\\\\\\s[A-Z][a-zA-Z]+\\\\\\\"\\\\n\\\",\\n\",\n", " \" \\\"organization = re.findall(org_pattern, text)\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 7. EVENT TEMPLATE (LEBIH LENGKAP)\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"event_template = {\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"EventType\\\\\\\": \\\\\\\"Product Launch\\\\\\\",\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"Organization\\\\\\\": organization[0] if organization else None,\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"LaunchDateOriginal\\\\\\\": dates[0] if dates else None,\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"LaunchDateISO\\\\\\\": normalized_dates[0] if normalized_dates else None,\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"Location\\\\\\\": location,\\\\n\\\",\\n\",\n", " \" \\\" \\\\\\\"SourceText\\\\\\\": text\\\\n\\\",\\n\",\n", " \" \\\"}\\\\n\\\",\\n\",\n", " \" \\\"\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"# 8. OUTPUT\\\\n\\\",\\n\",\n", " \" \\\"# =========================\\\\n\\\",\\n\",\n", " \" \\\"print(\\\\\\\"\\\\\\\\nHASIL INFORMATION EXTRACTION:\\\\\\\")\\\\n\\\",\\n\",\n", " \" \\\"for key, value in event_template.items():\\\\n\\\",\\n\",\n", " \" \\\" print(f\\\\\\\"{key}: {value}\\\\\\\")\\\"\\n\",\n", " \" ]\\n\",\n", " \" },\\n\",\n", " \" {\\n\",\n", " \" \\\"cell_type\\\": \\\"code\\\",\\n\",\n", " \" \\\"execution_count\\\": 6,\\n\",\n", " \" \\\"id\\\": \\\"1e128113-af1e-45a1-8586-48c4acf578b4\\\",\\n\",\n", " \" \\\"metadata\\\": {\\n\",\n", " \" \\\"id\\\": \\\"1e128113-af1e-45a1-8586-48c4acf578b4\\\"\\n\",\n", " \" },\\n\",\n", " \" \\\"outputs\\\": [],\\n\",\n", " \" \\\"source\\\": []\\n\",\n", " \" }\\n\",\n", " \" ],\\n\",\n", " \" \\\"metadata\\\": {\\n\",\n", " \" \\\"kernelspec\\\": {\\n\",\n", " \" \\\"display_name\\\": \\\"Python 3 (ipykernel)\\\",\\n\",\n", " \" \\\"language\\\": \\\"python\\\",\\n\",\n", " \" \\\"name\\\": \\\"python3\\\"\\n\",\n", " \" },\\n\",\n", " \" \\\"language_info\\\": {\\n\",\n", " \" \\\"codemirror_mode\\\": {\\n\",\n", " \" \\\"name\\\": \\\"ipython\\\",\\n\",\n", " \" \\\"version\\\": 3\\n\",\n", " \" },\\n\",\n", " \" \\\"file_extension\\\": \\\".py\\\",\\n\",\n", " \" \\\"mimetype\\\": \\\"text/x-python\\\",\\n\",\n", " \" \\\"name\\\": \\\"python\\\",\\n\",\n", " \" \\\"nbconvert_exporter\\\": \\\"python\\\",\\n\",\n", " \" \\\"pygments_lexer\\\": \\\"ipython3\\\",\\n\",\n", " \" \\\"version\\\": \\\"3.12.2\\\"\\n\",\n", " \" },\\n\",\n", " \" \\\"colab\\\": {\\n\",\n", " \" \\\"provenance\\\": []\\n\",\n", " \" }\\n\",\n", " \" },\\n\",\n", " \" \\\"nbformat\\\": 4,\\n\",\n", " \" \\\"nbformat_minor\\\": 5\\n\",\n", " \"}\"\n", " ]\n", " }\n", " ]\n", "}" ] } ] }