Upload files to "/"

This commit is contained in:
202210715070 LUSIANA SITUMORANG 2026-01-20 23:05:57 +07:00
parent ba1061b563
commit c3e95e66d9

View File

@ -0,0 +1,167 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "7c7601d6-3c91-453e-8c29-706528237596",
"metadata": {
"scrolled": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7c7601d6-3c91-453e-8c29-706528237596",
"outputId": "df473be6-c537-431b-8bc9-66b1ca1d64b1"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Teks sumber:\n",
"\n",
"PT Maju Jaya resmi meluncurkan produk terbaru mereka pada 12 Agustus 2023\n",
"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\n",
"disaksikan oleh ratusan undangan.\n",
"\n",
"\n",
"HASIL INFORMATION EXTRACTION:\n",
"EventType: Product Launch\n",
"Organization: PT Maju Jaya\n",
"LaunchDateOriginal: 12 Agustus 2023\n",
"LaunchDateISO: 2023-08-12\n",
"Location: Jakarta\n",
"SourceText: PT Maju Jaya resmi meluncurkan produk terbaru mereka pada 12 Agustus 2023 di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan disaksikan oleh ratusan undangan.\n"
]
}
],
"source": [
"# =========================\n",
"# NLP Information Extraction (MODIFIED VERSION)\n",
"# =========================\n",
"\n",
"import re\n",
"import nltk\n",
"from datetime import datetime\n",
"\n",
"# =========================\n",
"# 1. DATA TEKS (DIMODIFIKASI)\n",
"# =========================\n",
"text = \"\"\"\n",
"PT Maju Jaya resmi meluncurkan produk terbaru mereka pada 12 Agustus 2023\n",
"di Jakarta. Acara peluncuran tersebut dihadiri oleh CEO perusahaan dan\n",
"disaksikan oleh ratusan undangan.\n",
"\"\"\"\n",
"\n",
"print(\"Teks sumber:\")\n",
"print(text)\n",
"\n",
"# =========================\n",
"# 2. PREPROCESSING TAMBAHAN\n",
"# =========================\n",
"def clean_text(text):\n",
" text = text.strip()\n",
" text = re.sub(r\"\\s+\", \" \", text)\n",
" return text\n",
"\n",
"text = clean_text(text)\n",
"\n",
"# =========================\n",
"# 3. EKSTRAKSI TANGGAL (LEBIH DARI 1 FORMAT)\n",
"# =========================\n",
"date_patterns = [\n",
" r\"\\d{1,2}\\s(?:Januari|Februari|Maret|April|Mei|Juni|Juli|Agustus|September|Oktober|November|Desember)\\s\\d{4}\",\n",
" r\"\\d{4}-\\d{2}-\\d{2}\"\n",
"]\n",
"\n",
"dates = []\n",
"for pattern in date_patterns:\n",
" dates.extend(re.findall(pattern, text))\n",
"\n",
"# =========================\n",
"# 4. NORMALISASI TANGGAL\n",
"# =========================\n",
"bulan_map = {\n",
" \"Januari\": \"01\", \"Februari\": \"02\", \"Maret\": \"03\",\n",
" \"April\": \"04\", \"Mei\": \"05\", \"Juni\": \"06\",\n",
" \"Juli\": \"07\", \"Agustus\": \"08\", \"September\": \"09\",\n",
" \"Oktober\": \"10\", \"November\": \"11\", \"Desember\": \"12\"\n",
"}\n",
"\n",
"def normalize_date(date_str):\n",
" try:\n",
" parts = date_str.split()\n",
" return f\"{parts[2]}-{bulan_map[parts[1]]}-{parts[0].zfill(2)}\"\n",
" except:\n",
" return None\n",
"\n",
"normalized_dates = [normalize_date(d) for d in dates]\n",
"\n",
"# =========================\n",
"# 5. EKSTRAKSI LOKASI (RULE-BASED)\n",
"# =========================\n",
"location_pattern = r\"di\\s([A-Z][a-zA-Z]+)\"\n",
"location_match = re.search(location_pattern, text)\n",
"location = location_match.group(1) if location_match else None\n",
"\n",
"# =========================\n",
"# 6. EKSTRAKSI ORGANISASI (MODIFIKASI)\n",
"# =========================\n",
"org_pattern = r\"PT\\s[A-Z][a-zA-Z]+\\s[A-Z][a-zA-Z]+\"\n",
"organization = re.findall(org_pattern, text)\n",
"\n",
"# =========================\n",
"# 7. EVENT TEMPLATE (LEBIH LENGKAP)\n",
"# =========================\n",
"event_template = {\n",
" \"EventType\": \"Product Launch\",\n",
" \"Organization\": organization[0] if organization else None,\n",
" \"LaunchDateOriginal\": dates[0] if dates else None,\n",
" \"LaunchDateISO\": normalized_dates[0] if normalized_dates else None,\n",
" \"Location\": location,\n",
" \"SourceText\": text\n",
"}\n",
"\n",
"# =========================\n",
"# 8. OUTPUT\n",
"# =========================\n",
"print(\"\\nHASIL INFORMATION EXTRACTION:\")\n",
"for key, value in event_template.items():\n",
" print(f\"{key}: {value}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1e128113-af1e-45a1-8586-48c4acf578b4",
"metadata": {
"id": "1e128113-af1e-45a1-8586-48c4acf578b4"
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
},
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 5
}