package com.example.notesai.util import android.content.Context import android.net.Uri import android.util.Log import com.tom_roush.pdfbox.android.PDFBoxResourceLoader import com.tom_roush.pdfbox.pdmodel.PDDocument import com.tom_roush.pdfbox.text.PDFTextStripper import java.io.BufferedReader import java.io.InputStreamReader object FileParser { private const val TAG = "FileParser" /** * Initialize PDFBox (call this in Application.onCreate or before first use) */ fun initPDFBox(context: Context) { try { PDFBoxResourceLoader.init(context) Log.d(TAG, "PDFBox initialized successfully") } catch (e: Exception) { Log.e(TAG, "Failed to initialize PDFBox", e) } } /** * Parse file berdasarkan tipe */ suspend fun parseFile(context: Context, uri: Uri): FileParseResult { return try { val mimeType = context.contentResolver.getType(uri) val fileName = getFileName(context, uri) Log.d(TAG, "Parsing file: $fileName, type: $mimeType") val content = when { mimeType == "application/pdf" || fileName.endsWith(".pdf", ignoreCase = true) -> { parsePDF(context, uri) } mimeType == "text/plain" || fileName.endsWith(".txt", ignoreCase = true) -> { parseTXT(context, uri) } mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || fileName.endsWith(".docx", ignoreCase = true) -> { parseDOCX(context, uri) } else -> { return FileParseResult.Error("Format file tidak didukung: $mimeType") } } if (content.isBlank()) { FileParseResult.Error("File kosong atau tidak dapat dibaca") } else { FileParseResult.Success( content = content, fileName = fileName, fileType = getFileType(fileName), wordCount = content.split(Regex("\\s+")).size ) } } catch (e: Exception) { Log.e(TAG, "Error parsing file", e) FileParseResult.Error("Gagal membaca file: ${e.message}") } } /** * Parse PDF file */ private fun parsePDF(context: Context, uri: Uri): String { val inputStream = context.contentResolver.openInputStream(uri) ?: throw Exception("Cannot open file") return inputStream.use { stream -> val document = PDDocument.load(stream) val stripper = PDFTextStripper() val text = stripper.getText(document) document.close() text.trim() } } /** * Parse TXT file */ private fun parseTXT(context: Context, uri: Uri): String { val inputStream = context.contentResolver.openInputStream(uri) ?: throw Exception("Cannot open file") return inputStream.use { stream -> BufferedReader(InputStreamReader(stream, Charsets.UTF_8)) .readText() .trim() } } /** * Parse DOCX file - SIMPLIFIED VERSION * Hanya extract text mentah dari XML */ private fun parseDOCX(context: Context, uri: Uri): String { val inputStream = context.contentResolver.openInputStream(uri) ?: throw Exception("Cannot open file") return inputStream.use { stream -> try { // DOCX adalah ZIP file, kita extract document.xml val zipInputStream = java.util.zip.ZipInputStream(stream) val text = StringBuilder() var entry = zipInputStream.nextEntry while (entry != null) { if (entry.name == "word/document.xml") { val xmlContent = zipInputStream.bufferedReader().readText() // Extract text dari XML tags // Format: text here val textPattern = Regex("]*>([^<]+)") textPattern.findAll(xmlContent).forEach { match -> text.append(match.groupValues[1]) text.append(" ") } // Extract text dari paragraph tags val paraPattern = Regex("]*>(.*?)", RegexOption.DOT_MATCHES_ALL) paraPattern.findAll(xmlContent).forEach { match -> val paraContent = match.groupValues[1] val textInPara = Regex("]*>([^<]+)") textInPara.findAll(paraContent).forEach { textMatch -> text.append(textMatch.groupValues[1]) text.append(" ") } text.append("\n") } break } entry = zipInputStream.nextEntry } zipInputStream.close() text.toString().trim() } catch (e: Exception) { Log.e(TAG, "Error parsing DOCX", e) throw Exception("Gagal membaca file DOCX: ${e.message}") } } } /** * Get file name from URI */ private fun getFileName(context: Context, uri: Uri): String { var fileName = "unknown" context.contentResolver.query(uri, null, null, null, null)?.use { cursor -> val nameIndex = cursor.getColumnIndex(android.provider.OpenableColumns.DISPLAY_NAME) if (cursor.moveToFirst() && nameIndex != -1) { fileName = cursor.getString(nameIndex) } } return fileName } /** * Get file type display name */ private fun getFileType(fileName: String): String { return when { fileName.endsWith(".pdf", ignoreCase = true) -> "PDF" fileName.endsWith(".txt", ignoreCase = true) -> "Text" fileName.endsWith(".docx", ignoreCase = true) -> "Word" else -> "Unknown" } } /** * Get file size */ fun getFileSize(context: Context, uri: Uri): Long { var size = 0L context.contentResolver.query(uri, null, null, null, null)?.use { cursor -> val sizeIndex = cursor.getColumnIndex(android.provider.OpenableColumns.SIZE) if (cursor.moveToFirst() && sizeIndex != -1) { size = cursor.getLong(sizeIndex) } } return size } /** * Format file size untuk display */ fun formatFileSize(bytes: Long): String { return when { bytes < 1024 -> "$bytes B" bytes < 1024 * 1024 -> "${bytes / 1024} KB" else -> "${bytes / (1024 * 1024)} MB" } } } /** * Result dari parsing file */ sealed class FileParseResult { data class Success( val content: String, val fileName: String, val fileType: String, val wordCount: Int ) : FileParseResult() data class Error(val message: String) : FileParseResult() }