package com.example.notesai.util
import android.content.Context
import android.net.Uri
import android.util.Log
import com.tom_roush.pdfbox.android.PDFBoxResourceLoader
import com.tom_roush.pdfbox.pdmodel.PDDocument
import com.tom_roush.pdfbox.text.PDFTextStripper
import java.io.BufferedReader
import java.io.InputStreamReader
object FileParser {
private const val TAG = "FileParser"
/**
* Initialize PDFBox (call this in Application.onCreate or before first use)
*/
fun initPDFBox(context: Context) {
try {
PDFBoxResourceLoader.init(context)
Log.d(TAG, "PDFBox initialized successfully")
} catch (e: Exception) {
Log.e(TAG, "Failed to initialize PDFBox", e)
}
}
/**
* Parse file berdasarkan tipe
*/
suspend fun parseFile(context: Context, uri: Uri): FileParseResult {
return try {
val mimeType = context.contentResolver.getType(uri)
val fileName = getFileName(context, uri)
Log.d(TAG, "Parsing file: $fileName, type: $mimeType")
val content = when {
mimeType == "application/pdf" || fileName.endsWith(".pdf", ignoreCase = true) -> {
parsePDF(context, uri)
}
mimeType == "text/plain" || fileName.endsWith(".txt", ignoreCase = true) -> {
parseTXT(context, uri)
}
mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|| fileName.endsWith(".docx", ignoreCase = true) -> {
parseDOCX(context, uri)
}
else -> {
return FileParseResult.Error("Format file tidak didukung: $mimeType")
}
}
if (content.isBlank()) {
FileParseResult.Error("File kosong atau tidak dapat dibaca")
} else {
FileParseResult.Success(
content = content,
fileName = fileName,
fileType = getFileType(fileName),
wordCount = content.split(Regex("\\s+")).size
)
}
} catch (e: Exception) {
Log.e(TAG, "Error parsing file", e)
FileParseResult.Error("Gagal membaca file: ${e.message}")
}
}
/**
* Parse PDF file
*/
private fun parsePDF(context: Context, uri: Uri): String {
val inputStream = context.contentResolver.openInputStream(uri)
?: throw Exception("Cannot open file")
return inputStream.use { stream ->
val document = PDDocument.load(stream)
val stripper = PDFTextStripper()
val text = stripper.getText(document)
document.close()
text.trim()
}
}
/**
* Parse TXT file
*/
private fun parseTXT(context: Context, uri: Uri): String {
val inputStream = context.contentResolver.openInputStream(uri)
?: throw Exception("Cannot open file")
return inputStream.use { stream ->
BufferedReader(InputStreamReader(stream, Charsets.UTF_8))
.readText()
.trim()
}
}
/**
* Parse DOCX file - SIMPLIFIED VERSION
* Hanya extract text mentah dari XML
*/
private fun parseDOCX(context: Context, uri: Uri): String {
val inputStream = context.contentResolver.openInputStream(uri)
?: throw Exception("Cannot open file")
return inputStream.use { stream ->
try {
// DOCX adalah ZIP file, kita extract document.xml
val zipInputStream = java.util.zip.ZipInputStream(stream)
val text = StringBuilder()
var entry = zipInputStream.nextEntry
while (entry != null) {
if (entry.name == "word/document.xml") {
val xmlContent = zipInputStream.bufferedReader().readText()
// Extract text dari XML tags
// Format: text here
val textPattern = Regex("]*>([^<]+)")
textPattern.findAll(xmlContent).forEach { match ->
text.append(match.groupValues[1])
text.append(" ")
}
// Extract text dari paragraph tags
val paraPattern = Regex("]*>(.*?)", RegexOption.DOT_MATCHES_ALL)
paraPattern.findAll(xmlContent).forEach { match ->
val paraContent = match.groupValues[1]
val textInPara = Regex("]*>([^<]+)")
textInPara.findAll(paraContent).forEach { textMatch ->
text.append(textMatch.groupValues[1])
text.append(" ")
}
text.append("\n")
}
break
}
entry = zipInputStream.nextEntry
}
zipInputStream.close()
text.toString().trim()
} catch (e: Exception) {
Log.e(TAG, "Error parsing DOCX", e)
throw Exception("Gagal membaca file DOCX: ${e.message}")
}
}
}
/**
* Get file name from URI
*/
private fun getFileName(context: Context, uri: Uri): String {
var fileName = "unknown"
context.contentResolver.query(uri, null, null, null, null)?.use { cursor ->
val nameIndex = cursor.getColumnIndex(android.provider.OpenableColumns.DISPLAY_NAME)
if (cursor.moveToFirst() && nameIndex != -1) {
fileName = cursor.getString(nameIndex)
}
}
return fileName
}
/**
* Get file type display name
*/
private fun getFileType(fileName: String): String {
return when {
fileName.endsWith(".pdf", ignoreCase = true) -> "PDF"
fileName.endsWith(".txt", ignoreCase = true) -> "Text"
fileName.endsWith(".docx", ignoreCase = true) -> "Word"
else -> "Unknown"
}
}
/**
* Get file size
*/
fun getFileSize(context: Context, uri: Uri): Long {
var size = 0L
context.contentResolver.query(uri, null, null, null, null)?.use { cursor ->
val sizeIndex = cursor.getColumnIndex(android.provider.OpenableColumns.SIZE)
if (cursor.moveToFirst() && sizeIndex != -1) {
size = cursor.getLong(sizeIndex)
}
}
return size
}
/**
* Format file size untuk display
*/
fun formatFileSize(bytes: Long): String {
return when {
bytes < 1024 -> "$bytes B"
bytes < 1024 * 1024 -> "${bytes / 1024} KB"
else -> "${bytes / (1024 * 1024)} MB"
}
}
}
/**
* Result dari parsing file
*/
sealed class FileParseResult {
data class Success(
val content: String,
val fileName: String,
val fileType: String,
val wordCount: Int
) : FileParseResult()
data class Error(val message: String) : FileParseResult()
}