from collections import Counter from IPython.display import clear_output import math # 1. Input Kalimat dan Tokenisasi kalimat = input("Masukkan kalimat: ").strip() # Bersihkan output (khusus lingkungan notebook) try: clear_output() except: pass print(f"Corpus: {kalimat}") # Tokenize tokens = kalimat.lower().split() print(f"Tokens ({len(tokens)}): {tokens}") # 2. Hitung Frekuensi Unigram unigram_counts = Counter(tokens) total_tokens = sum(unigram_counts.values()) print("\nFrekuensi Unigram dalam kalimat") for pair, count in unigram_counts.items(): print(f" ('{pair}'): {count}") print(f"\nTotal unigram dalam 1 kalimat: {total_tokens}") # 3. Hitung Probabilitas Unigram: P(wi) = Count(wi) / Total Kata unigram_probabilities = {} for word, count in unigram_counts.items(): prob = count / total_tokens unigram_probabilities[word] = prob print("\nProbabilitas masing-masing unigram:") for word, prob in unigram_probabilities.items(): print(f" P({word}) = {prob:.2f} ({prob*100:.2f}%)") # 4. Hitung Probabilitas Kalimat Keseluruhan (P(kalimat) = P(w1) * P(w2) * ...) p_kalimat = 1 prob_parts = [] # Loop untuk menghitung probabilitas total dan membangun string rumus detail for word in tokens: prob_value = unigram_probabilities[word] p_kalimat *= prob_value # Format: P(word)=prob_value prob_parts.append(f"P({word})={prob_value:.2f}") # Gabungkan bagian-bagian rumus untuk mendapatkan prob_str detail prob_str = " x ".join(prob_parts) print("\nProbabilitas Keseluruhan Kalimat (Model Unigram):") print(f" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.4f} ({p_kalimat*100:.2f}%)") from collections import Counter from IPython.display import clear_output import math # 1. Input Kalimat dan Tokenisasi kalimat = input("Masukkan kalimat: ").strip() # Bersihkan output (khusus lingkungan notebook) try: clear_output() except: pass print(f"Corpus: {kalimat}") # Tokenisasi tokens = kalimat.lower().split() print(f"Tokens ({len(tokens)}): {tokens}") # 2. Hitung Frekuensi Unigram dan Bigram unigram_counts = Counter(tokens) bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)] bigram_counts = Counter(bigrams) print("\nFrekuensi Bigram dalam kalimat:") for pair, count in bigram_counts.items(): print(f" {pair}: {count}") print(f"\nTotal bigram dalam 1 kalimat: {sum(bigram_counts.values())}") # 3. Hitung Probabilitas Bigram: P(w2 | w1) = Count(w1,w2) / Count(w1) bigram_probabilities = {} for (w1, w2), count in bigram_counts.items(): prob = count / unigram_counts[w1] bigram_probabilities[(w1, w2)] = prob print("\nProbabilitas masing-masing bigram:") for (w1, w2), prob in bigram_probabilities.items(): print(f" P({w2}|{w1}) = {prob:.2f} ({prob*100:.2f}%)") # 4. Hitung Probabilitas Kalimat Keseluruhan (Model Bigram) # P(kalimat) = P(w1) * P(w2|w1) * P(w3|w2) * ... total_tokens = sum(unigram_counts.values()) p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens # P(w1) p_kalimat = p_w1 # Inisialisasi dengan P(w1) prob_str_parts = [f"P({tokens[0]})={p_w1:.2f}"] # Tambahkan P(w1) ke rumus for i in range(1, len(tokens)): pair = (tokens[i-1], tokens[i]) p = bigram_probabilities.get(pair, 0) p_kalimat *= p prob_str_parts.append(f"P({pair[1]}|{pair[0]})={p:.2f}") # Gabungkan rumus perkalian untuk ditampilkan prob_str = " x ".join(prob_str_parts) print("\nProbabilitas Keseluruhan Kalimat (Model Bigram):") print(f" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)") from collections import Counter from IPython.display import clear_output import math # 1. Input Kalimat dan Tokenisasi kalimat = input("Masukkan kalimat: ").strip() # Bersihkan output (khusus lingkungan notebook) try: clear_output() except: pass print(f"Corpus: {kalimat}") # Tokenisasi tokens = kalimat.lower().split() print(f"Tokens ({len(tokens)}): {tokens}") # 2. Hitung Frekuensi Bigram dan Trigram bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens) - 1)] trigrams = [(tokens[i], tokens[i+1], tokens[i+2]) for i in range(len(tokens) - 2)] bigram_counts = Counter(bigrams) trigram_counts = Counter(trigrams) print("\nFrekuensi Trigram dalam kalimat:") for tg, count in trigram_counts.items(): print(f" {tg}: {count}") print(f"\nTotal trigram dalam 1 kalimat: {sum(trigram_counts.values())}") # 3. Hitung Probabilitas Trigram: P(w3 | w1, w2) = Count(w1,w2,w3) / Count(w1,w2) trigram_probabilities = {} for (w1, w2, w3), count in trigram_counts.items(): # Hindari pembagian dengan nol (jika ada bigram yang tidak muncul) if bigram_counts[(w1, w2)] > 0: prob = count / bigram_counts[(w1, w2)] else: prob = 0 trigram_probabilities[(w1, w2, w3)] = prob print("\nProbabilitas masing-masing trigram:") for (w1, w2, w3), prob in trigram_probabilities.items(): print(f" P({w3}|{w1},{w2}) = {prob:.2f} ({prob*100:.2f}%)") # Tambahkan perhitungan Unigram Count (dibutuhkan untuk P(w1) dan P(w2|w1)) unigram_counts = Counter(tokens) total_tokens = sum(unigram_counts.values()) # 4. Hitung Probabilitas Kalimat Keseluruhan (Model Trigram) # P(kalimat) = P(w1) * P(w2|w1) * P(w3|w1,w2) * ... # a. P(w1) p_w1 = unigram_counts.get(tokens[0], 0) / total_tokens if total_tokens > 0 else 0 # b. P(w2|w1) (Menggunakan Bigram tanpa smoothing) if len(tokens) > 1: count_w1 = unigram_counts.get(tokens[0], 1) # Hindari pembagian dengan 0 p_w2_w1 = bigram_counts.get((tokens[0], tokens[1]), 0) / count_w1 else: p_w2_w1 = 1.0 # Jika hanya 1 kata p_kalimat = p_w1 * p_w2_w1 # Inisialisasi dengan P(w1) * P(w2|w1) # Daftar bagian rumus untuk ditampilkan prob_str_parts = [f"P({tokens[0]})={p_w1:.2f}"] if len(tokens) > 1: prob_str_parts.append(f"P({tokens[1]}|{tokens[0]})={p_w2_w1:.2f}") # c. Perkalian Trigram P(wi | wi-2, wi-1) untuk i >= 3 for i in range(len(tokens) - 2): triplet = (tokens[i], tokens[i+1], tokens[i+2]) p = trigram_probabilities.get(triplet, 0) p_kalimat *= p prob_str_parts.append(f"P({triplet[2]}|{triplet[0]},{triplet[1]})={p:.2f}") prob_str = " x ".join(prob_str_parts) print("\nProbabilitas Keseluruhan Kalimat (Model Trigram):") print(f" P({' '.join(tokens)}) = {prob_str} = {p_kalimat:.6f} ({p_kalimat*100:.2f}%)")