diff --git a/ Tugas.Classification/Rizky Noor Fazila-Clas-SVM-cancer b/ Tugas.Classification/Rizky Noor Fazila-Clas-SVM-cancer new file mode 100644 index 0000000..4f2d0e5 --- /dev/null +++ b/ Tugas.Classification/Rizky Noor Fazila-Clas-SVM-cancer @@ -0,0 +1,56 @@ +import pandas as pd +import numpy as np +from sklearn import preprocessing +from sklearn.model_selection import train_test_split +from sklearn import svm +from sklearn.metrics import classification_report, f1_score + +# Fungsi hitung Jaccard manual (untuk sklearn lama) +def jaccard_manual(y_true, y_pred): + intersection = np.logical_and(y_true == 1, y_pred == 1).sum() + union = np.logical_or(y_true == 1, y_pred == 1).sum() + return intersection / union if union != 0 else 0 + +# ================================ +# 1. Load Data +# ================================ +cell_df = pd.read_csv("cell_samples.csv") + +# ================================ +# 2. Bersihkan Kolom BareNuc +# ================================ +cell_df = cell_df[pd.to_numeric(cell_df['BareNuc'], errors='coerce').notnull()] +cell_df['BareNuc'] = cell_df['BareNuc'].astype('int') + +# ================================ +# 3. Buat Feature dan Label +# ================================ +feature_df = cell_df[['Clump','UnifSize','UnifShape','MargAdh', + 'SingEpiSize','BareNuc','BlandChrom','NormNucl','Mit']].astype(float) + +X = np.asarray(feature_df) +y = np.where(cell_df['Class'] == 2, 0, 1) # 0 = Benign, 1 = Malignant + +# ================================ +# 4. Split Train/Test +# ================================ +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) + +# ================================ +# 5. Model SVM Kernel LINEAR +# ================================ +model = svm.SVC(kernel='linear') +model.fit(X_train, y_train) + +# ================================ +# 6. Prediksi +# ================================ +y_pred = model.predict(X_test) + +# ================================ +# 7. Evaluasi +# ================================ +print("Avg F1-score:", f1_score(y_test, y_pred, average='weighted')) +print("Jaccard score:", jaccard_manual(y_test, y_pred)) +print("\nClassification Report:\n") +print(classification_report(y_test, y_pred)) \ No newline at end of file