import pandas as pd import numpy as np from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn import svm from sklearn.metrics import classification_report, f1_score # Fungsi hitung Jaccard manual (untuk sklearn lama) def jaccard_manual(y_true, y_pred): intersection = np.logical_and(y_true == 1, y_pred == 1).sum() union = np.logical_or(y_true == 1, y_pred == 1).sum() return intersection / union if union != 0 else 0 # ================================ # 1. Load Data # ================================ cell_df = pd.read_csv("cell_samples.csv") # ================================ # 2. Bersihkan Kolom BareNuc # ================================ cell_df = cell_df[pd.to_numeric(cell_df['BareNuc'], errors='coerce').notnull()] cell_df['BareNuc'] = cell_df['BareNuc'].astype('int') # ================================ # 3. Buat Feature dan Label # ================================ feature_df = cell_df[['Clump','UnifSize','UnifShape','MargAdh', 'SingEpiSize','BareNuc','BlandChrom','NormNucl','Mit']].astype(float) X = np.asarray(feature_df) y = np.where(cell_df['Class'] == 2, 0, 1) # 0 = Benign, 1 = Malignant # ================================ # 4. Split Train/Test # ================================ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) # ================================ # 5. Model SVM Kernel LINEAR # ================================ model = svm.SVC(kernel='linear') model.fit(X_train, y_train) # ================================ # 6. Prediksi # ================================ y_pred = model.predict(X_test) # ================================ # 7. Evaluasi # ================================ print("Avg F1-score:", f1_score(y_test, y_pred, average='weighted')) print("Jaccard score:", jaccard_manual(y_test, y_pred)) print("\nClassification Report:\n") print(classification_report(y_test, y_pred))