# autoencoder_custom.py import warnings warnings.filterwarnings("ignore") import json import pandas as pd import numpy as np import psutil from sklearn.preprocessing import StandardScaler from sklearn.metrics import ( confusion_matrix, accuracy_score, precision_recall_fscore_support, ) import os import gc def _force_memory_cleanup(): gc.collect() try: import ctypes ctypes.CDLL("libc.so.6").malloc_trim(0) except Exception: pass BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEMP_DIR = os.path.join(BASE_DIR, "temp") def update_progress(value, progress_path="progress.json"): with open(progress_path, "w") as f: json.dump({"progress": value}, f) def _update_ram_peak(process, ram_peak): current_ram = process.memory_info().rss return max(ram_peak, current_ram) def run_autoencoder_custom(csv_path=None, config_path=None, progress_path=None): if csv_path is None: csv_path = os.path.join(TEMP_DIR, "upload.csv") if config_path is None: config_path = os.path.join(TEMP_DIR, "config.json") if progress_path is None: progress_path = os.path.join(BASE_DIR, "progress.json") process = psutil.Process() _force_memory_cleanup() ram_before = process.memory_info().rss ram_peak = ram_before update_progress(5, progress_path) import tensorflow as tf from tensorflow.keras import Model from tensorflow.keras.layers import Dense, Input, Dropout from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras import backend as K ram_peak = _update_ram_peak(process, ram_peak) with open(config_path, "r", encoding="utf-8") as f: config = json.load(f) ram_peak = _update_ram_peak(process, ram_peak) label_col = config["labeling"]["label_column"] normal_value = config["labeling"]["normal_value"] features = config["features"]["selected_columns"] params = config.get("algorithm", {}).get("parameters", {}) needed_cols = list(dict.fromkeys(features + [label_col])) df = pd.read_csv(csv_path, usecols=needed_cols) update_progress(15, progress_path) ram_peak = _update_ram_peak(process, ram_peak) col = df[label_col] df["__label"] = col.apply(lambda x: 0 if x == normal_value else 1) update_progress(25, progress_path) ram_peak = _update_ram_peak(process, ram_peak) normal_count = int((df["__label"] == 0).sum()) attack_count = int((df["__label"] == 1).sum()) X_raw = df[features].to_numpy(dtype=np.float32, copy=True) y = df["__label"].to_numpy(dtype=np.int8, copy=True) ram_peak = _update_ram_peak(process, ram_peak) scaler = StandardScaler() X_scaled = scaler.fit_transform(X_raw).astype(np.float32, copy=False) ram_peak = _update_ram_peak(process, ram_peak) X_train_normal = X_scaled[y == 0] X_test = X_scaled y_test = y ram_peak = _update_ram_peak(process, ram_peak) INPUT_SHAPE = X_scaled.shape[1] CODE_DIM = params.get("latent_dim", 16) inp = Input(shape=(INPUT_SHAPE,)) x = Dense(128, activation="relu")(inp) x = Dropout(0.1)(x) x = Dense(64, activation="relu")(x) x = Dense(16, activation="relu")(x) code = Dense(CODE_DIM, activation="relu")(x) x = Dense(16, activation="relu")(code) x = Dense(64, activation="relu")(x) x = Dense(128, activation="relu")(x) out = Dense(INPUT_SHAPE, activation="linear")(x) autoencoder = Model(inp, out) autoencoder.compile( loss="mae", optimizer=Adam(learning_rate=params.get("learning_rate", 0.001)) ) update_progress(45, progress_path) ram_peak = _update_ram_peak(process, ram_peak) earlystopping = EarlyStopping( monitor="val_loss", patience=5, restore_best_weights=True ) history = autoencoder.fit( X_train_normal, X_train_normal, epochs=params.get("epochs", 20), batch_size=params.get("batch_size", 64), validation_split=params.get("validation_split", 0.1), callbacks=[earlystopping], shuffle=True, verbose=1, ) update_progress(75, progress_path) ram_peak = _update_ram_peak(process, ram_peak) reconstructions = autoencoder.predict(X_test, verbose=0) ram_peak = _update_ram_peak(process, ram_peak) reconstruction_error = np.mean(np.abs(reconstructions - X_test), axis=1) ram_peak = _update_ram_peak(process, ram_peak) recons_df = pd.DataFrame( {"error": reconstruction_error, "y_true": y_test} ).reset_index(drop=True) ram_peak = _update_ram_peak(process, ram_peak) threshold = np.percentile(recons_df["error"], params.get("threshold_percentile", 60)) recons_df["y_pred"] = (recons_df["error"] > threshold).astype(int) update_progress(85, progress_path) ram_peak = _update_ram_peak(process, ram_peak) cm = confusion_matrix(recons_df["y_true"], recons_df["y_pred"]) accuracy = accuracy_score(recons_df["y_true"], recons_df["y_pred"]) precision, recall, f1, _ = precision_recall_fscore_support( recons_df["y_true"], recons_df["y_pred"], average=None, labels=[0, 1], ) results = { "normal_count": float(normal_count), "attack_count": float(attack_count), "accuracy": float(accuracy), "precision_normal": float(precision[0]), "recall_normal": float(recall[0]), "f1_normal": float(f1[0]), "precision_attack": float(precision[1]), "recall_attack": float(recall[1]), "f1_attack": float(f1[1]), } display_cols = list(dict.fromkeys(features + [label_col])) top_df = df[display_cols].copy() top_df["reconstruction_error"] = recons_df["error"].to_numpy() top_df["predicted"] = recons_df["y_pred"].to_numpy() top_df["true_label"] = y_test ram_peak = _update_ram_peak(process, ram_peak) real_attacks = top_df[top_df["true_label"] == 1] real_attacks_sorted = real_attacks.sort_values("reconstruction_error", ascending=False) top_real_attacks = real_attacks_sorted.head(5) ram_peak = _update_ram_peak(process, ram_peak) if len(top_real_attacks) > 0: cols = [c for c in display_cols if c in top_real_attacks.columns] + ["reconstruction_error"] top_anomalies = ( top_real_attacks[cols] .rename(columns={"reconstruction_error": "score"}) .round(4) .to_dict(orient="records") ) else: top_anomalies = [] results["top_anomalies"] = top_anomalies ram_peak = _update_ram_peak(process, ram_peak) del X_raw, X_scaled, X_train_normal, X_test, y_test, y del reconstructions, reconstruction_error, recons_df del top_df, real_attacks, real_attacks_sorted, top_real_attacks del autoencoder, history, scaler, df, cm try: K.clear_session() except Exception: pass try: tf.keras.backend.clear_session(free_memory=True) except TypeError: tf.keras.backend.clear_session() except Exception: pass _force_memory_cleanup() ram_after = process.memory_info().rss results["ram_before"] = round(ram_before / (1024 ** 2), 2) results["ram_peak"] = round(ram_peak / (1024 ** 2), 2) results["ram_after"] = round(ram_after / (1024 ** 2), 2) results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2) update_progress(100, progress_path) return results if __name__ == "__main__": res = run_autoencoder_custom() print(json.dumps(res, indent=2))