diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..452c5df --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +*.git +*.tox +*.nox +*.coverage +*.hypothesis +dist/ +build/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..1c2fda5 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/Hackujeme.iml b/.idea/Hackujeme.iml new file mode 100644 index 0000000..a80cbb1 --- /dev/null +++ b/.idea/Hackujeme.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f8a22e9 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b5717f1 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..9661ac7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Backend/__pycache__/autoencoder.cpython-313.pyc b/Backend/__pycache__/autoencoder.cpython-313.pyc index f195b63..5fe60df 100644 Binary files a/Backend/__pycache__/autoencoder.cpython-313.pyc and b/Backend/__pycache__/autoencoder.cpython-313.pyc differ diff --git a/Backend/app.py b/Backend/app.py index b40fa1b..424074a 100644 --- a/Backend/app.py +++ b/Backend/app.py @@ -18,9 +18,6 @@ from datetime import datetime from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.lib.units import cm - -from isolation_forest_cicids import run_isolation_forest -from autoencoder import run_autoencoder from isolation_forest_custom import run_isolation_forest_custom from autoencoder_custom import run_autoencoder_custom import signal @@ -37,13 +34,20 @@ FRONTEND_DIR = os.path.join(BASE_DIR, "..", "Frontend") HTML_DIR = os.path.join(FRONTEND_DIR, "HTML Files") JS_DIR = os.path.join(FRONTEND_DIR, "JS") IMG_DIR = os.path.join(FRONTEND_DIR, "img") +CACHE_DIR = os.path.join(BASE_DIR, "cached_results") +CACHE_IFOREST = os.path.join(CACHE_DIR, "cached_iforest_result.json") +CACHE_AUTOENCODER = os.path.join(CACHE_DIR, "cached_autoencoder_result.json") os.makedirs(TEMP_DIR, exist_ok=True) os.makedirs(REPORT_DIR, exist_ok=True) +os.makedirs(CACHE_DIR, exist_ok=True) app = Flask(__name__) CORS(app) +MAX_UPLOAD_SIZE = 30 * 1024 * 1024 +app.config["MAX_CONTENT_LENGTH"] = MAX_UPLOAD_SIZE + def force_memory_cleanup(): gc.collect() try: @@ -131,6 +135,93 @@ def write_json_file(path, data): with open(path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2) +def load_cached_demo_result(job_name): + if job_name == "iforest": + cache_path = CACHE_IFOREST + result_path = RESULT_IFOREST + elif job_name == "autoencoder": + cache_path = CACHE_AUTOENCODER + result_path = RESULT_AUTOENCODER + else: + raise ValueError(f"Unknown cached job: {job_name}") + + cached_data = read_json_file(cache_path) + + if cached_data is None: + raise FileNotFoundError(f"Cached result file not found: {cache_path}") + + if "runtime" not in cached_data: + cached_data["runtime"] = 0 + + write_json_file(result_path, { + "ready": True, + "data": cached_data + }) + + update_progress(100) + +def cached_worker_entry(job_name): + result_path = get_result_path(job_name) + + try: + update_progress(5) + time.sleep(0.4) + + update_progress(25) + time.sleep(0.4) + + update_progress(45) + time.sleep(0.4) + + update_progress(65) + time.sleep(0.4) + + update_progress(85) + time.sleep(0.4) + + load_cached_demo_result(job_name) + + except Exception as e: + write_json_file(result_path, { + "ready": True, + "data": { + "error": True, + "message": str(e), + "details": traceback.format_exc() + } + }) + update_progress(0) + +def start_cached_demo_job(job_name): + global is_running, current_job, worker_process + + refresh_worker_state() + + with state_lock: + if is_running: + return False + + is_running = True + current_job = job_name + + result_path = get_result_path(job_name) + + if os.path.exists(result_path): + try: + os.remove(result_path) + except Exception: + pass + + update_progress(0) + + proc = mp.Process(target=cached_worker_entry, args=(job_name,)) + proc.start() + + with state_lock: + worker_process = proc + + return True + def get_result_path(job_name): mapping = { @@ -143,11 +234,7 @@ def get_result_path(job_name): def get_job_runner(job_name): - if job_name == "iforest": - return lambda: run_isolation_forest(plot=False, table=False) - elif job_name == "autoencoder": - return lambda: run_autoencoder(plot=False, table=False) - elif job_name == "iforest_custom": + if job_name == "iforest_custom": return run_isolation_forest_custom elif job_name == "autoencoder_custom": return run_autoencoder_custom @@ -243,25 +330,16 @@ def start_background_job(job_name): return True - -def read_progress(): - try: - with open(PROGRESS_PATH, "r") as f: - return json.load(f).get("progress", 0) - except Exception: - return 0 - -update_progress(0) - - @app.route("/start-iforest", methods=["POST"]) def start_iforest(): - started = start_background_job("iforest") + started = start_cached_demo_job("iforest") + if not started: return jsonify({ "status": "already_running", "message": "Another algorithm is already running" }), 409 + return jsonify({"status": "started"}) @@ -288,12 +366,14 @@ def start_autoencoder_custom(): @app.route("/start-autoencoder", methods=["POST"]) def start_autoencoder(): - started = start_background_job("autoencoder") + started = start_cached_demo_job("autoencoder") + if not started: return jsonify({ "status": "already_running", "message": "Another algorithm is already running" }), 409 + return jsonify({"status": "started"}) @@ -602,6 +682,13 @@ def upload_dataset(): file = request.files["file"] + file.seek(0, os.SEEK_END) + file_size = file.tell() + file.seek(0) + + if file_size > MAX_UPLOAD_SIZE: + return jsonify({"error": "File is too big! Maximum size is 30MB."}), 400 + if file.filename == "": return jsonify({"error": "Empty filename"}), 400 diff --git a/Backend/autoencoder.py b/Backend/autoencoder.py deleted file mode 100644 index 0e37b52..0000000 --- a/Backend/autoencoder.py +++ /dev/null @@ -1,223 +0,0 @@ -import warnings -warnings.filterwarnings("ignore") -import psutil -import pandas as pd -import numpy as np -import json -import gc -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import ( - confusion_matrix, - accuracy_score, - precision_recall_fscore_support, -) - -from tabulate import tabulate - - -import os - -def _force_memory_cleanup(): - gc.collect() - try: - import ctypes - ctypes.CDLL("libc.so.6").malloc_trim(0) - except Exception: - pass - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json") -DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv") - - -def update_progress(value): - with open(PROGRESS_PATH, "w") as f: - json.dump({"progress": value}, f) - - -def run_autoencoder(csv_path=DATASET_PATH, plot=False, table=False): - - import tensorflow as tf - - from tensorflow.keras import Model - from tensorflow.keras.layers import Dense, Input, Dropout - from tensorflow.keras.optimizers import Adam - from tensorflow.keras.callbacks import EarlyStopping - from tensorflow.keras import backend as K - - process = psutil.Process() - ram_before = process.memory_info().rss - ram_peak = ram_before - update_progress(1) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - df = pd.read_csv(csv_path) - update_progress(10) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - df["fraud"] = df["Attack Type"].apply( - lambda x: 0 if "normal" in str(x).lower() else 1 - ) - true_labels = df["fraud"] - - normal_count = int((true_labels == 0).sum()) - attack_count = int((true_labels == 1).sum()) - - features = [c for c in df.columns if c not in ["Attack Type", "fraud"]] - scaler = StandardScaler() - - X_raw = df[features].to_numpy(dtype=np.float32, copy=True) - y = df["fraud"].to_numpy(dtype=np.int8, copy=True) - - X_scaled = scaler.fit_transform(X_raw).astype(np.float32, copy=False) - - X_train_normal = X_scaled[y == 0] - X_test = X_scaled - y_test = y - - CODE_DIM = 16 - INPUT_SHAPE = X_scaled.shape[1] - - inp = Input(shape=(INPUT_SHAPE,)) - x = Dense(128, activation="relu")(inp) - x = Dropout(0.1)(x) - x = Dense(64, activation="relu")(x) - x = Dense(16, activation="relu")(x) - code = Dense(CODE_DIM, activation="relu")(x) - x = Dense(16, activation="relu")(code) - x = Dense(64, activation="relu")(x) - x = Dense(128, activation="relu")(x) - out = Dense(INPUT_SHAPE, activation="linear")(x) - - autoencoder = Model(inp, out) - autoencoder.compile(loss="mae", optimizer=Adam(learning_rate=0.001)) - update_progress(40) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - earlystopping = EarlyStopping( - monitor="val_loss", patience=5, restore_best_weights=True - ) - - history = autoencoder.fit( - X_train_normal, - X_train_normal, - epochs=20, - batch_size=64, - validation_split=0.1, - callbacks=[earlystopping], - shuffle=True, - verbose=1, - ) - update_progress(60) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - reconstructions = autoencoder.predict(X_test, verbose=0) - reconstruction_error = np.mean(np.abs(reconstructions - X_test), axis=1) - - recons_df = pd.DataFrame( - {"error": reconstruction_error, "y_true": y_test} - ).reset_index(drop=True) - - threshold = np.percentile(recons_df["error"], 60) - recons_df["y_pred"] = (recons_df["error"] > threshold).astype(int) - update_progress(80) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - cm = confusion_matrix(recons_df["y_true"], recons_df["y_pred"]) - accuracy = accuracy_score(recons_df["y_true"], recons_df["y_pred"]) - precision, recall, f1, _ = precision_recall_fscore_support( - recons_df["y_true"], - recons_df["y_pred"], - average=None, - labels=[0, 1], - ) - update_progress(90) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - if table: - table_data = [ - ["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"], - ["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"], - ["Overall Accuracy", "-", "-", f"{accuracy:.4f}"], - ] - print( - tabulate( - table_data, - headers=["Class", "Precision", "Recall", "F1-Score"], - tablefmt="fancy_grid", - ) - ) - - results = { - "normal_count": float(normal_count), - "attack_count": float(attack_count), - "accuracy": float(accuracy), - "precision_normal": float(precision[0]), - "recall_normal": float(recall[0]), - "f1_normal": float(f1[0]), - "precision_attack": float(precision[1]), - "recall_attack": float(recall[1]), - "f1_attack": float(f1[1]), - } - - candidates = recons_df[ - (recons_df["y_pred"] == 1) & (recons_df["y_true"] == 1) - ].copy() - - - if len(candidates) < 5: - extra = recons_df[recons_df["y_pred"] == 1].copy() - candidates = pd.concat([candidates, extra]).drop_duplicates() - - if len(candidates) < 5: - candidates = recons_df.copy() - - candidates = candidates.sort_values("error", ascending=False).head(5) - idx = candidates.index - - df_top = df.iloc[idx].copy() - df_top["reconstruction_error"] = candidates["error"].values - - important_cols = [ - "Attack Type", - "Destination Port", - "Flow Duration", - "Total Fwd Packets", - "Flow Packets/s", - "Packet Length Mean", - ] - cols_exist = [c for c in important_cols if c in df_top.columns] - - top_anomalies = df_top[cols_exist + ["reconstruction_error"]].to_dict( - orient="records" - ) - - results["top_anomalies"] = top_anomalies - - del X_raw, X_scaled, X_train_normal, X_test, y_test, y - del reconstructions, reconstruction_error, recons_df, candidates, df_top - del autoencoder, history, scaler, df - - K.clear_session() - tf.keras.backend.clear_session(free_memory=True) - _force_memory_cleanup() - - ram_after = process.memory_info().rss - results["ram_before"] = round(ram_before / (1024 ** 2), 2) - results["ram_peak"] = round(ram_peak / (1024 ** 2), 2) - results["ram_after"] = round(ram_after / (1024 ** 2), 2) - results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2) - - update_progress(100) - - return results - -if __name__ == "__main__": - res = run_autoencoder(plot=True, table=True) - print(res) diff --git a/Backend/cached_results/cached_autoencoder_result.json b/Backend/cached_results/cached_autoencoder_result.json new file mode 100644 index 0000000..478e613 --- /dev/null +++ b/Backend/cached_results/cached_autoencoder_result.json @@ -0,0 +1,63 @@ +{ + "normal_count": 2095057.0, + "attack_count": 425694.0, + "accuracy": 0.7128641424718268, + "precision_normal": 0.9533234465116556, + "recall_normal": 0.6882175520761488, + "f1_normal": 0.7993634386950771, + "precision_attack": 0.35217494793216303, + "recall_attack": 0.8341625674780476, + "f1_attack": 0.4952573023318089, + "top_anomalies": [ + { + "Attack Type": "DoS", + "Destination Port": 80, + "Flow Duration": 1, + "Total Fwd Packets": 1, + "Flow Packets/s": 2000000.0, + "Packet Length Mean": 1322.0, + "reconstruction_error": 2.6239676475524902 + }, + { + "Attack Type": "DoS", + "Destination Port": 80, + "Flow Duration": 97501845, + "Total Fwd Packets": 4, + "Flow Packets/s": 0.071793513, + "Packet Length Mean": 1489.375, + "reconstruction_error": 1.4105286598205566 + }, + { + "Attack Type": "DoS", + "Destination Port": 80, + "Flow Duration": 97509484, + "Total Fwd Packets": 4, + "Flow Packets/s": 0.071787889, + "Packet Length Mean": 1495.875, + "reconstruction_error": 1.401184320449829 + }, + { + "Attack Type": "DoS", + "Destination Port": 80, + "Flow Duration": 95488861, + "Total Fwd Packets": 4, + "Flow Packets/s": 0.07330698, + "Packet Length Mean": 1495.0, + "reconstruction_error": 1.3920245170593262 + }, + { + "Attack Type": "DoS", + "Destination Port": 80, + "Flow Duration": 99036701, + "Total Fwd Packets": 6, + "Flow Packets/s": 0.090875402, + "Packet Length Mean": 1198.9, + "reconstruction_error": 1.3627992868423462 + } + ], + "ram_before": 386.41, + "ram_peak": 4589.86, + "ram_after": 1194.69, + "ram_increase": 4203.45, + "runtime": 719.9 +} \ No newline at end of file diff --git a/Backend/cached_results/cached_iforest_result.json b/Backend/cached_results/cached_iforest_result.json new file mode 100644 index 0000000..d1026e6 --- /dev/null +++ b/Backend/cached_results/cached_iforest_result.json @@ -0,0 +1,64 @@ +{ + "normal_count": 2095057.0, + "attack_count": 425694.0, + "contamination": 0.42218965697127564, + "accuracy": 0.7300296617952349, + "precision_normal": 0.9855814946609444, + "recall_normal": 0.6851985411375442, + "f1_normal": 0.8083876846075615, + "precision_attack": 0.38027193625378214, + "recall_attack": 0.9506664411525649, + "f1_attack": 0.5432434355271513, + "top_anomalies": [ + { + "Attack Type": "Bots", + "Destination Port": 8080, + "Flow Duration": 104836, + "Total Fwd Packets": 35, + "Flow Packets/s": 1096.951429, + "Packet Length Mean": 1797.637931, + "score": -0.3361174695180047 + }, + { + "Attack Type": "Port Scanning", + "Destination Port": 55055, + "Flow Duration": 116005956, + "Total Fwd Packets": 2, + "Flow Packets/s": 0.034480988, + "Packet Length Mean": 3.2, + "score": -0.3274269932966515 + }, + { + "Attack Type": "Port Scanning", + "Destination Port": 32781, + "Flow Duration": 117169685, + "Total Fwd Packets": 2, + "Flow Packets/s": 0.034138523, + "Packet Length Mean": 3.2, + "score": -0.32230849167538755 + }, + { + "Attack Type": "Port Scanning", + "Destination Port": 873, + "Flow Duration": 119809735, + "Total Fwd Packets": 2, + "Flow Packets/s": 0.033386269, + "Packet Length Mean": 3.2, + "score": -0.32111814642871583 + }, + { + "Attack Type": "Port Scanning", + "Destination Port": 21571, + "Flow Duration": 117624607, + "Total Fwd Packets": 2, + "Flow Packets/s": 0.03400649, + "Packet Length Mean": 3.2, + "score": -0.31684442769693955 + } + ], + "ram_before": 182.07, + "ram_peak": 3098.45, + "ram_after": 186.78, + "ram_increase": 2916.39, + "runtime": 543.79 +} \ No newline at end of file diff --git a/Backend/dataset/cicids2017_cleaned.csv b/Backend/dataset/cicids2017_cleaned.csv deleted file mode 100644 index 32e5303..0000000 Binary files a/Backend/dataset/cicids2017_cleaned.csv and /dev/null differ diff --git a/Backend/isolation_forest_cicids.py b/Backend/isolation_forest_cicids.py deleted file mode 100644 index 0c45cf9..0000000 --- a/Backend/isolation_forest_cicids.py +++ /dev/null @@ -1,211 +0,0 @@ -import pandas as pd -import numpy as np -import json -import psutil -from sklearn.ensemble import IsolationForest -from sklearn.preprocessing import RobustScaler -from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support -from tabulate import tabulate -import matplotlib.pyplot as plt -import seaborn as sns - -import gc - -import os -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json") -DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv") - -def _force_memory_cleanup(): - gc.collect() - try: - import ctypes - ctypes.CDLL("libc.so.6").malloc_trim(0) - except Exception: - pass - - -def update_progress(value): - with open(PROGRESS_PATH, "w") as f: - json.dump({"progress": value}, f) - - -def run_isolation_forest(csv_path=DATASET_PATH, plot=False, table=False): - process = psutil.Process() - ram_before = process.memory_info().rss - ram_peak = ram_before - - update_progress(1) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - df = pd.read_csv(csv_path) - update_progress(10) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - df["fraud"] = df["Attack Type"].apply( - lambda x: 0 if "normal" in str(x).lower() else 1 - ) - true_labels = df["fraud"] - update_progress(20) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - normal_count = int((true_labels == 0).sum()) - attack_count = int((true_labels == 1).sum()) - - attack_fraction = true_labels.mean() - contamination = min(attack_fraction * 2.5, 0.49) - - X = df.select_dtypes(include=[np.number]) - X = X.loc[:, X.std() > 0.01] - X = X.to_numpy(dtype=np.float32, copy=True) - update_progress(30) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - scaler = RobustScaler() - X_scaled = scaler.fit_transform(X).astype(np.float32, copy=False) - update_progress(40) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - model = IsolationForest( - n_estimators=600, - max_samples=0.3, - contamination=contamination, - max_features=0.7, - bootstrap=False, - random_state=42, - n_jobs=1, - ) - model.fit(X_scaled) - update_progress(70) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - preds = model.predict(X_scaled) - df["pred_label"] = np.where(preds == 1, 0, 1) - df["anomaly_score"] = model.decision_function(X_scaled) - update_progress(85) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - cm = confusion_matrix(true_labels, df["pred_label"]) - accuracy = accuracy_score(true_labels, df["pred_label"]) - precision, recall, f1, _ = precision_recall_fscore_support( - true_labels, df["pred_label"], average=None, labels=[0, 1] - ) - update_progress(95) - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - if table: - table_data = [ - ["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"], - ["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"], - ["Overall Accuracy", "-", "-", f"{accuracy:.4f}"], - ] - print( - tabulate( - table_data, - headers=["Class", "Precision", "Recall", "F1-Score"], - tablefmt="fancy_grid", - ) - ) - - if plot: - plt.figure(figsize=(10, 5)) - scatter = plt.scatter( - range(len(df)), - df["anomaly_score"], - c=df["pred_label"], - cmap="coolwarm", - s=10, - ) - plt.xlabel("Instance") - plt.ylabel("Anomaly Score") - plt.title("Anomaly Score Distribution (Isolation Forest)") - handles, labels = scatter.legend_elements() - plt.legend(handles, ["Normal", "Anomaly"], title="Predicted") - plt.show() - - plt.figure(figsize=(5, 4)) - sns.heatmap( - cm, - annot=True, - fmt="d", - cmap="Blues", - xticklabels=["Normal", "Attack"], - yticklabels=["Normal", "Attack"], - ) - plt.xlabel("Predicted Label") - plt.ylabel("True Label") - plt.title("Confusion Matrix (Isolation Forest)") - plt.show() - - results = { - "normal_count": float(normal_count), - "attack_count": float(attack_count), - "contamination": float(contamination), - "accuracy": float(accuracy), - "precision_normal": float(precision[0]), - "recall_normal": float(recall[0]), - "f1_normal": float(f1[0]), - "precision_attack": float(precision[1]), - "recall_attack": float(recall[1]), - "f1_attack": float(f1[1]), - } - - - - candidates = df[(df["pred_label"] == 1) & (df["fraud"] == 1)].copy() - - - if len(candidates) < 5: - extra = df[df["pred_label"] == 1].copy() - candidates = pd.concat([candidates, extra]).drop_duplicates() - - if len(candidates) < 5: - candidates = df.copy() - - candidates = candidates.sort_values("anomaly_score").head(5) - - important_cols = [ - "Attack Type", - "Destination Port", - "Flow Duration", - "Total Fwd Packets", - "Flow Packets/s", - "Packet Length Mean", - ] - - cols_exist = [c for c in important_cols if c in candidates.columns] - - top_anomalies = candidates[cols_exist + ["anomaly_score"]].rename( - columns={"anomaly_score": "score"} - ).to_dict(orient="records") - - results["top_anomalies"] = top_anomalies - - current_ram = process.memory_info().rss - ram_peak = max(ram_peak, current_ram) - - - del X, X_scaled, preds, model, scaler, candidates, df, cm, true_labels - _force_memory_cleanup() - - ram_after = process.memory_info().rss - - results["ram_before"] = round(ram_before / (1024 ** 2), 2) - results["ram_peak"] = round(ram_peak / (1024 ** 2), 2) - results["ram_after"] = round(ram_after / (1024 ** 2), 2) - results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2) - - return results - - -if __name__ == "__main__": - res = run_isolation_forest(plot=True, table=True) - print(res) diff --git a/Frontend/HTML Files/default.html b/Frontend/HTML Files/default.html index 1dc276b..33b2da2 100644 --- a/Frontend/HTML Files/default.html +++ b/Frontend/HTML Files/default.html @@ -86,7 +86,7 @@

CICIDS Demo

- Here, you can test the algoritmhs on + Here, you can see the results of both algorithms which were tested on CICIDS dataset.

diff --git a/Frontend/HTML Files/upload.html b/Frontend/HTML Files/upload.html index 55a9569..da922be 100644 --- a/Frontend/HTML Files/upload.html +++ b/Frontend/HTML Files/upload.html @@ -231,10 +231,10 @@ const uploadField = document.getElementById("file_input"); uploadField.onchange = function() { - const MAX_SIZE = 50 * 1024 * 1024; + const MAX_SIZE = 30 * 1024 * 1024; if(this.files[0].size > MAX_SIZE) { - showErrorPopup("File is too big! Maximum size is 50MB."); + showErrorPopup("File is too big! Maximum size is 30MB!"); this.value = ""; } }; diff --git a/Frontend/JS/frontend.js b/Frontend/JS/frontend.js index fde0217..469a062 100644 --- a/Frontend/JS/frontend.js +++ b/Frontend/JS/frontend.js @@ -139,6 +139,7 @@ function fetchFinalResults(retryCount = 0) { setTimeout(() => fetchFinalResults(retryCount + 1), 1000); return; } + hideLoading(); clearActiveJob(); showErrorPopup("Final results are not ready."); @@ -156,7 +157,7 @@ function fetchFinalResults(retryCount = 0) { displayResults(json.data); }) - .catch(err => { + .catch(() => { hideLoading(); clearActiveJob(); showErrorPopup("Error fetching final results."); @@ -187,7 +188,7 @@ function startProgressPolling() { progressInterval = null; fetchFinalResults(); } - } catch (err) { + } catch { clearInterval(progressInterval); progressInterval = null; hideLoading(); @@ -198,7 +199,7 @@ function startProgressPolling() { showErrorPopup("Connection to backend was lost.
Backend may have stopped."); } } - }, 1500); + }, 500); } async function restoreRunningJob() { @@ -209,12 +210,33 @@ async function restoreRunningJob() { const res = await fetch("/get-status"); const status = await res.json(); + applySavedAlgorithm(saved); + if (status.current_job !== saved.backendJob) { + try { + const resultRes = await fetch(getResultURL()); + const resultJson = await resultRes.json(); + + if (resultJson.ready) { + hideLoading(); + clearActiveJob(); + lastResults = resultJson.data; + + if (resultJson.data?.error) { + showErrorPopup(`Algorithm failed.
${resultJson.data.message}`); + return; + } + + displayResults(resultJson.data); + return; + } + } catch { + } + clearActiveJob(); return; } - applySavedAlgorithm(saved); showLoading(`Resuming ${chosenAlgorithmName}...
This may take a while.`); updateProgressUI(status.progress || 0); @@ -223,21 +245,21 @@ async function restoreRunningJob() { } else { fetchFinalResults(); } - } catch (err) { - hideLoading(); - clearActiveJob(); + } catch { + hideLoading(); + clearActiveJob(); - if (!backendErrorShown) { - backendErrorShown = true; - showErrorPopup("Connection to backend was lost.
Please start the backend and try again."); + if (!backendErrorShown) { + backendErrorShown = true; + showErrorPopup("Connection to backend was lost.
Please start the backend and try again."); + } } } -} startBtn.addEventListener("click", () => { if (!chosenAlgorithm) return; - showLoading(); + showLoading(`Loading prepared ${chosenAlgorithmName} results...
This may take a moment.`); backendErrorShown = false; saveActiveJob(); @@ -264,8 +286,11 @@ startBtn.addEventListener("click", () => { throw new Error("Server error"); } + await res.json(); + const saved = loadActiveJob(); startTime = saved?.startedAt ?? Date.now(); + startProgressPolling(); }) .catch(err => { @@ -282,10 +307,12 @@ function displayResults(data) { const container = document.getElementById("resultContainer"); container.classList.remove("hidden"); - const runtime = ((Date.now() - startTime) / 1000).toFixed(2); + const runtime = data.runtime !== undefined && data.runtime !== null + ? Number(data.runtime).toFixed(2) + : ((Date.now() - startTime) / 1000).toFixed(2); + + let topAnomaliesHTML = ""; - // Dynamicky vytvoríme tabuľku pre Top 5 anomálie (ak existujú) - let topAnomaliesHTML = ''; if (data.top_anomalies && data.top_anomalies.length > 0) { const headers = Object.keys(data.top_anomalies[0]); @@ -298,7 +325,7 @@ function displayResults(data) { - ${headers.map(h => ``).join('')} + ${headers.map(h => ``).join("")} @@ -306,9 +333,9 @@ function displayResults(data) { ${headers.map(key => { const val = row[key]; - return ``; - }).join('')} - `).join('')} + return ``; + }).join("")} + `).join("")}
${h}${h}
${typeof val === 'number' ? val.toFixed(4) : val}
${typeof val === "number" ? val.toFixed(4) : val}
@@ -320,7 +347,6 @@ function displayResults(data) { Results -
@@ -354,15 +380,12 @@ function displayResults(data) {
-
-

Class distribution

-

Performance

@@ -379,7 +402,6 @@ function displayResults(data) { ${topAnomaliesHTML} -
numeric `; + featureList.appendChild(label); }); @@ -203,6 +339,7 @@ btnStep3Next.addEventListener("click", () => { updateCounter(); show(step4); + hide(step5); hide(featureError); }); @@ -219,18 +356,39 @@ function updateCounter() { } btnRun.addEventListener("click", async () => { - - const labelColumnName = headers[labelIndex]; - let normalValue = normalBox.querySelector("input:checked").value; - - if (!isNaN(normalValue)) { - normalValue = Number(normalValue); - } + if (await isBackendBusy()) { + showPopup(BUSY_MESSAGE); + resetUploadFlow(); + return; + } const selectedFeatures = Array.from( featureList.querySelectorAll("input:checked") ).map(cb => headers[cb.dataset.col]); + if (selectedFeatures.length !== MAX_FEATURES) { + featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`; + show(featureError); + return; + } + + hide(featureError); + + const labelColumnName = headers[labelIndex]; + const normalInput = normalBox.querySelector("input:checked"); + + if (!normalInput) { + normalError.textContent = "Select exactly one NORMAL value."; + show(normalError); + return; + } + + let normalValue = normalInput.value; + + if (!isNaN(normalValue)) { + normalValue = Number(normalValue); + } + const config = { dataset: { file_path: "temp/upload.csv", @@ -247,25 +405,32 @@ btnRun.addEventListener("click", async () => { expected_feature_count: selectedFeatures.length }, algorithm: { - name: "isolation_forest" + name: "custom_dataset" } }; - await fetch("/save-config", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(config) - }); - const selected = featureList.querySelectorAll("input:checked").length; + let saveResponse; - if (selected !== MAX_FEATURES) { - featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`; - show(featureError); + try { + saveResponse = await fetch("/save-config", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(config) + }); + } catch { + showPopup("Configuration could not be saved. Backend server is not available."); return; } - hide(featureError); + if (!saveResponse.ok) { + if (saveResponse.status !== 409) { + const message = await readErrorMessage(saveResponse, "Configuration could not be saved."); + showPopup(message); + } - show(step5); -}); + hide(step5); + return; + } + show(step5); +}); \ No newline at end of file diff --git a/Frontend/JS/upload_choosing_fetch.js b/Frontend/JS/upload_choosing_fetch.js index f2b612f..214f4ee 100644 --- a/Frontend/JS/upload_choosing_fetch.js +++ b/Frontend/JS/upload_choosing_fetch.js @@ -287,9 +287,22 @@ async function restoreRunningJob() { } } -startBtn.addEventListener("click", () => { +startBtn.addEventListener("click", async () => { if (!chosenAlgorithm) return; + try { + const statusRes = await fetch("/get-status"); + const status = await statusRes.json(); + + if (status.running) { + showErrorPopup("Another algorithm is already running.
Please wait until it finishes."); + return; + } + } catch { + showErrorPopup("Connection to backend was lost.
Please start the backend and try again."); + return; + } + showLoading(); backendErrorShown = false; saveActiveJob(); diff --git a/progress.json b/progress.json deleted file mode 100644 index 2cc70c8..0000000 --- a/progress.json +++ /dev/null @@ -1 +0,0 @@ -{"progress": 100} \ No newline at end of file