diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..452c5df
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,14 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+*.git
+*.tox
+*.nox
+*.coverage
+*.hypothesis
+dist/
+build/
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..1c2fda5
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/Hackujeme.iml b/.idea/Hackujeme.iml
new file mode 100644
index 0000000..a80cbb1
--- /dev/null
+++ b/.idea/Hackujeme.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..f8a22e9
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..b5717f1
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..9661ac7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Backend/__pycache__/autoencoder.cpython-313.pyc b/Backend/__pycache__/autoencoder.cpython-313.pyc
index f195b63..5fe60df 100644
Binary files a/Backend/__pycache__/autoencoder.cpython-313.pyc and b/Backend/__pycache__/autoencoder.cpython-313.pyc differ
diff --git a/Backend/app.py b/Backend/app.py
index b40fa1b..424074a 100644
--- a/Backend/app.py
+++ b/Backend/app.py
@@ -18,9 +18,6 @@ from datetime import datetime
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm
-
-from isolation_forest_cicids import run_isolation_forest
-from autoencoder import run_autoencoder
from isolation_forest_custom import run_isolation_forest_custom
from autoencoder_custom import run_autoencoder_custom
import signal
@@ -37,13 +34,20 @@ FRONTEND_DIR = os.path.join(BASE_DIR, "..", "Frontend")
HTML_DIR = os.path.join(FRONTEND_DIR, "HTML Files")
JS_DIR = os.path.join(FRONTEND_DIR, "JS")
IMG_DIR = os.path.join(FRONTEND_DIR, "img")
+CACHE_DIR = os.path.join(BASE_DIR, "cached_results")
+CACHE_IFOREST = os.path.join(CACHE_DIR, "cached_iforest_result.json")
+CACHE_AUTOENCODER = os.path.join(CACHE_DIR, "cached_autoencoder_result.json")
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(REPORT_DIR, exist_ok=True)
+os.makedirs(CACHE_DIR, exist_ok=True)
app = Flask(__name__)
CORS(app)
+MAX_UPLOAD_SIZE = 30 * 1024 * 1024
+app.config["MAX_CONTENT_LENGTH"] = MAX_UPLOAD_SIZE
+
def force_memory_cleanup():
gc.collect()
try:
@@ -131,6 +135,93 @@ def write_json_file(path, data):
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
+def load_cached_demo_result(job_name):
+ if job_name == "iforest":
+ cache_path = CACHE_IFOREST
+ result_path = RESULT_IFOREST
+ elif job_name == "autoencoder":
+ cache_path = CACHE_AUTOENCODER
+ result_path = RESULT_AUTOENCODER
+ else:
+ raise ValueError(f"Unknown cached job: {job_name}")
+
+ cached_data = read_json_file(cache_path)
+
+ if cached_data is None:
+ raise FileNotFoundError(f"Cached result file not found: {cache_path}")
+
+ if "runtime" not in cached_data:
+ cached_data["runtime"] = 0
+
+ write_json_file(result_path, {
+ "ready": True,
+ "data": cached_data
+ })
+
+ update_progress(100)
+
+def cached_worker_entry(job_name):
+ result_path = get_result_path(job_name)
+
+ try:
+ update_progress(5)
+ time.sleep(0.4)
+
+ update_progress(25)
+ time.sleep(0.4)
+
+ update_progress(45)
+ time.sleep(0.4)
+
+ update_progress(65)
+ time.sleep(0.4)
+
+ update_progress(85)
+ time.sleep(0.4)
+
+ load_cached_demo_result(job_name)
+
+ except Exception as e:
+ write_json_file(result_path, {
+ "ready": True,
+ "data": {
+ "error": True,
+ "message": str(e),
+ "details": traceback.format_exc()
+ }
+ })
+ update_progress(0)
+
+def start_cached_demo_job(job_name):
+ global is_running, current_job, worker_process
+
+ refresh_worker_state()
+
+ with state_lock:
+ if is_running:
+ return False
+
+ is_running = True
+ current_job = job_name
+
+ result_path = get_result_path(job_name)
+
+ if os.path.exists(result_path):
+ try:
+ os.remove(result_path)
+ except Exception:
+ pass
+
+ update_progress(0)
+
+ proc = mp.Process(target=cached_worker_entry, args=(job_name,))
+ proc.start()
+
+ with state_lock:
+ worker_process = proc
+
+ return True
+
def get_result_path(job_name):
mapping = {
@@ -143,11 +234,7 @@ def get_result_path(job_name):
def get_job_runner(job_name):
- if job_name == "iforest":
- return lambda: run_isolation_forest(plot=False, table=False)
- elif job_name == "autoencoder":
- return lambda: run_autoencoder(plot=False, table=False)
- elif job_name == "iforest_custom":
+ if job_name == "iforest_custom":
return run_isolation_forest_custom
elif job_name == "autoencoder_custom":
return run_autoencoder_custom
@@ -243,25 +330,16 @@ def start_background_job(job_name):
return True
-
-def read_progress():
- try:
- with open(PROGRESS_PATH, "r") as f:
- return json.load(f).get("progress", 0)
- except Exception:
- return 0
-
-update_progress(0)
-
-
@app.route("/start-iforest", methods=["POST"])
def start_iforest():
- started = start_background_job("iforest")
+ started = start_cached_demo_job("iforest")
+
if not started:
return jsonify({
"status": "already_running",
"message": "Another algorithm is already running"
}), 409
+
return jsonify({"status": "started"})
@@ -288,12 +366,14 @@ def start_autoencoder_custom():
@app.route("/start-autoencoder", methods=["POST"])
def start_autoencoder():
- started = start_background_job("autoencoder")
+ started = start_cached_demo_job("autoencoder")
+
if not started:
return jsonify({
"status": "already_running",
"message": "Another algorithm is already running"
}), 409
+
return jsonify({"status": "started"})
@@ -602,6 +682,13 @@ def upload_dataset():
file = request.files["file"]
+ file.seek(0, os.SEEK_END)
+ file_size = file.tell()
+ file.seek(0)
+
+ if file_size > MAX_UPLOAD_SIZE:
+ return jsonify({"error": "File is too big! Maximum size is 30MB."}), 400
+
if file.filename == "":
return jsonify({"error": "Empty filename"}), 400
diff --git a/Backend/autoencoder.py b/Backend/autoencoder.py
deleted file mode 100644
index 0e37b52..0000000
--- a/Backend/autoencoder.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import warnings
-warnings.filterwarnings("ignore")
-import psutil
-import pandas as pd
-import numpy as np
-import json
-import gc
-from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import (
- confusion_matrix,
- accuracy_score,
- precision_recall_fscore_support,
-)
-
-from tabulate import tabulate
-
-
-import os
-
-def _force_memory_cleanup():
- gc.collect()
- try:
- import ctypes
- ctypes.CDLL("libc.so.6").malloc_trim(0)
- except Exception:
- pass
-
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
-DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
-
-
-def update_progress(value):
- with open(PROGRESS_PATH, "w") as f:
- json.dump({"progress": value}, f)
-
-
-def run_autoencoder(csv_path=DATASET_PATH, plot=False, table=False):
-
- import tensorflow as tf
-
- from tensorflow.keras import Model
- from tensorflow.keras.layers import Dense, Input, Dropout
- from tensorflow.keras.optimizers import Adam
- from tensorflow.keras.callbacks import EarlyStopping
- from tensorflow.keras import backend as K
-
- process = psutil.Process()
- ram_before = process.memory_info().rss
- ram_peak = ram_before
- update_progress(1)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- df = pd.read_csv(csv_path)
- update_progress(10)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- df["fraud"] = df["Attack Type"].apply(
- lambda x: 0 if "normal" in str(x).lower() else 1
- )
- true_labels = df["fraud"]
-
- normal_count = int((true_labels == 0).sum())
- attack_count = int((true_labels == 1).sum())
-
- features = [c for c in df.columns if c not in ["Attack Type", "fraud"]]
- scaler = StandardScaler()
-
- X_raw = df[features].to_numpy(dtype=np.float32, copy=True)
- y = df["fraud"].to_numpy(dtype=np.int8, copy=True)
-
- X_scaled = scaler.fit_transform(X_raw).astype(np.float32, copy=False)
-
- X_train_normal = X_scaled[y == 0]
- X_test = X_scaled
- y_test = y
-
- CODE_DIM = 16
- INPUT_SHAPE = X_scaled.shape[1]
-
- inp = Input(shape=(INPUT_SHAPE,))
- x = Dense(128, activation="relu")(inp)
- x = Dropout(0.1)(x)
- x = Dense(64, activation="relu")(x)
- x = Dense(16, activation="relu")(x)
- code = Dense(CODE_DIM, activation="relu")(x)
- x = Dense(16, activation="relu")(code)
- x = Dense(64, activation="relu")(x)
- x = Dense(128, activation="relu")(x)
- out = Dense(INPUT_SHAPE, activation="linear")(x)
-
- autoencoder = Model(inp, out)
- autoencoder.compile(loss="mae", optimizer=Adam(learning_rate=0.001))
- update_progress(40)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- earlystopping = EarlyStopping(
- monitor="val_loss", patience=5, restore_best_weights=True
- )
-
- history = autoencoder.fit(
- X_train_normal,
- X_train_normal,
- epochs=20,
- batch_size=64,
- validation_split=0.1,
- callbacks=[earlystopping],
- shuffle=True,
- verbose=1,
- )
- update_progress(60)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- reconstructions = autoencoder.predict(X_test, verbose=0)
- reconstruction_error = np.mean(np.abs(reconstructions - X_test), axis=1)
-
- recons_df = pd.DataFrame(
- {"error": reconstruction_error, "y_true": y_test}
- ).reset_index(drop=True)
-
- threshold = np.percentile(recons_df["error"], 60)
- recons_df["y_pred"] = (recons_df["error"] > threshold).astype(int)
- update_progress(80)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- cm = confusion_matrix(recons_df["y_true"], recons_df["y_pred"])
- accuracy = accuracy_score(recons_df["y_true"], recons_df["y_pred"])
- precision, recall, f1, _ = precision_recall_fscore_support(
- recons_df["y_true"],
- recons_df["y_pred"],
- average=None,
- labels=[0, 1],
- )
- update_progress(90)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- if table:
- table_data = [
- ["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
- ["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
- ["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
- ]
- print(
- tabulate(
- table_data,
- headers=["Class", "Precision", "Recall", "F1-Score"],
- tablefmt="fancy_grid",
- )
- )
-
- results = {
- "normal_count": float(normal_count),
- "attack_count": float(attack_count),
- "accuracy": float(accuracy),
- "precision_normal": float(precision[0]),
- "recall_normal": float(recall[0]),
- "f1_normal": float(f1[0]),
- "precision_attack": float(precision[1]),
- "recall_attack": float(recall[1]),
- "f1_attack": float(f1[1]),
- }
-
- candidates = recons_df[
- (recons_df["y_pred"] == 1) & (recons_df["y_true"] == 1)
- ].copy()
-
-
- if len(candidates) < 5:
- extra = recons_df[recons_df["y_pred"] == 1].copy()
- candidates = pd.concat([candidates, extra]).drop_duplicates()
-
- if len(candidates) < 5:
- candidates = recons_df.copy()
-
- candidates = candidates.sort_values("error", ascending=False).head(5)
- idx = candidates.index
-
- df_top = df.iloc[idx].copy()
- df_top["reconstruction_error"] = candidates["error"].values
-
- important_cols = [
- "Attack Type",
- "Destination Port",
- "Flow Duration",
- "Total Fwd Packets",
- "Flow Packets/s",
- "Packet Length Mean",
- ]
- cols_exist = [c for c in important_cols if c in df_top.columns]
-
- top_anomalies = df_top[cols_exist + ["reconstruction_error"]].to_dict(
- orient="records"
- )
-
- results["top_anomalies"] = top_anomalies
-
- del X_raw, X_scaled, X_train_normal, X_test, y_test, y
- del reconstructions, reconstruction_error, recons_df, candidates, df_top
- del autoencoder, history, scaler, df
-
- K.clear_session()
- tf.keras.backend.clear_session(free_memory=True)
- _force_memory_cleanup()
-
- ram_after = process.memory_info().rss
- results["ram_before"] = round(ram_before / (1024 ** 2), 2)
- results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
- results["ram_after"] = round(ram_after / (1024 ** 2), 2)
- results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
-
- update_progress(100)
-
- return results
-
-if __name__ == "__main__":
- res = run_autoencoder(plot=True, table=True)
- print(res)
diff --git a/Backend/cached_results/cached_autoencoder_result.json b/Backend/cached_results/cached_autoencoder_result.json
new file mode 100644
index 0000000..478e613
--- /dev/null
+++ b/Backend/cached_results/cached_autoencoder_result.json
@@ -0,0 +1,63 @@
+{
+ "normal_count": 2095057.0,
+ "attack_count": 425694.0,
+ "accuracy": 0.7128641424718268,
+ "precision_normal": 0.9533234465116556,
+ "recall_normal": 0.6882175520761488,
+ "f1_normal": 0.7993634386950771,
+ "precision_attack": 0.35217494793216303,
+ "recall_attack": 0.8341625674780476,
+ "f1_attack": 0.4952573023318089,
+ "top_anomalies": [
+ {
+ "Attack Type": "DoS",
+ "Destination Port": 80,
+ "Flow Duration": 1,
+ "Total Fwd Packets": 1,
+ "Flow Packets/s": 2000000.0,
+ "Packet Length Mean": 1322.0,
+ "reconstruction_error": 2.6239676475524902
+ },
+ {
+ "Attack Type": "DoS",
+ "Destination Port": 80,
+ "Flow Duration": 97501845,
+ "Total Fwd Packets": 4,
+ "Flow Packets/s": 0.071793513,
+ "Packet Length Mean": 1489.375,
+ "reconstruction_error": 1.4105286598205566
+ },
+ {
+ "Attack Type": "DoS",
+ "Destination Port": 80,
+ "Flow Duration": 97509484,
+ "Total Fwd Packets": 4,
+ "Flow Packets/s": 0.071787889,
+ "Packet Length Mean": 1495.875,
+ "reconstruction_error": 1.401184320449829
+ },
+ {
+ "Attack Type": "DoS",
+ "Destination Port": 80,
+ "Flow Duration": 95488861,
+ "Total Fwd Packets": 4,
+ "Flow Packets/s": 0.07330698,
+ "Packet Length Mean": 1495.0,
+ "reconstruction_error": 1.3920245170593262
+ },
+ {
+ "Attack Type": "DoS",
+ "Destination Port": 80,
+ "Flow Duration": 99036701,
+ "Total Fwd Packets": 6,
+ "Flow Packets/s": 0.090875402,
+ "Packet Length Mean": 1198.9,
+ "reconstruction_error": 1.3627992868423462
+ }
+ ],
+ "ram_before": 386.41,
+ "ram_peak": 4589.86,
+ "ram_after": 1194.69,
+ "ram_increase": 4203.45,
+ "runtime": 719.9
+}
\ No newline at end of file
diff --git a/Backend/cached_results/cached_iforest_result.json b/Backend/cached_results/cached_iforest_result.json
new file mode 100644
index 0000000..d1026e6
--- /dev/null
+++ b/Backend/cached_results/cached_iforest_result.json
@@ -0,0 +1,64 @@
+{
+ "normal_count": 2095057.0,
+ "attack_count": 425694.0,
+ "contamination": 0.42218965697127564,
+ "accuracy": 0.7300296617952349,
+ "precision_normal": 0.9855814946609444,
+ "recall_normal": 0.6851985411375442,
+ "f1_normal": 0.8083876846075615,
+ "precision_attack": 0.38027193625378214,
+ "recall_attack": 0.9506664411525649,
+ "f1_attack": 0.5432434355271513,
+ "top_anomalies": [
+ {
+ "Attack Type": "Bots",
+ "Destination Port": 8080,
+ "Flow Duration": 104836,
+ "Total Fwd Packets": 35,
+ "Flow Packets/s": 1096.951429,
+ "Packet Length Mean": 1797.637931,
+ "score": -0.3361174695180047
+ },
+ {
+ "Attack Type": "Port Scanning",
+ "Destination Port": 55055,
+ "Flow Duration": 116005956,
+ "Total Fwd Packets": 2,
+ "Flow Packets/s": 0.034480988,
+ "Packet Length Mean": 3.2,
+ "score": -0.3274269932966515
+ },
+ {
+ "Attack Type": "Port Scanning",
+ "Destination Port": 32781,
+ "Flow Duration": 117169685,
+ "Total Fwd Packets": 2,
+ "Flow Packets/s": 0.034138523,
+ "Packet Length Mean": 3.2,
+ "score": -0.32230849167538755
+ },
+ {
+ "Attack Type": "Port Scanning",
+ "Destination Port": 873,
+ "Flow Duration": 119809735,
+ "Total Fwd Packets": 2,
+ "Flow Packets/s": 0.033386269,
+ "Packet Length Mean": 3.2,
+ "score": -0.32111814642871583
+ },
+ {
+ "Attack Type": "Port Scanning",
+ "Destination Port": 21571,
+ "Flow Duration": 117624607,
+ "Total Fwd Packets": 2,
+ "Flow Packets/s": 0.03400649,
+ "Packet Length Mean": 3.2,
+ "score": -0.31684442769693955
+ }
+ ],
+ "ram_before": 182.07,
+ "ram_peak": 3098.45,
+ "ram_after": 186.78,
+ "ram_increase": 2916.39,
+ "runtime": 543.79
+}
\ No newline at end of file
diff --git a/Backend/dataset/cicids2017_cleaned.csv b/Backend/dataset/cicids2017_cleaned.csv
deleted file mode 100644
index 32e5303..0000000
Binary files a/Backend/dataset/cicids2017_cleaned.csv and /dev/null differ
diff --git a/Backend/isolation_forest_cicids.py b/Backend/isolation_forest_cicids.py
deleted file mode 100644
index 0c45cf9..0000000
--- a/Backend/isolation_forest_cicids.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import pandas as pd
-import numpy as np
-import json
-import psutil
-from sklearn.ensemble import IsolationForest
-from sklearn.preprocessing import RobustScaler
-from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
-from tabulate import tabulate
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-import gc
-
-import os
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
-DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
-
-def _force_memory_cleanup():
- gc.collect()
- try:
- import ctypes
- ctypes.CDLL("libc.so.6").malloc_trim(0)
- except Exception:
- pass
-
-
-def update_progress(value):
- with open(PROGRESS_PATH, "w") as f:
- json.dump({"progress": value}, f)
-
-
-def run_isolation_forest(csv_path=DATASET_PATH, plot=False, table=False):
- process = psutil.Process()
- ram_before = process.memory_info().rss
- ram_peak = ram_before
-
- update_progress(1)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- df = pd.read_csv(csv_path)
- update_progress(10)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- df["fraud"] = df["Attack Type"].apply(
- lambda x: 0 if "normal" in str(x).lower() else 1
- )
- true_labels = df["fraud"]
- update_progress(20)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- normal_count = int((true_labels == 0).sum())
- attack_count = int((true_labels == 1).sum())
-
- attack_fraction = true_labels.mean()
- contamination = min(attack_fraction * 2.5, 0.49)
-
- X = df.select_dtypes(include=[np.number])
- X = X.loc[:, X.std() > 0.01]
- X = X.to_numpy(dtype=np.float32, copy=True)
- update_progress(30)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- scaler = RobustScaler()
- X_scaled = scaler.fit_transform(X).astype(np.float32, copy=False)
- update_progress(40)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- model = IsolationForest(
- n_estimators=600,
- max_samples=0.3,
- contamination=contamination,
- max_features=0.7,
- bootstrap=False,
- random_state=42,
- n_jobs=1,
- )
- model.fit(X_scaled)
- update_progress(70)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- preds = model.predict(X_scaled)
- df["pred_label"] = np.where(preds == 1, 0, 1)
- df["anomaly_score"] = model.decision_function(X_scaled)
- update_progress(85)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- cm = confusion_matrix(true_labels, df["pred_label"])
- accuracy = accuracy_score(true_labels, df["pred_label"])
- precision, recall, f1, _ = precision_recall_fscore_support(
- true_labels, df["pred_label"], average=None, labels=[0, 1]
- )
- update_progress(95)
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
- if table:
- table_data = [
- ["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
- ["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
- ["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
- ]
- print(
- tabulate(
- table_data,
- headers=["Class", "Precision", "Recall", "F1-Score"],
- tablefmt="fancy_grid",
- )
- )
-
- if plot:
- plt.figure(figsize=(10, 5))
- scatter = plt.scatter(
- range(len(df)),
- df["anomaly_score"],
- c=df["pred_label"],
- cmap="coolwarm",
- s=10,
- )
- plt.xlabel("Instance")
- plt.ylabel("Anomaly Score")
- plt.title("Anomaly Score Distribution (Isolation Forest)")
- handles, labels = scatter.legend_elements()
- plt.legend(handles, ["Normal", "Anomaly"], title="Predicted")
- plt.show()
-
- plt.figure(figsize=(5, 4))
- sns.heatmap(
- cm,
- annot=True,
- fmt="d",
- cmap="Blues",
- xticklabels=["Normal", "Attack"],
- yticklabels=["Normal", "Attack"],
- )
- plt.xlabel("Predicted Label")
- plt.ylabel("True Label")
- plt.title("Confusion Matrix (Isolation Forest)")
- plt.show()
-
- results = {
- "normal_count": float(normal_count),
- "attack_count": float(attack_count),
- "contamination": float(contamination),
- "accuracy": float(accuracy),
- "precision_normal": float(precision[0]),
- "recall_normal": float(recall[0]),
- "f1_normal": float(f1[0]),
- "precision_attack": float(precision[1]),
- "recall_attack": float(recall[1]),
- "f1_attack": float(f1[1]),
- }
-
-
-
- candidates = df[(df["pred_label"] == 1) & (df["fraud"] == 1)].copy()
-
-
- if len(candidates) < 5:
- extra = df[df["pred_label"] == 1].copy()
- candidates = pd.concat([candidates, extra]).drop_duplicates()
-
- if len(candidates) < 5:
- candidates = df.copy()
-
- candidates = candidates.sort_values("anomaly_score").head(5)
-
- important_cols = [
- "Attack Type",
- "Destination Port",
- "Flow Duration",
- "Total Fwd Packets",
- "Flow Packets/s",
- "Packet Length Mean",
- ]
-
- cols_exist = [c for c in important_cols if c in candidates.columns]
-
- top_anomalies = candidates[cols_exist + ["anomaly_score"]].rename(
- columns={"anomaly_score": "score"}
- ).to_dict(orient="records")
-
- results["top_anomalies"] = top_anomalies
-
- current_ram = process.memory_info().rss
- ram_peak = max(ram_peak, current_ram)
-
-
- del X, X_scaled, preds, model, scaler, candidates, df, cm, true_labels
- _force_memory_cleanup()
-
- ram_after = process.memory_info().rss
-
- results["ram_before"] = round(ram_before / (1024 ** 2), 2)
- results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
- results["ram_after"] = round(ram_after / (1024 ** 2), 2)
- results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
-
- return results
-
-
-if __name__ == "__main__":
- res = run_isolation_forest(plot=True, table=True)
- print(res)
diff --git a/Frontend/HTML Files/default.html b/Frontend/HTML Files/default.html
index 1dc276b..33b2da2 100644
--- a/Frontend/HTML Files/default.html
+++ b/Frontend/HTML Files/default.html
@@ -86,7 +86,7 @@
CICIDS Demo
- Here, you can test the algoritmhs on
+ Here, you can see the results of both algorithms which were tested on
CICIDS dataset.
diff --git a/Frontend/HTML Files/upload.html b/Frontend/HTML Files/upload.html
index 55a9569..da922be 100644
--- a/Frontend/HTML Files/upload.html
+++ b/Frontend/HTML Files/upload.html
@@ -231,10 +231,10 @@
const uploadField = document.getElementById("file_input");
uploadField.onchange = function() {
- const MAX_SIZE = 50 * 1024 * 1024;
+ const MAX_SIZE = 30 * 1024 * 1024;
if(this.files[0].size > MAX_SIZE) {
- showErrorPopup("File is too big! Maximum size is 50MB.");
+ showErrorPopup("File is too big! Maximum size is 30MB!");
this.value = "";
}
};
diff --git a/Frontend/JS/frontend.js b/Frontend/JS/frontend.js
index fde0217..469a062 100644
--- a/Frontend/JS/frontend.js
+++ b/Frontend/JS/frontend.js
@@ -139,6 +139,7 @@ function fetchFinalResults(retryCount = 0) {
setTimeout(() => fetchFinalResults(retryCount + 1), 1000);
return;
}
+
hideLoading();
clearActiveJob();
showErrorPopup("Final results are not ready.");
@@ -156,7 +157,7 @@ function fetchFinalResults(retryCount = 0) {
displayResults(json.data);
})
- .catch(err => {
+ .catch(() => {
hideLoading();
clearActiveJob();
showErrorPopup("Error fetching final results.");
@@ -187,7 +188,7 @@ function startProgressPolling() {
progressInterval = null;
fetchFinalResults();
}
- } catch (err) {
+ } catch {
clearInterval(progressInterval);
progressInterval = null;
hideLoading();
@@ -198,7 +199,7 @@ function startProgressPolling() {
showErrorPopup("Connection to backend was lost.
Backend may have stopped.");
}
}
- }, 1500);
+ }, 500);
}
async function restoreRunningJob() {
@@ -209,12 +210,33 @@ async function restoreRunningJob() {
const res = await fetch("/get-status");
const status = await res.json();
+ applySavedAlgorithm(saved);
+
if (status.current_job !== saved.backendJob) {
+ try {
+ const resultRes = await fetch(getResultURL());
+ const resultJson = await resultRes.json();
+
+ if (resultJson.ready) {
+ hideLoading();
+ clearActiveJob();
+ lastResults = resultJson.data;
+
+ if (resultJson.data?.error) {
+ showErrorPopup(`Algorithm failed.
${resultJson.data.message}`);
+ return;
+ }
+
+ displayResults(resultJson.data);
+ return;
+ }
+ } catch {
+ }
+
clearActiveJob();
return;
}
- applySavedAlgorithm(saved);
showLoading(`Resuming ${chosenAlgorithmName}...
This may take a while.`);
updateProgressUI(status.progress || 0);
@@ -223,21 +245,21 @@ async function restoreRunningJob() {
} else {
fetchFinalResults();
}
- } catch (err) {
- hideLoading();
- clearActiveJob();
+ } catch {
+ hideLoading();
+ clearActiveJob();
- if (!backendErrorShown) {
- backendErrorShown = true;
- showErrorPopup("Connection to backend was lost.
Please start the backend and try again.");
+ if (!backendErrorShown) {
+ backendErrorShown = true;
+ showErrorPopup("Connection to backend was lost.
Please start the backend and try again.");
+ }
}
}
-}
startBtn.addEventListener("click", () => {
if (!chosenAlgorithm) return;
- showLoading();
+ showLoading(`Loading prepared ${chosenAlgorithmName} results...
This may take a moment.`);
backendErrorShown = false;
saveActiveJob();
@@ -264,8 +286,11 @@ startBtn.addEventListener("click", () => {
throw new Error("Server error");
}
+ await res.json();
+
const saved = loadActiveJob();
startTime = saved?.startedAt ?? Date.now();
+
startProgressPolling();
})
.catch(err => {
@@ -282,10 +307,12 @@ function displayResults(data) {
const container = document.getElementById("resultContainer");
container.classList.remove("hidden");
- const runtime = ((Date.now() - startTime) / 1000).toFixed(2);
+ const runtime = data.runtime !== undefined && data.runtime !== null
+ ? Number(data.runtime).toFixed(2)
+ : ((Date.now() - startTime) / 1000).toFixed(2);
+
+ let topAnomaliesHTML = "";
- // Dynamicky vytvoríme tabuľku pre Top 5 anomálie (ak existujú)
- let topAnomaliesHTML = '';
if (data.top_anomalies && data.top_anomalies.length > 0) {
const headers = Object.keys(data.top_anomalies[0]);
@@ -298,7 +325,7 @@ function displayResults(data) {
- ${headers.map(h => `| ${h} | `).join('')}
+ ${headers.map(h => `${h} | `).join("")}
@@ -306,9 +333,9 @@ function displayResults(data) {
${headers.map(key => {
const val = row[key];
- return `| ${typeof val === 'number' ? val.toFixed(4) : val} | `;
- }).join('')}
-
`).join('')}
+ return `${typeof val === "number" ? val.toFixed(4) : val} | `;
+ }).join("")}
+ `).join("")}
@@ -320,7 +347,6 @@ function displayResults(data) {
Results
-
@@ -354,15 +380,12 @@ function displayResults(data) {
-
-
Class distribution
-
Performance
@@ -379,7 +402,6 @@ function displayResults(data) {
${topAnomaliesHTML}
-
numeric
`;
+
featureList.appendChild(label);
});
@@ -203,6 +339,7 @@ btnStep3Next.addEventListener("click", () => {
updateCounter();
show(step4);
+ hide(step5);
hide(featureError);
});
@@ -219,18 +356,39 @@ function updateCounter() {
}
btnRun.addEventListener("click", async () => {
-
- const labelColumnName = headers[labelIndex];
- let normalValue = normalBox.querySelector("input:checked").value;
-
- if (!isNaN(normalValue)) {
- normalValue = Number(normalValue);
- }
+ if (await isBackendBusy()) {
+ showPopup(BUSY_MESSAGE);
+ resetUploadFlow();
+ return;
+ }
const selectedFeatures = Array.from(
featureList.querySelectorAll("input:checked")
).map(cb => headers[cb.dataset.col]);
+ if (selectedFeatures.length !== MAX_FEATURES) {
+ featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`;
+ show(featureError);
+ return;
+ }
+
+ hide(featureError);
+
+ const labelColumnName = headers[labelIndex];
+ const normalInput = normalBox.querySelector("input:checked");
+
+ if (!normalInput) {
+ normalError.textContent = "Select exactly one NORMAL value.";
+ show(normalError);
+ return;
+ }
+
+ let normalValue = normalInput.value;
+
+ if (!isNaN(normalValue)) {
+ normalValue = Number(normalValue);
+ }
+
const config = {
dataset: {
file_path: "temp/upload.csv",
@@ -247,25 +405,32 @@ btnRun.addEventListener("click", async () => {
expected_feature_count: selectedFeatures.length
},
algorithm: {
- name: "isolation_forest"
+ name: "custom_dataset"
}
};
- await fetch("/save-config", {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify(config)
- });
- const selected = featureList.querySelectorAll("input:checked").length;
+ let saveResponse;
- if (selected !== MAX_FEATURES) {
- featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`;
- show(featureError);
+ try {
+ saveResponse = await fetch("/save-config", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify(config)
+ });
+ } catch {
+ showPopup("Configuration could not be saved. Backend server is not available.");
return;
}
- hide(featureError);
+ if (!saveResponse.ok) {
+ if (saveResponse.status !== 409) {
+ const message = await readErrorMessage(saveResponse, "Configuration could not be saved.");
+ showPopup(message);
+ }
- show(step5);
-});
+ hide(step5);
+ return;
+ }
+ show(step5);
+});
\ No newline at end of file
diff --git a/Frontend/JS/upload_choosing_fetch.js b/Frontend/JS/upload_choosing_fetch.js
index f2b612f..214f4ee 100644
--- a/Frontend/JS/upload_choosing_fetch.js
+++ b/Frontend/JS/upload_choosing_fetch.js
@@ -287,9 +287,22 @@ async function restoreRunningJob() {
}
}
-startBtn.addEventListener("click", () => {
+startBtn.addEventListener("click", async () => {
if (!chosenAlgorithm) return;
+ try {
+ const statusRes = await fetch("/get-status");
+ const status = await statusRes.json();
+
+ if (status.running) {
+ showErrorPopup("Another algorithm is already running.
Please wait until it finishes.");
+ return;
+ }
+ } catch {
+ showErrorPopup("Connection to backend was lost.
Please start the backend and try again.");
+ return;
+ }
+
showLoading();
backendErrorShown = false;
saveActiveJob();
diff --git a/progress.json b/progress.json
deleted file mode 100644
index 2cc70c8..0000000
--- a/progress.json
+++ /dev/null
@@ -1 +0,0 @@
-{"progress": 100}
\ No newline at end of file