This commit is contained in:
Michal Utľák 2026-05-04 16:48:28 +02:00
parent 42a4dbbe8c
commit 44aeb8cf26
20 changed files with 559 additions and 522 deletions

14
.dockerignore Normal file
View File

@ -0,0 +1,14 @@
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
*.git
*.tox
*.nox
*.coverage
*.hypothesis
dist/
build/

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

8
.idea/Hackujeme.iml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.13" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Hackujeme.iml" filepath="$PROJECT_DIR$/.idea/Hackujeme.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -18,9 +18,6 @@ from datetime import datetime
from reportlab.pdfgen import canvas from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4 from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm from reportlab.lib.units import cm
from isolation_forest_cicids import run_isolation_forest
from autoencoder import run_autoencoder
from isolation_forest_custom import run_isolation_forest_custom from isolation_forest_custom import run_isolation_forest_custom
from autoencoder_custom import run_autoencoder_custom from autoencoder_custom import run_autoencoder_custom
import signal import signal
@ -37,13 +34,20 @@ FRONTEND_DIR = os.path.join(BASE_DIR, "..", "Frontend")
HTML_DIR = os.path.join(FRONTEND_DIR, "HTML Files") HTML_DIR = os.path.join(FRONTEND_DIR, "HTML Files")
JS_DIR = os.path.join(FRONTEND_DIR, "JS") JS_DIR = os.path.join(FRONTEND_DIR, "JS")
IMG_DIR = os.path.join(FRONTEND_DIR, "img") IMG_DIR = os.path.join(FRONTEND_DIR, "img")
CACHE_DIR = os.path.join(BASE_DIR, "cached_results")
CACHE_IFOREST = os.path.join(CACHE_DIR, "cached_iforest_result.json")
CACHE_AUTOENCODER = os.path.join(CACHE_DIR, "cached_autoencoder_result.json")
os.makedirs(TEMP_DIR, exist_ok=True) os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(REPORT_DIR, exist_ok=True) os.makedirs(REPORT_DIR, exist_ok=True)
os.makedirs(CACHE_DIR, exist_ok=True)
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
MAX_UPLOAD_SIZE = 30 * 1024 * 1024
app.config["MAX_CONTENT_LENGTH"] = MAX_UPLOAD_SIZE
def force_memory_cleanup(): def force_memory_cleanup():
gc.collect() gc.collect()
try: try:
@ -131,6 +135,93 @@ def write_json_file(path, data):
with open(path, "w", encoding="utf-8") as f: with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
def load_cached_demo_result(job_name):
if job_name == "iforest":
cache_path = CACHE_IFOREST
result_path = RESULT_IFOREST
elif job_name == "autoencoder":
cache_path = CACHE_AUTOENCODER
result_path = RESULT_AUTOENCODER
else:
raise ValueError(f"Unknown cached job: {job_name}")
cached_data = read_json_file(cache_path)
if cached_data is None:
raise FileNotFoundError(f"Cached result file not found: {cache_path}")
if "runtime" not in cached_data:
cached_data["runtime"] = 0
write_json_file(result_path, {
"ready": True,
"data": cached_data
})
update_progress(100)
def cached_worker_entry(job_name):
result_path = get_result_path(job_name)
try:
update_progress(5)
time.sleep(0.4)
update_progress(25)
time.sleep(0.4)
update_progress(45)
time.sleep(0.4)
update_progress(65)
time.sleep(0.4)
update_progress(85)
time.sleep(0.4)
load_cached_demo_result(job_name)
except Exception as e:
write_json_file(result_path, {
"ready": True,
"data": {
"error": True,
"message": str(e),
"details": traceback.format_exc()
}
})
update_progress(0)
def start_cached_demo_job(job_name):
global is_running, current_job, worker_process
refresh_worker_state()
with state_lock:
if is_running:
return False
is_running = True
current_job = job_name
result_path = get_result_path(job_name)
if os.path.exists(result_path):
try:
os.remove(result_path)
except Exception:
pass
update_progress(0)
proc = mp.Process(target=cached_worker_entry, args=(job_name,))
proc.start()
with state_lock:
worker_process = proc
return True
def get_result_path(job_name): def get_result_path(job_name):
mapping = { mapping = {
@ -143,11 +234,7 @@ def get_result_path(job_name):
def get_job_runner(job_name): def get_job_runner(job_name):
if job_name == "iforest": if job_name == "iforest_custom":
return lambda: run_isolation_forest(plot=False, table=False)
elif job_name == "autoencoder":
return lambda: run_autoencoder(plot=False, table=False)
elif job_name == "iforest_custom":
return run_isolation_forest_custom return run_isolation_forest_custom
elif job_name == "autoencoder_custom": elif job_name == "autoencoder_custom":
return run_autoencoder_custom return run_autoencoder_custom
@ -243,25 +330,16 @@ def start_background_job(job_name):
return True return True
def read_progress():
try:
with open(PROGRESS_PATH, "r") as f:
return json.load(f).get("progress", 0)
except Exception:
return 0
update_progress(0)
@app.route("/start-iforest", methods=["POST"]) @app.route("/start-iforest", methods=["POST"])
def start_iforest(): def start_iforest():
started = start_background_job("iforest") started = start_cached_demo_job("iforest")
if not started: if not started:
return jsonify({ return jsonify({
"status": "already_running", "status": "already_running",
"message": "Another algorithm is already running" "message": "Another algorithm is already running"
}), 409 }), 409
return jsonify({"status": "started"}) return jsonify({"status": "started"})
@ -288,12 +366,14 @@ def start_autoencoder_custom():
@app.route("/start-autoencoder", methods=["POST"]) @app.route("/start-autoencoder", methods=["POST"])
def start_autoencoder(): def start_autoencoder():
started = start_background_job("autoencoder") started = start_cached_demo_job("autoencoder")
if not started: if not started:
return jsonify({ return jsonify({
"status": "already_running", "status": "already_running",
"message": "Another algorithm is already running" "message": "Another algorithm is already running"
}), 409 }), 409
return jsonify({"status": "started"}) return jsonify({"status": "started"})
@ -602,6 +682,13 @@ def upload_dataset():
file = request.files["file"] file = request.files["file"]
file.seek(0, os.SEEK_END)
file_size = file.tell()
file.seek(0)
if file_size > MAX_UPLOAD_SIZE:
return jsonify({"error": "File is too big! Maximum size is 30MB."}), 400
if file.filename == "": if file.filename == "":
return jsonify({"error": "Empty filename"}), 400 return jsonify({"error": "Empty filename"}), 400

View File

@ -1,223 +0,0 @@
import warnings
warnings.filterwarnings("ignore")
import psutil
import pandas as pd
import numpy as np
import json
import gc
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
confusion_matrix,
accuracy_score,
precision_recall_fscore_support,
)
from tabulate import tabulate
import os
def _force_memory_cleanup():
gc.collect()
try:
import ctypes
ctypes.CDLL("libc.so.6").malloc_trim(0)
except Exception:
pass
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
def update_progress(value):
with open(PROGRESS_PATH, "w") as f:
json.dump({"progress": value}, f)
def run_autoencoder(csv_path=DATASET_PATH, plot=False, table=False):
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
process = psutil.Process()
ram_before = process.memory_info().rss
ram_peak = ram_before
update_progress(1)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
df = pd.read_csv(csv_path)
update_progress(10)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
df["fraud"] = df["Attack Type"].apply(
lambda x: 0 if "normal" in str(x).lower() else 1
)
true_labels = df["fraud"]
normal_count = int((true_labels == 0).sum())
attack_count = int((true_labels == 1).sum())
features = [c for c in df.columns if c not in ["Attack Type", "fraud"]]
scaler = StandardScaler()
X_raw = df[features].to_numpy(dtype=np.float32, copy=True)
y = df["fraud"].to_numpy(dtype=np.int8, copy=True)
X_scaled = scaler.fit_transform(X_raw).astype(np.float32, copy=False)
X_train_normal = X_scaled[y == 0]
X_test = X_scaled
y_test = y
CODE_DIM = 16
INPUT_SHAPE = X_scaled.shape[1]
inp = Input(shape=(INPUT_SHAPE,))
x = Dense(128, activation="relu")(inp)
x = Dropout(0.1)(x)
x = Dense(64, activation="relu")(x)
x = Dense(16, activation="relu")(x)
code = Dense(CODE_DIM, activation="relu")(x)
x = Dense(16, activation="relu")(code)
x = Dense(64, activation="relu")(x)
x = Dense(128, activation="relu")(x)
out = Dense(INPUT_SHAPE, activation="linear")(x)
autoencoder = Model(inp, out)
autoencoder.compile(loss="mae", optimizer=Adam(learning_rate=0.001))
update_progress(40)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
earlystopping = EarlyStopping(
monitor="val_loss", patience=5, restore_best_weights=True
)
history = autoencoder.fit(
X_train_normal,
X_train_normal,
epochs=20,
batch_size=64,
validation_split=0.1,
callbacks=[earlystopping],
shuffle=True,
verbose=1,
)
update_progress(60)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
reconstructions = autoencoder.predict(X_test, verbose=0)
reconstruction_error = np.mean(np.abs(reconstructions - X_test), axis=1)
recons_df = pd.DataFrame(
{"error": reconstruction_error, "y_true": y_test}
).reset_index(drop=True)
threshold = np.percentile(recons_df["error"], 60)
recons_df["y_pred"] = (recons_df["error"] > threshold).astype(int)
update_progress(80)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
cm = confusion_matrix(recons_df["y_true"], recons_df["y_pred"])
accuracy = accuracy_score(recons_df["y_true"], recons_df["y_pred"])
precision, recall, f1, _ = precision_recall_fscore_support(
recons_df["y_true"],
recons_df["y_pred"],
average=None,
labels=[0, 1],
)
update_progress(90)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
if table:
table_data = [
["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
]
print(
tabulate(
table_data,
headers=["Class", "Precision", "Recall", "F1-Score"],
tablefmt="fancy_grid",
)
)
results = {
"normal_count": float(normal_count),
"attack_count": float(attack_count),
"accuracy": float(accuracy),
"precision_normal": float(precision[0]),
"recall_normal": float(recall[0]),
"f1_normal": float(f1[0]),
"precision_attack": float(precision[1]),
"recall_attack": float(recall[1]),
"f1_attack": float(f1[1]),
}
candidates = recons_df[
(recons_df["y_pred"] == 1) & (recons_df["y_true"] == 1)
].copy()
if len(candidates) < 5:
extra = recons_df[recons_df["y_pred"] == 1].copy()
candidates = pd.concat([candidates, extra]).drop_duplicates()
if len(candidates) < 5:
candidates = recons_df.copy()
candidates = candidates.sort_values("error", ascending=False).head(5)
idx = candidates.index
df_top = df.iloc[idx].copy()
df_top["reconstruction_error"] = candidates["error"].values
important_cols = [
"Attack Type",
"Destination Port",
"Flow Duration",
"Total Fwd Packets",
"Flow Packets/s",
"Packet Length Mean",
]
cols_exist = [c for c in important_cols if c in df_top.columns]
top_anomalies = df_top[cols_exist + ["reconstruction_error"]].to_dict(
orient="records"
)
results["top_anomalies"] = top_anomalies
del X_raw, X_scaled, X_train_normal, X_test, y_test, y
del reconstructions, reconstruction_error, recons_df, candidates, df_top
del autoencoder, history, scaler, df
K.clear_session()
tf.keras.backend.clear_session(free_memory=True)
_force_memory_cleanup()
ram_after = process.memory_info().rss
results["ram_before"] = round(ram_before / (1024 ** 2), 2)
results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
results["ram_after"] = round(ram_after / (1024 ** 2), 2)
results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
update_progress(100)
return results
if __name__ == "__main__":
res = run_autoencoder(plot=True, table=True)
print(res)

View File

@ -0,0 +1,63 @@
{
"normal_count": 2095057.0,
"attack_count": 425694.0,
"accuracy": 0.7128641424718268,
"precision_normal": 0.9533234465116556,
"recall_normal": 0.6882175520761488,
"f1_normal": 0.7993634386950771,
"precision_attack": 0.35217494793216303,
"recall_attack": 0.8341625674780476,
"f1_attack": 0.4952573023318089,
"top_anomalies": [
{
"Attack Type": "DoS",
"Destination Port": 80,
"Flow Duration": 1,
"Total Fwd Packets": 1,
"Flow Packets/s": 2000000.0,
"Packet Length Mean": 1322.0,
"reconstruction_error": 2.6239676475524902
},
{
"Attack Type": "DoS",
"Destination Port": 80,
"Flow Duration": 97501845,
"Total Fwd Packets": 4,
"Flow Packets/s": 0.071793513,
"Packet Length Mean": 1489.375,
"reconstruction_error": 1.4105286598205566
},
{
"Attack Type": "DoS",
"Destination Port": 80,
"Flow Duration": 97509484,
"Total Fwd Packets": 4,
"Flow Packets/s": 0.071787889,
"Packet Length Mean": 1495.875,
"reconstruction_error": 1.401184320449829
},
{
"Attack Type": "DoS",
"Destination Port": 80,
"Flow Duration": 95488861,
"Total Fwd Packets": 4,
"Flow Packets/s": 0.07330698,
"Packet Length Mean": 1495.0,
"reconstruction_error": 1.3920245170593262
},
{
"Attack Type": "DoS",
"Destination Port": 80,
"Flow Duration": 99036701,
"Total Fwd Packets": 6,
"Flow Packets/s": 0.090875402,
"Packet Length Mean": 1198.9,
"reconstruction_error": 1.3627992868423462
}
],
"ram_before": 386.41,
"ram_peak": 4589.86,
"ram_after": 1194.69,
"ram_increase": 4203.45,
"runtime": 719.9
}

View File

@ -0,0 +1,64 @@
{
"normal_count": 2095057.0,
"attack_count": 425694.0,
"contamination": 0.42218965697127564,
"accuracy": 0.7300296617952349,
"precision_normal": 0.9855814946609444,
"recall_normal": 0.6851985411375442,
"f1_normal": 0.8083876846075615,
"precision_attack": 0.38027193625378214,
"recall_attack": 0.9506664411525649,
"f1_attack": 0.5432434355271513,
"top_anomalies": [
{
"Attack Type": "Bots",
"Destination Port": 8080,
"Flow Duration": 104836,
"Total Fwd Packets": 35,
"Flow Packets/s": 1096.951429,
"Packet Length Mean": 1797.637931,
"score": -0.3361174695180047
},
{
"Attack Type": "Port Scanning",
"Destination Port": 55055,
"Flow Duration": 116005956,
"Total Fwd Packets": 2,
"Flow Packets/s": 0.034480988,
"Packet Length Mean": 3.2,
"score": -0.3274269932966515
},
{
"Attack Type": "Port Scanning",
"Destination Port": 32781,
"Flow Duration": 117169685,
"Total Fwd Packets": 2,
"Flow Packets/s": 0.034138523,
"Packet Length Mean": 3.2,
"score": -0.32230849167538755
},
{
"Attack Type": "Port Scanning",
"Destination Port": 873,
"Flow Duration": 119809735,
"Total Fwd Packets": 2,
"Flow Packets/s": 0.033386269,
"Packet Length Mean": 3.2,
"score": -0.32111814642871583
},
{
"Attack Type": "Port Scanning",
"Destination Port": 21571,
"Flow Duration": 117624607,
"Total Fwd Packets": 2,
"Flow Packets/s": 0.03400649,
"Packet Length Mean": 3.2,
"score": -0.31684442769693955
}
],
"ram_before": 182.07,
"ram_peak": 3098.45,
"ram_after": 186.78,
"ram_increase": 2916.39,
"runtime": 543.79
}

Binary file not shown.
Can't render this file because it is too large.

View File

@ -1,211 +0,0 @@
import pandas as pd
import numpy as np
import json
import psutil
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
def _force_memory_cleanup():
gc.collect()
try:
import ctypes
ctypes.CDLL("libc.so.6").malloc_trim(0)
except Exception:
pass
def update_progress(value):
with open(PROGRESS_PATH, "w") as f:
json.dump({"progress": value}, f)
def run_isolation_forest(csv_path=DATASET_PATH, plot=False, table=False):
process = psutil.Process()
ram_before = process.memory_info().rss
ram_peak = ram_before
update_progress(1)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
df = pd.read_csv(csv_path)
update_progress(10)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
df["fraud"] = df["Attack Type"].apply(
lambda x: 0 if "normal" in str(x).lower() else 1
)
true_labels = df["fraud"]
update_progress(20)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
normal_count = int((true_labels == 0).sum())
attack_count = int((true_labels == 1).sum())
attack_fraction = true_labels.mean()
contamination = min(attack_fraction * 2.5, 0.49)
X = df.select_dtypes(include=[np.number])
X = X.loc[:, X.std() > 0.01]
X = X.to_numpy(dtype=np.float32, copy=True)
update_progress(30)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X).astype(np.float32, copy=False)
update_progress(40)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
model = IsolationForest(
n_estimators=600,
max_samples=0.3,
contamination=contamination,
max_features=0.7,
bootstrap=False,
random_state=42,
n_jobs=1,
)
model.fit(X_scaled)
update_progress(70)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
preds = model.predict(X_scaled)
df["pred_label"] = np.where(preds == 1, 0, 1)
df["anomaly_score"] = model.decision_function(X_scaled)
update_progress(85)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
cm = confusion_matrix(true_labels, df["pred_label"])
accuracy = accuracy_score(true_labels, df["pred_label"])
precision, recall, f1, _ = precision_recall_fscore_support(
true_labels, df["pred_label"], average=None, labels=[0, 1]
)
update_progress(95)
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
if table:
table_data = [
["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
]
print(
tabulate(
table_data,
headers=["Class", "Precision", "Recall", "F1-Score"],
tablefmt="fancy_grid",
)
)
if plot:
plt.figure(figsize=(10, 5))
scatter = plt.scatter(
range(len(df)),
df["anomaly_score"],
c=df["pred_label"],
cmap="coolwarm",
s=10,
)
plt.xlabel("Instance")
plt.ylabel("Anomaly Score")
plt.title("Anomaly Score Distribution (Isolation Forest)")
handles, labels = scatter.legend_elements()
plt.legend(handles, ["Normal", "Anomaly"], title="Predicted")
plt.show()
plt.figure(figsize=(5, 4))
sns.heatmap(
cm,
annot=True,
fmt="d",
cmap="Blues",
xticklabels=["Normal", "Attack"],
yticklabels=["Normal", "Attack"],
)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix (Isolation Forest)")
plt.show()
results = {
"normal_count": float(normal_count),
"attack_count": float(attack_count),
"contamination": float(contamination),
"accuracy": float(accuracy),
"precision_normal": float(precision[0]),
"recall_normal": float(recall[0]),
"f1_normal": float(f1[0]),
"precision_attack": float(precision[1]),
"recall_attack": float(recall[1]),
"f1_attack": float(f1[1]),
}
candidates = df[(df["pred_label"] == 1) & (df["fraud"] == 1)].copy()
if len(candidates) < 5:
extra = df[df["pred_label"] == 1].copy()
candidates = pd.concat([candidates, extra]).drop_duplicates()
if len(candidates) < 5:
candidates = df.copy()
candidates = candidates.sort_values("anomaly_score").head(5)
important_cols = [
"Attack Type",
"Destination Port",
"Flow Duration",
"Total Fwd Packets",
"Flow Packets/s",
"Packet Length Mean",
]
cols_exist = [c for c in important_cols if c in candidates.columns]
top_anomalies = candidates[cols_exist + ["anomaly_score"]].rename(
columns={"anomaly_score": "score"}
).to_dict(orient="records")
results["top_anomalies"] = top_anomalies
current_ram = process.memory_info().rss
ram_peak = max(ram_peak, current_ram)
del X, X_scaled, preds, model, scaler, candidates, df, cm, true_labels
_force_memory_cleanup()
ram_after = process.memory_info().rss
results["ram_before"] = round(ram_before / (1024 ** 2), 2)
results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
results["ram_after"] = round(ram_after / (1024 ** 2), 2)
results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
return results
if __name__ == "__main__":
res = run_isolation_forest(plot=True, table=True)
print(res)

View File

@ -86,7 +86,7 @@
<h2 class="text-3xl font-bold text-orange-500 mb-4 flex items-center justify-center">CICIDS Demo</h2> <h2 class="text-3xl font-bold text-orange-500 mb-4 flex items-center justify-center">CICIDS Demo</h2>
<p class="text-lg leading-relaxed mb-6 text-center text-white"> <p class="text-lg leading-relaxed mb-6 text-center text-white">
Here, you can test the algoritmhs on Here, you can see the results of both algorithms which were tested on
<span class="text-orange-500 font-semibold">CICIDS</span> dataset. <span class="text-orange-500 font-semibold">CICIDS</span> dataset.
</p> </p>

View File

@ -231,10 +231,10 @@
const uploadField = document.getElementById("file_input"); const uploadField = document.getElementById("file_input");
uploadField.onchange = function() { uploadField.onchange = function() {
const MAX_SIZE = 50 * 1024 * 1024; const MAX_SIZE = 30 * 1024 * 1024;
if(this.files[0].size > MAX_SIZE) { if(this.files[0].size > MAX_SIZE) {
showErrorPopup("File is too big! Maximum size is 50MB."); showErrorPopup("File is too big! Maximum size is 30MB!");
this.value = ""; this.value = "";
} }
}; };

View File

@ -139,6 +139,7 @@ function fetchFinalResults(retryCount = 0) {
setTimeout(() => fetchFinalResults(retryCount + 1), 1000); setTimeout(() => fetchFinalResults(retryCount + 1), 1000);
return; return;
} }
hideLoading(); hideLoading();
clearActiveJob(); clearActiveJob();
showErrorPopup("Final results are not ready."); showErrorPopup("Final results are not ready.");
@ -156,7 +157,7 @@ function fetchFinalResults(retryCount = 0) {
displayResults(json.data); displayResults(json.data);
}) })
.catch(err => { .catch(() => {
hideLoading(); hideLoading();
clearActiveJob(); clearActiveJob();
showErrorPopup("Error fetching final results."); showErrorPopup("Error fetching final results.");
@ -187,7 +188,7 @@ function startProgressPolling() {
progressInterval = null; progressInterval = null;
fetchFinalResults(); fetchFinalResults();
} }
} catch (err) { } catch {
clearInterval(progressInterval); clearInterval(progressInterval);
progressInterval = null; progressInterval = null;
hideLoading(); hideLoading();
@ -198,7 +199,7 @@ function startProgressPolling() {
showErrorPopup("Connection to backend was lost.<br>Backend may have stopped."); showErrorPopup("Connection to backend was lost.<br>Backend may have stopped.");
} }
} }
}, 1500); }, 500);
} }
async function restoreRunningJob() { async function restoreRunningJob() {
@ -209,12 +210,33 @@ async function restoreRunningJob() {
const res = await fetch("/get-status"); const res = await fetch("/get-status");
const status = await res.json(); const status = await res.json();
applySavedAlgorithm(saved);
if (status.current_job !== saved.backendJob) { if (status.current_job !== saved.backendJob) {
try {
const resultRes = await fetch(getResultURL());
const resultJson = await resultRes.json();
if (resultJson.ready) {
hideLoading();
clearActiveJob();
lastResults = resultJson.data;
if (resultJson.data?.error) {
showErrorPopup(`Algorithm failed.<br>${resultJson.data.message}`);
return;
}
displayResults(resultJson.data);
return;
}
} catch {
}
clearActiveJob(); clearActiveJob();
return; return;
} }
applySavedAlgorithm(saved);
showLoading(`Resuming ${chosenAlgorithmName}...<br>This may take a while.`); showLoading(`Resuming ${chosenAlgorithmName}...<br>This may take a while.`);
updateProgressUI(status.progress || 0); updateProgressUI(status.progress || 0);
@ -223,21 +245,21 @@ async function restoreRunningJob() {
} else { } else {
fetchFinalResults(); fetchFinalResults();
} }
} catch (err) { } catch {
hideLoading(); hideLoading();
clearActiveJob(); clearActiveJob();
if (!backendErrorShown) { if (!backendErrorShown) {
backendErrorShown = true; backendErrorShown = true;
showErrorPopup("Connection to backend was lost.<br>Please start the backend and try again."); showErrorPopup("Connection to backend was lost.<br>Please start the backend and try again.");
}
} }
} }
}
startBtn.addEventListener("click", () => { startBtn.addEventListener("click", () => {
if (!chosenAlgorithm) return; if (!chosenAlgorithm) return;
showLoading(); showLoading(`Loading prepared ${chosenAlgorithmName} results...<br>This may take a moment.`);
backendErrorShown = false; backendErrorShown = false;
saveActiveJob(); saveActiveJob();
@ -264,8 +286,11 @@ startBtn.addEventListener("click", () => {
throw new Error("Server error"); throw new Error("Server error");
} }
await res.json();
const saved = loadActiveJob(); const saved = loadActiveJob();
startTime = saved?.startedAt ?? Date.now(); startTime = saved?.startedAt ?? Date.now();
startProgressPolling(); startProgressPolling();
}) })
.catch(err => { .catch(err => {
@ -282,10 +307,12 @@ function displayResults(data) {
const container = document.getElementById("resultContainer"); const container = document.getElementById("resultContainer");
container.classList.remove("hidden"); container.classList.remove("hidden");
const runtime = ((Date.now() - startTime) / 1000).toFixed(2); const runtime = data.runtime !== undefined && data.runtime !== null
? Number(data.runtime).toFixed(2)
: ((Date.now() - startTime) / 1000).toFixed(2);
let topAnomaliesHTML = "";
// Dynamicky vytvoríme tabuľku pre Top 5 anomálie (ak existujú)
let topAnomaliesHTML = '';
if (data.top_anomalies && data.top_anomalies.length > 0) { if (data.top_anomalies && data.top_anomalies.length > 0) {
const headers = Object.keys(data.top_anomalies[0]); const headers = Object.keys(data.top_anomalies[0]);
@ -298,7 +325,7 @@ function displayResults(data) {
<table class="min-w-full bg-gray-800 rounded-xl shadow-xl"> <table class="min-w-full bg-gray-800 rounded-xl shadow-xl">
<thead class="bg-gray-700"> <thead class="bg-gray-700">
<tr> <tr>
${headers.map(h => `<th class="px-4 py-3 text-orange-500">${h}</th>`).join('')} ${headers.map(h => `<th class="px-4 py-3 text-orange-500">${h}</th>`).join("")}
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
@ -306,9 +333,9 @@ function displayResults(data) {
<tr class="border-b border-gray-700 hover:bg-gray-700/50 transition"> <tr class="border-b border-gray-700 hover:bg-gray-700/50 transition">
${headers.map(key => { ${headers.map(key => {
const val = row[key]; const val = row[key];
return `<td class="px-4 py-3 text-center">${typeof val === 'number' ? val.toFixed(4) : val}</td>`; return `<td class="px-4 py-3 text-center">${typeof val === "number" ? val.toFixed(4) : val}</td>`;
}).join('')} }).join("")}
</tr>`).join('')} </tr>`).join("")}
</tbody> </tbody>
</table> </table>
</div> </div>
@ -320,7 +347,6 @@ function displayResults(data) {
Results Results
</h2> </h2>
<!-- Metriky tabuľka -->
<div class="overflow-x-auto mb-10"> <div class="overflow-x-auto mb-10">
<table class="min-w-full bg-gray-800 rounded-xl shadow-xl text-center"> <table class="min-w-full bg-gray-800 rounded-xl shadow-xl text-center">
<thead class="bg-gray-700"> <thead class="bg-gray-700">
@ -354,15 +380,12 @@ function displayResults(data) {
</table> </table>
</div> </div>
<!-- Distribúcia + Systémové info -->
<div class="grid grid-cols-1 md:grid-cols-2 gap-10 mb-10"> <div class="grid grid-cols-1 md:grid-cols-2 gap-10 mb-10">
<!-- Pie Chart -->
<div class="flex flex-col items-center"> <div class="flex flex-col items-center">
<h3 class="text-2xl text-orange-500 font-semibold mb-4">Class distribution</h3> <h3 class="text-2xl text-orange-500 font-semibold mb-4">Class distribution</h3>
<canvas id="pieChart" class="w-80 h-80"></canvas> <canvas id="pieChart" class="w-80 h-80"></canvas>
</div> </div>
<!-- RAM a počty -->
<div class="bg-gray-800/60 rounded-xl p-8 border border-gray-700"> <div class="bg-gray-800/60 rounded-xl p-8 border border-gray-700">
<h3 class="text-2xl text-orange-500 font-semibold mb-6">Performance</h3> <h3 class="text-2xl text-orange-500 font-semibold mb-6">Performance</h3>
<div class="space-y-4 text-lg"> <div class="space-y-4 text-lg">
@ -379,7 +402,6 @@ function displayResults(data) {
${topAnomaliesHTML} ${topAnomaliesHTML}
<!-- Download tlačidlá -->
<div class="w-full flex justify-center gap-10 mt-12"> <div class="w-full flex justify-center gap-10 mt-12">
<button id="downloadPDF" <button id="downloadPDF"
class="px-10 py-5 bg-orange-500 hover:bg-orange-400 text-white text-xl font-bold rounded-xl transition shadow-xl cursor-pointer transform hover:scale-105"> class="px-10 py-5 bg-orange-500 hover:bg-orange-400 text-white text-xl font-bold rounded-xl transition shadow-xl cursor-pointer transform hover:scale-105">
@ -397,6 +419,7 @@ function displayResults(data) {
document.getElementById("downloadPDF").addEventListener("click", () => { document.getElementById("downloadPDF").addEventListener("click", () => {
downloadPDF(data, runtime); downloadPDF(data, runtime);
}); });
document.getElementById("downloadJSON").addEventListener("click", () => { document.getElementById("downloadJSON").addEventListener("click", () => {
downloadJSON(data, runtime); downloadJSON(data, runtime);
}); });
@ -451,7 +474,7 @@ function loadPieChart(data) {
labels: ["Normal Traffic", "Attacks"], labels: ["Normal Traffic", "Attacks"],
datasets: [{ datasets: [{
data: [data.normal_count || 0, data.attack_count || 0], data: [data.normal_count || 0, data.attack_count || 0],
backgroundColor: ["#14b8a6", "#dc2626"], backgroundColor: ["#14b8a6", "#dc2626"]
}] }]
}, },
options: { options: {

View File

@ -1,4 +1,3 @@
function show(el) { el.classList.remove("hidden"); } function show(el) { el.classList.remove("hidden"); }
function hide(el) { el.classList.add("hidden"); } function hide(el) { el.classList.add("hidden"); }
@ -18,6 +17,7 @@ function splitCsvLine(line) {
for (let i = 0; i < line.length; i++) { for (let i = 0; i < line.length; i++) {
const ch = line[i]; const ch = line[i];
if (ch === '"') { if (ch === '"') {
if (inQuotes && line[i + 1] === '"') { if (inQuotes && line[i + 1] === '"') {
cur += '"'; cur += '"';
@ -32,6 +32,7 @@ function splitCsvLine(line) {
cur += ch; cur += ch;
} }
} }
out.push(cur); out.push(cur);
return out.map(v => v.trim()); return out.map(v => v.trim());
} }
@ -58,35 +59,143 @@ const btnStep3Next = document.getElementById("btn_step3_next");
const btnRun = document.getElementById("btn_run"); const btnRun = document.getElementById("btn_run");
const MAX_FEATURES = 5; const MAX_FEATURES = 5;
const MAX_FILE_SIZE = 30 * 1024 * 1024;
const BUSY_MESSAGE = "Another algorithm is already running. Please wait until it finishes.";
let headers = []; let headers = [];
let rows = []; let rows = [];
let labelIndex = -1; let labelIndex = -1;
let numericColumnIdx = []; let numericColumnIdx = [];
function showPopup(message) {
if (typeof showErrorPopup === "function") {
showErrorPopup(message);
} else {
alert(message);
}
}
function resetUploadFlow() {
fileInput.value = "";
headers = [];
rows = [];
labelIndex = -1;
numericColumnIdx = [];
labelSelect.innerHTML = `<option value="">-- select label column --</option>`;
normalBox.innerHTML = "";
featureList.innerHTML = "";
featureCounter.textContent = `0 / ${MAX_FEATURES} selected`;
hide(step2);
hide(step3);
hide(step4);
hide(step5);
hide(labelError);
hide(normalError);
hide(featureError);
}
async function isBackendBusy() {
try {
const res = await fetch("/get-status");
if (!res.ok) {
showPopup("Backend status could not be checked.");
return true;
}
const status = await res.json();
return status.running === true;
} catch {
showPopup("Connection to backend was lost.");
return true;
}
}
async function readErrorMessage(response, fallback) {
try {
const data = await response.clone().json();
return data.error || data.message || fallback;
} catch {
return fallback;
}
}
fileInput.addEventListener("change", async () => { fileInput.addEventListener("change", async () => {
const formData = new FormData();
formData.append("file", fileInput.files[0]);
await fetch("/upload-dataset", {
method: "POST",
body: formData
});
if (!fileInput.files.length) return; if (!fileInput.files.length) return;
const file = fileInput.files[0]; const file = fileInput.files[0];
const text = await file.text();
if (file.size > MAX_FILE_SIZE) {
showPopup("File is too big! Maximum size is 30MB.");
resetUploadFlow();
return;
}
if (!file.name.toLowerCase().endsWith(".csv")) {
showPopup("Only CSV files are allowed.");
resetUploadFlow();
return;
}
if (await isBackendBusy()) {
showPopup(BUSY_MESSAGE);
resetUploadFlow();
return;
}
const formData = new FormData();
formData.append("file", file);
let uploadResponse;
try {
uploadResponse = await fetch("/upload-dataset", {
method: "POST",
body: formData
});
} catch {
showPopup("Dataset upload failed. Backend server is not available.");
resetUploadFlow();
return;
}
if (!uploadResponse.ok) {
if (uploadResponse.status !== 409) {
const message = await readErrorMessage(uploadResponse, "Dataset upload failed.");
showPopup(message);
}
resetUploadFlow();
return;
}
let text;
try {
text = await file.text();
} catch {
showPopup("File could not be read.");
resetUploadFlow();
return;
}
const lines = text.split(/\r?\n/).filter(l => l.trim() !== ""); const lines = text.split(/\r?\n/).filter(l => l.trim() !== "");
if (lines.length < 2) return;
if (lines.length < 2) {
showPopup("CSV file must contain a header and at least one data row.");
resetUploadFlow();
return;
}
headers = splitCsvLine(lines[0]); headers = splitCsvLine(lines[0]);
rows = lines.slice(1).map(splitCsvLine); rows = lines.slice(1).map(splitCsvLine);
labelSelect.innerHTML = `<option value="">-- select label column --</option>`; labelSelect.innerHTML = `<option value="">-- select label column --</option>`;
headers.forEach((h, idx) => { headers.forEach((h, idx) => {
const opt = document.createElement("option"); const opt = document.createElement("option");
opt.value = idx; opt.value = idx;
@ -113,19 +222,31 @@ function detectNumericColumns(headers, rows, sampleCount) {
for (let r = 0; r < samples; r++) { for (let r = 0; r < samples; r++) {
const v = rows[r][c]; const v = rows[r][c];
if (v === undefined || v === "") continue; if (v === undefined || v === "") continue;
total++; total++;
if (!isNaN(Number(v))) valid++;
if (!isNaN(Number(v))) {
valid++;
}
} }
if (total > 0 && valid / total >= 0.9) { if (total > 0 && valid / total >= 0.9) {
result.push(c); result.push(c);
} }
} }
return result; return result;
} }
btnStep2Next.addEventListener("click", () => { btnStep2Next.addEventListener("click", async () => {
if (await isBackendBusy()) {
showPopup(BUSY_MESSAGE);
resetUploadFlow();
return;
}
if (labelSelect.value === "") { if (labelSelect.value === "") {
show(labelError); show(labelError);
return; return;
@ -135,12 +256,17 @@ btnStep2Next.addEventListener("click", () => {
labelIndex = Number(labelSelect.value); labelIndex = Number(labelSelect.value);
const uniques = new Set(); const uniques = new Set();
rows.forEach(r => { rows.forEach(r => {
const v = r[labelIndex]; const v = r[labelIndex];
if (v !== undefined && v !== "") uniques.add(v.trim());
if (v !== undefined && v !== "") {
uniques.add(v.trim());
}
}); });
normalBox.innerHTML = ""; normalBox.innerHTML = "";
uniques.forEach(val => { uniques.forEach(val => {
const label = document.createElement("label"); const label = document.createElement("label");
label.className = label.className =
@ -150,6 +276,7 @@ btnStep2Next.addEventListener("click", () => {
<input type="checkbox" value="${escapeHtml(val)}" class="accent-orange-500"> <input type="checkbox" value="${escapeHtml(val)}" class="accent-orange-500">
<span>${escapeHtml(val)}</span> <span>${escapeHtml(val)}</span>
`; `;
normalBox.appendChild(label); normalBox.appendChild(label);
}); });
@ -169,8 +296,15 @@ btnStep2Next.addEventListener("click", () => {
hide(normalError); hide(normalError);
}); });
btnStep3Next.addEventListener("click", () => { btnStep3Next.addEventListener("click", async () => {
if (await isBackendBusy()) {
showPopup(BUSY_MESSAGE);
resetUploadFlow();
return;
}
const checked = normalBox.querySelectorAll("input:checked"); const checked = normalBox.querySelectorAll("input:checked");
if (checked.length !== 1) { if (checked.length !== 1) {
normalError.textContent = "Select exactly one NORMAL value."; normalError.textContent = "Select exactly one NORMAL value.";
show(normalError); show(normalError);
@ -180,6 +314,7 @@ btnStep3Next.addEventListener("click", () => {
hide(normalError); hide(normalError);
featureList.innerHTML = ""; featureList.innerHTML = "";
numericColumnIdx numericColumnIdx
.filter(idx => idx !== labelIndex) .filter(idx => idx !== labelIndex)
.forEach(idx => { .forEach(idx => {
@ -194,6 +329,7 @@ btnStep3Next.addEventListener("click", () => {
</div> </div>
<span class="text-xs text-gray-500">numeric</span> <span class="text-xs text-gray-500">numeric</span>
`; `;
featureList.appendChild(label); featureList.appendChild(label);
}); });
@ -203,6 +339,7 @@ btnStep3Next.addEventListener("click", () => {
updateCounter(); updateCounter();
show(step4); show(step4);
hide(step5);
hide(featureError); hide(featureError);
}); });
@ -219,18 +356,39 @@ function updateCounter() {
} }
btnRun.addEventListener("click", async () => { btnRun.addEventListener("click", async () => {
if (await isBackendBusy()) {
const labelColumnName = headers[labelIndex]; showPopup(BUSY_MESSAGE);
let normalValue = normalBox.querySelector("input:checked").value; resetUploadFlow();
return;
if (!isNaN(normalValue)) { }
normalValue = Number(normalValue);
}
const selectedFeatures = Array.from( const selectedFeatures = Array.from(
featureList.querySelectorAll("input:checked") featureList.querySelectorAll("input:checked")
).map(cb => headers[cb.dataset.col]); ).map(cb => headers[cb.dataset.col]);
if (selectedFeatures.length !== MAX_FEATURES) {
featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`;
show(featureError);
return;
}
hide(featureError);
const labelColumnName = headers[labelIndex];
const normalInput = normalBox.querySelector("input:checked");
if (!normalInput) {
normalError.textContent = "Select exactly one NORMAL value.";
show(normalError);
return;
}
let normalValue = normalInput.value;
if (!isNaN(normalValue)) {
normalValue = Number(normalValue);
}
const config = { const config = {
dataset: { dataset: {
file_path: "temp/upload.csv", file_path: "temp/upload.csv",
@ -247,25 +405,32 @@ btnRun.addEventListener("click", async () => {
expected_feature_count: selectedFeatures.length expected_feature_count: selectedFeatures.length
}, },
algorithm: { algorithm: {
name: "isolation_forest" name: "custom_dataset"
} }
}; };
await fetch("/save-config", { let saveResponse;
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(config)
});
const selected = featureList.querySelectorAll("input:checked").length;
if (selected !== MAX_FEATURES) { try {
featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`; saveResponse = await fetch("/save-config", {
show(featureError); method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(config)
});
} catch {
showPopup("Configuration could not be saved. Backend server is not available.");
return; return;
} }
hide(featureError); if (!saveResponse.ok) {
if (saveResponse.status !== 409) {
const message = await readErrorMessage(saveResponse, "Configuration could not be saved.");
showPopup(message);
}
show(step5); hide(step5);
}); return;
}
show(step5);
});

View File

@ -287,9 +287,22 @@ async function restoreRunningJob() {
} }
} }
startBtn.addEventListener("click", () => { startBtn.addEventListener("click", async () => {
if (!chosenAlgorithm) return; if (!chosenAlgorithm) return;
try {
const statusRes = await fetch("/get-status");
const status = await statusRes.json();
if (status.running) {
showErrorPopup("Another algorithm is already running.<br>Please wait until it finishes.");
return;
}
} catch {
showErrorPopup("Connection to backend was lost.<br>Please start the backend and try again.");
return;
}
showLoading(); showLoading();
backendErrorShown = false; backendErrorShown = false;
saveActiveJob(); saveActiveJob();

View File

@ -1 +0,0 @@
{"progress": 100}