v2
This commit is contained in:
parent
42a4dbbe8c
commit
44aeb8cf26
14
.dockerignore
Normal file
14
.dockerignore
Normal file
@ -0,0 +1,14 @@
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
*.git
|
||||
*.tox
|
||||
*.nox
|
||||
*.coverage
|
||||
*.hypothesis
|
||||
dist/
|
||||
build/
|
||||
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
8
.idea/Hackujeme.iml
Normal file
8
.idea/Hackujeme.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
7
.idea/misc.xml
Normal file
7
.idea/misc.xml
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.13" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/Hackujeme.iml" filepath="$PROJECT_DIR$/.idea/Hackujeme.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
Binary file not shown.
129
Backend/app.py
129
Backend/app.py
@ -18,9 +18,6 @@ from datetime import datetime
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.units import cm
|
||||
|
||||
from isolation_forest_cicids import run_isolation_forest
|
||||
from autoencoder import run_autoencoder
|
||||
from isolation_forest_custom import run_isolation_forest_custom
|
||||
from autoencoder_custom import run_autoencoder_custom
|
||||
import signal
|
||||
@ -37,13 +34,20 @@ FRONTEND_DIR = os.path.join(BASE_DIR, "..", "Frontend")
|
||||
HTML_DIR = os.path.join(FRONTEND_DIR, "HTML Files")
|
||||
JS_DIR = os.path.join(FRONTEND_DIR, "JS")
|
||||
IMG_DIR = os.path.join(FRONTEND_DIR, "img")
|
||||
CACHE_DIR = os.path.join(BASE_DIR, "cached_results")
|
||||
CACHE_IFOREST = os.path.join(CACHE_DIR, "cached_iforest_result.json")
|
||||
CACHE_AUTOENCODER = os.path.join(CACHE_DIR, "cached_autoencoder_result.json")
|
||||
|
||||
os.makedirs(TEMP_DIR, exist_ok=True)
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
os.makedirs(CACHE_DIR, exist_ok=True)
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
MAX_UPLOAD_SIZE = 30 * 1024 * 1024
|
||||
app.config["MAX_CONTENT_LENGTH"] = MAX_UPLOAD_SIZE
|
||||
|
||||
def force_memory_cleanup():
|
||||
gc.collect()
|
||||
try:
|
||||
@ -131,6 +135,93 @@ def write_json_file(path, data):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def load_cached_demo_result(job_name):
|
||||
if job_name == "iforest":
|
||||
cache_path = CACHE_IFOREST
|
||||
result_path = RESULT_IFOREST
|
||||
elif job_name == "autoencoder":
|
||||
cache_path = CACHE_AUTOENCODER
|
||||
result_path = RESULT_AUTOENCODER
|
||||
else:
|
||||
raise ValueError(f"Unknown cached job: {job_name}")
|
||||
|
||||
cached_data = read_json_file(cache_path)
|
||||
|
||||
if cached_data is None:
|
||||
raise FileNotFoundError(f"Cached result file not found: {cache_path}")
|
||||
|
||||
if "runtime" not in cached_data:
|
||||
cached_data["runtime"] = 0
|
||||
|
||||
write_json_file(result_path, {
|
||||
"ready": True,
|
||||
"data": cached_data
|
||||
})
|
||||
|
||||
update_progress(100)
|
||||
|
||||
def cached_worker_entry(job_name):
|
||||
result_path = get_result_path(job_name)
|
||||
|
||||
try:
|
||||
update_progress(5)
|
||||
time.sleep(0.4)
|
||||
|
||||
update_progress(25)
|
||||
time.sleep(0.4)
|
||||
|
||||
update_progress(45)
|
||||
time.sleep(0.4)
|
||||
|
||||
update_progress(65)
|
||||
time.sleep(0.4)
|
||||
|
||||
update_progress(85)
|
||||
time.sleep(0.4)
|
||||
|
||||
load_cached_demo_result(job_name)
|
||||
|
||||
except Exception as e:
|
||||
write_json_file(result_path, {
|
||||
"ready": True,
|
||||
"data": {
|
||||
"error": True,
|
||||
"message": str(e),
|
||||
"details": traceback.format_exc()
|
||||
}
|
||||
})
|
||||
update_progress(0)
|
||||
|
||||
def start_cached_demo_job(job_name):
|
||||
global is_running, current_job, worker_process
|
||||
|
||||
refresh_worker_state()
|
||||
|
||||
with state_lock:
|
||||
if is_running:
|
||||
return False
|
||||
|
||||
is_running = True
|
||||
current_job = job_name
|
||||
|
||||
result_path = get_result_path(job_name)
|
||||
|
||||
if os.path.exists(result_path):
|
||||
try:
|
||||
os.remove(result_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
update_progress(0)
|
||||
|
||||
proc = mp.Process(target=cached_worker_entry, args=(job_name,))
|
||||
proc.start()
|
||||
|
||||
with state_lock:
|
||||
worker_process = proc
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_result_path(job_name):
|
||||
mapping = {
|
||||
@ -143,11 +234,7 @@ def get_result_path(job_name):
|
||||
|
||||
|
||||
def get_job_runner(job_name):
|
||||
if job_name == "iforest":
|
||||
return lambda: run_isolation_forest(plot=False, table=False)
|
||||
elif job_name == "autoencoder":
|
||||
return lambda: run_autoencoder(plot=False, table=False)
|
||||
elif job_name == "iforest_custom":
|
||||
if job_name == "iforest_custom":
|
||||
return run_isolation_forest_custom
|
||||
elif job_name == "autoencoder_custom":
|
||||
return run_autoencoder_custom
|
||||
@ -243,25 +330,16 @@ def start_background_job(job_name):
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def read_progress():
|
||||
try:
|
||||
with open(PROGRESS_PATH, "r") as f:
|
||||
return json.load(f).get("progress", 0)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
update_progress(0)
|
||||
|
||||
|
||||
@app.route("/start-iforest", methods=["POST"])
|
||||
def start_iforest():
|
||||
started = start_background_job("iforest")
|
||||
started = start_cached_demo_job("iforest")
|
||||
|
||||
if not started:
|
||||
return jsonify({
|
||||
"status": "already_running",
|
||||
"message": "Another algorithm is already running"
|
||||
}), 409
|
||||
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@ -288,12 +366,14 @@ def start_autoencoder_custom():
|
||||
|
||||
@app.route("/start-autoencoder", methods=["POST"])
|
||||
def start_autoencoder():
|
||||
started = start_background_job("autoencoder")
|
||||
started = start_cached_demo_job("autoencoder")
|
||||
|
||||
if not started:
|
||||
return jsonify({
|
||||
"status": "already_running",
|
||||
"message": "Another algorithm is already running"
|
||||
}), 409
|
||||
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@ -602,6 +682,13 @@ def upload_dataset():
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
file.seek(0, os.SEEK_END)
|
||||
file_size = file.tell()
|
||||
file.seek(0)
|
||||
|
||||
if file_size > MAX_UPLOAD_SIZE:
|
||||
return jsonify({"error": "File is too big! Maximum size is 30MB."}), 400
|
||||
|
||||
if file.filename == "":
|
||||
return jsonify({"error": "Empty filename"}), 400
|
||||
|
||||
|
||||
@ -1,223 +0,0 @@
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import psutil
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import json
|
||||
import gc
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import (
|
||||
confusion_matrix,
|
||||
accuracy_score,
|
||||
precision_recall_fscore_support,
|
||||
)
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
|
||||
import os
|
||||
|
||||
def _force_memory_cleanup():
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL("libc.so.6").malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
|
||||
DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
|
||||
|
||||
|
||||
def update_progress(value):
|
||||
with open(PROGRESS_PATH, "w") as f:
|
||||
json.dump({"progress": value}, f)
|
||||
|
||||
|
||||
def run_autoencoder(csv_path=DATASET_PATH, plot=False, table=False):
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.keras import Model
|
||||
from tensorflow.keras.layers import Dense, Input, Dropout
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping
|
||||
from tensorflow.keras import backend as K
|
||||
|
||||
process = psutil.Process()
|
||||
ram_before = process.memory_info().rss
|
||||
ram_peak = ram_before
|
||||
update_progress(1)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
df = pd.read_csv(csv_path)
|
||||
update_progress(10)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
df["fraud"] = df["Attack Type"].apply(
|
||||
lambda x: 0 if "normal" in str(x).lower() else 1
|
||||
)
|
||||
true_labels = df["fraud"]
|
||||
|
||||
normal_count = int((true_labels == 0).sum())
|
||||
attack_count = int((true_labels == 1).sum())
|
||||
|
||||
features = [c for c in df.columns if c not in ["Attack Type", "fraud"]]
|
||||
scaler = StandardScaler()
|
||||
|
||||
X_raw = df[features].to_numpy(dtype=np.float32, copy=True)
|
||||
y = df["fraud"].to_numpy(dtype=np.int8, copy=True)
|
||||
|
||||
X_scaled = scaler.fit_transform(X_raw).astype(np.float32, copy=False)
|
||||
|
||||
X_train_normal = X_scaled[y == 0]
|
||||
X_test = X_scaled
|
||||
y_test = y
|
||||
|
||||
CODE_DIM = 16
|
||||
INPUT_SHAPE = X_scaled.shape[1]
|
||||
|
||||
inp = Input(shape=(INPUT_SHAPE,))
|
||||
x = Dense(128, activation="relu")(inp)
|
||||
x = Dropout(0.1)(x)
|
||||
x = Dense(64, activation="relu")(x)
|
||||
x = Dense(16, activation="relu")(x)
|
||||
code = Dense(CODE_DIM, activation="relu")(x)
|
||||
x = Dense(16, activation="relu")(code)
|
||||
x = Dense(64, activation="relu")(x)
|
||||
x = Dense(128, activation="relu")(x)
|
||||
out = Dense(INPUT_SHAPE, activation="linear")(x)
|
||||
|
||||
autoencoder = Model(inp, out)
|
||||
autoencoder.compile(loss="mae", optimizer=Adam(learning_rate=0.001))
|
||||
update_progress(40)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
earlystopping = EarlyStopping(
|
||||
monitor="val_loss", patience=5, restore_best_weights=True
|
||||
)
|
||||
|
||||
history = autoencoder.fit(
|
||||
X_train_normal,
|
||||
X_train_normal,
|
||||
epochs=20,
|
||||
batch_size=64,
|
||||
validation_split=0.1,
|
||||
callbacks=[earlystopping],
|
||||
shuffle=True,
|
||||
verbose=1,
|
||||
)
|
||||
update_progress(60)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
reconstructions = autoencoder.predict(X_test, verbose=0)
|
||||
reconstruction_error = np.mean(np.abs(reconstructions - X_test), axis=1)
|
||||
|
||||
recons_df = pd.DataFrame(
|
||||
{"error": reconstruction_error, "y_true": y_test}
|
||||
).reset_index(drop=True)
|
||||
|
||||
threshold = np.percentile(recons_df["error"], 60)
|
||||
recons_df["y_pred"] = (recons_df["error"] > threshold).astype(int)
|
||||
update_progress(80)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
cm = confusion_matrix(recons_df["y_true"], recons_df["y_pred"])
|
||||
accuracy = accuracy_score(recons_df["y_true"], recons_df["y_pred"])
|
||||
precision, recall, f1, _ = precision_recall_fscore_support(
|
||||
recons_df["y_true"],
|
||||
recons_df["y_pred"],
|
||||
average=None,
|
||||
labels=[0, 1],
|
||||
)
|
||||
update_progress(90)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
if table:
|
||||
table_data = [
|
||||
["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
|
||||
["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
|
||||
["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
|
||||
]
|
||||
print(
|
||||
tabulate(
|
||||
table_data,
|
||||
headers=["Class", "Precision", "Recall", "F1-Score"],
|
||||
tablefmt="fancy_grid",
|
||||
)
|
||||
)
|
||||
|
||||
results = {
|
||||
"normal_count": float(normal_count),
|
||||
"attack_count": float(attack_count),
|
||||
"accuracy": float(accuracy),
|
||||
"precision_normal": float(precision[0]),
|
||||
"recall_normal": float(recall[0]),
|
||||
"f1_normal": float(f1[0]),
|
||||
"precision_attack": float(precision[1]),
|
||||
"recall_attack": float(recall[1]),
|
||||
"f1_attack": float(f1[1]),
|
||||
}
|
||||
|
||||
candidates = recons_df[
|
||||
(recons_df["y_pred"] == 1) & (recons_df["y_true"] == 1)
|
||||
].copy()
|
||||
|
||||
|
||||
if len(candidates) < 5:
|
||||
extra = recons_df[recons_df["y_pred"] == 1].copy()
|
||||
candidates = pd.concat([candidates, extra]).drop_duplicates()
|
||||
|
||||
if len(candidates) < 5:
|
||||
candidates = recons_df.copy()
|
||||
|
||||
candidates = candidates.sort_values("error", ascending=False).head(5)
|
||||
idx = candidates.index
|
||||
|
||||
df_top = df.iloc[idx].copy()
|
||||
df_top["reconstruction_error"] = candidates["error"].values
|
||||
|
||||
important_cols = [
|
||||
"Attack Type",
|
||||
"Destination Port",
|
||||
"Flow Duration",
|
||||
"Total Fwd Packets",
|
||||
"Flow Packets/s",
|
||||
"Packet Length Mean",
|
||||
]
|
||||
cols_exist = [c for c in important_cols if c in df_top.columns]
|
||||
|
||||
top_anomalies = df_top[cols_exist + ["reconstruction_error"]].to_dict(
|
||||
orient="records"
|
||||
)
|
||||
|
||||
results["top_anomalies"] = top_anomalies
|
||||
|
||||
del X_raw, X_scaled, X_train_normal, X_test, y_test, y
|
||||
del reconstructions, reconstruction_error, recons_df, candidates, df_top
|
||||
del autoencoder, history, scaler, df
|
||||
|
||||
K.clear_session()
|
||||
tf.keras.backend.clear_session(free_memory=True)
|
||||
_force_memory_cleanup()
|
||||
|
||||
ram_after = process.memory_info().rss
|
||||
results["ram_before"] = round(ram_before / (1024 ** 2), 2)
|
||||
results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
|
||||
results["ram_after"] = round(ram_after / (1024 ** 2), 2)
|
||||
results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
|
||||
|
||||
update_progress(100)
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
res = run_autoencoder(plot=True, table=True)
|
||||
print(res)
|
||||
63
Backend/cached_results/cached_autoencoder_result.json
Normal file
63
Backend/cached_results/cached_autoencoder_result.json
Normal file
@ -0,0 +1,63 @@
|
||||
{
|
||||
"normal_count": 2095057.0,
|
||||
"attack_count": 425694.0,
|
||||
"accuracy": 0.7128641424718268,
|
||||
"precision_normal": 0.9533234465116556,
|
||||
"recall_normal": 0.6882175520761488,
|
||||
"f1_normal": 0.7993634386950771,
|
||||
"precision_attack": 0.35217494793216303,
|
||||
"recall_attack": 0.8341625674780476,
|
||||
"f1_attack": 0.4952573023318089,
|
||||
"top_anomalies": [
|
||||
{
|
||||
"Attack Type": "DoS",
|
||||
"Destination Port": 80,
|
||||
"Flow Duration": 1,
|
||||
"Total Fwd Packets": 1,
|
||||
"Flow Packets/s": 2000000.0,
|
||||
"Packet Length Mean": 1322.0,
|
||||
"reconstruction_error": 2.6239676475524902
|
||||
},
|
||||
{
|
||||
"Attack Type": "DoS",
|
||||
"Destination Port": 80,
|
||||
"Flow Duration": 97501845,
|
||||
"Total Fwd Packets": 4,
|
||||
"Flow Packets/s": 0.071793513,
|
||||
"Packet Length Mean": 1489.375,
|
||||
"reconstruction_error": 1.4105286598205566
|
||||
},
|
||||
{
|
||||
"Attack Type": "DoS",
|
||||
"Destination Port": 80,
|
||||
"Flow Duration": 97509484,
|
||||
"Total Fwd Packets": 4,
|
||||
"Flow Packets/s": 0.071787889,
|
||||
"Packet Length Mean": 1495.875,
|
||||
"reconstruction_error": 1.401184320449829
|
||||
},
|
||||
{
|
||||
"Attack Type": "DoS",
|
||||
"Destination Port": 80,
|
||||
"Flow Duration": 95488861,
|
||||
"Total Fwd Packets": 4,
|
||||
"Flow Packets/s": 0.07330698,
|
||||
"Packet Length Mean": 1495.0,
|
||||
"reconstruction_error": 1.3920245170593262
|
||||
},
|
||||
{
|
||||
"Attack Type": "DoS",
|
||||
"Destination Port": 80,
|
||||
"Flow Duration": 99036701,
|
||||
"Total Fwd Packets": 6,
|
||||
"Flow Packets/s": 0.090875402,
|
||||
"Packet Length Mean": 1198.9,
|
||||
"reconstruction_error": 1.3627992868423462
|
||||
}
|
||||
],
|
||||
"ram_before": 386.41,
|
||||
"ram_peak": 4589.86,
|
||||
"ram_after": 1194.69,
|
||||
"ram_increase": 4203.45,
|
||||
"runtime": 719.9
|
||||
}
|
||||
64
Backend/cached_results/cached_iforest_result.json
Normal file
64
Backend/cached_results/cached_iforest_result.json
Normal file
@ -0,0 +1,64 @@
|
||||
{
|
||||
"normal_count": 2095057.0,
|
||||
"attack_count": 425694.0,
|
||||
"contamination": 0.42218965697127564,
|
||||
"accuracy": 0.7300296617952349,
|
||||
"precision_normal": 0.9855814946609444,
|
||||
"recall_normal": 0.6851985411375442,
|
||||
"f1_normal": 0.8083876846075615,
|
||||
"precision_attack": 0.38027193625378214,
|
||||
"recall_attack": 0.9506664411525649,
|
||||
"f1_attack": 0.5432434355271513,
|
||||
"top_anomalies": [
|
||||
{
|
||||
"Attack Type": "Bots",
|
||||
"Destination Port": 8080,
|
||||
"Flow Duration": 104836,
|
||||
"Total Fwd Packets": 35,
|
||||
"Flow Packets/s": 1096.951429,
|
||||
"Packet Length Mean": 1797.637931,
|
||||
"score": -0.3361174695180047
|
||||
},
|
||||
{
|
||||
"Attack Type": "Port Scanning",
|
||||
"Destination Port": 55055,
|
||||
"Flow Duration": 116005956,
|
||||
"Total Fwd Packets": 2,
|
||||
"Flow Packets/s": 0.034480988,
|
||||
"Packet Length Mean": 3.2,
|
||||
"score": -0.3274269932966515
|
||||
},
|
||||
{
|
||||
"Attack Type": "Port Scanning",
|
||||
"Destination Port": 32781,
|
||||
"Flow Duration": 117169685,
|
||||
"Total Fwd Packets": 2,
|
||||
"Flow Packets/s": 0.034138523,
|
||||
"Packet Length Mean": 3.2,
|
||||
"score": -0.32230849167538755
|
||||
},
|
||||
{
|
||||
"Attack Type": "Port Scanning",
|
||||
"Destination Port": 873,
|
||||
"Flow Duration": 119809735,
|
||||
"Total Fwd Packets": 2,
|
||||
"Flow Packets/s": 0.033386269,
|
||||
"Packet Length Mean": 3.2,
|
||||
"score": -0.32111814642871583
|
||||
},
|
||||
{
|
||||
"Attack Type": "Port Scanning",
|
||||
"Destination Port": 21571,
|
||||
"Flow Duration": 117624607,
|
||||
"Total Fwd Packets": 2,
|
||||
"Flow Packets/s": 0.03400649,
|
||||
"Packet Length Mean": 3.2,
|
||||
"score": -0.31684442769693955
|
||||
}
|
||||
],
|
||||
"ram_before": 182.07,
|
||||
"ram_peak": 3098.45,
|
||||
"ram_after": 186.78,
|
||||
"ram_increase": 2916.39,
|
||||
"runtime": 543.79
|
||||
}
|
||||
Binary file not shown.
|
Can't render this file because it is too large.
|
@ -1,211 +0,0 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import json
|
||||
import psutil
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
|
||||
from tabulate import tabulate
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
import gc
|
||||
|
||||
import os
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
PROGRESS_PATH = os.path.join(BASE_DIR, "progress.json")
|
||||
DATASET_PATH = os.path.join(BASE_DIR, "dataset", "cicids2017_cleaned.csv")
|
||||
|
||||
def _force_memory_cleanup():
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL("libc.so.6").malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def update_progress(value):
|
||||
with open(PROGRESS_PATH, "w") as f:
|
||||
json.dump({"progress": value}, f)
|
||||
|
||||
|
||||
def run_isolation_forest(csv_path=DATASET_PATH, plot=False, table=False):
|
||||
process = psutil.Process()
|
||||
ram_before = process.memory_info().rss
|
||||
ram_peak = ram_before
|
||||
|
||||
update_progress(1)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
df = pd.read_csv(csv_path)
|
||||
update_progress(10)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
df["fraud"] = df["Attack Type"].apply(
|
||||
lambda x: 0 if "normal" in str(x).lower() else 1
|
||||
)
|
||||
true_labels = df["fraud"]
|
||||
update_progress(20)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
normal_count = int((true_labels == 0).sum())
|
||||
attack_count = int((true_labels == 1).sum())
|
||||
|
||||
attack_fraction = true_labels.mean()
|
||||
contamination = min(attack_fraction * 2.5, 0.49)
|
||||
|
||||
X = df.select_dtypes(include=[np.number])
|
||||
X = X.loc[:, X.std() > 0.01]
|
||||
X = X.to_numpy(dtype=np.float32, copy=True)
|
||||
update_progress(30)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
scaler = RobustScaler()
|
||||
X_scaled = scaler.fit_transform(X).astype(np.float32, copy=False)
|
||||
update_progress(40)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
model = IsolationForest(
|
||||
n_estimators=600,
|
||||
max_samples=0.3,
|
||||
contamination=contamination,
|
||||
max_features=0.7,
|
||||
bootstrap=False,
|
||||
random_state=42,
|
||||
n_jobs=1,
|
||||
)
|
||||
model.fit(X_scaled)
|
||||
update_progress(70)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
preds = model.predict(X_scaled)
|
||||
df["pred_label"] = np.where(preds == 1, 0, 1)
|
||||
df["anomaly_score"] = model.decision_function(X_scaled)
|
||||
update_progress(85)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
cm = confusion_matrix(true_labels, df["pred_label"])
|
||||
accuracy = accuracy_score(true_labels, df["pred_label"])
|
||||
precision, recall, f1, _ = precision_recall_fscore_support(
|
||||
true_labels, df["pred_label"], average=None, labels=[0, 1]
|
||||
)
|
||||
update_progress(95)
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
if table:
|
||||
table_data = [
|
||||
["Normal (0)", f"{precision[0]:.4f}", f"{recall[0]:.4f}", f"{f1[0]:.4f}"],
|
||||
["Attack (1)", f"{precision[1]:.4f}", f"{recall[1]:.4f}", f"{f1[1]:.4f}"],
|
||||
["Overall Accuracy", "-", "-", f"{accuracy:.4f}"],
|
||||
]
|
||||
print(
|
||||
tabulate(
|
||||
table_data,
|
||||
headers=["Class", "Precision", "Recall", "F1-Score"],
|
||||
tablefmt="fancy_grid",
|
||||
)
|
||||
)
|
||||
|
||||
if plot:
|
||||
plt.figure(figsize=(10, 5))
|
||||
scatter = plt.scatter(
|
||||
range(len(df)),
|
||||
df["anomaly_score"],
|
||||
c=df["pred_label"],
|
||||
cmap="coolwarm",
|
||||
s=10,
|
||||
)
|
||||
plt.xlabel("Instance")
|
||||
plt.ylabel("Anomaly Score")
|
||||
plt.title("Anomaly Score Distribution (Isolation Forest)")
|
||||
handles, labels = scatter.legend_elements()
|
||||
plt.legend(handles, ["Normal", "Anomaly"], title="Predicted")
|
||||
plt.show()
|
||||
|
||||
plt.figure(figsize=(5, 4))
|
||||
sns.heatmap(
|
||||
cm,
|
||||
annot=True,
|
||||
fmt="d",
|
||||
cmap="Blues",
|
||||
xticklabels=["Normal", "Attack"],
|
||||
yticklabels=["Normal", "Attack"],
|
||||
)
|
||||
plt.xlabel("Predicted Label")
|
||||
plt.ylabel("True Label")
|
||||
plt.title("Confusion Matrix (Isolation Forest)")
|
||||
plt.show()
|
||||
|
||||
results = {
|
||||
"normal_count": float(normal_count),
|
||||
"attack_count": float(attack_count),
|
||||
"contamination": float(contamination),
|
||||
"accuracy": float(accuracy),
|
||||
"precision_normal": float(precision[0]),
|
||||
"recall_normal": float(recall[0]),
|
||||
"f1_normal": float(f1[0]),
|
||||
"precision_attack": float(precision[1]),
|
||||
"recall_attack": float(recall[1]),
|
||||
"f1_attack": float(f1[1]),
|
||||
}
|
||||
|
||||
|
||||
|
||||
candidates = df[(df["pred_label"] == 1) & (df["fraud"] == 1)].copy()
|
||||
|
||||
|
||||
if len(candidates) < 5:
|
||||
extra = df[df["pred_label"] == 1].copy()
|
||||
candidates = pd.concat([candidates, extra]).drop_duplicates()
|
||||
|
||||
if len(candidates) < 5:
|
||||
candidates = df.copy()
|
||||
|
||||
candidates = candidates.sort_values("anomaly_score").head(5)
|
||||
|
||||
important_cols = [
|
||||
"Attack Type",
|
||||
"Destination Port",
|
||||
"Flow Duration",
|
||||
"Total Fwd Packets",
|
||||
"Flow Packets/s",
|
||||
"Packet Length Mean",
|
||||
]
|
||||
|
||||
cols_exist = [c for c in important_cols if c in candidates.columns]
|
||||
|
||||
top_anomalies = candidates[cols_exist + ["anomaly_score"]].rename(
|
||||
columns={"anomaly_score": "score"}
|
||||
).to_dict(orient="records")
|
||||
|
||||
results["top_anomalies"] = top_anomalies
|
||||
|
||||
current_ram = process.memory_info().rss
|
||||
ram_peak = max(ram_peak, current_ram)
|
||||
|
||||
|
||||
del X, X_scaled, preds, model, scaler, candidates, df, cm, true_labels
|
||||
_force_memory_cleanup()
|
||||
|
||||
ram_after = process.memory_info().rss
|
||||
|
||||
results["ram_before"] = round(ram_before / (1024 ** 2), 2)
|
||||
results["ram_peak"] = round(ram_peak / (1024 ** 2), 2)
|
||||
results["ram_after"] = round(ram_after / (1024 ** 2), 2)
|
||||
results["ram_increase"] = round((ram_peak - ram_before) / (1024 ** 2), 2)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
res = run_isolation_forest(plot=True, table=True)
|
||||
print(res)
|
||||
@ -86,7 +86,7 @@
|
||||
<h2 class="text-3xl font-bold text-orange-500 mb-4 flex items-center justify-center">CICIDS Demo</h2>
|
||||
|
||||
<p class="text-lg leading-relaxed mb-6 text-center text-white">
|
||||
Here, you can test the algoritmhs on
|
||||
Here, you can see the results of both algorithms which were tested on
|
||||
<span class="text-orange-500 font-semibold">CICIDS</span> dataset.
|
||||
</p>
|
||||
|
||||
|
||||
@ -231,10 +231,10 @@
|
||||
const uploadField = document.getElementById("file_input");
|
||||
|
||||
uploadField.onchange = function() {
|
||||
const MAX_SIZE = 50 * 1024 * 1024;
|
||||
const MAX_SIZE = 30 * 1024 * 1024;
|
||||
|
||||
if(this.files[0].size > MAX_SIZE) {
|
||||
showErrorPopup("File is too big! Maximum size is 50MB.");
|
||||
showErrorPopup("File is too big! Maximum size is 30MB!");
|
||||
this.value = "";
|
||||
}
|
||||
};
|
||||
|
||||
@ -139,6 +139,7 @@ function fetchFinalResults(retryCount = 0) {
|
||||
setTimeout(() => fetchFinalResults(retryCount + 1), 1000);
|
||||
return;
|
||||
}
|
||||
|
||||
hideLoading();
|
||||
clearActiveJob();
|
||||
showErrorPopup("Final results are not ready.");
|
||||
@ -156,7 +157,7 @@ function fetchFinalResults(retryCount = 0) {
|
||||
|
||||
displayResults(json.data);
|
||||
})
|
||||
.catch(err => {
|
||||
.catch(() => {
|
||||
hideLoading();
|
||||
clearActiveJob();
|
||||
showErrorPopup("Error fetching final results.");
|
||||
@ -187,7 +188,7 @@ function startProgressPolling() {
|
||||
progressInterval = null;
|
||||
fetchFinalResults();
|
||||
}
|
||||
} catch (err) {
|
||||
} catch {
|
||||
clearInterval(progressInterval);
|
||||
progressInterval = null;
|
||||
hideLoading();
|
||||
@ -198,7 +199,7 @@ function startProgressPolling() {
|
||||
showErrorPopup("Connection to backend was lost.<br>Backend may have stopped.");
|
||||
}
|
||||
}
|
||||
}, 1500);
|
||||
}, 500);
|
||||
}
|
||||
|
||||
async function restoreRunningJob() {
|
||||
@ -209,12 +210,33 @@ async function restoreRunningJob() {
|
||||
const res = await fetch("/get-status");
|
||||
const status = await res.json();
|
||||
|
||||
applySavedAlgorithm(saved);
|
||||
|
||||
if (status.current_job !== saved.backendJob) {
|
||||
try {
|
||||
const resultRes = await fetch(getResultURL());
|
||||
const resultJson = await resultRes.json();
|
||||
|
||||
if (resultJson.ready) {
|
||||
hideLoading();
|
||||
clearActiveJob();
|
||||
lastResults = resultJson.data;
|
||||
|
||||
if (resultJson.data?.error) {
|
||||
showErrorPopup(`Algorithm failed.<br>${resultJson.data.message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
displayResults(resultJson.data);
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
}
|
||||
|
||||
clearActiveJob();
|
||||
return;
|
||||
}
|
||||
|
||||
applySavedAlgorithm(saved);
|
||||
showLoading(`Resuming ${chosenAlgorithmName}...<br>This may take a while.`);
|
||||
updateProgressUI(status.progress || 0);
|
||||
|
||||
@ -223,7 +245,7 @@ async function restoreRunningJob() {
|
||||
} else {
|
||||
fetchFinalResults();
|
||||
}
|
||||
} catch (err) {
|
||||
} catch {
|
||||
hideLoading();
|
||||
clearActiveJob();
|
||||
|
||||
@ -231,13 +253,13 @@ async function restoreRunningJob() {
|
||||
backendErrorShown = true;
|
||||
showErrorPopup("Connection to backend was lost.<br>Please start the backend and try again.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
startBtn.addEventListener("click", () => {
|
||||
if (!chosenAlgorithm) return;
|
||||
|
||||
showLoading();
|
||||
showLoading(`Loading prepared ${chosenAlgorithmName} results...<br>This may take a moment.`);
|
||||
backendErrorShown = false;
|
||||
saveActiveJob();
|
||||
|
||||
@ -264,8 +286,11 @@ startBtn.addEventListener("click", () => {
|
||||
throw new Error("Server error");
|
||||
}
|
||||
|
||||
await res.json();
|
||||
|
||||
const saved = loadActiveJob();
|
||||
startTime = saved?.startedAt ?? Date.now();
|
||||
|
||||
startProgressPolling();
|
||||
})
|
||||
.catch(err => {
|
||||
@ -282,10 +307,12 @@ function displayResults(data) {
|
||||
const container = document.getElementById("resultContainer");
|
||||
container.classList.remove("hidden");
|
||||
|
||||
const runtime = ((Date.now() - startTime) / 1000).toFixed(2);
|
||||
const runtime = data.runtime !== undefined && data.runtime !== null
|
||||
? Number(data.runtime).toFixed(2)
|
||||
: ((Date.now() - startTime) / 1000).toFixed(2);
|
||||
|
||||
let topAnomaliesHTML = "";
|
||||
|
||||
// Dynamicky vytvoríme tabuľku pre Top 5 anomálie (ak existujú)
|
||||
let topAnomaliesHTML = '';
|
||||
if (data.top_anomalies && data.top_anomalies.length > 0) {
|
||||
const headers = Object.keys(data.top_anomalies[0]);
|
||||
|
||||
@ -298,7 +325,7 @@ function displayResults(data) {
|
||||
<table class="min-w-full bg-gray-800 rounded-xl shadow-xl">
|
||||
<thead class="bg-gray-700">
|
||||
<tr>
|
||||
${headers.map(h => `<th class="px-4 py-3 text-orange-500">${h}</th>`).join('')}
|
||||
${headers.map(h => `<th class="px-4 py-3 text-orange-500">${h}</th>`).join("")}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@ -306,9 +333,9 @@ function displayResults(data) {
|
||||
<tr class="border-b border-gray-700 hover:bg-gray-700/50 transition">
|
||||
${headers.map(key => {
|
||||
const val = row[key];
|
||||
return `<td class="px-4 py-3 text-center">${typeof val === 'number' ? val.toFixed(4) : val}</td>`;
|
||||
}).join('')}
|
||||
</tr>`).join('')}
|
||||
return `<td class="px-4 py-3 text-center">${typeof val === "number" ? val.toFixed(4) : val}</td>`;
|
||||
}).join("")}
|
||||
</tr>`).join("")}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
@ -320,7 +347,6 @@ function displayResults(data) {
|
||||
Results
|
||||
</h2>
|
||||
|
||||
<!-- Metriky tabuľka -->
|
||||
<div class="overflow-x-auto mb-10">
|
||||
<table class="min-w-full bg-gray-800 rounded-xl shadow-xl text-center">
|
||||
<thead class="bg-gray-700">
|
||||
@ -354,15 +380,12 @@ function displayResults(data) {
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Distribúcia + Systémové info -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-10 mb-10">
|
||||
<!-- Pie Chart -->
|
||||
<div class="flex flex-col items-center">
|
||||
<h3 class="text-2xl text-orange-500 font-semibold mb-4">Class distribution</h3>
|
||||
<canvas id="pieChart" class="w-80 h-80"></canvas>
|
||||
</div>
|
||||
|
||||
<!-- RAM a počty -->
|
||||
<div class="bg-gray-800/60 rounded-xl p-8 border border-gray-700">
|
||||
<h3 class="text-2xl text-orange-500 font-semibold mb-6">Performance</h3>
|
||||
<div class="space-y-4 text-lg">
|
||||
@ -379,7 +402,6 @@ function displayResults(data) {
|
||||
|
||||
${topAnomaliesHTML}
|
||||
|
||||
<!-- Download tlačidlá -->
|
||||
<div class="w-full flex justify-center gap-10 mt-12">
|
||||
<button id="downloadPDF"
|
||||
class="px-10 py-5 bg-orange-500 hover:bg-orange-400 text-white text-xl font-bold rounded-xl transition shadow-xl cursor-pointer transform hover:scale-105">
|
||||
@ -397,6 +419,7 @@ function displayResults(data) {
|
||||
document.getElementById("downloadPDF").addEventListener("click", () => {
|
||||
downloadPDF(data, runtime);
|
||||
});
|
||||
|
||||
document.getElementById("downloadJSON").addEventListener("click", () => {
|
||||
downloadJSON(data, runtime);
|
||||
});
|
||||
@ -451,7 +474,7 @@ function loadPieChart(data) {
|
||||
labels: ["Normal Traffic", "Attacks"],
|
||||
datasets: [{
|
||||
data: [data.normal_count || 0, data.attack_count || 0],
|
||||
backgroundColor: ["#14b8a6", "#dc2626"],
|
||||
backgroundColor: ["#14b8a6", "#dc2626"]
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
function show(el) { el.classList.remove("hidden"); }
|
||||
function hide(el) { el.classList.add("hidden"); }
|
||||
|
||||
@ -18,6 +17,7 @@ function splitCsvLine(line) {
|
||||
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const ch = line[i];
|
||||
|
||||
if (ch === '"') {
|
||||
if (inQuotes && line[i + 1] === '"') {
|
||||
cur += '"';
|
||||
@ -32,6 +32,7 @@ function splitCsvLine(line) {
|
||||
cur += ch;
|
||||
}
|
||||
}
|
||||
|
||||
out.push(cur);
|
||||
return out.map(v => v.trim());
|
||||
}
|
||||
@ -58,35 +59,143 @@ const btnStep3Next = document.getElementById("btn_step3_next");
|
||||
const btnRun = document.getElementById("btn_run");
|
||||
|
||||
const MAX_FEATURES = 5;
|
||||
|
||||
const MAX_FILE_SIZE = 30 * 1024 * 1024;
|
||||
const BUSY_MESSAGE = "Another algorithm is already running. Please wait until it finishes.";
|
||||
|
||||
let headers = [];
|
||||
let rows = [];
|
||||
let labelIndex = -1;
|
||||
let numericColumnIdx = [];
|
||||
|
||||
function showPopup(message) {
|
||||
if (typeof showErrorPopup === "function") {
|
||||
showErrorPopup(message);
|
||||
} else {
|
||||
alert(message);
|
||||
}
|
||||
}
|
||||
|
||||
function resetUploadFlow() {
|
||||
fileInput.value = "";
|
||||
|
||||
headers = [];
|
||||
rows = [];
|
||||
labelIndex = -1;
|
||||
numericColumnIdx = [];
|
||||
|
||||
labelSelect.innerHTML = `<option value="">-- select label column --</option>`;
|
||||
normalBox.innerHTML = "";
|
||||
featureList.innerHTML = "";
|
||||
featureCounter.textContent = `0 / ${MAX_FEATURES} selected`;
|
||||
|
||||
hide(step2);
|
||||
hide(step3);
|
||||
hide(step4);
|
||||
hide(step5);
|
||||
|
||||
hide(labelError);
|
||||
hide(normalError);
|
||||
hide(featureError);
|
||||
}
|
||||
|
||||
async function isBackendBusy() {
|
||||
try {
|
||||
const res = await fetch("/get-status");
|
||||
|
||||
if (!res.ok) {
|
||||
showPopup("Backend status could not be checked.");
|
||||
return true;
|
||||
}
|
||||
|
||||
const status = await res.json();
|
||||
return status.running === true;
|
||||
} catch {
|
||||
showPopup("Connection to backend was lost.");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
async function readErrorMessage(response, fallback) {
|
||||
try {
|
||||
const data = await response.clone().json();
|
||||
return data.error || data.message || fallback;
|
||||
} catch {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
fileInput.addEventListener("change", async () => {
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", fileInput.files[0]);
|
||||
|
||||
await fetch("/upload-dataset", {
|
||||
method: "POST",
|
||||
body: formData
|
||||
});
|
||||
|
||||
if (!fileInput.files.length) return;
|
||||
|
||||
const file = fileInput.files[0];
|
||||
const text = await file.text();
|
||||
|
||||
if (file.size > MAX_FILE_SIZE) {
|
||||
showPopup("File is too big! Maximum size is 30MB.");
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!file.name.toLowerCase().endsWith(".csv")) {
|
||||
showPopup("Only CSV files are allowed.");
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
if (await isBackendBusy()) {
|
||||
showPopup(BUSY_MESSAGE);
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
let uploadResponse;
|
||||
|
||||
try {
|
||||
uploadResponse = await fetch("/upload-dataset", {
|
||||
method: "POST",
|
||||
body: formData
|
||||
});
|
||||
} catch {
|
||||
showPopup("Dataset upload failed. Backend server is not available.");
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!uploadResponse.ok) {
|
||||
if (uploadResponse.status !== 409) {
|
||||
const message = await readErrorMessage(uploadResponse, "Dataset upload failed.");
|
||||
showPopup(message);
|
||||
}
|
||||
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
let text;
|
||||
|
||||
try {
|
||||
text = await file.text();
|
||||
} catch {
|
||||
showPopup("File could not be read.");
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
const lines = text.split(/\r?\n/).filter(l => l.trim() !== "");
|
||||
if (lines.length < 2) return;
|
||||
|
||||
if (lines.length < 2) {
|
||||
showPopup("CSV file must contain a header and at least one data row.");
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
headers = splitCsvLine(lines[0]);
|
||||
rows = lines.slice(1).map(splitCsvLine);
|
||||
|
||||
labelSelect.innerHTML = `<option value="">-- select label column --</option>`;
|
||||
|
||||
headers.forEach((h, idx) => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = idx;
|
||||
@ -113,19 +222,31 @@ function detectNumericColumns(headers, rows, sampleCount) {
|
||||
|
||||
for (let r = 0; r < samples; r++) {
|
||||
const v = rows[r][c];
|
||||
|
||||
if (v === undefined || v === "") continue;
|
||||
|
||||
total++;
|
||||
if (!isNaN(Number(v))) valid++;
|
||||
|
||||
if (!isNaN(Number(v))) {
|
||||
valid++;
|
||||
}
|
||||
}
|
||||
|
||||
if (total > 0 && valid / total >= 0.9) {
|
||||
result.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
btnStep2Next.addEventListener("click", () => {
|
||||
btnStep2Next.addEventListener("click", async () => {
|
||||
if (await isBackendBusy()) {
|
||||
showPopup(BUSY_MESSAGE);
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
if (labelSelect.value === "") {
|
||||
show(labelError);
|
||||
return;
|
||||
@ -135,12 +256,17 @@ btnStep2Next.addEventListener("click", () => {
|
||||
labelIndex = Number(labelSelect.value);
|
||||
|
||||
const uniques = new Set();
|
||||
|
||||
rows.forEach(r => {
|
||||
const v = r[labelIndex];
|
||||
if (v !== undefined && v !== "") uniques.add(v.trim());
|
||||
|
||||
if (v !== undefined && v !== "") {
|
||||
uniques.add(v.trim());
|
||||
}
|
||||
});
|
||||
|
||||
normalBox.innerHTML = "";
|
||||
|
||||
uniques.forEach(val => {
|
||||
const label = document.createElement("label");
|
||||
label.className =
|
||||
@ -150,6 +276,7 @@ btnStep2Next.addEventListener("click", () => {
|
||||
<input type="checkbox" value="${escapeHtml(val)}" class="accent-orange-500">
|
||||
<span>${escapeHtml(val)}</span>
|
||||
`;
|
||||
|
||||
normalBox.appendChild(label);
|
||||
});
|
||||
|
||||
@ -169,8 +296,15 @@ btnStep2Next.addEventListener("click", () => {
|
||||
hide(normalError);
|
||||
});
|
||||
|
||||
btnStep3Next.addEventListener("click", () => {
|
||||
btnStep3Next.addEventListener("click", async () => {
|
||||
if (await isBackendBusy()) {
|
||||
showPopup(BUSY_MESSAGE);
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
const checked = normalBox.querySelectorAll("input:checked");
|
||||
|
||||
if (checked.length !== 1) {
|
||||
normalError.textContent = "Select exactly one NORMAL value.";
|
||||
show(normalError);
|
||||
@ -180,6 +314,7 @@ btnStep3Next.addEventListener("click", () => {
|
||||
hide(normalError);
|
||||
|
||||
featureList.innerHTML = "";
|
||||
|
||||
numericColumnIdx
|
||||
.filter(idx => idx !== labelIndex)
|
||||
.forEach(idx => {
|
||||
@ -194,6 +329,7 @@ btnStep3Next.addEventListener("click", () => {
|
||||
</div>
|
||||
<span class="text-xs text-gray-500">numeric</span>
|
||||
`;
|
||||
|
||||
featureList.appendChild(label);
|
||||
});
|
||||
|
||||
@ -203,6 +339,7 @@ btnStep3Next.addEventListener("click", () => {
|
||||
|
||||
updateCounter();
|
||||
show(step4);
|
||||
hide(step5);
|
||||
hide(featureError);
|
||||
});
|
||||
|
||||
@ -219,18 +356,39 @@ function updateCounter() {
|
||||
}
|
||||
|
||||
btnRun.addEventListener("click", async () => {
|
||||
|
||||
const labelColumnName = headers[labelIndex];
|
||||
let normalValue = normalBox.querySelector("input:checked").value;
|
||||
|
||||
if (!isNaN(normalValue)) {
|
||||
normalValue = Number(normalValue);
|
||||
if (await isBackendBusy()) {
|
||||
showPopup(BUSY_MESSAGE);
|
||||
resetUploadFlow();
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedFeatures = Array.from(
|
||||
featureList.querySelectorAll("input:checked")
|
||||
).map(cb => headers[cb.dataset.col]);
|
||||
|
||||
if (selectedFeatures.length !== MAX_FEATURES) {
|
||||
featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`;
|
||||
show(featureError);
|
||||
return;
|
||||
}
|
||||
|
||||
hide(featureError);
|
||||
|
||||
const labelColumnName = headers[labelIndex];
|
||||
const normalInput = normalBox.querySelector("input:checked");
|
||||
|
||||
if (!normalInput) {
|
||||
normalError.textContent = "Select exactly one NORMAL value.";
|
||||
show(normalError);
|
||||
return;
|
||||
}
|
||||
|
||||
let normalValue = normalInput.value;
|
||||
|
||||
if (!isNaN(normalValue)) {
|
||||
normalValue = Number(normalValue);
|
||||
}
|
||||
|
||||
const config = {
|
||||
dataset: {
|
||||
file_path: "temp/upload.csv",
|
||||
@ -247,25 +405,32 @@ btnRun.addEventListener("click", async () => {
|
||||
expected_feature_count: selectedFeatures.length
|
||||
},
|
||||
algorithm: {
|
||||
name: "isolation_forest"
|
||||
name: "custom_dataset"
|
||||
}
|
||||
};
|
||||
|
||||
await fetch("/save-config", {
|
||||
let saveResponse;
|
||||
|
||||
try {
|
||||
saveResponse = await fetch("/save-config", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(config)
|
||||
});
|
||||
const selected = featureList.querySelectorAll("input:checked").length;
|
||||
|
||||
if (selected !== MAX_FEATURES) {
|
||||
featureError.textContent = `You must select exactly ${MAX_FEATURES} numeric features.`;
|
||||
show(featureError);
|
||||
} catch {
|
||||
showPopup("Configuration could not be saved. Backend server is not available.");
|
||||
return;
|
||||
}
|
||||
|
||||
hide(featureError);
|
||||
if (!saveResponse.ok) {
|
||||
if (saveResponse.status !== 409) {
|
||||
const message = await readErrorMessage(saveResponse, "Configuration could not be saved.");
|
||||
showPopup(message);
|
||||
}
|
||||
|
||||
hide(step5);
|
||||
return;
|
||||
}
|
||||
|
||||
show(step5);
|
||||
});
|
||||
|
||||
|
||||
@ -287,9 +287,22 @@ async function restoreRunningJob() {
|
||||
}
|
||||
}
|
||||
|
||||
startBtn.addEventListener("click", () => {
|
||||
startBtn.addEventListener("click", async () => {
|
||||
if (!chosenAlgorithm) return;
|
||||
|
||||
try {
|
||||
const statusRes = await fetch("/get-status");
|
||||
const status = await statusRes.json();
|
||||
|
||||
if (status.running) {
|
||||
showErrorPopup("Another algorithm is already running.<br>Please wait until it finishes.");
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
showErrorPopup("Connection to backend was lost.<br>Please start the backend and try again.");
|
||||
return;
|
||||
}
|
||||
|
||||
showLoading();
|
||||
backendErrorShown = false;
|
||||
saveActiveJob();
|
||||
|
||||
@ -1 +0,0 @@
|
||||
{"progress": 100}
|
||||
Loading…
Reference in New Issue
Block a user