From 786ae98996fbad49db80b1a6da7d1038b2613da5 Mon Sep 17 00:00:00 2001
From: hwangtaehyeon <st7276@koseongnam.com>
Date: Wed, 21 May 2025 10:41:11 +0000
Subject: [PATCH] Upload files to "Nam"

---
 Nam/Feature.py            | 29 +++++++++++++++++++++++++++++
 Nam/model.running_code.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 Nam/Feature.py
 create mode 100644 Nam/model.running_code.py

diff --git a/Nam/Feature.py b/Nam/Feature.py
new file mode 100644
index 0000000..e7634d6
--- /dev/null
+++ b/Nam/Feature.py
@@ -0,0 +1,29 @@
+import re
+
+def extract_url_features(url):
+    suspicious_words = [
+        'login', 'verify', 'update', 'confirm',
+        'account', 'secure', 'ebayisapi', 'banking'
+    ]
+
+    return {
+        'digit_count' : len(re.findall(r'\d', url)), 
+        'dash_count' : url.count('-'),
+        'underscore_count' : url.count('_'),
+        'percent_count' : url.count('%'),
+        'equal_count' : url.count('='),
+        'question_count' : url.count('?'),
+        'at_count' : url.count('@'),
+        'count_of_exclamation' : url.count('!'),
+        'count_of_dot' : url.count('.'),
+        'count_of_double_slash' : url.count('//'),
+        'special_char_count' : len(re.findall(r'[^a-zA-Z0-9]', url)),
+        'is_ip_in_url' : bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)),
+        'has_www' : 'www' in url,
+        'suspicious_word_count' : sum(word in url.lower() for word in suspicious_words),
+        'path_depth' : url.count('/') - 2,
+        'has_long_digit_sequence' : bool(re.search(r'\d{4,}', url)),
+        'has_multiple_dash' : bool(re.search(r'-{2,}', url)),
+        'has_https' : url.startswith('https'),
+        'ends_with_common_extension' : url.endswith(('.html', '.php'))
+    }
diff --git a/Nam/model.running_code.py b/Nam/model.running_code.py
new file mode 100644
index 0000000..ca3a437
--- /dev/null
+++ b/Nam/model.running_code.py
@@ -0,0 +1,39 @@
+
+import pandas as pd
+import pickle
+from tensorflow.keras.models import load_model
+from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
+from Feature import extract_url_features
+import tensorflow as tf
+
+
+# 4. 스케일러 불러오기
+with open("scaler.pkl", "rb") as f:
+    scaler = pickle.load(f)
+
+# 5. 모델 불러오기
+model = load_model("best_model.h5")
+
+@tf.function(reduce_retracing=True)
+def predict_with_model(model, input_data):
+    return model(input_data)
+
+
+url = input("URL입력 : ")
+
+features = extract_url_features(url)
+input_df = pd.DataFrame([list(features.values())], columns= features.keys())
+
+
+input_scaled = scaler.transform(input_df)
+
+
+prediction = predict_with_model(model, input_scaled)
+
+
+# 7. 결과 출력
+best_threshold = 0.5
+if prediction[0][0] > best_threshold:
+    print('악')
+else:
+    print('정')
\ No newline at end of file