From 786ae98996fbad49db80b1a6da7d1038b2613da5 Mon Sep 17 00:00:00 2001 From: hwangtaehyeon Date: Wed, 21 May 2025 10:41:11 +0000 Subject: [PATCH] Upload files to "Nam" --- Nam/Feature.py | 29 +++++++++++++++++++++++++++++ Nam/model.running_code.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 Nam/Feature.py create mode 100644 Nam/model.running_code.py diff --git a/Nam/Feature.py b/Nam/Feature.py new file mode 100644 index 0000000..e7634d6 --- /dev/null +++ b/Nam/Feature.py @@ -0,0 +1,29 @@ +import re + +def extract_url_features(url): + suspicious_words = [ + 'login', 'verify', 'update', 'confirm', + 'account', 'secure', 'ebayisapi', 'banking' + ] + + return { + 'digit_count' : len(re.findall(r'\d', url)), + 'dash_count' : url.count('-'), + 'underscore_count' : url.count('_'), + 'percent_count' : url.count('%'), + 'equal_count' : url.count('='), + 'question_count' : url.count('?'), + 'at_count' : url.count('@'), + 'count_of_exclamation' : url.count('!'), + 'count_of_dot' : url.count('.'), + 'count_of_double_slash' : url.count('//'), + 'special_char_count' : len(re.findall(r'[^a-zA-Z0-9]', url)), + 'is_ip_in_url' : bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)), + 'has_www' : 'www' in url, + 'suspicious_word_count' : sum(word in url.lower() for word in suspicious_words), + 'path_depth' : url.count('/') - 2, + 'has_long_digit_sequence' : bool(re.search(r'\d{4,}', url)), + 'has_multiple_dash' : bool(re.search(r'-{2,}', url)), + 'has_https' : url.startswith('https'), + 'ends_with_common_extension' : url.endswith(('.html', '.php')) + } diff --git a/Nam/model.running_code.py b/Nam/model.running_code.py new file mode 100644 index 0000000..ca3a437 --- /dev/null +++ b/Nam/model.running_code.py @@ -0,0 +1,39 @@ + +import pandas as pd +import pickle +from tensorflow.keras.models import load_model +from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score +from Feature import extract_url_features +import tensorflow as tf + + +# 4. 스케일러 불러오기 +with open("scaler.pkl", "rb") as f: + scaler = pickle.load(f) + +# 5. 모델 불러오기 +model = load_model("best_model.h5") + +@tf.function(reduce_retracing=True) +def predict_with_model(model, input_data): + return model(input_data) + + +url = input("URL입력 : ") + +features = extract_url_features(url) +input_df = pd.DataFrame([list(features.values())], columns= features.keys()) + + +input_scaled = scaler.transform(input_df) + + +prediction = predict_with_model(model, input_scaled) + + +# 7. 결과 출력 +best_threshold = 0.5 +if prediction[0][0] > best_threshold: + print('악') +else: + print('정') \ No newline at end of file