Upload files to "Nam"
This commit is contained in:
@@ -1,4 +1,11 @@
|
|||||||
import re
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
from scipy.stats import entropy
|
||||||
|
|
||||||
|
def calculate_url_entropy(url):
|
||||||
|
counter = Counter(url)
|
||||||
|
probabilities = [count / len(url) for count in counter.values()]
|
||||||
|
return entropy(probabilities, base=2)
|
||||||
|
|
||||||
def extract_url_features(url):
|
def extract_url_features(url):
|
||||||
suspicious_words = [
|
suspicious_words = [
|
||||||
@@ -7,23 +14,25 @@ def extract_url_features(url):
|
|||||||
]
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'digit_count' : len(re.findall(r'\d', url)),
|
'digit_count': len(re.findall(r'\d', url)),
|
||||||
'dash_count' : url.count('-'),
|
'dash_count': url.count('-'),
|
||||||
'underscore_count' : url.count('_'),
|
'underscore_count': url.count('_'),
|
||||||
'percent_count' : url.count('%'),
|
'percent_count': url.count('%'),
|
||||||
'equal_count' : url.count('='),
|
'equal_count': url.count('='),
|
||||||
'question_count' : url.count('?'),
|
'question_count': url.count('?'),
|
||||||
'at_count' : url.count('@'),
|
'at_count': url.count('@'),
|
||||||
'count_of_exclamation' : url.count('!'),
|
'count_of_exclamation': url.count('!'),
|
||||||
'count_of_dot' : url.count('.'),
|
'count_of_dot': url.count('.'),
|
||||||
'count_of_double_slash' : url.count('//'),
|
'count_of_double_slash': url.count('//'),
|
||||||
'special_char_count' : len(re.findall(r'[^a-zA-Z0-9]', url)),
|
'special_char_count': len(re.findall(r'[^a-zA-Z0-9]', url)),
|
||||||
'is_ip_in_url' : bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)),
|
'is_ip_in_url': bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)),
|
||||||
'has_www' : 'www' in url,
|
'has_www': 'www' in url,
|
||||||
'suspicious_word_count' : sum(word in url.lower() for word in suspicious_words),
|
'suspicious_word_count': sum(word in url.lower() for word in suspicious_words),
|
||||||
'path_depth' : url.count('/') - 2,
|
'path_depth': url.count('/') - 2,
|
||||||
'has_long_digit_sequence' : bool(re.search(r'\d{4,}', url)),
|
'has_long_digit_sequence': bool(re.search(r'\d{4,}', url)),
|
||||||
'has_multiple_dash' : bool(re.search(r'-{2,}', url)),
|
'has_multiple_dash': bool(re.search(r'-{2,}', url)),
|
||||||
'has_https' : url.startswith('https'),
|
'has_https': url.startswith('https'),
|
||||||
'ends_with_common_extension' : url.endswith(('.html', '.php'))
|
'ends_with_common_extension': url.endswith(('.html', '.php')),
|
||||||
|
'url_length': len(url), # ✅ 추가
|
||||||
|
'url_entropy': calculate_url_entropy(url) # ✅ 추가
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,39 +1,54 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pickle
|
import pickle
|
||||||
from tensorflow.keras.models import load_model
|
from tensorflow.keras.models import load_model
|
||||||
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
|
||||||
from Feature import extract_url_features
|
from Feature import extract_url_features
|
||||||
|
from collections import Counter
|
||||||
|
from scipy.stats import entropy
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
# 🔹 URL 엔트로피 계산 함수
|
||||||
|
def calculate_url_entropy(url):
|
||||||
|
counter = Counter(url)
|
||||||
|
probabilities = [count / len(url) for count in counter.values()]
|
||||||
|
return entropy(probabilities, base=2)
|
||||||
|
|
||||||
# 4. 스케일러 불러오기
|
# 🔹 스케일러 불러오기
|
||||||
with open("scaler.pkl", "rb") as f:
|
with open("scaler.pkl", "rb") as f:
|
||||||
scaler = pickle.load(f)
|
scaler = pickle.load(f)
|
||||||
|
|
||||||
# 5. 모델 불러오기
|
# 🔹 모델 불러오기
|
||||||
model = load_model("best_model.h5")
|
model = load_model("best_model.h5")
|
||||||
|
|
||||||
|
# 🔹 예측 함수
|
||||||
@tf.function(reduce_retracing=True)
|
@tf.function(reduce_retracing=True)
|
||||||
def predict_with_model(model, input_data):
|
def predict_with_model(model, input_data):
|
||||||
return model(input_data)
|
return model(input_data)
|
||||||
|
|
||||||
|
# 🔹 입력 URL 받기
|
||||||
url = input("URL입력 : ")
|
url = input("URL입력 : ")
|
||||||
|
|
||||||
|
# 🔹 Feature.py에서 피처 추출
|
||||||
features = extract_url_features(url)
|
features = extract_url_features(url)
|
||||||
input_df = pd.DataFrame([list(features.values())], columns= features.keys())
|
|
||||||
|
|
||||||
|
# 🔹 누락된 피처 보완
|
||||||
|
features['url_length'] = len(url)
|
||||||
|
features['url_entropy'] = calculate_url_entropy(url)
|
||||||
|
|
||||||
|
# 🔹 데이터프레임 생성 및 정렬
|
||||||
|
input_df = pd.DataFrame([features])
|
||||||
|
expected_columns = list(scaler.feature_names_in_)
|
||||||
|
input_df = input_df[expected_columns]
|
||||||
|
|
||||||
|
# 🔹 스케일링
|
||||||
input_scaled = scaler.transform(input_df)
|
input_scaled = scaler.transform(input_df)
|
||||||
|
|
||||||
|
# 🔹 예측
|
||||||
prediction = predict_with_model(model, input_scaled)
|
prediction = predict_with_model(model, input_scaled)
|
||||||
|
score = float(prediction.numpy()[0][0]) # 🔥 정확히 float으로 변환
|
||||||
|
|
||||||
|
# 🔹 출력
|
||||||
# 7. 결과 출력
|
threshold = 0.5
|
||||||
best_threshold = 0.5
|
if score > threshold:
|
||||||
if prediction[0][0] > best_threshold:
|
print(f"악성 (악성일 확률: {score:.4f})")
|
||||||
print('악')
|
|
||||||
else:
|
else:
|
||||||
print('정')
|
print(f"정상 (정상일 확률: {1 - score:.4f})")
|
||||||
|
|||||||
Reference in New Issue
Block a user