Upload files to "Nam"

This commit is contained in:
2025-05-21 11:03:24 +00:00
parent 66f5c75d1e
commit 58056ce8a4
2 changed files with 56 additions and 32 deletions

View File

@@ -1,4 +1,11 @@
import re import re
from collections import Counter
from scipy.stats import entropy
def calculate_url_entropy(url):
counter = Counter(url)
probabilities = [count / len(url) for count in counter.values()]
return entropy(probabilities, base=2)
def extract_url_features(url): def extract_url_features(url):
suspicious_words = [ suspicious_words = [
@@ -25,5 +32,7 @@ def extract_url_features(url):
'has_long_digit_sequence': bool(re.search(r'\d{4,}', url)), 'has_long_digit_sequence': bool(re.search(r'\d{4,}', url)),
'has_multiple_dash': bool(re.search(r'-{2,}', url)), 'has_multiple_dash': bool(re.search(r'-{2,}', url)),
'has_https': url.startswith('https'), 'has_https': url.startswith('https'),
'ends_with_common_extension' : url.endswith(('.html', '.php')) 'ends_with_common_extension': url.endswith(('.html', '.php')),
'url_length': len(url), # ✅ 추가
'url_entropy': calculate_url_entropy(url) # ✅ 추가
} }

View File

@@ -1,39 +1,54 @@
import pandas as pd import pandas as pd
import pickle import pickle
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from Feature import extract_url_features from Feature import extract_url_features
from collections import Counter
from scipy.stats import entropy
import tensorflow as tf import tensorflow as tf
# 🔹 URL 엔트로피 계산 함수
def calculate_url_entropy(url):
counter = Counter(url)
probabilities = [count / len(url) for count in counter.values()]
return entropy(probabilities, base=2)
# 4. 스케일러 불러오기 # 🔹 스케일러 불러오기
with open("scaler.pkl", "rb") as f: with open("scaler.pkl", "rb") as f:
scaler = pickle.load(f) scaler = pickle.load(f)
# 5. 모델 불러오기 # 🔹 모델 불러오기
model = load_model("best_model.h5") model = load_model("best_model.h5")
# 🔹 예측 함수
@tf.function(reduce_retracing=True) @tf.function(reduce_retracing=True)
def predict_with_model(model, input_data): def predict_with_model(model, input_data):
return model(input_data) return model(input_data)
# 🔹 입력 URL 받기
url = input("URL입력 : ") url = input("URL입력 : ")
# 🔹 Feature.py에서 피처 추출
features = extract_url_features(url) features = extract_url_features(url)
input_df = pd.DataFrame([list(features.values())], columns= features.keys())
# 🔹 누락된 피처 보완
features['url_length'] = len(url)
features['url_entropy'] = calculate_url_entropy(url)
# 🔹 데이터프레임 생성 및 정렬
input_df = pd.DataFrame([features])
expected_columns = list(scaler.feature_names_in_)
input_df = input_df[expected_columns]
# 🔹 스케일링
input_scaled = scaler.transform(input_df) input_scaled = scaler.transform(input_df)
# 🔹 예측
prediction = predict_with_model(model, input_scaled) prediction = predict_with_model(model, input_scaled)
score = float(prediction.numpy()[0][0]) # 🔥 정확히 float으로 변환
# 🔹 출력
# 7. 결과 출력 threshold = 0.5
best_threshold = 0.5 if score > threshold:
if prediction[0][0] > best_threshold: print(f"악성 (악성일 확률: {score:.4f})")
print('')
else: else:
print('') print(f"정상 (정상일 확률: {1 - score:.4f})")