backend-junPreP Update(Update whitelist model), front-urlpredictor.jsx UI Update
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
backend/app/__pycache__/predictor.cpython-310.pyc
Normal file
BIN
backend/app/__pycache__/predictor.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,52 +1,53 @@
|
|||||||
from app.junPreP import extract_features
|
from app.junPreP import extract_features
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
from tensorflow.keras.models import load_model
|
from tensorflow.keras.models import load_model
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
|
# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
|
||||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
MODEL_PATH = os.path.join(BASE_DIR, "models", "Recall_0.77.keras")
|
MODEL_PATH = os.path.join(BASE_DIR, "models", "White_list_model.keras")
|
||||||
SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
|
SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
|
||||||
|
|
||||||
# 모델 및 스케일러 로드 (1회만 수행)
|
# 모델 및 스케일러 로드 (1회만 수행)
|
||||||
model = load_model(MODEL_PATH)
|
model = load_model(MODEL_PATH)
|
||||||
with open(SCALER_PATH, 'rb') as f:
|
with open(SCALER_PATH, 'rb') as f:
|
||||||
scaler = pickle.load(f)
|
scaler = pickle.load(f)
|
||||||
|
|
||||||
# @tf.function으로 추론 최적화
|
# @tf.function으로 추론 최적화
|
||||||
@tf.function(reduce_retracing=True)
|
@tf.function(reduce_retracing=True)
|
||||||
def predict_with_model(model, input_data):
|
def predict_with_model(model, input_data):
|
||||||
return model(input_data)
|
return model(input_data)
|
||||||
|
|
||||||
# Threshold (적절히 조정 가능)
|
# Threshold (적절히 조정 가능)
|
||||||
BEST_THRESHOLD = 0.4034
|
BEST_THRESHOLD = 0.4034
|
||||||
|
|
||||||
# 📦 예측 함수 정의 (FastAPI에서 import해서 사용)
|
# 📦 예측 함수 정의 (FastAPI에서 import해서 사용)
|
||||||
def predict_url_maliciousness(url: str) -> dict:
|
def predict_url_maliciousness(url: str) -> dict:
|
||||||
# 특성 추출
|
# 특성 추출
|
||||||
features = extract_features(url)
|
features = extract_features(url)
|
||||||
input_df = pd.DataFrame([list(features.values())], columns=features.keys())
|
input_df = pd.DataFrame([list(features.values())], columns=features.keys())
|
||||||
|
|
||||||
# 스케일링
|
# 스케일링
|
||||||
input_scaled = scaler.transform(input_df)
|
input_scaled = scaler.transform(input_df)
|
||||||
|
|
||||||
# 예측
|
# 예측
|
||||||
prediction = predict_with_model(model, input_scaled)
|
prediction = predict_with_model(model, input_scaled)
|
||||||
malicious_prob = float(prediction[0][0])
|
malicious_prob = float(prediction[0][0].numpy())
|
||||||
|
|
||||||
# 임계값 기반 판단
|
|
||||||
is_malicious = bool(malicious_prob > BEST_THRESHOLD)
|
# 임계값 기반 판단
|
||||||
|
is_malicious = bool(malicious_prob > BEST_THRESHOLD)
|
||||||
# Ensure all values are Python native types (not numpy types)
|
|
||||||
return {
|
# 예: malicious_probability가 np.float32 타입일 경우
|
||||||
"url": str(url),
|
return {
|
||||||
"malicious_probability": float(malicious_prob),
|
"url": str(url),
|
||||||
"is_malicious": bool(is_malicious),
|
"malicious_probability": malicious_prob,
|
||||||
"threshold": float(BEST_THRESHOLD)
|
"is_malicious": is_malicious,
|
||||||
}
|
"threshold": float(BEST_THRESHOLD)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,204 +1,274 @@
|
|||||||
import re
|
import re
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
import tldextract
|
import tldextract
|
||||||
import zlib
|
import zlib
|
||||||
import re
|
from collections import Counter
|
||||||
from urllib.parse import urlparse
|
import math
|
||||||
from collections import Counter
|
|
||||||
import math
|
def url_is_whitelisted(url):
|
||||||
|
trusted_domains = [
|
||||||
|
# 1. 포털 / 검색엔진
|
||||||
|
'naver.com', 'daum.net', 'google.com', 'bing.com', 'yahoo.com',
|
||||||
def check_similar_brand(url):
|
|
||||||
# 자주 사용되는 브랜드/도메인 목록
|
# 2. 소셜 미디어 / 커뮤니케이션
|
||||||
common_brands = {
|
'facebook.com', 'instagram.com', 'twitter.com', 'x.com', 'linkedin.com',
|
||||||
'google', 'facebook', 'amazon', 'microsoft', 'apple',
|
'whatsapp.com', 'kakao.com', 'kakaocorp.com',
|
||||||
'netflix', 'paypal', 'twitter', 'instagram', 'linkedin',
|
|
||||||
'youtube', 'yahoo', 'gmail', 'whatsapp', 'tiktok',
|
# 3. 동영상 / 스트리밍
|
||||||
'geocities', 'angelfire', 'newadvent', 'wikipedia',
|
'youtube.com', 'netflix.com', 'twitch.tv', 'tving.com', 'watcha.com',
|
||||||
}
|
|
||||||
|
# 4. 쇼핑 / 이커머스
|
||||||
# 2. 유사 브랜드 확인
|
'amazon.com', 'gmarket.co.kr', '11st.co.kr', 'coupang.com', 'ssg.com', 'wemakeprice.com',
|
||||||
try:
|
|
||||||
# URL 파싱
|
# 5. 금융 / 결제
|
||||||
parsed = urlparse(url if '//' in url else '//' + url)
|
'paypal.com', 'kbfg.com', 'shinhan.com', 'hanafn.com', 'wooribank.com',
|
||||||
domain = parsed.netloc.lower() if parsed.netloc else url.lower()
|
'kakaobank.com', 'toss.im',
|
||||||
|
|
||||||
for brand in common_brands:
|
# 6. 공공기관 / 교육
|
||||||
if brand not in domain:
|
'gov.kr', 'moe.go.kr', 'epeople.go.kr', 'pusan.ac.kr', 'ac.kr',
|
||||||
similar = False
|
|
||||||
# 비슷한 철자 패턴 확인
|
# 7. IT / 기술
|
||||||
patterns = [
|
'apple.com', 'microsoft.com', 'adobe.com', 'github.com', 'stackoverflow.com'
|
||||||
brand.replace('o', '0'),
|
]
|
||||||
brand.replace('i', '1'),
|
|
||||||
brand.replace('l', '1'),
|
try:
|
||||||
brand.replace('e', '3'),
|
domain = urlparse(url if '//' in url else '//' + url).netloc.lower()
|
||||||
brand.replace('a', '4'),
|
for trusted in trusted_domains:
|
||||||
brand.replace('s', '5'),
|
if domain.endswith(trusted):
|
||||||
brand + '-',
|
return True
|
||||||
brand + '_',
|
return False
|
||||||
brand[:-1], # 마지막 문자 제거
|
except:
|
||||||
''.join(c + c for c in brand), # 문자 중복
|
return False
|
||||||
]
|
|
||||||
|
|
||||||
for pattern in patterns:
|
|
||||||
if pattern in domain:
|
def check_similar_brand(url):
|
||||||
similar = True
|
# 자주 사용되는 브랜드/도메인 목록
|
||||||
break
|
common_brands = {
|
||||||
|
'google', 'facebook', 'amazon', 'microsoft', 'apple',
|
||||||
if similar:
|
'netflix', 'paypal', 'twitter', 'instagram', 'linkedin',
|
||||||
return True # 유사 브랜드가 발견되면 True 반환
|
'youtube', 'yahoo', 'gmail', 'whatsapp', 'tiktok',
|
||||||
|
'geocities', 'angelfire', 'newadvent', 'wikipedia',
|
||||||
except Exception as e:
|
}
|
||||||
return False # 예외 발생 시 False 반환
|
|
||||||
|
# 2. 유사 브랜드 확인
|
||||||
return False # 유사 브랜드가 없으면 False 반환
|
try:
|
||||||
|
# URL 파싱
|
||||||
|
parsed = urlparse(url if '//' in url else '//' + url)
|
||||||
|
domain = parsed.netloc.lower() if parsed.netloc else url.lower()
|
||||||
# url 압축 비율 계산 함수
|
|
||||||
def compression_ratio(url: str) -> float:
|
for brand in common_brands:
|
||||||
if not url:
|
if brand not in domain:
|
||||||
return 0.0
|
similar = False
|
||||||
original_length = len(url.encode('utf-8'))
|
# 비슷한 철자 패턴 확인
|
||||||
compressed_data = zlib.compress(url.encode('utf-8'))
|
patterns = [
|
||||||
compressed_length = len(compressed_data)
|
brand.replace('o', '0'),
|
||||||
return compressed_length / original_length
|
brand.replace('i', '1'),
|
||||||
|
brand.replace('l', '1'),
|
||||||
|
brand.replace('e', '3'),
|
||||||
def extract_features(url):
|
brand.replace('a', '4'),
|
||||||
parsed_url = urlparse(url)
|
brand.replace('s', '5'),
|
||||||
suspicious_keywords = [
|
brand + '-',
|
||||||
'login', 'verify', 'account', 'update', 'secure', 'banking',
|
brand + '_',
|
||||||
'paypal', 'confirm', 'signin', 'auth', 'redirect', 'free',
|
brand[:-1], # 마지막 문자 제거
|
||||||
'bonus', 'admin', 'support', 'server', 'password', 'click',
|
''.join(c + c for c in brand), # 문자 중복
|
||||||
'urgent', 'immediate', 'alert', 'security', 'prompt'
|
]
|
||||||
]
|
|
||||||
|
for pattern in patterns:
|
||||||
additional_keywords = [
|
if pattern in domain:
|
||||||
'verify', 'wallet', 'cryptocurrency', 'bitcoin', 'ethereum',
|
similar = True
|
||||||
'validation', 'authenticate', 'reset', 'recover', 'access',
|
break
|
||||||
'limited', 'offer', 'prize', 'win', 'winner', 'payment',
|
|
||||||
'bank', 'credit', 'debit', 'card', 'expire', 'suspension',
|
if similar:
|
||||||
'unusual', 'activity', 'verify', 'document', 'invoice'
|
return True # 유사 브랜드가 발견되면 True 반환
|
||||||
]
|
|
||||||
|
except Exception as e:
|
||||||
all_keywords = list(set(suspicious_keywords + additional_keywords))
|
return False # 예외 발생 시 False 반환
|
||||||
|
|
||||||
contains_keyword = 0
|
return False # 유사 브랜드가 없으면 False 반환
|
||||||
keyword_count = 0
|
|
||||||
for keyword in all_keywords:
|
|
||||||
if re.search(r'\b' + keyword + r'\b', url, re.IGNORECASE):
|
|
||||||
contains_keyword = 1
|
# url 압축 비율 계산 함수
|
||||||
keyword_count += 1
|
def compression_ratio(url: str) -> float:
|
||||||
|
if not url:
|
||||||
url_length = len(url)
|
return 0.0
|
||||||
extracted = tldextract.extract(url)
|
original_length = len(url.encode('utf-8'))
|
||||||
tld = extracted.suffix
|
compressed_data = zlib.compress(url.encode('utf-8'))
|
||||||
domain = extracted.domain
|
compressed_length = len(compressed_data)
|
||||||
subdomain = extracted.subdomain
|
return compressed_length / original_length
|
||||||
|
|
||||||
tld_length = len(tld) if tld else 0
|
|
||||||
common_tlds = ['com', 'org', 'net', 'edu', 'gov', 'mil', 'io', 'co', 'info', 'biz']
|
def extract_features(url):
|
||||||
is_common_tld = 1 if tld in common_tlds else 0
|
parsed_url = urlparse(url)
|
||||||
country_tlds = ['us', 'uk', 'ca', 'au', 'de', 'fr', 'jp', 'cn', 'ru', 'br', 'in', 'it', 'es']
|
suspicious_keywords = [
|
||||||
is_country_tld = 1 if tld in country_tlds else 0
|
'login', 'verify', 'account', 'update', 'secure', 'banking',
|
||||||
suspicious_tlds = ['xyz', 'top', 'club', 'online', 'site', 'icu', 'vip', 'work', 'rest', 'fit']
|
'paypal', 'confirm', 'signin', 'auth', 'redirect', 'free',
|
||||||
is_suspicious_tld = 1 if tld in suspicious_tlds else 0
|
'bonus', 'admin', 'support', 'server', 'password', 'click',
|
||||||
url_shorteners = ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'is.gd', 'buff.ly', 'adf.ly', 'tiny.cc']
|
'urgent', 'immediate', 'alert', 'security', 'prompt'
|
||||||
full_domain = f"{domain}.{tld}" if tld else domain
|
]
|
||||||
is_shortened = 1 if full_domain in url_shorteners else 0
|
|
||||||
|
additional_keywords = [
|
||||||
|
'verify', 'wallet', 'cryptocurrency', 'bitcoin', 'ethereum',
|
||||||
domain_length = len(domain) if domain else 0
|
'validation', 'authenticate', 'reset', 'recover', 'access',
|
||||||
has_subdomain = 1 if subdomain else 0
|
'limited', 'offer', 'prize', 'win', 'winner', 'payment',
|
||||||
subdomain_length = len(subdomain) if subdomain else 0
|
'bank', 'credit', 'debit', 'card', 'expire', 'suspension',
|
||||||
subdomain_count = len(subdomain.split('.')) if subdomain else 0
|
'unusual', 'activity', 'verify', 'document', 'invoice'
|
||||||
|
]
|
||||||
path = parsed_url.path
|
|
||||||
path_length = len(path)
|
all_keywords = list(set(suspicious_keywords + additional_keywords))
|
||||||
path_depth = path.count('/') if path else 0
|
|
||||||
|
contains_keyword = 0
|
||||||
query = parsed_url.query
|
keyword_count = 0
|
||||||
has_query = 1 if query else 0
|
for keyword in all_keywords:
|
||||||
query_length = len(query) if query else 0
|
if re.search(r'\b' + keyword + r'\b', url, re.IGNORECASE):
|
||||||
query_params = parse_qs(query)
|
contains_keyword = 1
|
||||||
query_param_count = len(query_params) if query_params else 0
|
keyword_count += 1
|
||||||
|
|
||||||
has_fragment = 1 if parsed_url.fragment else 0
|
url_length = len(url)
|
||||||
fragment_length = len(parsed_url.fragment) if parsed_url.fragment else 0
|
extracted = tldextract.extract(url)
|
||||||
|
tld = extracted.suffix
|
||||||
# Character type ratios
|
domain = extracted.domain
|
||||||
letter_count = sum(c.isalpha() for c in url)
|
subdomain = extracted.subdomain
|
||||||
digit_count = sum(c.isdigit() for c in url)
|
|
||||||
special_char_count = len(re.findall(r'[^a-zA-Z0-9]', url))
|
tld_length = len(tld) if tld else 0
|
||||||
|
common_tlds = ['com', 'org', 'net', 'edu', 'gov', 'mil', 'io', 'co', 'info', 'biz']
|
||||||
letter_ratio = letter_count / url_length if url_length > 0 else 0
|
is_common_tld = 1 if tld in common_tlds else 0
|
||||||
digit_ratio = digit_count / url_length if url_length > 0 else 0
|
country_tlds = ['us', 'uk', 'ca', 'au', 'de', 'fr', 'jp', 'cn', 'ru', 'br', 'in', 'it', 'es']
|
||||||
special_char_ratio = special_char_count / url_length if url_length > 0 else 0
|
is_country_tld = 1 if tld in country_tlds else 0
|
||||||
|
suspicious_tlds = ['xyz', 'top', 'club', 'online', 'site', 'icu', 'vip', 'work', 'rest', 'fit']
|
||||||
# Character distribution and entropy
|
is_suspicious_tld = 1 if tld in suspicious_tlds else 0
|
||||||
if url:
|
url_shorteners = ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'is.gd', 'buff.ly', 'adf.ly', 'tiny.cc']
|
||||||
char_counts = Counter(url)
|
full_domain = f"{domain}.{tld}" if tld else domain
|
||||||
total_chars = len(url)
|
is_shortened = 1 if full_domain in url_shorteners else 0
|
||||||
char_frequencies = {char: count/total_chars for char, count in char_counts.items()}
|
|
||||||
entropy = -sum(freq * math.log2(freq) for freq in char_frequencies.values())
|
|
||||||
else:
|
domain_length = len(domain) if domain else 0
|
||||||
entropy = 0
|
has_subdomain = 1 if subdomain else 0
|
||||||
|
subdomain_length = len(subdomain) if subdomain else 0
|
||||||
|
subdomain_count = len(subdomain.split('.')) if subdomain else 0
|
||||||
|
|
||||||
|
path = parsed_url.path
|
||||||
|
path_length = len(path)
|
||||||
if url_length <= 13:
|
path_depth = path.count('/') if path else 0
|
||||||
url_length_cat = 0
|
|
||||||
elif url_length <= 18:
|
query = parsed_url.query
|
||||||
url_length_cat = 1
|
has_query = 1 if query else 0
|
||||||
elif url_length <= 25:
|
query_length = len(query) if query else 0
|
||||||
url_length_cat = 2
|
query_params = parse_qs(query)
|
||||||
else:
|
query_param_count = len(query_params) if query_params else 0
|
||||||
url_length_cat = 3
|
|
||||||
|
has_fragment = 1 if parsed_url.fragment else 0
|
||||||
return {
|
fragment_length = len(parsed_url.fragment) if parsed_url.fragment else 0
|
||||||
# "url_length": url_length,
|
|
||||||
"url_length_cat": url_length_cat,
|
# Character type ratios
|
||||||
"num_dots": url.count("."),
|
letter_count = sum(c.isalpha() for c in url)
|
||||||
"num_digits": sum(c.isdigit() for c in url),
|
digit_count = sum(c.isdigit() for c in url)
|
||||||
"num_special_chars": len(re.findall(r"[^a-zA-Z0-9]", url)),
|
special_char_count = len(re.findall(r'[^a-zA-Z0-9]', url))
|
||||||
"url_keyword": contains_keyword,
|
|
||||||
# "url_keyword_count": keyword_count,
|
letter_ratio = letter_count / url_length if url_length > 0 else 0
|
||||||
"num_underbar": url.count("_"),
|
digit_ratio = digit_count / url_length if url_length > 0 else 0
|
||||||
"extract_consecutive_numbers": int(bool(re.findall(r'(\d)\1+', url))),
|
special_char_ratio = special_char_count / url_length if url_length > 0 else 0
|
||||||
"number": int(bool(len(re.findall(r'(\d)(?!\1)(\d)(?!\2)(\d)', url)))),
|
|
||||||
"upper": int(any(c.isupper() for c in url)),
|
# Character distribution and entropy
|
||||||
|
if url:
|
||||||
"is_common_tld": is_common_tld,
|
char_counts = Counter(url)
|
||||||
"is country_tld": is_country_tld,
|
total_chars = len(url)
|
||||||
"is_suspicious_tld": is_suspicious_tld,
|
char_frequencies = {char: count/total_chars for char, count in char_counts.items()}
|
||||||
|
entropy = -sum(freq * math.log2(freq) for freq in char_frequencies.values())
|
||||||
"domain_length": domain_length,
|
else:
|
||||||
"has_subdomain": has_subdomain,
|
entropy = 0
|
||||||
"subdomain_length": subdomain_length,
|
|
||||||
"subdomain_count": subdomain_count,
|
|
||||||
|
|
||||||
# "path_length": path_length,
|
|
||||||
"path_depth": path_depth,
|
|
||||||
"has_query": has_query,
|
if url_length <= 13:
|
||||||
"query_length": query_length,
|
url_length_cat = 0
|
||||||
"query_param_count": query_param_count,
|
elif url_length <= 18:
|
||||||
# "has_fragment": has_fragment,
|
url_length_cat = 1
|
||||||
# "fragment_length": fragment_length,
|
elif url_length <= 25:
|
||||||
"url_shorteners": is_shortened,
|
url_length_cat = 2
|
||||||
|
else:
|
||||||
# 새로 추가된 특성
|
url_length_cat = 3
|
||||||
"compression_ratio": compression_ratio(url),
|
|
||||||
"check_similar_brand" : check_similar_brand(url),
|
if url_is_whitelisted(url):
|
||||||
|
return {
|
||||||
# Advanced text analysis
|
# 화이트리스트 URL이면 특징값들을 "정상적"으로 처리되도록 설정
|
||||||
"entropy": entropy,
|
"url_length_cat": 1,
|
||||||
#"letter_ratio": letter_ratio,
|
"num_dots": 1,
|
||||||
"digit_ratio": digit_ratio,
|
"num_digits": 0,
|
||||||
"special_char_ratio": special_char_ratio
|
"num_special_chars": 1,
|
||||||
|
"url_keyword": 0,
|
||||||
|
"num_underbar": 0,
|
||||||
}
|
"extract_consecutive_numbers": 0,
|
||||||
|
"number": 0,
|
||||||
|
"upper": 0,
|
||||||
|
|
||||||
|
"is_common_tld": 1,
|
||||||
|
"is country_tld": 0,
|
||||||
|
"is_suspicious_tld": 0,
|
||||||
|
|
||||||
|
"domain_length": 5,
|
||||||
|
"has_subdomain": 0,
|
||||||
|
"subdomain_length": 0,
|
||||||
|
"subdomain_count": 0,
|
||||||
|
|
||||||
|
"path_depth": 0,
|
||||||
|
"has_query": 0,
|
||||||
|
"query_length": 0,
|
||||||
|
"query_param_count": 0,
|
||||||
|
"url_shorteners": 0,
|
||||||
|
|
||||||
|
"compression_ratio": 1.0,
|
||||||
|
"check_similar_brand": 0,
|
||||||
|
"entropy": 3.0,
|
||||||
|
"digit_ratio": 0.0,
|
||||||
|
"special_char_ratio": 0.1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return {
|
||||||
|
|
||||||
|
# "url_length": url_length,
|
||||||
|
"url_length_cat": url_length_cat,
|
||||||
|
"num_dots": url.count("."),
|
||||||
|
"num_digits": sum(c.isdigit() for c in url),
|
||||||
|
"num_special_chars": len(re.findall(r"[^a-zA-Z0-9]", url)),
|
||||||
|
"url_keyword": contains_keyword,
|
||||||
|
# "url_keyword_count": keyword_count,
|
||||||
|
"num_underbar": url.count("_"),
|
||||||
|
"extract_consecutive_numbers": int(bool(re.findall(r'(\d)\1+', url))),
|
||||||
|
"number": int(bool(len(re.findall(r'(\d)(?!\1)(\d)(?!\2)(\d)', url)))),
|
||||||
|
"upper": int(any(c.isupper() for c in url)),
|
||||||
|
|
||||||
|
"is_common_tld": is_common_tld,
|
||||||
|
"is country_tld": is_country_tld,
|
||||||
|
"is_suspicious_tld": is_suspicious_tld,
|
||||||
|
|
||||||
|
"domain_length": domain_length,
|
||||||
|
"has_subdomain": has_subdomain,
|
||||||
|
"subdomain_length": subdomain_length,
|
||||||
|
"subdomain_count": subdomain_count,
|
||||||
|
|
||||||
|
# "path_length": path_length,
|
||||||
|
"path_depth": path_depth,
|
||||||
|
"has_query": has_query,
|
||||||
|
"query_length": query_length,
|
||||||
|
"query_param_count": query_param_count,
|
||||||
|
# "has_fragment": has_fragment,
|
||||||
|
# "fragment_length": fragment_length,
|
||||||
|
"url_shorteners": is_shortened,
|
||||||
|
|
||||||
|
# 새로 추가된 특성
|
||||||
|
"compression_ratio": compression_ratio(url),
|
||||||
|
"check_similar_brand" : check_similar_brand(url),
|
||||||
|
|
||||||
|
# Advanced text analysis
|
||||||
|
"entropy": entropy,
|
||||||
|
#"letter_ratio": letter_ratio,
|
||||||
|
"digit_ratio": digit_ratio,
|
||||||
|
"special_char_ratio": special_char_ratio
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from fastapi import FastAPI
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from app.model_load import use_model # predictor.py에서 함수 import
|
from app.model_load import use_model # predictor.py에서 함수 import
|
||||||
from app.exe import predict_url_maliciousness
|
from app.exe import predict_url_maliciousness
|
||||||
from app.utils import convert_numpy_to_python_types
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
@@ -28,13 +27,15 @@ def root():
|
|||||||
def predict(request: UrlRequest):
|
def predict(request: UrlRequest):
|
||||||
url = request.url
|
url = request.url
|
||||||
|
|
||||||
result_model1 = convert_numpy_to_python_types(use_model(url))
|
result_model1 = use_model(url)
|
||||||
result_model2 = convert_numpy_to_python_types(predict_url_maliciousness(url))
|
result_model2 = predict_url_maliciousness(url)
|
||||||
|
# print("model1 : ")
|
||||||
response_data = {
|
# print(result_model1.values())
|
||||||
"url": url,
|
# print("model2 : ")
|
||||||
"model1": result_model1,
|
# print(result_model2.values())
|
||||||
"model2": result_model2
|
|
||||||
}
|
return {
|
||||||
|
"url" : url,
|
||||||
return convert_numpy_to_python_types(response_data)
|
"model1": result_model1,
|
||||||
|
"model2": result_model2
|
||||||
|
}
|
||||||
|
|||||||
@@ -29,6 +29,12 @@ def use_model(url : str):
|
|||||||
input_data = featured_df[features_cols]
|
input_data = featured_df[features_cols]
|
||||||
|
|
||||||
# 학습된 모델에 적용
|
# 학습된 모델에 적용
|
||||||
model_pred = round(float(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load])), 4)
|
model_pred = round(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load]), 4)
|
||||||
|
|
||||||
return model_pred
|
#return model_pred
|
||||||
|
return {
|
||||||
|
"url" : url,
|
||||||
|
"malicious_probability" : float(model_pred),
|
||||||
|
"is_malicious" : bool(model_pred > best_threshold),
|
||||||
|
"threshold" : float(best_threshold)
|
||||||
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ def predict_url(url: str) -> dict:
|
|||||||
input_data = preprocessed[features_cols]
|
input_data = preprocessed[features_cols]
|
||||||
|
|
||||||
# ✅ 전처리된 데이터 확인
|
# ✅ 전처리된 데이터 확인
|
||||||
print("Preprocessed input:", input_data)
|
#print("Preprocessed input:", input_data)
|
||||||
|
|
||||||
# 평균 확률 계산
|
# 평균 확률 계산
|
||||||
probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load]
|
probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load]
|
||||||
@@ -61,8 +61,8 @@ def predict_url(url: str) -> dict:
|
|||||||
# 예: malicious_probability가 np.float32 타입일 경우
|
# 예: malicious_probability가 np.float32 타입일 경우
|
||||||
return {
|
return {
|
||||||
"url": url,
|
"url": url,
|
||||||
"malicious_probability": mean_pred, # ⬅️ numpy -> float
|
"malicious_probability": mean_pred,
|
||||||
"is_malicious": bool(is_malicious), # ⬅️ numpy -> bool
|
"is_malicious": is_malicious,
|
||||||
"threshold": float(BEST_THRESHOLD) # ⬅️ numpy -> float
|
"threshold": float(BEST_THRESHOLD) # ⬅️ numpy -> float
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
4
backend/app/testexe.py
Normal file
4
backend/app/testexe.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from exe import predict_url_maliciousness
|
||||||
|
|
||||||
|
result_model2 = predict_url_maliciousness("www.naver.com")
|
||||||
|
print(result_model2)
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
|
|
||||||
def convert_numpy_to_python_types(obj):
|
|
||||||
"""
|
|
||||||
Recursively convert numpy types to native Python types.
|
|
||||||
"""
|
|
||||||
if isinstance(obj, np.ndarray):
|
|
||||||
return convert_numpy_to_python_types(obj.tolist())
|
|
||||||
elif isinstance(obj, np.number):
|
|
||||||
return float(obj) if isinstance(obj, np.floating) else int(obj)
|
|
||||||
elif isinstance(obj, np.bool_):
|
|
||||||
return bool(obj)
|
|
||||||
elif isinstance(obj, dict):
|
|
||||||
return {k: convert_numpy_to_python_types(v) for k, v in obj.items()}
|
|
||||||
elif isinstance(obj, list) or isinstance(obj, tuple):
|
|
||||||
return [convert_numpy_to_python_types(item) for item in obj]
|
|
||||||
else:
|
|
||||||
return obj
|
|
||||||
34744
react-url-checker/package-lock.json
generated
34744
react-url-checker/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -12,10 +12,11 @@
|
|||||||
"@types/react": "^19.1.0",
|
"@types/react": "^19.1.0",
|
||||||
"@types/react-dom": "^19.1.1",
|
"@types/react-dom": "^19.1.1",
|
||||||
"axios": "^1.8.4",
|
"axios": "^1.8.4",
|
||||||
|
"framer-motion": "^12.9.2",
|
||||||
"react": "^19.1.0",
|
"react": "^19.1.0",
|
||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-icons": "^5.5.0",
|
"react-icons": "^5.5.0",
|
||||||
"react-scripts": "^3.0.1",
|
"react-scripts": "^5.0.1",
|
||||||
"web-vitals": "^2.1.4"
|
"web-vitals": "^2.1.4"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -43,9 +44,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@babel/preset-react": "^7.26.3",
|
||||||
"autoprefixer": "^10.4.21",
|
"autoprefixer": "^10.4.21",
|
||||||
|
"eslint": "^8.57.1",
|
||||||
"postcss": "^8.5.3",
|
"postcss": "^8.5.3",
|
||||||
"tailwindcss": "^3.3.5",
|
"tailwindcss": "^3.3.5",
|
||||||
"typescript": "^5.3.3"
|
"typescript": "^4.1.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>URL 악성 판별기</title>
|
<title></title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="root"></div>
|
<div id="root"></div>
|
||||||
|
|||||||
3
react-url-checker/src/App.js
vendored
3
react-url-checker/src/App.js
vendored
@@ -5,9 +5,8 @@ import './App.css';
|
|||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-100 flex flex-col justify-center">
|
<div className="min-h-screen bg-sky-200 flex flex-col justify-center">
|
||||||
<div className="container mx-auto px-4 text-center">
|
<div className="container mx-auto px-4 text-center">
|
||||||
<h1 className="text-3xl font-bold text-blue-600 mb-6">🔍 악성 URL 판별기</h1>
|
|
||||||
<UrlPredictor />
|
<UrlPredictor />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import React, { useState } from "react";
|
import React, { useState } from "react";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
|
import { motion } from "framer-motion"; // 애니메이션용
|
||||||
|
import { FaSearch, FaRedo } from "react-icons/fa"; // 아이콘용
|
||||||
|
|
||||||
const UrlPredictor = () => {
|
const UrlPredictor = () => {
|
||||||
const [url, setUrl] = useState("");
|
const [url, setUrl] = useState("");
|
||||||
@@ -26,81 +28,90 @@ const UrlPredictor = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 모델 정보 정의 (title + 키)
|
|
||||||
const models = [
|
const models = [
|
||||||
{ key: "old_model", title: "🧠 기존 모델 (Ho)" },
|
{ key: "model1", title: "HO 모델" },
|
||||||
{ key: "new_model", title: "🚀 개선 모델 (Jun)" },
|
{ key: "model2", title: "Jun 모델" },
|
||||||
];
|
];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-100 p-6">
|
<div className="min-h-screen bg-blue-50 p-8">
|
||||||
{!results ? (
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-8 h-full">
|
||||||
<div className="flex justify-center items-center h-full">
|
|
||||||
<form onSubmit={handleSubmit} className="flex gap-4 w-full max-w-2xl">
|
{/* 왼쪽 입력창 */}
|
||||||
|
<div className="flex flex-col justify-center items-center gap-6">
|
||||||
|
<h1 className="text-2x1 font-bold text-blue-700">URL 판별기</h1>
|
||||||
|
<form onSubmit={handleSubmit} className="flex gap-2 w-full max-w-md">
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
value={url}
|
value={url}
|
||||||
onChange={(e) => setUrl(e.target.value)}
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
placeholder="URL을 입력하세요"
|
placeholder="URL을 입력하세요"
|
||||||
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
|
className="flex-grow px-4 py-2 border border-gray-300 rounded-lg shadow-md focus:outline-none focus:ring-2 focus:ring-blue-400"
|
||||||
required
|
required
|
||||||
/>
|
/>
|
||||||
<button
|
<button
|
||||||
type="submit"
|
type="submit"
|
||||||
className="bg-blue-600 text-white px-6 py-2 rounded shadow hover:bg-blue-700 transition"
|
className="bg-blue-600 text-white px-6 py-2 flex items-center gap-2 rounded-lg shadow-md hover:bg-blue-700 transition"
|
||||||
>
|
>
|
||||||
✅ 검사하기
|
<FaSearch /> 검사
|
||||||
</button>
|
</button>
|
||||||
</form>
|
</form>
|
||||||
|
{loading && (
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<div className="w-6 h-6 border-4 border-blue-400 border-t-transparent rounded-full animate-spin"></div>
|
||||||
|
<p className="text-blue-600 font-semibold">분석 중...</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{error && <p className="text-red-500">❌ {error}</p>}
|
||||||
</div>
|
</div>
|
||||||
) : (
|
|
||||||
<div className="grid grid-cols-2 gap-6">
|
|
||||||
{/* 좌측 입력창 */}
|
|
||||||
<div className="flex flex-col gap-4">
|
|
||||||
<form onSubmit={handleSubmit} className="flex gap-2">
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
value={url}
|
|
||||||
onChange={(e) => setUrl(e.target.value)}
|
|
||||||
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
|
|
||||||
placeholder="URL을 다시 입력해보세요"
|
|
||||||
required
|
|
||||||
/>
|
|
||||||
<button
|
|
||||||
type="submit"
|
|
||||||
className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 transition"
|
|
||||||
>
|
|
||||||
다시 검사
|
|
||||||
</button>
|
|
||||||
</form>
|
|
||||||
{loading && <p>🔍 분석 중...</p>}
|
|
||||||
{error && <p className="text-red-500">❌ {error}</p>}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* 우측 결과 반복 렌더링 */}
|
{/* 오른쪽 결과창 */}
|
||||||
<div className="flex flex-col gap-4">
|
<div className="flex flex-col gap-6">
|
||||||
{models.map((model) => {
|
{results ? (
|
||||||
|
models.map((model) => {
|
||||||
const data = results[model.key];
|
const data = results[model.key];
|
||||||
|
if (!data) return null;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div key={model.key} className="bg-white rounded p-4 shadow">
|
<motion.div
|
||||||
<h2 className="text-lg font-bold mb-2">{model.title}</h2>
|
key={model.key}
|
||||||
<p>
|
initial={{ opacity: 0, y: 30 }}
|
||||||
악성 확률: <strong>{(data.prob * 100).toFixed(2)}%</strong>
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
transition={{ duration: 0.6 }}
|
||||||
|
className="bg-white rounded-2xl p-6 shadow-lg border border-gray-200"
|
||||||
|
>
|
||||||
|
<h2 className="text-xl font-bold mb-4 text-gray-800">{model.title}</h2>
|
||||||
|
<p className="mb-2 text-gray-700">
|
||||||
|
악성 확률:{" "}
|
||||||
|
<strong>
|
||||||
|
{(data.malicious_probability * 100).toFixed(2)}%
|
||||||
|
</strong>
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
판별 결과:{" "}
|
판별 결과:{" "}
|
||||||
<strong className={data.malicious ? "text-red-600" : "text-green-600"}>
|
<strong
|
||||||
{data.malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
|
className={
|
||||||
|
data.is_malicious
|
||||||
|
? "text-red-600"
|
||||||
|
: "text-green-600"
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{data.is_malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
|
||||||
</strong>
|
</strong>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</motion.div>
|
||||||
);
|
);
|
||||||
})}
|
})
|
||||||
</div>
|
) : (
|
||||||
|
<div className="text-gray-500 flex items-center justify-center h-full">
|
||||||
|
결과가 여기에 표시됩니다.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default UrlPredictor;
|
export default UrlPredictor;
|
||||||
26
react-url-checker/tsconfig (copy 1).json
Normal file
26
react-url-checker/tsconfig (copy 1).json
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "es5",
|
||||||
|
"lib": [
|
||||||
|
"dom",
|
||||||
|
"dom.iterable",
|
||||||
|
"esnext"
|
||||||
|
],
|
||||||
|
"allowJs": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"strict": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
"module": "esnext",
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"noEmit": true,
|
||||||
|
"jsx": "preserve"
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"src"
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user