Compare commits

..

14 Commits

36 changed files with 13597 additions and 24555 deletions

Submodule 1st-project deleted from 16f48f84a3

38
Nam/Feature.py Normal file
View File

@@ -0,0 +1,38 @@
import re
from collections import Counter
from scipy.stats import entropy
def calculate_url_entropy(url):
counter = Counter(url)
probabilities = [count / len(url) for count in counter.values()]
return entropy(probabilities, base=2)
def extract_url_features(url):
suspicious_words = [
'login', 'verify', 'update', 'confirm',
'account', 'secure', 'ebayisapi', 'banking'
]
return {
'digit_count': len(re.findall(r'\d', url)),
'dash_count': url.count('-'),
'underscore_count': url.count('_'),
'percent_count': url.count('%'),
'equal_count': url.count('='),
'question_count': url.count('?'),
'at_count': url.count('@'),
'count_of_exclamation': url.count('!'),
'count_of_dot': url.count('.'),
'count_of_double_slash': url.count('//'),
'special_char_count': len(re.findall(r'[^a-zA-Z0-9]', url)),
'is_ip_in_url': bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)),
'has_www': 'www' in url,
'suspicious_word_count': sum(word in url.lower() for word in suspicious_words),
'path_depth': url.count('/') - 2,
'has_long_digit_sequence': bool(re.search(r'\d{4,}', url)),
'has_multiple_dash': bool(re.search(r'-{2,}', url)),
'has_https': url.startswith('https'),
'ends_with_common_extension': url.endswith(('.html', '.php')),
'url_length': len(url), # ✅ 추가
'url_entropy': calculate_url_entropy(url) # ✅ 추가
}

BIN
Nam/best_model 1.h5 Normal file

Binary file not shown.

54
Nam/model.running_code.py Normal file
View File

@@ -0,0 +1,54 @@
import pandas as pd
import pickle
from tensorflow.keras.models import load_model
from Feature import extract_url_features
from collections import Counter
from scipy.stats import entropy
import tensorflow as tf
# 🔹 URL 엔트로피 계산 함수
def calculate_url_entropy(url):
counter = Counter(url)
probabilities = [count / len(url) for count in counter.values()]
return entropy(probabilities, base=2)
# 🔹 스케일러 불러오기
with open("scaler.pkl", "rb") as f:
scaler = pickle.load(f)
# 🔹 모델 불러오기
model = load_model("best_model.h5")
# 🔹 예측 함수
@tf.function(reduce_retracing=True)
def predict_with_model(model, input_data):
return model(input_data)
# 🔹 입력 URL 받기
url = input("URL입력 : ")
# 🔹 Feature.py에서 피처 추출
features = extract_url_features(url)
# 🔹 누락된 피처 보완
features['url_length'] = len(url)
features['url_entropy'] = calculate_url_entropy(url)
# 🔹 데이터프레임 생성 및 정렬
input_df = pd.DataFrame([features])
expected_columns = list(scaler.feature_names_in_)
input_df = input_df[expected_columns]
# 🔹 스케일링
input_scaled = scaler.transform(input_df)
# 🔹 예측
prediction = predict_with_model(model, input_scaled)
score = float(prediction.numpy()[0][0]) # 🔥 정확히 float으로 변환
# 🔹 출력
threshold = 0.5
if score > threshold:
print(f"악성 (악성일 확률: {score:.4f})")
else:
print(f"정상 (정상일 확률: {1 - score:.4f})")

BIN
Nam/model.scaler.pkl Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -9,7 +9,7 @@ import os
# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_PATH = os.path.join(BASE_DIR, "models", "Recall_0.77.keras")
MODEL_PATH = os.path.join(BASE_DIR, "models", "White_list_model.keras")
SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
# 모델 및 스케일러 로드 (1회만 수행)
@@ -36,16 +36,17 @@ def predict_url_maliciousness(url: str) -> dict:
# 예측
prediction = predict_with_model(model, input_scaled)
malicious_prob = float(prediction[0][0])
malicious_prob = float(prediction[0][0].numpy())
# 임계값 기반 판단
is_malicious = bool(malicious_prob > BEST_THRESHOLD)
# Ensure all values are Python native types (not numpy types)
# 예: malicious_probability가 np.float32 타입일 경우
return {
"url": str(url),
"malicious_probability": float(malicious_prob),
"is_malicious": bool(is_malicious),
"malicious_probability": malicious_prob,
"is_malicious": is_malicious,
"threshold": float(BEST_THRESHOLD)
}

View File

@@ -2,11 +2,44 @@ import re
from urllib.parse import urlparse, parse_qs
import tldextract
import zlib
import re
from urllib.parse import urlparse
from collections import Counter
import math
def url_is_whitelisted(url):
trusted_domains = [
# 1. 포털 / 검색엔진
'naver.com', 'daum.net', 'google.com', 'bing.com', 'yahoo.com',
# 2. 소셜 미디어 / 커뮤니케이션
'facebook.com', 'instagram.com', 'twitter.com', 'x.com', 'linkedin.com',
'whatsapp.com', 'kakao.com', 'kakaocorp.com',
# 3. 동영상 / 스트리밍
'youtube.com', 'netflix.com', 'twitch.tv', 'tving.com', 'watcha.com',
# 4. 쇼핑 / 이커머스
'amazon.com', 'gmarket.co.kr', '11st.co.kr', 'coupang.com', 'ssg.com', 'wemakeprice.com',
# 5. 금융 / 결제
'paypal.com', 'kbfg.com', 'shinhan.com', 'hanafn.com', 'wooribank.com',
'kakaobank.com', 'toss.im',
# 6. 공공기관 / 교육
'gov.kr', 'moe.go.kr', 'epeople.go.kr', 'pusan.ac.kr', 'ac.kr',
# 7. IT / 기술
'apple.com', 'microsoft.com', 'adobe.com', 'github.com', 'stackoverflow.com'
]
try:
domain = urlparse(url if '//' in url else '//' + url).netloc.lower()
for trusted in trusted_domains:
if domain.endswith(trusted):
return True
return False
except:
return False
def check_similar_brand(url):
@@ -159,7 +192,44 @@ def extract_features(url):
else:
url_length_cat = 3
if url_is_whitelisted(url):
return {
# 화이트리스트 URL이면 특징값들을 "정상적"으로 처리되도록 설정
"url_length_cat": 1,
"num_dots": 1,
"num_digits": 0,
"num_special_chars": 1,
"url_keyword": 0,
"num_underbar": 0,
"extract_consecutive_numbers": 0,
"number": 0,
"upper": 0,
"is_common_tld": 1,
"is country_tld": 0,
"is_suspicious_tld": 0,
"domain_length": 5,
"has_subdomain": 0,
"subdomain_length": 0,
"subdomain_count": 0,
"path_depth": 0,
"has_query": 0,
"query_length": 0,
"query_param_count": 0,
"url_shorteners": 0,
"compression_ratio": 1.0,
"check_similar_brand": 0,
"entropy": 3.0,
"digit_ratio": 0.0,
"special_char_ratio": 0.1
}
return {
# "url_length": url_length,
"url_length_cat": url_length_cat,
"num_dots": url.count("."),

View File

@@ -2,7 +2,6 @@ from fastapi import FastAPI
from pydantic import BaseModel
from app.model_load import use_model # predictor.py에서 함수 import
from app.exe import predict_url_maliciousness
from app.utils import convert_numpy_to_python_types
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
@@ -28,13 +27,15 @@ def root():
def predict(request: UrlRequest):
url = request.url
result_model1 = convert_numpy_to_python_types(use_model(url))
result_model2 = convert_numpy_to_python_types(predict_url_maliciousness(url))
result_model1 = use_model(url)
result_model2 = predict_url_maliciousness(url)
# print("model1 : ")
# print(result_model1.values())
# print("model2 : ")
# print(result_model2.values())
response_data = {
"url": url,
"model1": result_model1,
"model2": result_model2
}
return convert_numpy_to_python_types(response_data)
return {
"url" : url,
"model1": result_model1,
"model2": result_model2
}

View File

@@ -29,6 +29,12 @@ def use_model(url : str):
input_data = featured_df[features_cols]
# 학습된 모델에 적용
model_pred = round(float(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load])), 4)
model_pred = round(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load]), 4)
return model_pred
#return model_pred
return {
"url" : url,
"malicious_probability" : float(model_pred),
"is_malicious" : bool(model_pred > best_threshold),
"threshold" : float(best_threshold)
}

View File

@@ -44,7 +44,7 @@ def predict_url(url: str) -> dict:
input_data = preprocessed[features_cols]
# ✅ 전처리된 데이터 확인
print("Preprocessed input:", input_data)
#print("Preprocessed input:", input_data)
# 평균 확률 계산
probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load]
@@ -61,8 +61,8 @@ def predict_url(url: str) -> dict:
# 예: malicious_probability가 np.float32 타입일 경우
return {
"url": url,
"malicious_probability": mean_pred, # ⬅️ numpy -> float
"is_malicious": bool(is_malicious), # ⬅️ numpy -> bool
"malicious_probability": mean_pred,
"is_malicious": is_malicious,
"threshold": float(BEST_THRESHOLD) # ⬅️ numpy -> float
}

4
backend/app/testexe.py Normal file
View File

@@ -0,0 +1,4 @@
from exe import predict_url_maliciousness
result_model2 = predict_url_maliciousness("www.naver.com")
print(result_model2)

View File

@@ -1,18 +0,0 @@
import numpy as np
def convert_numpy_to_python_types(obj):
"""
Recursively convert numpy types to native Python types.
"""
if isinstance(obj, np.ndarray):
return convert_numpy_to_python_types(obj.tolist())
elif isinstance(obj, np.number):
return float(obj) if isinstance(obj, np.floating) else int(obj)
elif isinstance(obj, np.bool_):
return bool(obj)
elif isinstance(obj, dict):
return {k: convert_numpy_to_python_types(v) for k, v in obj.items()}
elif isinstance(obj, list) or isinstance(obj, tuple):
return [convert_numpy_to_python_types(item) for item in obj]
else:
return obj

BIN
best_model.h5 Normal file

Binary file not shown.

View File

@@ -2392,703 +2392,56 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "object",
"type": "string"
},
{
"name": "label",
"rawType": "float64",
"type": "float"
},
{
"name": "url_length_cat",
"rawType": "float64",
"type": "float"
},
{
"name": "num_dots",
"rawType": "float64",
"type": "float"
},
{
"name": "num_digits",
"rawType": "float64",
"type": "float"
},
{
"name": "num_special_chars",
"rawType": "float64",
"type": "float"
},
{
"name": "url_keyword",
"rawType": "float64",
"type": "float"
},
{
"name": "num_underbar",
"rawType": "float64",
"type": "float"
},
{
"name": "extract_consecutive_numbers",
"rawType": "float64",
"type": "float"
},
{
"name": "number",
"rawType": "float64",
"type": "float"
},
{
"name": "upper",
"rawType": "float64",
"type": "float"
},
{
"name": "is_common_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "is_country_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "is_suspicious_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "domain_length",
"rawType": "float64",
"type": "float"
},
{
"name": "has_subdomain",
"rawType": "float64",
"type": "float"
},
{
"name": "subdomain_length",
"rawType": "float64",
"type": "float"
},
{
"name": "subdomain_count",
"rawType": "float64",
"type": "float"
},
{
"name": "path_depth",
"rawType": "float64",
"type": "float"
},
{
"name": "has_query",
"rawType": "float64",
"type": "float"
},
{
"name": "query_length",
"rawType": "float64",
"type": "float"
},
{
"name": "query_param_count",
"rawType": "float64",
"type": "float"
},
{
"name": "url_shorteners",
"rawType": "float64",
"type": "float"
},
{
"name": "compression_ratio",
"rawType": "float64",
"type": "float"
},
{
"name": "entropy",
"rawType": "float64",
"type": "float"
},
{
"name": "digit_ratio",
"rawType": "float64",
"type": "float"
},
{
"name": "special_char_ratio",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "c79a077e-8e52-4e42-b88f-dc9698b0fa30",
"rows": [
[
"count",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0"
],
[
"mean",
"0.22371472079708868",
"1.4435534183000107",
"1.546944584861079",
"1.6343590387267808",
"2.6635716711917676",
"0.0370789025849114",
"0.045005501028154746",
"0.056463736673444787",
"0.08128040719044995",
"0.0357764112252997",
"0.6133649251700057",
"0.12739140329970197",
"0.022784949827420967",
"10.464007150192936",
"0.21130266862767075",
"2.43731000866898",
"0.2660177416735477",
"0.6056849294701858",
"0.027221368921135157",
"1.9155892390282507",
"0.04228915393958247",
"0.0018421582329004942",
"1.4552534994784176",
"3.5360434022769756",
"0.029042428345387533",
"0.1102289088601276"
],
[
"std",
"0.41673309122602675",
"1.1161203432813147",
"1.010078604927829",
"9.827940363271033",
"7.1618457272654",
"0.18895518694176003",
"0.6023702991784359",
"0.23081505741717664",
"0.273265280035072",
"0.18573223887275842",
"0.4869788780260291",
"0.33341093196934307",
"0.14921728811320575",
"5.0652546813544035",
"0.4082326232468674",
"6.90096602515224",
"0.6272395647222854",
"1.6003209664806863",
"0.1627279010519657",
"19.702068343354906",
"0.35208851309719974",
"0.04288082262284407",
"0.24856536988340924",
"0.47898938276414027",
"0.08255957016074264",
"0.046338026902092454"
],
[
"min",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.010181818181818183",
"-0.0",
"0.0",
"0.0"
],
[
"25%",
"0.0",
"0.0",
"1.0",
"0.0",
"1.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"7.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.3076923076923077",
"3.238901256602631",
"0.0",
"0.07142857142857142"
],
[
"50%",
"0.0",
"1.0",
"1.0",
"0.0",
"2.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"10.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.4444444444444444",
"3.5068905956085183",
"0.0",
"0.10344827586206896"
],
[
"75%",
"0.0",
"2.0",
"2.0",
"0.0",
"3.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"13.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.6153846153846154",
"3.7962176025900556",
"0.0",
"0.14285714285714285"
],
[
"max",
"1.0",
"3.0",
"171.0",
"2011.0",
"8198.0",
"1.0",
"136.0",
"1.0",
"1.0",
"1.0",
"1.0",
"1.0",
"1.0",
"63.0",
"1.0",
"237.0",
"38.0",
"136.0",
"1.0",
"8367.0",
"131.0",
"1.0",
"5.0",
"6.570554108088201",
"0.9545454545454546",
"1.0"
]
],
"shape": {
"columns": 26,
"rows": 8
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label</th>\n",
" <th>url_length_cat</th>\n",
" <th>num_dots</th>\n",
" <th>num_digits</th>\n",
" <th>num_special_chars</th>\n",
" <th>url_keyword</th>\n",
" <th>num_underbar</th>\n",
" <th>extract_consecutive_numbers</th>\n",
" <th>number</th>\n",
" <th>upper</th>\n",
" <th>...</th>\n",
" <th>subdomain_count</th>\n",
" <th>path_depth</th>\n",
" <th>has_query</th>\n",
" <th>query_length</th>\n",
" <th>query_param_count</th>\n",
" <th>url_shorteners</th>\n",
" <th>compression_ratio</th>\n",
" <th>entropy</th>\n",
" <th>digit_ratio</th>\n",
" <th>special_char_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>...</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2.237147e-01</td>\n",
" <td>1.443553e+00</td>\n",
" <td>1.546945e+00</td>\n",
" <td>1.634359e+00</td>\n",
" <td>2.663572e+00</td>\n",
" <td>3.707890e-02</td>\n",
" <td>4.500550e-02</td>\n",
" <td>5.646374e-02</td>\n",
" <td>8.128041e-02</td>\n",
" <td>3.577641e-02</td>\n",
" <td>...</td>\n",
" <td>2.660177e-01</td>\n",
" <td>6.056849e-01</td>\n",
" <td>2.722137e-02</td>\n",
" <td>1.915589e+00</td>\n",
" <td>4.228915e-02</td>\n",
" <td>1.842158e-03</td>\n",
" <td>1.455253e+00</td>\n",
" <td>3.536043e+00</td>\n",
" <td>2.904243e-02</td>\n",
" <td>1.102289e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.167331e-01</td>\n",
" <td>1.116120e+00</td>\n",
" <td>1.010079e+00</td>\n",
" <td>9.827940e+00</td>\n",
" <td>7.161846e+00</td>\n",
" <td>1.889552e-01</td>\n",
" <td>6.023703e-01</td>\n",
" <td>2.308151e-01</td>\n",
" <td>2.732653e-01</td>\n",
" <td>1.857322e-01</td>\n",
" <td>...</td>\n",
" <td>6.272396e-01</td>\n",
" <td>1.600321e+00</td>\n",
" <td>1.627279e-01</td>\n",
" <td>1.970207e+01</td>\n",
" <td>3.520885e-01</td>\n",
" <td>4.288082e-02</td>\n",
" <td>2.485654e-01</td>\n",
" <td>4.789894e-01</td>\n",
" <td>8.255957e-02</td>\n",
" <td>4.633803e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.018182e-02</td>\n",
" <td>-0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.307692e+00</td>\n",
" <td>3.238901e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>7.142857e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.444444e+00</td>\n",
" <td>3.506891e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.034483e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>3.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.615385e+00</td>\n",
" <td>3.796218e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.428571e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000e+00</td>\n",
" <td>3.000000e+00</td>\n",
" <td>1.710000e+02</td>\n",
" <td>2.011000e+03</td>\n",
" <td>8.198000e+03</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.360000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>...</td>\n",
" <td>3.800000e+01</td>\n",
" <td>1.360000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>8.367000e+03</td>\n",
" <td>1.310000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>5.000000e+00</td>\n",
" <td>6.570554e+00</td>\n",
" <td>9.545455e-01</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 26 columns</p>\n",
"</div>"
],
"text/plain": [
" label url_length_cat num_dots num_digits \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.237147e-01 1.443553e+00 1.546945e+00 1.634359e+00 \n",
"std 4.167331e-01 1.116120e+00 1.010079e+00 9.827940e+00 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 \n",
"50% 0.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 \n",
"75% 0.000000e+00 2.000000e+00 2.000000e+00 0.000000e+00 \n",
"max 1.000000e+00 3.000000e+00 1.710000e+02 2.011000e+03 \n",
"\n",
" num_special_chars url_keyword num_underbar \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.663572e+00 3.707890e-02 4.500550e-02 \n",
"std 7.161846e+00 1.889552e-01 6.023703e-01 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 1.000000e+00 0.000000e+00 0.000000e+00 \n",
"50% 2.000000e+00 0.000000e+00 0.000000e+00 \n",
"75% 3.000000e+00 0.000000e+00 0.000000e+00 \n",
"max 8.198000e+03 1.000000e+00 1.360000e+02 \n",
"\n",
" extract_consecutive_numbers number upper ... \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 ... \n",
"mean 5.646374e-02 8.128041e-02 3.577641e-02 ... \n",
"std 2.308151e-01 2.732653e-01 1.857322e-01 ... \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"25% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"50% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"75% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"max 1.000000e+00 1.000000e+00 1.000000e+00 ... \n",
"\n",
" subdomain_count path_depth has_query query_length \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.660177e-01 6.056849e-01 2.722137e-02 1.915589e+00 \n",
"std 6.272396e-01 1.600321e+00 1.627279e-01 1.970207e+01 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"50% 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"75% 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 \n",
"max 3.800000e+01 1.360000e+02 1.000000e+00 8.367000e+03 \n",
"\n",
" query_param_count url_shorteners compression_ratio entropy \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 4.228915e-02 1.842158e-03 1.455253e+00 3.536043e+00 \n",
"std 3.520885e-01 4.288082e-02 2.485654e-01 4.789894e-01 \n",
"min 0.000000e+00 0.000000e+00 1.018182e-02 -0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 1.307692e+00 3.238901e+00 \n",
"50% 0.000000e+00 0.000000e+00 1.444444e+00 3.506891e+00 \n",
"75% 0.000000e+00 0.000000e+00 1.615385e+00 3.796218e+00 \n",
"max 1.310000e+02 1.000000e+00 5.000000e+00 6.570554e+00 \n",
"\n",
" digit_ratio special_char_ratio \n",
"count 6.995056e+06 6.995056e+06 \n",
"mean 2.904243e-02 1.102289e-01 \n",
"std 8.255957e-02 4.633803e-02 \n",
"min 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 7.142857e-02 \n",
"50% 0.000000e+00 1.034483e-01 \n",
"75% 0.000000e+00 1.428571e-01 \n",
"max 9.545455e-01 1.000000e+00 \n",
"\n",
"[8 rows x 26 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
"ename": "NameError",
"evalue": "name 'processed_train' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mprocessed_train\u001b[49m.describe()\n",
"\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
]
}
],
"source": [
"processed_train.describe()"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'processed_train' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m desc = \u001b[43mprocessed_train\u001b[49m.describe()\n\u001b[32m 6\u001b[39m plt.figure(figsize=(\u001b[32m12\u001b[39m, \u001b[32m6\u001b[39m))\n\u001b[32m 7\u001b[39m sns.barplot(data=desc.T[[\u001b[33m'\u001b[39m\u001b[33mmean\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mstd\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmin\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmax\u001b[39m\u001b[33m'\u001b[39m]])\n",
"\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"desc = processed_train.describe()\n",
"\n",
"plt.figure(figsize=(12, 6))\n",
"sns.barplot(data=desc.T[['mean', 'std', 'min', 'max']])\n",
"plt.title('Feature Statistics')\n",
"plt.xticks(rotation=45)\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
@@ -3248,12 +2601,12 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras.layers import Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.layers import Dense\n",
"\n",
"def build_model(input_dim, learning_rate=0.001):\n",
" \"\"\"\n",

File diff suppressed because it is too large Load Diff

View File

@@ -12,10 +12,11 @@
"@types/react": "^19.1.0",
"@types/react-dom": "^19.1.1",
"axios": "^1.8.4",
"framer-motion": "^12.9.2",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-icons": "^5.5.0",
"react-scripts": "^3.0.1",
"react-scripts": "^5.0.1",
"web-vitals": "^2.1.4"
},
"scripts": {
@@ -43,9 +44,11 @@
]
},
"devDependencies": {
"@babel/preset-react": "^7.26.3",
"autoprefixer": "^10.4.21",
"eslint": "^8.57.1",
"postcss": "^8.5.3",
"tailwindcss": "^3.3.5",
"typescript": "^5.3.3"
"typescript": "^4.1.2"
}
}

View File

@@ -3,7 +3,7 @@
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>URL 악성 판별기</title>
<title></title>
</head>
<body>
<div id="root"></div>

View File

@@ -5,9 +5,8 @@ import './App.css';
function App() {
return (
<div className="min-h-screen bg-gray-100 flex flex-col justify-center">
<div className="min-h-screen bg-sky-200 flex flex-col justify-center">
<div className="container mx-auto px-4 text-center">
<h1 className="text-3xl font-bold text-blue-600 mb-6">🔍 악성 URL 판별기</h1>
<UrlPredictor />
</div>
</div>

View File

@@ -1,5 +1,7 @@
import React, { useState } from "react";
import axios from "axios";
import { motion } from "framer-motion"; // 애니메이션용
import { FaSearch, FaRedo } from "react-icons/fa"; // 아이콘용
const UrlPredictor = () => {
const [url, setUrl] = useState("");
@@ -26,79 +28,88 @@ const UrlPredictor = () => {
}
};
// 모델 정보 정의 (title + 키)
const models = [
{ key: "old_model", title: "🧠 기존 모델 (Ho)" },
{ key: "new_model", title: "🚀 개선 모델 (Jun)" },
{ key: "model1", title: "HO 모델" },
{ key: "model2", title: "Jun 모델" },
];
return (
<div className="min-h-screen bg-gray-100 p-6">
{!results ? (
<div className="flex justify-center items-center h-full">
<form onSubmit={handleSubmit} className="flex gap-4 w-full max-w-2xl">
<div className="min-h-screen bg-blue-50 p-8">
<div className="grid grid-cols-1 md:grid-cols-2 gap-8 h-full">
{/* 왼쪽 입력창 */}
<div className="flex flex-col justify-center items-center gap-6">
<h1 className="text-2x1 font-bold text-blue-700">URL 판별기</h1>
<form onSubmit={handleSubmit} className="flex gap-2 w-full max-w-md">
<input
type="text"
value={url}
onChange={(e) => setUrl(e.target.value)}
placeholder="URL을 입력하세요"
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
className="flex-grow px-4 py-2 border border-gray-300 rounded-lg shadow-md focus:outline-none focus:ring-2 focus:ring-blue-400"
required
/>
<button
type="submit"
className="bg-blue-600 text-white px-6 py-2 rounded shadow hover:bg-blue-700 transition"
className="bg-blue-600 text-white px-6 py-2 flex items-center gap-2 rounded-lg shadow-md hover:bg-blue-700 transition"
>
검사하기
<FaSearch /> 검사
</button>
</form>
{loading && (
<div className="flex items-center gap-2">
<div className="w-6 h-6 border-4 border-blue-400 border-t-transparent rounded-full animate-spin"></div>
<p className="text-blue-600 font-semibold">분석 ...</p>
</div>
)}
{error && <p className="text-red-500"> {error}</p>}
</div>
) : (
<div className="grid grid-cols-2 gap-6">
{/* 좌측 입력창 */}
<div className="flex flex-col gap-4">
<form onSubmit={handleSubmit} className="flex gap-2">
<input
type="text"
value={url}
onChange={(e) => setUrl(e.target.value)}
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
placeholder="URL을 다시 입력해보세요"
required
/>
<button
type="submit"
className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 transition"
>
다시 검사
</button>
</form>
{loading && <p>🔍 분석 ...</p>}
{error && <p className="text-red-500"> {error}</p>}
</div>
{/* 우측 결과 반복 렌더링 */}
<div className="flex flex-col gap-4">
{models.map((model) => {
{/* 오른쪽 결과창 */}
<div className="flex flex-col gap-6">
{results ? (
models.map((model) => {
const data = results[model.key];
if (!data) return null;
return (
<div key={model.key} className="bg-white rounded p-4 shadow">
<h2 className="text-lg font-bold mb-2">{model.title}</h2>
<p>
악성 확률: <strong>{(data.prob * 100).toFixed(2)}%</strong>
<motion.div
key={model.key}
initial={{ opacity: 0, y: 30 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.6 }}
className="bg-white rounded-2xl p-6 shadow-lg border border-gray-200"
>
<h2 className="text-xl font-bold mb-4 text-gray-800">{model.title}</h2>
<p className="mb-2 text-gray-700">
악성 확률:{" "}
<strong>
{(data.malicious_probability * 100).toFixed(2)}%
</strong>
</p>
<p>
판별 결과:{" "}
<strong className={data.malicious ? "text-red-600" : "text-green-600"}>
{data.malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
<strong
className={
data.is_malicious
? "text-red-600"
: "text-green-600"
}
>
{data.is_malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
</strong>
</p>
</div>
</motion.div>
);
})}
</div>
})
) : (
<div className="text-gray-500 flex items-center justify-center h-full">
결과가 여기에 표시됩니다.
</div>
)}
</div>
)}
</div>
</div>
);
};

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"target": "es5",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "preserve"
},
"include": [
"src"
]
}

BIN
scaler.pkl Normal file

Binary file not shown.