backend-junPreP Update(Update whitelist model), front-urlpredictor.jsx UI Update

This commit is contained in:
2025-04-30 02:40:18 +00:00
parent 11839c40c0
commit 8de5238395
27 changed files with 11667 additions and 23864 deletions

Binary file not shown.

View File

@@ -9,7 +9,7 @@ import os
# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정) # 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_PATH = os.path.join(BASE_DIR, "models", "Recall_0.77.keras") MODEL_PATH = os.path.join(BASE_DIR, "models", "White_list_model.keras")
SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl") SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
# 모델 및 스케일러 로드 (1회만 수행) # 모델 및 스케일러 로드 (1회만 수행)
@@ -36,16 +36,17 @@ def predict_url_maliciousness(url: str) -> dict:
# 예측 # 예측
prediction = predict_with_model(model, input_scaled) prediction = predict_with_model(model, input_scaled)
malicious_prob = float(prediction[0][0]) malicious_prob = float(prediction[0][0].numpy())
# 임계값 기반 판단 # 임계값 기반 판단
is_malicious = bool(malicious_prob > BEST_THRESHOLD) is_malicious = bool(malicious_prob > BEST_THRESHOLD)
# Ensure all values are Python native types (not numpy types) # 예: malicious_probability가 np.float32 타입일 경우
return { return {
"url": str(url), "url": str(url),
"malicious_probability": float(malicious_prob), "malicious_probability": malicious_prob,
"is_malicious": bool(is_malicious), "is_malicious": is_malicious,
"threshold": float(BEST_THRESHOLD) "threshold": float(BEST_THRESHOLD)
} }

View File

@@ -2,11 +2,44 @@ import re
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
import tldextract import tldextract
import zlib import zlib
import re
from urllib.parse import urlparse
from collections import Counter from collections import Counter
import math import math
def url_is_whitelisted(url):
trusted_domains = [
# 1. 포털 / 검색엔진
'naver.com', 'daum.net', 'google.com', 'bing.com', 'yahoo.com',
# 2. 소셜 미디어 / 커뮤니케이션
'facebook.com', 'instagram.com', 'twitter.com', 'x.com', 'linkedin.com',
'whatsapp.com', 'kakao.com', 'kakaocorp.com',
# 3. 동영상 / 스트리밍
'youtube.com', 'netflix.com', 'twitch.tv', 'tving.com', 'watcha.com',
# 4. 쇼핑 / 이커머스
'amazon.com', 'gmarket.co.kr', '11st.co.kr', 'coupang.com', 'ssg.com', 'wemakeprice.com',
# 5. 금융 / 결제
'paypal.com', 'kbfg.com', 'shinhan.com', 'hanafn.com', 'wooribank.com',
'kakaobank.com', 'toss.im',
# 6. 공공기관 / 교육
'gov.kr', 'moe.go.kr', 'epeople.go.kr', 'pusan.ac.kr', 'ac.kr',
# 7. IT / 기술
'apple.com', 'microsoft.com', 'adobe.com', 'github.com', 'stackoverflow.com'
]
try:
domain = urlparse(url if '//' in url else '//' + url).netloc.lower()
for trusted in trusted_domains:
if domain.endswith(trusted):
return True
return False
except:
return False
def check_similar_brand(url): def check_similar_brand(url):
@@ -159,7 +192,44 @@ def extract_features(url):
else: else:
url_length_cat = 3 url_length_cat = 3
if url_is_whitelisted(url):
return { return {
# 화이트리스트 URL이면 특징값들을 "정상적"으로 처리되도록 설정
"url_length_cat": 1,
"num_dots": 1,
"num_digits": 0,
"num_special_chars": 1,
"url_keyword": 0,
"num_underbar": 0,
"extract_consecutive_numbers": 0,
"number": 0,
"upper": 0,
"is_common_tld": 1,
"is country_tld": 0,
"is_suspicious_tld": 0,
"domain_length": 5,
"has_subdomain": 0,
"subdomain_length": 0,
"subdomain_count": 0,
"path_depth": 0,
"has_query": 0,
"query_length": 0,
"query_param_count": 0,
"url_shorteners": 0,
"compression_ratio": 1.0,
"check_similar_brand": 0,
"entropy": 3.0,
"digit_ratio": 0.0,
"special_char_ratio": 0.1
}
return {
# "url_length": url_length, # "url_length": url_length,
"url_length_cat": url_length_cat, "url_length_cat": url_length_cat,
"num_dots": url.count("."), "num_dots": url.count("."),

View File

@@ -2,7 +2,6 @@ from fastapi import FastAPI
from pydantic import BaseModel from pydantic import BaseModel
from app.model_load import use_model # predictor.py에서 함수 import from app.model_load import use_model # predictor.py에서 함수 import
from app.exe import predict_url_maliciousness from app.exe import predict_url_maliciousness
from app.utils import convert_numpy_to_python_types
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
app = FastAPI() app = FastAPI()
@@ -28,13 +27,15 @@ def root():
def predict(request: UrlRequest): def predict(request: UrlRequest):
url = request.url url = request.url
result_model1 = convert_numpy_to_python_types(use_model(url)) result_model1 = use_model(url)
result_model2 = convert_numpy_to_python_types(predict_url_maliciousness(url)) result_model2 = predict_url_maliciousness(url)
# print("model1 : ")
# print(result_model1.values())
# print("model2 : ")
# print(result_model2.values())
response_data = { return {
"url" : url, "url" : url,
"model1": result_model1, "model1": result_model1,
"model2": result_model2 "model2": result_model2
} }
return convert_numpy_to_python_types(response_data)

View File

@@ -29,6 +29,12 @@ def use_model(url : str):
input_data = featured_df[features_cols] input_data = featured_df[features_cols]
# 학습된 모델에 적용 # 학습된 모델에 적용
model_pred = round(float(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load])), 4) model_pred = round(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load]), 4)
return model_pred #return model_pred
return {
"url" : url,
"malicious_probability" : float(model_pred),
"is_malicious" : bool(model_pred > best_threshold),
"threshold" : float(best_threshold)
}

View File

@@ -44,7 +44,7 @@ def predict_url(url: str) -> dict:
input_data = preprocessed[features_cols] input_data = preprocessed[features_cols]
# ✅ 전처리된 데이터 확인 # ✅ 전처리된 데이터 확인
print("Preprocessed input:", input_data) #print("Preprocessed input:", input_data)
# 평균 확률 계산 # 평균 확률 계산
probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load] probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load]
@@ -61,8 +61,8 @@ def predict_url(url: str) -> dict:
# 예: malicious_probability가 np.float32 타입일 경우 # 예: malicious_probability가 np.float32 타입일 경우
return { return {
"url": url, "url": url,
"malicious_probability": mean_pred, # ⬅️ numpy -> float "malicious_probability": mean_pred,
"is_malicious": bool(is_malicious), # ⬅️ numpy -> bool "is_malicious": is_malicious,
"threshold": float(BEST_THRESHOLD) # ⬅️ numpy -> float "threshold": float(BEST_THRESHOLD) # ⬅️ numpy -> float
} }

4
backend/app/testexe.py Normal file
View File

@@ -0,0 +1,4 @@
from exe import predict_url_maliciousness
result_model2 = predict_url_maliciousness("www.naver.com")
print(result_model2)

View File

@@ -1,18 +0,0 @@
import numpy as np
def convert_numpy_to_python_types(obj):
"""
Recursively convert numpy types to native Python types.
"""
if isinstance(obj, np.ndarray):
return convert_numpy_to_python_types(obj.tolist())
elif isinstance(obj, np.number):
return float(obj) if isinstance(obj, np.floating) else int(obj)
elif isinstance(obj, np.bool_):
return bool(obj)
elif isinstance(obj, dict):
return {k: convert_numpy_to_python_types(v) for k, v in obj.items()}
elif isinstance(obj, list) or isinstance(obj, tuple):
return [convert_numpy_to_python_types(item) for item in obj]
else:
return obj

File diff suppressed because it is too large Load Diff

View File

@@ -12,10 +12,11 @@
"@types/react": "^19.1.0", "@types/react": "^19.1.0",
"@types/react-dom": "^19.1.1", "@types/react-dom": "^19.1.1",
"axios": "^1.8.4", "axios": "^1.8.4",
"framer-motion": "^12.9.2",
"react": "^19.1.0", "react": "^19.1.0",
"react-dom": "^19.1.0", "react-dom": "^19.1.0",
"react-icons": "^5.5.0", "react-icons": "^5.5.0",
"react-scripts": "^3.0.1", "react-scripts": "^5.0.1",
"web-vitals": "^2.1.4" "web-vitals": "^2.1.4"
}, },
"scripts": { "scripts": {
@@ -43,9 +44,11 @@
] ]
}, },
"devDependencies": { "devDependencies": {
"@babel/preset-react": "^7.26.3",
"autoprefixer": "^10.4.21", "autoprefixer": "^10.4.21",
"eslint": "^8.57.1",
"postcss": "^8.5.3", "postcss": "^8.5.3",
"tailwindcss": "^3.3.5", "tailwindcss": "^3.3.5",
"typescript": "^5.3.3" "typescript": "^4.1.2"
} }
} }

View File

@@ -3,7 +3,7 @@
<head> <head>
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>URL 악성 판별기</title> <title></title>
</head> </head>
<body> <body>
<div id="root"></div> <div id="root"></div>

View File

@@ -5,9 +5,8 @@ import './App.css';
function App() { function App() {
return ( return (
<div className="min-h-screen bg-gray-100 flex flex-col justify-center"> <div className="min-h-screen bg-sky-200 flex flex-col justify-center">
<div className="container mx-auto px-4 text-center"> <div className="container mx-auto px-4 text-center">
<h1 className="text-3xl font-bold text-blue-600 mb-6">🔍 악성 URL 판별기</h1>
<UrlPredictor /> <UrlPredictor />
</div> </div>
</div> </div>

View File

@@ -1,5 +1,7 @@
import React, { useState } from "react"; import React, { useState } from "react";
import axios from "axios"; import axios from "axios";
import { motion } from "framer-motion"; // 애니메이션용
import { FaSearch, FaRedo } from "react-icons/fa"; // 아이콘용
const UrlPredictor = () => { const UrlPredictor = () => {
const [url, setUrl] = useState(""); const [url, setUrl] = useState("");
@@ -26,80 +28,89 @@ const UrlPredictor = () => {
} }
}; };
// 모델 정보 정의 (title + 키)
const models = [ const models = [
{ key: "old_model", title: "🧠 기존 모델 (Ho)" }, { key: "model1", title: "HO 모델" },
{ key: "new_model", title: "🚀 개선 모델 (Jun)" }, { key: "model2", title: "Jun 모델" },
]; ];
return ( return (
<div className="min-h-screen bg-gray-100 p-6"> <div className="min-h-screen bg-blue-50 p-8">
{!results ? ( <div className="grid grid-cols-1 md:grid-cols-2 gap-8 h-full">
<div className="flex justify-center items-center h-full">
<form onSubmit={handleSubmit} className="flex gap-4 w-full max-w-2xl"> {/* 왼쪽 입력창 */}
<div className="flex flex-col justify-center items-center gap-6">
<h1 className="text-2x1 font-bold text-blue-700">URL 판별기</h1>
<form onSubmit={handleSubmit} className="flex gap-2 w-full max-w-md">
<input <input
type="text" type="text"
value={url} value={url}
onChange={(e) => setUrl(e.target.value)} onChange={(e) => setUrl(e.target.value)}
placeholder="URL을 입력하세요" placeholder="URL을 입력하세요"
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow" className="flex-grow px-4 py-2 border border-gray-300 rounded-lg shadow-md focus:outline-none focus:ring-2 focus:ring-blue-400"
required required
/> />
<button <button
type="submit" type="submit"
className="bg-blue-600 text-white px-6 py-2 rounded shadow hover:bg-blue-700 transition" className="bg-blue-600 text-white px-6 py-2 flex items-center gap-2 rounded-lg shadow-md hover:bg-blue-700 transition"
> >
검사하기 <FaSearch /> 검사
</button> </button>
</form> </form>
{loading && (
<div className="flex items-center gap-2">
<div className="w-6 h-6 border-4 border-blue-400 border-t-transparent rounded-full animate-spin"></div>
<p className="text-blue-600 font-semibold">분석 ...</p>
</div> </div>
) : ( )}
<div className="grid grid-cols-2 gap-6">
{/* 좌측 입력창 */}
<div className="flex flex-col gap-4">
<form onSubmit={handleSubmit} className="flex gap-2">
<input
type="text"
value={url}
onChange={(e) => setUrl(e.target.value)}
className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
placeholder="URL을 다시 입력해보세요"
required
/>
<button
type="submit"
className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 transition"
>
다시 검사
</button>
</form>
{loading && <p>🔍 분석 ...</p>}
{error && <p className="text-red-500"> {error}</p>} {error && <p className="text-red-500"> {error}</p>}
</div> </div>
{/* 우측 결과 반복 렌더링 */} {/* 오른쪽 결과창 */}
<div className="flex flex-col gap-4"> <div className="flex flex-col gap-6">
{models.map((model) => { {results ? (
models.map((model) => {
const data = results[model.key]; const data = results[model.key];
if (!data) return null;
return ( return (
<div key={model.key} className="bg-white rounded p-4 shadow"> <motion.div
<h2 className="text-lg font-bold mb-2">{model.title}</h2> key={model.key}
<p> initial={{ opacity: 0, y: 30 }}
악성 확률: <strong>{(data.prob * 100).toFixed(2)}%</strong> animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.6 }}
className="bg-white rounded-2xl p-6 shadow-lg border border-gray-200"
>
<h2 className="text-xl font-bold mb-4 text-gray-800">{model.title}</h2>
<p className="mb-2 text-gray-700">
악성 확률:{" "}
<strong>
{(data.malicious_probability * 100).toFixed(2)}%
</strong>
</p> </p>
<p> <p>
판별 결과:{" "} 판별 결과:{" "}
<strong className={data.malicious ? "text-red-600" : "text-green-600"}> <strong
{data.malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"} className={
data.is_malicious
? "text-red-600"
: "text-green-600"
}
>
{data.is_malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
</strong> </strong>
</p> </p>
</div> </motion.div>
); );
})} })
</div> ) : (
<div className="text-gray-500 flex items-center justify-center h-full">
결과가 여기에 표시됩니다.
</div> </div>
)} )}
</div> </div>
</div>
</div>
); );
}; };

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"target": "es5",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "preserve"
},
"include": [
"src"
]
}