Upload files to "/"

Delete Nam/scaler.pkl
Delete Nam/scaler 1.pkl
2025-05-22 06:56:32 +00:00 · 2025-05-21 11:53:56 +00:00 · 2025-05-21 11:53:50 +00:00 · 2025-05-21 11:53:38 +00:00 · 2025-05-21 11:53:09 +00:00 · 2025-05-21 11:52:11 +00:00
36 changed files with 13597 additions and 24555 deletions
--- a/1
+++ b/1
--- a/Nam/Feature.py
+++ b/Nam/Feature.py
@@ -0,0 +1,38 @@
 import re
 from collections import Counter
 from scipy.stats import entropy
 def calculate_url_entropy(url):
    counter = Counter(url)
    probabilities = [count / len(url) for count in counter.values()]
    return entropy(probabilities, base=2)
 def extract_url_features(url):
    suspicious_words = [
        'login', 'verify', 'update', 'confirm',
        'account', 'secure', 'ebayisapi', 'banking'
    ]
    return {
        'digit_count': len(re.findall(r'\d', url)),
        'dash_count': url.count('-'),
        'underscore_count': url.count('_'),
        'percent_count': url.count('%'),
        'equal_count': url.count('='),
        'question_count': url.count('?'),
        'at_count': url.count('@'),
        'count_of_exclamation': url.count('!'),
        'count_of_dot': url.count('.'),
        'count_of_double_slash': url.count('//'),
        'special_char_count': len(re.findall(r'[^a-zA-Z0-9]', url)),
        'is_ip_in_url': bool(re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', url)),
        'has_www': 'www' in url,
        'suspicious_word_count': sum(word in url.lower() for word in suspicious_words),
        'path_depth': url.count('/') - 2,
        'has_long_digit_sequence': bool(re.search(r'\d{4,}', url)),
        'has_multiple_dash': bool(re.search(r'-{2,}', url)),
        'has_https': url.startswith('https'),
        'ends_with_common_extension': url.endswith(('.html', '.php')),
        'url_length': len(url),  # ✅ 추가
        'url_entropy': calculate_url_entropy(url)  # ✅ 추가
    }
--- a/Nam/best_model
+++ b/Nam/best_model
--- a/Nam/model.running_code.py
+++ b/Nam/model.running_code.py
@@ -0,0 +1,54 @@
 import pandas as pd
 import pickle
 from tensorflow.keras.models import load_model
 from Feature import extract_url_features
 from collections import Counter
 from scipy.stats import entropy
 import tensorflow as tf
 # 🔹 URL 엔트로피 계산 함수
 def calculate_url_entropy(url):
    counter = Counter(url)
    probabilities = [count / len(url) for count in counter.values()]
    return entropy(probabilities, base=2)
 # 🔹 스케일러 불러오기
 with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)
 # 🔹 모델 불러오기
 model = load_model("best_model.h5")
 # 🔹 예측 함수
@tf.function(reduce_retracing=True)
 def predict_with_model(model, input_data):
    return model(input_data)
 # 🔹 입력 URL 받기
 url = input("URL입력 : ")
 # 🔹 Feature.py에서 피처 추출
 features = extract_url_features(url)
 # 🔹 누락된 피처 보완
 features['url_length'] = len(url)
 features['url_entropy'] = calculate_url_entropy(url)
 # 🔹 데이터프레임 생성 및 정렬
 input_df = pd.DataFrame([features])
 expected_columns = list(scaler.feature_names_in_)
 input_df = input_df[expected_columns]
 # 🔹 스케일링
 input_scaled = scaler.transform(input_df)
 # 🔹 예측
 prediction = predict_with_model(model, input_scaled)
 score = float(prediction.numpy()[0][0])  # 🔥 정확히 float으로 변환
 # 🔹 출력
 threshold = 0.5
 if score > threshold:
    print(f"악성 (악성일 확률: {score:.4f})")
 else:
    print(f"정상 (정상일 확률: {1 - score:.4f})")
--- a/Nam/model.scaler.pkl
+++ b/Nam/model.scaler.pkl
--- a/data_preprocessing_undersampling_val.ipynb
+++ b/data_preprocessing_undersampling_val.ipynb
--- a/backend/app/pycache/PreP.cpython-310.pyc
+++ b/backend/app/pycache/PreP.cpython-310.pyc
--- a/backend/app/pycache/PreP.cpython-312.pyc
+++ b/backend/app/pycache/PreP.cpython-312.pyc
--- a/backend/app/pycache/init.cpython-310.pyc
+++ b/backend/app/pycache/init.cpython-310.pyc
--- a/backend/app/pycache/init.cpython-312.pyc
+++ b/backend/app/pycache/init.cpython-312.pyc
--- a/backend/app/pycache/exe.cpython-310.pyc
+++ b/backend/app/pycache/exe.cpython-310.pyc
--- a/backend/app/pycache/exe.cpython-312.pyc
+++ b/backend/app/pycache/exe.cpython-312.pyc
--- a/backend/app/pycache/junPreP.cpython-310.pyc
+++ b/backend/app/pycache/junPreP.cpython-310.pyc
--- a/backend/app/pycache/junPreP.cpython-312.pyc
+++ b/backend/app/pycache/junPreP.cpython-312.pyc
--- a/backend/app/pycache/main.cpython-310.pyc
+++ b/backend/app/pycache/main.cpython-310.pyc
--- a/backend/app/pycache/main.cpython-312.pyc
+++ b/backend/app/pycache/main.cpython-312.pyc
--- a/backend/app/pycache/model_load.cpython-310.pyc
+++ b/backend/app/pycache/model_load.cpython-310.pyc
--- a/backend/app/pycache/model_load.cpython-312.pyc
+++ b/backend/app/pycache/model_load.cpython-312.pyc
--- a/backend/app/pycache/predictor.cpython-310.pyc
+++ b/backend/app/pycache/predictor.cpython-310.pyc
--- a/backend/app/pycache/utils.cpython-312.pyc
+++ b/backend/app/pycache/utils.cpython-312.pyc
--- a/backend/app/exe.py
+++ b/backend/app/exe.py
@@ -1,52 +1,53 @@
-from app.junPreP import extract_features
+from app.junPreP import extract_features
-import numpy as np
+import numpy as np
-import pickle
+import pickle
-import pandas as pd
+import pandas as pd
-from sklearn.preprocessing import MinMaxScaler
+from sklearn.preprocessing import MinMaxScaler
-from tensorflow.keras.models import load_model
+from tensorflow.keras.models import load_model
-import tensorflow as tf
+import tensorflow as tf
-import os
+import os
-
+
-# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
+# 모델 및 스케일러 경로 (FastAPI 기준으로 맞춰서 절대 경로 또는 경로 설정)
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MODEL_PATH = os.path.join(BASE_DIR, "models", "Recall_0.77.keras")
+MODEL_PATH = os.path.join(BASE_DIR, "models", "White_list_model.keras")
-SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
+SCALER_PATH = os.path.join(BASE_DIR, "models", "scaler.pkl")
-
+
-# 모델 및 스케일러 로드 (1회만 수행)
+# 모델 및 스케일러 로드 (1회만 수행)
-model = load_model(MODEL_PATH)
+model = load_model(MODEL_PATH)
-with open(SCALER_PATH, 'rb') as f:
+with open(SCALER_PATH, 'rb') as f:
-    scaler = pickle.load(f)
+    scaler = pickle.load(f)
-
+
-# @tf.function으로 추론 최적화
+# @tf.function으로 추론 최적화
-@tf.function(reduce_retracing=True)
+@tf.function(reduce_retracing=True)
-def predict_with_model(model, input_data):
+def predict_with_model(model, input_data):
-    return model(input_data)
+    return model(input_data)
-
+
-# Threshold (적절히 조정 가능)
+# Threshold (적절히 조정 가능)
-BEST_THRESHOLD = 0.4034
+BEST_THRESHOLD = 0.4034
-
+
-# 📦 예측 함수 정의 (FastAPI에서 import해서 사용)
+# 📦 예측 함수 정의 (FastAPI에서 import해서 사용)
-def predict_url_maliciousness(url: str) -> dict:
+def predict_url_maliciousness(url: str) -> dict:
-    # 특성 추출
+    # 특성 추출
-    features = extract_features(url)
+    features = extract_features(url)
-    input_df = pd.DataFrame([list(features.values())], columns=features.keys())
+    input_df = pd.DataFrame([list(features.values())], columns=features.keys())
-
+
-    # 스케일링
+    # 스케일링
-    input_scaled = scaler.transform(input_df)
+    input_scaled = scaler.transform(input_df)
-
+
-    # 예측
+    # 예측
-    prediction = predict_with_model(model, input_scaled)
+    prediction = predict_with_model(model, input_scaled)
-    malicious_prob = float(prediction[0][0])
+    malicious_prob = float(prediction[0][0].numpy())
-
+
-    # 임계값 기반 판단
+
-    is_malicious = bool(malicious_prob > BEST_THRESHOLD)
+    # 임계값 기반 판단
-
+    is_malicious = bool(malicious_prob > BEST_THRESHOLD)
-    # Ensure all values are Python native types (not numpy types)
+
-    return {
+# 예: malicious_probability가 np.float32 타입일 경우
-        "url": str(url),
+    return {
-        "malicious_probability": float(malicious_prob),
+        "url": str(url),
-        "is_malicious": bool(is_malicious),
+        "malicious_probability": malicious_prob,   
-        "threshold": float(BEST_THRESHOLD)
+        "is_malicious": is_malicious,               
-    }
+        "threshold": float(BEST_THRESHOLD)          
-
+    }
-
+
--- a/backend/app/junPreP.py
+++ b/backend/app/junPreP.py
@@ -1,204 +1,274 @@
-import re
+import re
-from urllib.parse import urlparse, parse_qs
+from urllib.parse import urlparse, parse_qs
-import tldextract
+import tldextract
-import zlib
+import zlib
-import re
+from collections import Counter
-from urllib.parse import urlparse
+import math
-from collections import Counter
+
-import math
+def url_is_whitelisted(url):
-
+    trusted_domains = [
-
+    # 1. 포털 / 검색엔진
-
+    'naver.com', 'daum.net', 'google.com', 'bing.com', 'yahoo.com',
-def check_similar_brand(url):
+
-    # 자주 사용되는 브랜드/도메인 목록
+    # 2. 소셜 미디어 / 커뮤니케이션
-    common_brands = {
+    'facebook.com', 'instagram.com', 'twitter.com', 'x.com', 'linkedin.com',
-        'google', 'facebook', 'amazon', 'microsoft', 'apple', 
+    'whatsapp.com', 'kakao.com', 'kakaocorp.com',
-        'netflix', 'paypal', 'twitter', 'instagram', 'linkedin',
+
-        'youtube', 'yahoo', 'gmail', 'whatsapp', 'tiktok',
+    # 3. 동영상 / 스트리밍
-        'geocities', 'angelfire', 'newadvent', 'wikipedia',
+    'youtube.com', 'netflix.com', 'twitch.tv', 'tving.com', 'watcha.com',
-    }
+
-    
+    # 4. 쇼핑 / 이커머스
-    # 2. 유사 브랜드 확인
+    'amazon.com', 'gmarket.co.kr', '11st.co.kr', 'coupang.com', 'ssg.com', 'wemakeprice.com',
-    try:
+
-        # URL 파싱
+    # 5. 금융 / 결제
-        parsed = urlparse(url if '//' in url else '//' + url)
+    'paypal.com', 'kbfg.com', 'shinhan.com', 'hanafn.com', 'wooribank.com', 
-        domain = parsed.netloc.lower() if parsed.netloc else url.lower()
+    'kakaobank.com', 'toss.im',
-        
+
-        for brand in common_brands:
+    # 6. 공공기관 / 교육
-            if brand not in domain:
+    'gov.kr', 'moe.go.kr', 'epeople.go.kr', 'pusan.ac.kr', 'ac.kr', 
-                similar = False
+
-                # 비슷한 철자 패턴 확인
+    # 7. IT / 기술
-                patterns = [
+    'apple.com', 'microsoft.com', 'adobe.com', 'github.com', 'stackoverflow.com'
-                    brand.replace('o', '0'),
+]
-                    brand.replace('i', '1'),
+    
-                    brand.replace('l', '1'),
+    try:
-                    brand.replace('e', '3'),
+        domain = urlparse(url if '//' in url else '//' + url).netloc.lower()
-                    brand.replace('a', '4'),
+        for trusted in trusted_domains:
-                    brand.replace('s', '5'),
+            if domain.endswith(trusted):
-                    brand + '-',
+                return True
-                    brand + '_',
+        return False
-                    brand[:-1],  # 마지막 문자 제거
+    except:
-                    ''.join(c + c for c in brand),  # 문자 중복
+        return False
-                ]
+
-                
+
-                for pattern in patterns:
+
-                    if pattern in domain:
+def check_similar_brand(url):
-                        similar = True
+    # 자주 사용되는 브랜드/도메인 목록
-                        break
+    common_brands = {
-                
+        'google', 'facebook', 'amazon', 'microsoft', 'apple', 
-                if similar:
+        'netflix', 'paypal', 'twitter', 'instagram', 'linkedin',
-                    return True  # 유사 브랜드가 발견되면 True 반환
+        'youtube', 'yahoo', 'gmail', 'whatsapp', 'tiktok',
-        
+        'geocities', 'angelfire', 'newadvent', 'wikipedia',
-    except Exception as e:
+    }
-        return False  # 예외 발생 시 False 반환
+    
-    
+    # 2. 유사 브랜드 확인
-    return False  # 유사 브랜드가 없으면 False 반환
+    try:
-
+        # URL 파싱
-
+        parsed = urlparse(url if '//' in url else '//' + url)
-
+        domain = parsed.netloc.lower() if parsed.netloc else url.lower()
-# url 압축 비율 계산 함수
+        
-def compression_ratio(url: str) -> float:
+        for brand in common_brands:
-    if not url:
+            if brand not in domain:
-        return 0.0
+                similar = False
-    original_length = len(url.encode('utf-8'))
+                # 비슷한 철자 패턴 확인
-    compressed_data = zlib.compress(url.encode('utf-8'))
+                patterns = [
-    compressed_length = len(compressed_data)
+                    brand.replace('o', '0'),
-    return compressed_length / original_length
+                    brand.replace('i', '1'),
-
+                    brand.replace('l', '1'),
-
+                    brand.replace('e', '3'),
-def extract_features(url):
+                    brand.replace('a', '4'),
-    parsed_url = urlparse(url)
+                    brand.replace('s', '5'),
-    suspicious_keywords = [
+                    brand + '-',
-        'login', 'verify', 'account', 'update', 'secure', 'banking', 
+                    brand + '_',
-        'paypal', 'confirm', 'signin', 'auth', 'redirect', 'free', 
+                    brand[:-1],  # 마지막 문자 제거
-        'bonus', 'admin', 'support', 'server', 'password', 'click', 
+                    ''.join(c + c for c in brand),  # 문자 중복
-        'urgent', 'immediate', 'alert', 'security', 'prompt'
+                ]
-    ]
+                
-    
+                for pattern in patterns:
-    additional_keywords = [
+                    if pattern in domain:
-        'verify', 'wallet', 'cryptocurrency', 'bitcoin', 'ethereum',
+                        similar = True
-        'validation', 'authenticate', 'reset', 'recover', 'access',
+                        break
-        'limited', 'offer', 'prize', 'win', 'winner', 'payment',
+                
-        'bank', 'credit', 'debit', 'card', 'expire', 'suspension',
+                if similar:
-        'unusual', 'activity', 'verify', 'document', 'invoice'
+                    return True  # 유사 브랜드가 발견되면 True 반환
-    ]
+        
-    
+    except Exception as e:
-    all_keywords = list(set(suspicious_keywords + additional_keywords))
+        return False  # 예외 발생 시 False 반환
-
+    
-    contains_keyword = 0
+    return False  # 유사 브랜드가 없으면 False 반환
-    keyword_count = 0
+
-    for keyword in all_keywords:
+
-        if re.search(r'\b' + keyword + r'\b', url, re.IGNORECASE):
+
-            contains_keyword = 1
+# url 압축 비율 계산 함수
-            keyword_count += 1
+def compression_ratio(url: str) -> float:
-    
+    if not url:
-    url_length = len(url)
+        return 0.0
-    extracted = tldextract.extract(url)
+    original_length = len(url.encode('utf-8'))
-    tld = extracted.suffix
+    compressed_data = zlib.compress(url.encode('utf-8'))
-    domain = extracted.domain
+    compressed_length = len(compressed_data)
-    subdomain = extracted.subdomain
+    return compressed_length / original_length
-
+
-    tld_length = len(tld) if tld else 0
+
-    common_tlds = ['com', 'org', 'net', 'edu', 'gov', 'mil', 'io', 'co', 'info', 'biz']
+def extract_features(url):
-    is_common_tld = 1 if tld in common_tlds else 0
+    parsed_url = urlparse(url)
-    country_tlds = ['us', 'uk', 'ca', 'au', 'de', 'fr', 'jp', 'cn', 'ru', 'br', 'in', 'it', 'es']
+    suspicious_keywords = [
-    is_country_tld = 1 if tld in country_tlds else 0
+        'login', 'verify', 'account', 'update', 'secure', 'banking', 
-    suspicious_tlds = ['xyz', 'top', 'club', 'online', 'site', 'icu', 'vip', 'work', 'rest', 'fit']
+        'paypal', 'confirm', 'signin', 'auth', 'redirect', 'free', 
-    is_suspicious_tld = 1 if tld in suspicious_tlds else 0
+        'bonus', 'admin', 'support', 'server', 'password', 'click', 
-    url_shorteners = ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'is.gd', 'buff.ly', 'adf.ly', 'tiny.cc']
+        'urgent', 'immediate', 'alert', 'security', 'prompt'
-    full_domain = f"{domain}.{tld}" if tld else domain
+    ]
-    is_shortened = 1 if full_domain in url_shorteners else 0
+    
-
+    additional_keywords = [
-
+        'verify', 'wallet', 'cryptocurrency', 'bitcoin', 'ethereum',
-    domain_length = len(domain) if domain else 0
+        'validation', 'authenticate', 'reset', 'recover', 'access',
-    has_subdomain = 1 if subdomain else 0
+        'limited', 'offer', 'prize', 'win', 'winner', 'payment',
-    subdomain_length = len(subdomain) if subdomain else 0
+        'bank', 'credit', 'debit', 'card', 'expire', 'suspension',
-    subdomain_count = len(subdomain.split('.')) if subdomain else 0 
+        'unusual', 'activity', 'verify', 'document', 'invoice'
-
+    ]
-    path = parsed_url.path
+    
-    path_length = len(path)
+    all_keywords = list(set(suspicious_keywords + additional_keywords))
-    path_depth = path.count('/') if path else 0
+
-
+    contains_keyword = 0
-    query = parsed_url.query
+    keyword_count = 0
-    has_query = 1 if query else 0
+    for keyword in all_keywords:
-    query_length = len(query) if query else 0
+        if re.search(r'\b' + keyword + r'\b', url, re.IGNORECASE):
-    query_params = parse_qs(query)
+            contains_keyword = 1
-    query_param_count = len(query_params) if query_params else 0
+            keyword_count += 1
-
+    
-    has_fragment = 1 if parsed_url.fragment else 0
+    url_length = len(url)
-    fragment_length = len(parsed_url.fragment) if parsed_url.fragment else 0
+    extracted = tldextract.extract(url)
-    
+    tld = extracted.suffix
-    # Character type ratios
+    domain = extracted.domain
-    letter_count = sum(c.isalpha() for c in url)
+    subdomain = extracted.subdomain
-    digit_count = sum(c.isdigit() for c in url)
+
-    special_char_count = len(re.findall(r'[^a-zA-Z0-9]', url))
+    tld_length = len(tld) if tld else 0
-    
+    common_tlds = ['com', 'org', 'net', 'edu', 'gov', 'mil', 'io', 'co', 'info', 'biz']
-    letter_ratio = letter_count / url_length if url_length > 0 else 0
+    is_common_tld = 1 if tld in common_tlds else 0
-    digit_ratio = digit_count / url_length if url_length > 0 else 0
+    country_tlds = ['us', 'uk', 'ca', 'au', 'de', 'fr', 'jp', 'cn', 'ru', 'br', 'in', 'it', 'es']
-    special_char_ratio = special_char_count / url_length if url_length > 0 else 0
+    is_country_tld = 1 if tld in country_tlds else 0
-    
+    suspicious_tlds = ['xyz', 'top', 'club', 'online', 'site', 'icu', 'vip', 'work', 'rest', 'fit']
-    # Character distribution and entropy
+    is_suspicious_tld = 1 if tld in suspicious_tlds else 0
-    if url:
+    url_shorteners = ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'is.gd', 'buff.ly', 'adf.ly', 'tiny.cc']
-        char_counts = Counter(url)
+    full_domain = f"{domain}.{tld}" if tld else domain
-        total_chars = len(url)
+    is_shortened = 1 if full_domain in url_shorteners else 0
-        char_frequencies = {char: count/total_chars for char, count in char_counts.items()}
+
-        entropy = -sum(freq * math.log2(freq) for freq in char_frequencies.values())
+
-    else:
+    domain_length = len(domain) if domain else 0
-        entropy = 0
+    has_subdomain = 1 if subdomain else 0
-
+    subdomain_length = len(subdomain) if subdomain else 0
-
+    subdomain_count = len(subdomain.split('.')) if subdomain else 0 
-
+
-
+    path = parsed_url.path
-
+    path_length = len(path)
-    if url_length <= 13:
+    path_depth = path.count('/') if path else 0
-        url_length_cat = 0  
+
-    elif url_length <= 18:
+    query = parsed_url.query
-        url_length_cat = 1 
+    has_query = 1 if query else 0
-    elif url_length <= 25:
+    query_length = len(query) if query else 0
-        url_length_cat = 2 
+    query_params = parse_qs(query)
-    else:
+    query_param_count = len(query_params) if query_params else 0
-        url_length_cat = 3 
+
-
+    has_fragment = 1 if parsed_url.fragment else 0
-    return {
+    fragment_length = len(parsed_url.fragment) if parsed_url.fragment else 0
-        # "url_length": url_length,
+    
-        "url_length_cat": url_length_cat,
+    # Character type ratios
-        "num_dots": url.count("."),
+    letter_count = sum(c.isalpha() for c in url)
-        "num_digits": sum(c.isdigit() for c in url),
+    digit_count = sum(c.isdigit() for c in url)
-        "num_special_chars": len(re.findall(r"[^a-zA-Z0-9]", url)),
+    special_char_count = len(re.findall(r'[^a-zA-Z0-9]', url))
-        "url_keyword": contains_keyword,
+    
-        # "url_keyword_count": keyword_count,
+    letter_ratio = letter_count / url_length if url_length > 0 else 0
-        "num_underbar": url.count("_"),
+    digit_ratio = digit_count / url_length if url_length > 0 else 0
-        "extract_consecutive_numbers": int(bool(re.findall(r'(\d)\1+', url))),
+    special_char_ratio = special_char_count / url_length if url_length > 0 else 0
-        "number": int(bool(len(re.findall(r'(\d)(?!\1)(\d)(?!\2)(\d)', url)))),
+    
-        "upper": int(any(c.isupper() for c in url)),
+    # Character distribution and entropy
-
+    if url:
-        "is_common_tld": is_common_tld,
+        char_counts = Counter(url)
-        "is country_tld": is_country_tld,
+        total_chars = len(url)
-        "is_suspicious_tld": is_suspicious_tld,
+        char_frequencies = {char: count/total_chars for char, count in char_counts.items()}
-
+        entropy = -sum(freq * math.log2(freq) for freq in char_frequencies.values())
-        "domain_length": domain_length,
+    else:
-        "has_subdomain": has_subdomain,
+        entropy = 0
-        "subdomain_length": subdomain_length,
+
-        "subdomain_count": subdomain_count,
+
-
+
-        # "path_length": path_length,
+
-        "path_depth": path_depth,
+
-        "has_query": has_query,
+    if url_length <= 13:
-        "query_length": query_length,
+        url_length_cat = 0  
-        "query_param_count": query_param_count,
+    elif url_length <= 18:
-        # "has_fragment": has_fragment,
+        url_length_cat = 1 
-        # "fragment_length": fragment_length,
+    elif url_length <= 25:
-        "url_shorteners": is_shortened,
+        url_length_cat = 2 
-
+    else:
-        # 새로 추가된 특성
+        url_length_cat = 3
-        "compression_ratio": compression_ratio(url),
+        
-        "check_similar_brand" : check_similar_brand(url),
+    if url_is_whitelisted(url):
- 
+        return {
-        # Advanced text analysis
+            # 화이트리스트 URL이면 특징값들을 "정상적"으로 처리되도록 설정
-        "entropy": entropy,
+            "url_length_cat": 1,
-        #"letter_ratio": letter_ratio,
+            "num_dots": 1,
-        "digit_ratio": digit_ratio,
+            "num_digits": 0,
-        "special_char_ratio": special_char_ratio
+            "num_special_chars": 1,
-
+            "url_keyword": 0,
-        
+            "num_underbar": 0,
-    }
+            "extract_consecutive_numbers": 0,
            "number": 0,
            "upper": 0,
            "is_common_tld": 1,
            "is country_tld": 0,
            "is_suspicious_tld": 0,
            "domain_length": 5,
            "has_subdomain": 0,
            "subdomain_length": 0,
            "subdomain_count": 0,
            "path_depth": 0,
            "has_query": 0,
            "query_length": 0,
            "query_param_count": 0,
            "url_shorteners": 0,
            "compression_ratio": 1.0,
            "check_similar_brand": 0,
            "entropy": 3.0,
            "digit_ratio": 0.0,
            "special_char_ratio": 0.1
        }
    return {
        # "url_length": url_length,
        "url_length_cat": url_length_cat,
        "num_dots": url.count("."),
        "num_digits": sum(c.isdigit() for c in url),
        "num_special_chars": len(re.findall(r"[^a-zA-Z0-9]", url)),
        "url_keyword": contains_keyword,
        # "url_keyword_count": keyword_count,
        "num_underbar": url.count("_"),
        "extract_consecutive_numbers": int(bool(re.findall(r'(\d)\1+', url))),
        "number": int(bool(len(re.findall(r'(\d)(?!\1)(\d)(?!\2)(\d)', url)))),
        "upper": int(any(c.isupper() for c in url)),
        "is_common_tld": is_common_tld,
        "is country_tld": is_country_tld,
        "is_suspicious_tld": is_suspicious_tld,
        "domain_length": domain_length,
        "has_subdomain": has_subdomain,
        "subdomain_length": subdomain_length,
        "subdomain_count": subdomain_count,
        # "path_length": path_length,
        "path_depth": path_depth,
        "has_query": has_query,
        "query_length": query_length,
        "query_param_count": query_param_count,
        # "has_fragment": has_fragment,
        # "fragment_length": fragment_length,
        "url_shorteners": is_shortened,
        # 새로 추가된 특성
        "compression_ratio": compression_ratio(url),
        "check_similar_brand" : check_similar_brand(url),
        # Advanced text analysis
        "entropy": entropy,
        #"letter_ratio": letter_ratio,
        "digit_ratio": digit_ratio,
        "special_char_ratio": special_char_ratio
    }
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -2,7 +2,6 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from app.model_load import use_model  # predictor.py에서 함수 import
 from app.exe import predict_url_maliciousness
 from app.utils import convert_numpy_to_python_types
 from fastapi.middleware.cors import CORSMiddleware
 app = FastAPI()
@@ -28,13 +27,15 @@ def root():
 def predict(request: UrlRequest):
     url = request.url
-     result_model1 = convert_numpy_to_python_types(use_model(url))
+     result_model1 = use_model(url)
-     result_model2 = convert_numpy_to_python_types(predict_url_maliciousness(url))
+     result_model2 = predict_url_maliciousness(url)
-     
+    #  print("model1 : ")
-     response_data = {
+    #  print(result_model1.values())
-         "url": url,
+    #  print("model2 : ")
-         "model1": result_model1,
+    #  print(result_model2.values())
-         "model2": result_model2
+
-     }
+     return {
-     
+         "url" : url,
-     return convert_numpy_to_python_types(response_data)
+        "model1": result_model1,
        "model2": result_model2
    }
--- a/backend/app/model_load.py
+++ b/backend/app/model_load.py
@@ -29,6 +29,12 @@ def use_model(url : str):
    input_data = featured_df[features_cols]
    # 학습된 모델에 적용
-    model_pred = round(float(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load])), 4)
+    model_pred = round(np.mean([model.predict_proba(input_data)[:, 1] for model in models_load]), 4)
-    return model_pred
+    #return model_pred
    return {
        "url" : url,
        "malicious_probability" : float(model_pred),
        "is_malicious" : bool(model_pred > best_threshold),
        "threshold" : float(best_threshold)
    }
--- a/backend/app/predictor.py
+++ b/backend/app/predictor.py
@@ -44,7 +44,7 @@ def predict_url(url: str) -> dict:
        input_data = preprocessed[features_cols]
        # ✅ 전처리된 데이터 확인
-        print("Preprocessed input:", input_data)
+        #print("Preprocessed input:", input_data)
        # 평균 확률 계산
        probs = [float(model.predict_proba(input_data)[0, 1]) for model in models_load]
@@ -61,8 +61,8 @@ def predict_url(url: str) -> dict:
        # 예: malicious_probability가 np.float32 타입일 경우
        return {
            "url": url,
-            "malicious_probability": mean_pred,  # ⬅️ numpy -> float
+            "malicious_probability": mean_pred,  
-            "is_malicious": bool(is_malicious),         # ⬅️ numpy -> bool
+            "is_malicious": is_malicious,         
            "threshold": float(BEST_THRESHOLD)          # ⬅️ numpy -> float
        }
--- a/backend/app/testexe.py
+++ b/backend/app/testexe.py
@@ -0,0 +1,4 @@
 from exe import predict_url_maliciousness
 result_model2 = predict_url_maliciousness("www.naver.com")
 print(result_model2)
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -1,18 +0,0 @@
 import numpy as np
 def convert_numpy_to_python_types(obj):
    """
    Recursively convert numpy types to native Python types.
    """
    if isinstance(obj, np.ndarray):
        return convert_numpy_to_python_types(obj.tolist())
    elif isinstance(obj, np.number):
        return float(obj) if isinstance(obj, np.floating) else int(obj)
    elif isinstance(obj, np.bool_):
        return bool(obj)
    elif isinstance(obj, dict):
        return {k: convert_numpy_to_python_types(v) for k, v in obj.items()}
    elif isinstance(obj, list) or isinstance(obj, tuple):
        return [convert_numpy_to_python_types(item) for item in obj]
    else:
        return obj
--- a/best_model.h5
+++ b/best_model.h5
--- a/jun/code.ipynb
+++ b/jun/code.ipynb
@@ -2392,703 +2392,56 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
-     "data": {
+     "ename": "NameError",
-      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+     "evalue": "name 'processed_train' is not defined",
-       "columns": [
+     "output_type": "error",
-        {
+     "traceback": [
-         "name": "index",
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-         "rawType": "object",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
-         "type": "string"
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mprocessed_train\u001b[49m.describe()\n",
-        },
+      "\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
-        {
+     ]
         "name": "label",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "url_length_cat",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "num_dots",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "num_digits",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "num_special_chars",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "url_keyword",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "num_underbar",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "extract_consecutive_numbers",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "number",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "upper",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "is_common_tld",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "is_country_tld",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "is_suspicious_tld",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "domain_length",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "has_subdomain",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "subdomain_length",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "subdomain_count",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "path_depth",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "has_query",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "query_length",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "query_param_count",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "url_shorteners",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "compression_ratio",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "entropy",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "digit_ratio",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "special_char_ratio",
         "rawType": "float64",
         "type": "float"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "c79a077e-8e52-4e42-b88f-dc9698b0fa30",
       "rows": [
        [
         "count",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0",
         "6995056.0"
        ],
        [
         "mean",
         "0.22371472079708868",
         "1.4435534183000107",
         "1.546944584861079",
         "1.6343590387267808",
         "2.6635716711917676",
         "0.0370789025849114",
         "0.045005501028154746",
         "0.056463736673444787",
         "0.08128040719044995",
         "0.0357764112252997",
         "0.6133649251700057",
         "0.12739140329970197",
         "0.022784949827420967",
         "10.464007150192936",
         "0.21130266862767075",
         "2.43731000866898",
         "0.2660177416735477",
         "0.6056849294701858",
         "0.027221368921135157",
         "1.9155892390282507",
         "0.04228915393958247",
         "0.0018421582329004942",
         "1.4552534994784176",
         "3.5360434022769756",
         "0.029042428345387533",
         "0.1102289088601276"
        ],
        [
         "std",
         "0.41673309122602675",
         "1.1161203432813147",
         "1.010078604927829",
         "9.827940363271033",
         "7.1618457272654",
         "0.18895518694176003",
         "0.6023702991784359",
         "0.23081505741717664",
         "0.273265280035072",
         "0.18573223887275842",
         "0.4869788780260291",
         "0.33341093196934307",
         "0.14921728811320575",
         "5.0652546813544035",
         "0.4082326232468674",
         "6.90096602515224",
         "0.6272395647222854",
         "1.6003209664806863",
         "0.1627279010519657",
         "19.702068343354906",
         "0.35208851309719974",
         "0.04288082262284407",
         "0.24856536988340924",
         "0.47898938276414027",
         "0.08255957016074264",
         "0.046338026902092454"
        ],
        [
         "min",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.010181818181818183",
         "-0.0",
         "0.0",
         "0.0"
        ],
        [
         "25%",
         "0.0",
         "0.0",
         "1.0",
         "0.0",
         "1.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "7.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "1.3076923076923077",
         "3.238901256602631",
         "0.0",
         "0.07142857142857142"
        ],
        [
         "50%",
         "0.0",
         "1.0",
         "1.0",
         "0.0",
         "2.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "1.0",
         "0.0",
         "0.0",
         "10.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "1.4444444444444444",
         "3.5068905956085183",
         "0.0",
         "0.10344827586206896"
        ],
        [
         "75%",
         "0.0",
         "2.0",
         "2.0",
         "0.0",
         "3.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "1.0",
         "0.0",
         "0.0",
         "13.0",
         "0.0",
         "0.0",
         "0.0",
         "1.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "1.6153846153846154",
         "3.7962176025900556",
         "0.0",
         "0.14285714285714285"
        ],
        [
         "max",
         "1.0",
         "3.0",
         "171.0",
         "2011.0",
         "8198.0",
         "1.0",
         "136.0",
         "1.0",
         "1.0",
         "1.0",
         "1.0",
         "1.0",
         "1.0",
         "63.0",
         "1.0",
         "237.0",
         "38.0",
         "136.0",
         "1.0",
         "8367.0",
         "131.0",
         "1.0",
         "5.0",
         "6.570554108088201",
         "0.9545454545454546",
         "1.0"
        ]
       ],
       "shape": {
        "columns": 26,
        "rows": 8
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>url_length_cat</th>\n",
       "      <th>num_dots</th>\n",
       "      <th>num_digits</th>\n",
       "      <th>num_special_chars</th>\n",
       "      <th>url_keyword</th>\n",
       "      <th>num_underbar</th>\n",
       "      <th>extract_consecutive_numbers</th>\n",
       "      <th>number</th>\n",
       "      <th>upper</th>\n",
       "      <th>...</th>\n",
       "      <th>subdomain_count</th>\n",
       "      <th>path_depth</th>\n",
       "      <th>has_query</th>\n",
       "      <th>query_length</th>\n",
       "      <th>query_param_count</th>\n",
       "      <th>url_shorteners</th>\n",
       "      <th>compression_ratio</th>\n",
       "      <th>entropy</th>\n",
       "      <th>digit_ratio</th>\n",
       "      <th>special_char_ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>...</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "      <td>6.995056e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.237147e-01</td>\n",
       "      <td>1.443553e+00</td>\n",
       "      <td>1.546945e+00</td>\n",
       "      <td>1.634359e+00</td>\n",
       "      <td>2.663572e+00</td>\n",
       "      <td>3.707890e-02</td>\n",
       "      <td>4.500550e-02</td>\n",
       "      <td>5.646374e-02</td>\n",
       "      <td>8.128041e-02</td>\n",
       "      <td>3.577641e-02</td>\n",
       "      <td>...</td>\n",
       "      <td>2.660177e-01</td>\n",
       "      <td>6.056849e-01</td>\n",
       "      <td>2.722137e-02</td>\n",
       "      <td>1.915589e+00</td>\n",
       "      <td>4.228915e-02</td>\n",
       "      <td>1.842158e-03</td>\n",
       "      <td>1.455253e+00</td>\n",
       "      <td>3.536043e+00</td>\n",
       "      <td>2.904243e-02</td>\n",
       "      <td>1.102289e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>4.167331e-01</td>\n",
       "      <td>1.116120e+00</td>\n",
       "      <td>1.010079e+00</td>\n",
       "      <td>9.827940e+00</td>\n",
       "      <td>7.161846e+00</td>\n",
       "      <td>1.889552e-01</td>\n",
       "      <td>6.023703e-01</td>\n",
       "      <td>2.308151e-01</td>\n",
       "      <td>2.732653e-01</td>\n",
       "      <td>1.857322e-01</td>\n",
       "      <td>...</td>\n",
       "      <td>6.272396e-01</td>\n",
       "      <td>1.600321e+00</td>\n",
       "      <td>1.627279e-01</td>\n",
       "      <td>1.970207e+01</td>\n",
       "      <td>3.520885e-01</td>\n",
       "      <td>4.288082e-02</td>\n",
       "      <td>2.485654e-01</td>\n",
       "      <td>4.789894e-01</td>\n",
       "      <td>8.255957e-02</td>\n",
       "      <td>4.633803e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.018182e-02</td>\n",
       "      <td>-0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.307692e+00</td>\n",
       "      <td>3.238901e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>7.142857e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.444444e+00</td>\n",
       "      <td>3.506891e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.034483e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.615385e+00</td>\n",
       "      <td>3.796218e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.428571e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>1.710000e+02</td>\n",
       "      <td>2.011000e+03</td>\n",
       "      <td>8.198000e+03</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.360000e+02</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>3.800000e+01</td>\n",
       "      <td>1.360000e+02</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>8.367000e+03</td>\n",
       "      <td>1.310000e+02</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>6.570554e+00</td>\n",
       "      <td>9.545455e-01</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              label  url_length_cat      num_dots    num_digits  \\\n",
       "count  6.995056e+06    6.995056e+06  6.995056e+06  6.995056e+06   \n",
       "mean   2.237147e-01    1.443553e+00  1.546945e+00  1.634359e+00   \n",
       "std    4.167331e-01    1.116120e+00  1.010079e+00  9.827940e+00   \n",
       "min    0.000000e+00    0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%    0.000000e+00    0.000000e+00  1.000000e+00  0.000000e+00   \n",
       "50%    0.000000e+00    1.000000e+00  1.000000e+00  0.000000e+00   \n",
       "75%    0.000000e+00    2.000000e+00  2.000000e+00  0.000000e+00   \n",
       "max    1.000000e+00    3.000000e+00  1.710000e+02  2.011000e+03   \n",
       "\n",
       "       num_special_chars   url_keyword  num_underbar  \\\n",
       "count       6.995056e+06  6.995056e+06  6.995056e+06   \n",
       "mean        2.663572e+00  3.707890e-02  4.500550e-02   \n",
       "std         7.161846e+00  1.889552e-01  6.023703e-01   \n",
       "min         0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%         1.000000e+00  0.000000e+00  0.000000e+00   \n",
       "50%         2.000000e+00  0.000000e+00  0.000000e+00   \n",
       "75%         3.000000e+00  0.000000e+00  0.000000e+00   \n",
       "max         8.198000e+03  1.000000e+00  1.360000e+02   \n",
       "\n",
       "       extract_consecutive_numbers        number         upper  ...  \\\n",
       "count                 6.995056e+06  6.995056e+06  6.995056e+06  ...   \n",
       "mean                  5.646374e-02  8.128041e-02  3.577641e-02  ...   \n",
       "std                   2.308151e-01  2.732653e-01  1.857322e-01  ...   \n",
       "min                   0.000000e+00  0.000000e+00  0.000000e+00  ...   \n",
       "25%                   0.000000e+00  0.000000e+00  0.000000e+00  ...   \n",
       "50%                   0.000000e+00  0.000000e+00  0.000000e+00  ...   \n",
       "75%                   0.000000e+00  0.000000e+00  0.000000e+00  ...   \n",
       "max                   1.000000e+00  1.000000e+00  1.000000e+00  ...   \n",
       "\n",
       "       subdomain_count    path_depth     has_query  query_length  \\\n",
       "count     6.995056e+06  6.995056e+06  6.995056e+06  6.995056e+06   \n",
       "mean      2.660177e-01  6.056849e-01  2.722137e-02  1.915589e+00   \n",
       "std       6.272396e-01  1.600321e+00  1.627279e-01  1.970207e+01   \n",
       "min       0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%       0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "50%       0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "75%       0.000000e+00  1.000000e+00  0.000000e+00  0.000000e+00   \n",
       "max       3.800000e+01  1.360000e+02  1.000000e+00  8.367000e+03   \n",
       "\n",
       "       query_param_count  url_shorteners  compression_ratio       entropy  \\\n",
       "count       6.995056e+06    6.995056e+06       6.995056e+06  6.995056e+06   \n",
       "mean        4.228915e-02    1.842158e-03       1.455253e+00  3.536043e+00   \n",
       "std         3.520885e-01    4.288082e-02       2.485654e-01  4.789894e-01   \n",
       "min         0.000000e+00    0.000000e+00       1.018182e-02 -0.000000e+00   \n",
       "25%         0.000000e+00    0.000000e+00       1.307692e+00  3.238901e+00   \n",
       "50%         0.000000e+00    0.000000e+00       1.444444e+00  3.506891e+00   \n",
       "75%         0.000000e+00    0.000000e+00       1.615385e+00  3.796218e+00   \n",
       "max         1.310000e+02    1.000000e+00       5.000000e+00  6.570554e+00   \n",
       "\n",
       "        digit_ratio  special_char_ratio  \n",
       "count  6.995056e+06        6.995056e+06  \n",
       "mean   2.904243e-02        1.102289e-01  \n",
       "std    8.255957e-02        4.633803e-02  \n",
       "min    0.000000e+00        0.000000e+00  \n",
       "25%    0.000000e+00        7.142857e-02  \n",
       "50%    0.000000e+00        1.034483e-01  \n",
       "75%    0.000000e+00        1.428571e-01  \n",
       "max    9.545455e-01        1.000000e+00  \n",
       "\n",
       "[8 rows x 26 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processed_train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'processed_train' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m desc = \u001b[43mprocessed_train\u001b[49m.describe()\n\u001b[32m      6\u001b[39m plt.figure(figsize=(\u001b[32m12\u001b[39m, \u001b[32m6\u001b[39m))\n\u001b[32m      7\u001b[39m sns.barplot(data=desc.T[[\u001b[33m'\u001b[39m\u001b[33mmean\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mstd\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmin\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmax\u001b[39m\u001b[33m'\u001b[39m]])\n",
      "\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "desc = processed_train.describe()\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.barplot(data=desc.T[['mean', 'std', 'min', 'max']])\n",
    "plt.title('Feature Statistics')\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
@@ -3248,12 +2601,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
-    "from tensorflow.keras.layers import Dense, Dropout, BatchNormalization\n",
+    "from tensorflow.keras.layers import Dense\n",
    "\n",
    "def build_model(input_dim, learning_rate=0.001):\n",
    "    \"\"\"\n",
--- a/react-url-checker/package-lock.json
+++ b/react-url-checker/package-lock.json
--- a/react-url-checker/package.json
+++ b/react-url-checker/package.json
@@ -12,10 +12,11 @@
    "@types/react": "^19.1.0",
    "@types/react-dom": "^19.1.1",
    "axios": "^1.8.4",
    "framer-motion": "^12.9.2",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
    "react-icons": "^5.5.0",
-    "react-scripts": "^3.0.1",
+    "react-scripts": "^5.0.1",
    "web-vitals": "^2.1.4"
  },
  "scripts": {
@@ -43,9 +44,11 @@
    ]
  },
  "devDependencies": {
    "@babel/preset-react": "^7.26.3",
    "autoprefixer": "^10.4.21",
    "eslint": "^8.57.1",
    "postcss": "^8.5.3",
    "tailwindcss": "^3.3.5",
-    "typescript": "^5.3.3"
+    "typescript": "^4.1.2"
  }
 }
--- a/react-url-checker/public/index.html
+++ b/react-url-checker/public/index.html
@@ -3,7 +3,7 @@
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>URL 악성 판별기</title>
+    <title></title>
  </head>
  <body>
    <div id="root"></div>
--- a/react-url-checker/src/App.js
+++ b/react-url-checker/src/App.js
@@ -5,9 +5,8 @@ import './App.css';
 function App() {
  return (
-<div className="min-h-screen bg-gray-100 flex flex-col justify-center">
+<div className="min-h-screen bg-sky-200 flex flex-col justify-center">
  <div className="container mx-auto px-4 text-center">
    <h1 className="text-3xl font-bold text-blue-600 mb-6">🔍 악성 URL 판별기</h1>
    <UrlPredictor />
  </div>
 </div>
--- a/react-url-checker/src/components/UrlPredictor.jsx
+++ b/react-url-checker/src/components/UrlPredictor.jsx
@@ -1,5 +1,7 @@
 import React, { useState } from "react";
 import axios from "axios";
 import { motion } from "framer-motion"; // 애니메이션용
 import { FaSearch, FaRedo } from "react-icons/fa"; // 아이콘용
 const UrlPredictor = () => {
  const [url, setUrl] = useState("");
@@ -26,81 +28,90 @@ const UrlPredictor = () => {
    }
  };
  // 모델 정보 정의 (title + 키)
  const models = [
-    { key: "old_model", title: "🧠 기존 모델 (Ho)" },
+    { key: "model1", title: "HO 모델" },
-    { key: "new_model", title: "🚀 개선 모델 (Jun)" },
+    { key: "model2", title: "Jun 모델" },
  ];
  return (
-    <div className="min-h-screen bg-gray-100 p-6">
+    <div className="min-h-screen bg-blue-50 p-8">
-      {!results ? (
+      <div className="grid grid-cols-1 md:grid-cols-2 gap-8 h-full">
-        <div className="flex justify-center items-center h-full">
+
-          <form onSubmit={handleSubmit} className="flex gap-4 w-full max-w-2xl">
+        {/* 왼쪽 입력창 */}
        <div className="flex flex-col justify-center items-center gap-6">
          <h1 className="text-2x1 font-bold text-blue-700">URL 판별기</h1>
          <form onSubmit={handleSubmit} className="flex gap-2 w-full max-w-md">
            <input
              type="text"
              value={url}
              onChange={(e) => setUrl(e.target.value)}
              placeholder="URL을 입력하세요"
-              className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
+              className="flex-grow px-4 py-2 border border-gray-300 rounded-lg shadow-md focus:outline-none focus:ring-2 focus:ring-blue-400"
              required
            />
            <button
              type="submit"
-              className="bg-blue-600 text-white px-6 py-2 rounded shadow hover:bg-blue-700 transition"
+              className="bg-blue-600 text-white px-6 py-2 flex items-center gap-2 rounded-lg shadow-md hover:bg-blue-700 transition"
            >
-              ✅ 검사하기
+              <FaSearch /> 검사
            </button>
          </form>
          {loading && (
            <div className="flex items-center gap-2">
              <div className="w-6 h-6 border-4 border-blue-400 border-t-transparent rounded-full animate-spin"></div>
              <p className="text-blue-600 font-semibold">분석 중...</p>
            </div>
          )}
          {error && <p className="text-red-500">❌ {error}</p>}
        </div>
      ) : (
        <div className="grid grid-cols-2 gap-6">
          {/* 좌측 입력창 */}
          <div className="flex flex-col gap-4">
            <form onSubmit={handleSubmit} className="flex gap-2">
              <input
                type="text"
                value={url}
                onChange={(e) => setUrl(e.target.value)}
                className="flex-grow px-4 py-2 border border-gray-300 rounded shadow"
                placeholder="URL을 다시 입력해보세요"
                required
              />
              <button
                type="submit"
                className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 transition"
              >
                다시 검사
              </button>
            </form>
            {loading && <p>🔍 분석 중...</p>}
            {error && <p className="text-red-500">❌ {error}</p>}
          </div>
-          {/* 우측 결과 반복 렌더링 */}
+        {/* 오른쪽 결과창 */}
-          <div className="flex flex-col gap-4">
+        <div className="flex flex-col gap-6">
-            {models.map((model) => {
+          {results ? (
            models.map((model) => {
              const data = results[model.key];
              if (!data) return null;
              return (
-                <div key={model.key} className="bg-white rounded p-4 shadow">
+                <motion.div
-                  <h2 className="text-lg font-bold mb-2">{model.title}</h2>
+                  key={model.key}
-                  <p>
+                  initial={{ opacity: 0, y: 30 }}
-                    악성 확률: <strong>{(data.prob * 100).toFixed(2)}%</strong>
+                  animate={{ opacity: 1, y: 0 }}
                  transition={{ duration: 0.6 }}
                  className="bg-white rounded-2xl p-6 shadow-lg border border-gray-200"
                >
                  <h2 className="text-xl font-bold mb-4 text-gray-800">{model.title}</h2>
                  <p className="mb-2 text-gray-700">
                    악성 확률:{" "}
                    <strong>
                      {(data.malicious_probability * 100).toFixed(2)}%
                    </strong>
                  </p>
                  <p>
                    판별 결과:{" "}
-                    <strong className={data.malicious ? "text-red-600" : "text-green-600"}>
+                    <strong
-                      {data.malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
+                      className={
                        data.is_malicious
                          ? "text-red-600"
                          : "text-green-600"
                      }
                    >
                      {data.is_malicious ? "⚠️ 악성 URL" : "✅ 정상 URL"}
                    </strong>
                  </p>
-                </div>
+                </motion.div>
              );
-            })}
+            })
-          </div>
+          ) : (
            <div className="text-gray-500 flex items-center justify-center h-full">
              결과가 여기에 표시됩니다.
            </div>
          )}
        </div>
-      )}
+
      </div>
    </div>
  );
 };
-export default UrlPredictor;
+export default UrlPredictor;
--- a/react-url-checker/tsconfig
+++ b/react-url-checker/tsconfig
@@ -0,0 +1,26 @@
 {
  "compilerOptions": {
    "target": "es5",
    "lib": [
      "dom",
      "dom.iterable",
      "esnext"
    ],
    "allowJs": true,
    "skipLibCheck": true,
    "esModuleInterop": true,
    "allowSyntheticDefaultImports": true,
    "strict": true,
    "forceConsistentCasingInFileNames": true,
    "noFallthroughCasesInSwitch": true,
    "module": "esnext",
    "moduleResolution": "node",
    "resolveJsonModule": true,
    "isolatedModules": true,
    "noEmit": true,
    "jsx": "preserve"
  },
  "include": [
    "src"
  ]
 }
--- a/scaler.pkl
+++ b/scaler.pkl
Author	SHA1	Message	Date
hwangtaehyeon	8709a3cbc6	Upload files to "/"	2025-05-22 06:56:32 +00:00
hwangtaehyeon	ef6f0cb447	Delete Nam/scaler.pkl	2025-05-21 11:53:56 +00:00
hwangtaehyeon	113bf7a747	Delete Nam/scaler 1.pkl	2025-05-21 11:53:50 +00:00
hwangtaehyeon	52f20bfcea	Upload files to "Nam"	2025-05-21 11:53:38 +00:00
hwangtaehyeon	66f9061c4a	Upload files to "Nam"	2025-05-21 11:53:09 +00:00
hwangtaehyeon	8ccf9e8642	Upload files to "/"	2025-05-21 11:52:11 +00:00
hwangtaehyeon	58056ce8a4	Upload files to "Nam"	2025-05-21 11:03:24 +00:00
hwangtaehyeon	66f5c75d1e	Delete Nam/Final_code 1.py	2025-05-21 10:41:24 +00:00
hwangtaehyeon	786ae98996	Upload files to "Nam"	2025-05-21 10:41:11 +00:00
taehyeon hwang	41dbe60e9a	병남씨 다시해주세요	2025-04-30 06:34:41 +00:00
qudwns245	477fc5e159	Nam model	2025-04-30 15:28:50 +09:00
taehyeon hwang	8de5238395	backend-junPreP Update(Update whitelist model), front-urlpredictor.jsx UI Update	2025-04-30 05:03:13 +00:00
David Ko	11839c40c0	Merge branch 'main' of ssh://gitea.koseongnam.com:40022/david/1st-project	2025-04-30 13:59:59 +09:00
David Ko	dcc3fc0a92	Refactor code structure for improved readability and maintainability	2025-04-30 13:59:45 +09:00