Nam model

This commit is contained in:
qudwns245
2025-04-30 15:28:50 +09:00
parent 8de5238395
commit 477fc5e159
4 changed files with 88 additions and 690 deletions

45
Nam/Final_code 1.py Normal file
View File

@@ -0,0 +1,45 @@
import pandas as pd
import pickle
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from url_preprocessing import preprocess_url_dataframe # 너가 만든 전처리 모듈
# 1. 원본 데이터 불러오기
df = pd.read_csv("train.csv") # 또는 적절한 파일명으로 수정
print("원본 데이터 불러옴")
# 2. 전처리 적용
df_processed = preprocess_url_dataframe(df)
print(" 전처리 완료")
# 3. 피처/레이블 분리
X = df_processed.drop(columns=['label', 'URL', 'URL_clean'], errors='ignore') # 'label' 없으면 자동 무시
y = df_processed['label'] if 'label' in df_processed.columns else None
# 4. 스케일러 불러오기
with open("scaler.pkl", "rb") as f:
scaler = pickle.load(f)
X_scaled = scaler.transform(X)
print(" 스케일링 완료")
# 5. 모델 불러오기
model = load_model("best_model.h5")
print(" 모델 불러오기 완료")
# 6. 예측
y_pred_proba = model.predict(X_scaled).ravel()
best_threshold = 0.34 # 여기에 저장된 값이 있다면 pickle로 불러올 수 있음
y_pred = (y_pred_proba > best_threshold).astype(int)
# 7. 결과 출력
if y is not None:
print("예측 결과 (테스트셋 평가)")
print("Accuracy:", accuracy_score(y, y_pred))
print("F1 Score:", f1_score(y, y_pred))
print("Precision:", precision_score(y, y_pred))
print("Recall:", recall_score(y, y_pred))
else:
print("예측 완료! 라벨이 없어 평가 생략")
print("예측 결과 샘플:", y_pred[:10])

BIN
Nam/best_model 1.h5 Normal file

Binary file not shown.

BIN
Nam/scaler 1.pkl Normal file

Binary file not shown.

View File

@@ -2392,703 +2392,56 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "ename": "NameError",
"application/vnd.microsoft.datawrangler.viewer.v0+json": { "evalue": "name 'processed_train' is not defined",
"columns": [ "output_type": "error",
{ "traceback": [
"name": "index", "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"rawType": "object", "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"type": "string" "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mprocessed_train\u001b[49m.describe()\n",
}, "\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
{ ]
"name": "label",
"rawType": "float64",
"type": "float"
},
{
"name": "url_length_cat",
"rawType": "float64",
"type": "float"
},
{
"name": "num_dots",
"rawType": "float64",
"type": "float"
},
{
"name": "num_digits",
"rawType": "float64",
"type": "float"
},
{
"name": "num_special_chars",
"rawType": "float64",
"type": "float"
},
{
"name": "url_keyword",
"rawType": "float64",
"type": "float"
},
{
"name": "num_underbar",
"rawType": "float64",
"type": "float"
},
{
"name": "extract_consecutive_numbers",
"rawType": "float64",
"type": "float"
},
{
"name": "number",
"rawType": "float64",
"type": "float"
},
{
"name": "upper",
"rawType": "float64",
"type": "float"
},
{
"name": "is_common_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "is_country_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "is_suspicious_tld",
"rawType": "float64",
"type": "float"
},
{
"name": "domain_length",
"rawType": "float64",
"type": "float"
},
{
"name": "has_subdomain",
"rawType": "float64",
"type": "float"
},
{
"name": "subdomain_length",
"rawType": "float64",
"type": "float"
},
{
"name": "subdomain_count",
"rawType": "float64",
"type": "float"
},
{
"name": "path_depth",
"rawType": "float64",
"type": "float"
},
{
"name": "has_query",
"rawType": "float64",
"type": "float"
},
{
"name": "query_length",
"rawType": "float64",
"type": "float"
},
{
"name": "query_param_count",
"rawType": "float64",
"type": "float"
},
{
"name": "url_shorteners",
"rawType": "float64",
"type": "float"
},
{
"name": "compression_ratio",
"rawType": "float64",
"type": "float"
},
{
"name": "entropy",
"rawType": "float64",
"type": "float"
},
{
"name": "digit_ratio",
"rawType": "float64",
"type": "float"
},
{
"name": "special_char_ratio",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "c79a077e-8e52-4e42-b88f-dc9698b0fa30",
"rows": [
[
"count",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0",
"6995056.0"
],
[
"mean",
"0.22371472079708868",
"1.4435534183000107",
"1.546944584861079",
"1.6343590387267808",
"2.6635716711917676",
"0.0370789025849114",
"0.045005501028154746",
"0.056463736673444787",
"0.08128040719044995",
"0.0357764112252997",
"0.6133649251700057",
"0.12739140329970197",
"0.022784949827420967",
"10.464007150192936",
"0.21130266862767075",
"2.43731000866898",
"0.2660177416735477",
"0.6056849294701858",
"0.027221368921135157",
"1.9155892390282507",
"0.04228915393958247",
"0.0018421582329004942",
"1.4552534994784176",
"3.5360434022769756",
"0.029042428345387533",
"0.1102289088601276"
],
[
"std",
"0.41673309122602675",
"1.1161203432813147",
"1.010078604927829",
"9.827940363271033",
"7.1618457272654",
"0.18895518694176003",
"0.6023702991784359",
"0.23081505741717664",
"0.273265280035072",
"0.18573223887275842",
"0.4869788780260291",
"0.33341093196934307",
"0.14921728811320575",
"5.0652546813544035",
"0.4082326232468674",
"6.90096602515224",
"0.6272395647222854",
"1.6003209664806863",
"0.1627279010519657",
"19.702068343354906",
"0.35208851309719974",
"0.04288082262284407",
"0.24856536988340924",
"0.47898938276414027",
"0.08255957016074264",
"0.046338026902092454"
],
[
"min",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.010181818181818183",
"-0.0",
"0.0",
"0.0"
],
[
"25%",
"0.0",
"0.0",
"1.0",
"0.0",
"1.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"7.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.3076923076923077",
"3.238901256602631",
"0.0",
"0.07142857142857142"
],
[
"50%",
"0.0",
"1.0",
"1.0",
"0.0",
"2.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"10.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.4444444444444444",
"3.5068905956085183",
"0.0",
"0.10344827586206896"
],
[
"75%",
"0.0",
"2.0",
"2.0",
"0.0",
"3.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"13.0",
"0.0",
"0.0",
"0.0",
"1.0",
"0.0",
"0.0",
"0.0",
"0.0",
"1.6153846153846154",
"3.7962176025900556",
"0.0",
"0.14285714285714285"
],
[
"max",
"1.0",
"3.0",
"171.0",
"2011.0",
"8198.0",
"1.0",
"136.0",
"1.0",
"1.0",
"1.0",
"1.0",
"1.0",
"1.0",
"63.0",
"1.0",
"237.0",
"38.0",
"136.0",
"1.0",
"8367.0",
"131.0",
"1.0",
"5.0",
"6.570554108088201",
"0.9545454545454546",
"1.0"
]
],
"shape": {
"columns": 26,
"rows": 8
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label</th>\n",
" <th>url_length_cat</th>\n",
" <th>num_dots</th>\n",
" <th>num_digits</th>\n",
" <th>num_special_chars</th>\n",
" <th>url_keyword</th>\n",
" <th>num_underbar</th>\n",
" <th>extract_consecutive_numbers</th>\n",
" <th>number</th>\n",
" <th>upper</th>\n",
" <th>...</th>\n",
" <th>subdomain_count</th>\n",
" <th>path_depth</th>\n",
" <th>has_query</th>\n",
" <th>query_length</th>\n",
" <th>query_param_count</th>\n",
" <th>url_shorteners</th>\n",
" <th>compression_ratio</th>\n",
" <th>entropy</th>\n",
" <th>digit_ratio</th>\n",
" <th>special_char_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>...</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" <td>6.995056e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2.237147e-01</td>\n",
" <td>1.443553e+00</td>\n",
" <td>1.546945e+00</td>\n",
" <td>1.634359e+00</td>\n",
" <td>2.663572e+00</td>\n",
" <td>3.707890e-02</td>\n",
" <td>4.500550e-02</td>\n",
" <td>5.646374e-02</td>\n",
" <td>8.128041e-02</td>\n",
" <td>3.577641e-02</td>\n",
" <td>...</td>\n",
" <td>2.660177e-01</td>\n",
" <td>6.056849e-01</td>\n",
" <td>2.722137e-02</td>\n",
" <td>1.915589e+00</td>\n",
" <td>4.228915e-02</td>\n",
" <td>1.842158e-03</td>\n",
" <td>1.455253e+00</td>\n",
" <td>3.536043e+00</td>\n",
" <td>2.904243e-02</td>\n",
" <td>1.102289e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.167331e-01</td>\n",
" <td>1.116120e+00</td>\n",
" <td>1.010079e+00</td>\n",
" <td>9.827940e+00</td>\n",
" <td>7.161846e+00</td>\n",
" <td>1.889552e-01</td>\n",
" <td>6.023703e-01</td>\n",
" <td>2.308151e-01</td>\n",
" <td>2.732653e-01</td>\n",
" <td>1.857322e-01</td>\n",
" <td>...</td>\n",
" <td>6.272396e-01</td>\n",
" <td>1.600321e+00</td>\n",
" <td>1.627279e-01</td>\n",
" <td>1.970207e+01</td>\n",
" <td>3.520885e-01</td>\n",
" <td>4.288082e-02</td>\n",
" <td>2.485654e-01</td>\n",
" <td>4.789894e-01</td>\n",
" <td>8.255957e-02</td>\n",
" <td>4.633803e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.018182e-02</td>\n",
" <td>-0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.307692e+00</td>\n",
" <td>3.238901e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>7.142857e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.444444e+00</td>\n",
" <td>3.506891e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.034483e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>3.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.615385e+00</td>\n",
" <td>3.796218e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.428571e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000e+00</td>\n",
" <td>3.000000e+00</td>\n",
" <td>1.710000e+02</td>\n",
" <td>2.011000e+03</td>\n",
" <td>8.198000e+03</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.360000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>...</td>\n",
" <td>3.800000e+01</td>\n",
" <td>1.360000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>8.367000e+03</td>\n",
" <td>1.310000e+02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>5.000000e+00</td>\n",
" <td>6.570554e+00</td>\n",
" <td>9.545455e-01</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 26 columns</p>\n",
"</div>"
],
"text/plain": [
" label url_length_cat num_dots num_digits \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.237147e-01 1.443553e+00 1.546945e+00 1.634359e+00 \n",
"std 4.167331e-01 1.116120e+00 1.010079e+00 9.827940e+00 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 \n",
"50% 0.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 \n",
"75% 0.000000e+00 2.000000e+00 2.000000e+00 0.000000e+00 \n",
"max 1.000000e+00 3.000000e+00 1.710000e+02 2.011000e+03 \n",
"\n",
" num_special_chars url_keyword num_underbar \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.663572e+00 3.707890e-02 4.500550e-02 \n",
"std 7.161846e+00 1.889552e-01 6.023703e-01 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 1.000000e+00 0.000000e+00 0.000000e+00 \n",
"50% 2.000000e+00 0.000000e+00 0.000000e+00 \n",
"75% 3.000000e+00 0.000000e+00 0.000000e+00 \n",
"max 8.198000e+03 1.000000e+00 1.360000e+02 \n",
"\n",
" extract_consecutive_numbers number upper ... \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 ... \n",
"mean 5.646374e-02 8.128041e-02 3.577641e-02 ... \n",
"std 2.308151e-01 2.732653e-01 1.857322e-01 ... \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"25% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"50% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"75% 0.000000e+00 0.000000e+00 0.000000e+00 ... \n",
"max 1.000000e+00 1.000000e+00 1.000000e+00 ... \n",
"\n",
" subdomain_count path_depth has_query query_length \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 2.660177e-01 6.056849e-01 2.722137e-02 1.915589e+00 \n",
"std 6.272396e-01 1.600321e+00 1.627279e-01 1.970207e+01 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"50% 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"75% 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 \n",
"max 3.800000e+01 1.360000e+02 1.000000e+00 8.367000e+03 \n",
"\n",
" query_param_count url_shorteners compression_ratio entropy \\\n",
"count 6.995056e+06 6.995056e+06 6.995056e+06 6.995056e+06 \n",
"mean 4.228915e-02 1.842158e-03 1.455253e+00 3.536043e+00 \n",
"std 3.520885e-01 4.288082e-02 2.485654e-01 4.789894e-01 \n",
"min 0.000000e+00 0.000000e+00 1.018182e-02 -0.000000e+00 \n",
"25% 0.000000e+00 0.000000e+00 1.307692e+00 3.238901e+00 \n",
"50% 0.000000e+00 0.000000e+00 1.444444e+00 3.506891e+00 \n",
"75% 0.000000e+00 0.000000e+00 1.615385e+00 3.796218e+00 \n",
"max 1.310000e+02 1.000000e+00 5.000000e+00 6.570554e+00 \n",
"\n",
" digit_ratio special_char_ratio \n",
"count 6.995056e+06 6.995056e+06 \n",
"mean 2.904243e-02 1.102289e-01 \n",
"std 8.255957e-02 4.633803e-02 \n",
"min 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 7.142857e-02 \n",
"50% 0.000000e+00 1.034483e-01 \n",
"75% 0.000000e+00 1.428571e-01 \n",
"max 9.545455e-01 1.000000e+00 \n",
"\n",
"[8 rows x 26 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"processed_train.describe()" "processed_train.describe()"
] ]
}, },
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'processed_train' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m desc = \u001b[43mprocessed_train\u001b[49m.describe()\n\u001b[32m 6\u001b[39m plt.figure(figsize=(\u001b[32m12\u001b[39m, \u001b[32m6\u001b[39m))\n\u001b[32m 7\u001b[39m sns.barplot(data=desc.T[[\u001b[33m'\u001b[39m\u001b[33mmean\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mstd\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmin\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmax\u001b[39m\u001b[33m'\u001b[39m]])\n",
"\u001b[31mNameError\u001b[39m: name 'processed_train' is not defined"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"desc = processed_train.describe()\n",
"\n",
"plt.figure(figsize=(12, 6))\n",
"sns.barplot(data=desc.T[['mean', 'std', 'min', 'max']])\n",
"plt.title('Feature Statistics')\n",
"plt.xticks(rotation=45)\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 11,
@@ -3248,12 +2601,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import tensorflow as tf\n", "import tensorflow as tf\n",
"from tensorflow.keras.layers import Dense, Dropout, BatchNormalization\n", "from tensorflow.keras.layers import Dense\n",
"\n", "\n",
"def build_model(input_dim, learning_rate=0.001):\n", "def build_model(input_dim, learning_rate=0.001):\n",
" \"\"\"\n", " \"\"\"\n",