{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#! pip install tensorflow\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv(\"train.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 poznan.wuoz.gov.pl\n", "1 vill.okawa.kochi.jp\n", "2 nationalfinance.co.om\n", "3 town.ozora.hokkaido.jp\n", "4 open24.ie-news.irish/online/Login\n", " ... \n", "6995051 ddht.co.kr\n", "6995052 www.upstartepoxy.com\n", "6995053 employeesalaryschedule70.000webhostapp.com/adb...\n", "6995054 dekalbtool.com\n", "6995055 helpinganimals.com\n", "Name: URL_clean, Length: 6995056, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 대괄호 [ ] 제거\n", "train['URL_clean'] = train['URL'].str.replace(r'[\\[\\]]', '', regex=True)\n", "train[\"URL_clean\"]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | ID | \n", "URL | \n", "label | \n", "URL_clean | \n", "
|---|---|---|---|---|
| 0 | \n", "TRAIN_0000000 | \n", "poznan[.]wuoz[.]gov[.]pl | \n", "0 | \n", "poznan.wuoz.gov.pl | \n", "
| 1 | \n", "TRAIN_0000001 | \n", "vill[.]okawa[.]kochi[.]jp | \n", "0 | \n", "vill.okawa.kochi.jp | \n", "
| 2 | \n", "TRAIN_0000002 | \n", "nationalfinance[.]co[.]om | \n", "0 | \n", "nationalfinance.co.om | \n", "
| 3 | \n", "TRAIN_0000003 | \n", "town[.]ozora[.]hokkaido[.]jp | \n", "0 | \n", "town.ozora.hokkaido.jp | \n", "
| 4 | \n", "TRAIN_0000004 | \n", "open24[.]ie-news[.]irish/online/Login | \n", "1 | \n", "open24.ie-news.irish/online/Login | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 6995051 | \n", "TRAIN_6995051 | \n", "ddht[.]co[.]kr | \n", "0 | \n", "ddht.co.kr | \n", "
| 6995052 | \n", "TRAIN_6995052 | \n", "www[.]upstartepoxy[.]com | \n", "0 | \n", "www.upstartepoxy.com | \n", "
| 6995053 | \n", "TRAIN_6995053 | \n", "employeesalaryschedule70[.]000webhostapp[.]com... | \n", "1 | \n", "employeesalaryschedule70.000webhostapp.com/adb... | \n", "
| 6995054 | \n", "TRAIN_6995054 | \n", "dekalbtool[.]com | \n", "0 | \n", "dekalbtool.com | \n", "
| 6995055 | \n", "TRAIN_6995055 | \n", "helpinganimals[.]com | \n", "0 | \n", "helpinganimals.com | \n", "
6995056 rows × 4 columns
\n", "| \n", " | ID | \n", "URL | \n", "label | \n", "URL_clean | \n", "dot_count | \n", "slash_count | \n", "alpha_count | \n", "dash_count | \n", "underscore_count | \n", "digit_count | \n", "percent_count | \n", "equal_count | \n", "question_count | \n", "at_count | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "TRAIN_0000000 | \n", "poznan[.]wuoz[.]gov[.]pl | \n", "0 | \n", "poznan.wuoz.gov.pl | \n", "3 | \n", "0 | \n", "15 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 1 | \n", "TRAIN_0000001 | \n", "vill[.]okawa[.]kochi[.]jp | \n", "0 | \n", "vill.okawa.kochi.jp | \n", "3 | \n", "0 | \n", "16 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 2 | \n", "TRAIN_0000002 | \n", "nationalfinance[.]co[.]om | \n", "0 | \n", "nationalfinance.co.om | \n", "2 | \n", "0 | \n", "19 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "TRAIN_0000003 | \n", "town[.]ozora[.]hokkaido[.]jp | \n", "0 | \n", "town.ozora.hokkaido.jp | \n", "3 | \n", "0 | \n", "19 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 4 | \n", "TRAIN_0000004 | \n", "open24[.]ie-news[.]irish/online/Login | \n", "1 | \n", "open24.ie-news.irish/online/Login | \n", "2 | \n", "2 | \n", "26 | \n", "1 | \n", "0 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 6995051 | \n", "TRAIN_6995051 | \n", "ddht[.]co[.]kr | \n", "0 | \n", "ddht.co.kr | \n", "2 | \n", "0 | \n", "8 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 6995052 | \n", "TRAIN_6995052 | \n", "www[.]upstartepoxy[.]com | \n", "0 | \n", "www.upstartepoxy.com | \n", "2 | \n", "0 | \n", "18 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 6995053 | \n", "TRAIN_6995053 | \n", "employeesalaryschedule70[.]000webhostapp[.]com... | \n", "1 | \n", "employeesalaryschedule70.000webhostapp.com/adb... | \n", "2 | \n", "2 | \n", "41 | \n", "0 | \n", "0 | \n", "5 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 6995054 | \n", "TRAIN_6995054 | \n", "dekalbtool[.]com | \n", "0 | \n", "dekalbtool.com | \n", "1 | \n", "0 | \n", "13 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 6995055 | \n", "TRAIN_6995055 | \n", "helpinganimals[.]com | \n", "0 | \n", "helpinganimals.com | \n", "1 | \n", "0 | \n", "17 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
6995056 rows × 14 columns
\n", "