Post

[Machine Learning] 타이타닉 생존자 머신러닝 -1-

In [4]:

1
2
df_train=pd.read_csv('Dataset/Titanic/train.csv')
df_test=pd.read_csv('Dataset/Titanic/test.csv')

In [5]:

1
df_train.head()

Out[5]:

 PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th…female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS

In [6]:

1
df_train.describe()

Out[6]:

 PassengerIdSurvivedPclassAgeSibSpParchFare
count891.000000891.000000891.000000714.000000891.000000891.000000891.000000
mean446.0000000.3838382.30864229.6991180.5230080.38159432.204208
std257.3538420.4865920.83607114.5264971.1027430.80605749.693429
min1.0000000.0000001.0000000.4200000.0000000.0000000.000000
25%223.5000000.0000002.00000020.1250000.0000000.0000007.910400
50%446.0000000.0000003.00000028.0000000.0000000.00000014.454200
75%668.5000001.0000003.00000038.0000001.0000000.00000031.000000
max891.0000001.0000003.00000080.0000008.0000006.000000512.329200

In [7]:

1
df_test.describe()

Out[7]:

 PassengerIdPclassAgeSibSpParchFare
count418.000000418.000000332.000000418.000000418.000000417.000000
mean1100.5000002.26555030.2725900.4473680.39234435.627188
std120.8104580.84183814.1812090.8967600.98142955.907576
min892.0000001.0000000.1700000.0000000.0000000.000000
25%996.2500001.00000021.0000000.0000000.0000007.895800
50%1100.5000003.00000027.0000000.0000000.00000014.454200
75%1204.7500003.00000039.0000001.0000000.00000031.500000
max1309.0000003.00000076.0000008.0000009.000000512.329200

In [15]:

1
2
3
for col in df_test.columns:
    msg = "column: {:>10}\t Percent of NaN value: {:.2f}%".format(col,100*(df_test[col].isnull().sum()/df_test[col].shape[0]))
    print(msg)
1
2
3
4
5
6
7
8
9
10
11
column: PassengerId	 Percent of NaN value: 0.00%
column:     Pclass	 Percent of NaN value: 0.00%
column:       Name	 Percent of NaN value: 0.00%
column:        Sex	 Percent of NaN value: 0.00%
column:        Age	 Percent of NaN value: 20.57%
column:      SibSp	 Percent of NaN value: 0.00%
column:      Parch	 Percent of NaN value: 0.00%
column:     Ticket	 Percent of NaN value: 0.00%
column:       Fare	 Percent of NaN value: 0.24%
column:      Cabin	 Percent of NaN value: 78.23%
column:   Embarked	 Percent of NaN value: 0.00%

In [16]:

1
2
3
 for col in df_train.columns:
    msg = "column: {:>10}\t Percent of NaN value: {:.2f}%".format(col,100*(df_train[col].isnull().sum()/df_train[col].shape[0]))
    print(msg)
1
2
3
4
5
6
7
8
9
10
11
12
column: PassengerId	 Percent of NaN value: 0.00%
column:   Survived	 Percent of NaN value: 0.00%
column:     Pclass	 Percent of NaN value: 0.00%
column:       Name	 Percent of NaN value: 0.00%
column:        Sex	 Percent of NaN value: 0.00%
column:        Age	 Percent of NaN value: 19.87%
column:      SibSp	 Percent of NaN value: 0.00%
column:      Parch	 Percent of NaN value: 0.00%
column:     Ticket	 Percent of NaN value: 0.00%
column:       Fare	 Percent of NaN value: 0.00%
column:      Cabin	 Percent of NaN value: 77.10%
column:   Embarked	 Percent of NaN value: 0.22%

In [17]:

1
msno.matrix(df=df_train.iloc[:,:], figsize=(8,8), color=(0.8,0.5,0.2))

Out[17]:

1
<AxesSubplot:>

In [18]:

1
msno.bar(df=df_train.iloc[:,:],figsize=(8,8),color=(0.8,0.5,0.2))

Out[18]:

1
<AxesSubplot:>

In [19]:

1
2
3
4
5
6
7
8
9
f,ax=plt.subplots(1,2,figsize=(18,8))

df_train['Survived'].value_counts().plot.pie(explode=[0,0.1], autopct='%1.1f%%',ax=ax[0],shadow=True)
ax[0].set_title('Pie plot - Survived')
ax[0].set_ylabel('')
sns.countplot("Survived",data=df_train,ax=ax[1])
ax[1].set_title("Count plot - Survived")

plt.show()

In [1]:

1
2
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
1
2
C:\Users\easyd\AppData\Local\Temp\ipykernel_28056\3510566465.py:1: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
  from IPython.core.display import display, HTML
This post is licensed under CC BY 4.0 by the author.

Comments powered by Disqus.