Merware 2023. 5. 22. 16:09
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
titanic = sns.load_dataset("titanic")
titanic.head()

"""
survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	0	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	0	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True
titanic.describe()

"""
survived	pclass	age	sibsp	parch	fare
count	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

 

def draw_distplot(col_name):
    plt.figure(figsize=(6, 6)) 
    sns.distplot(titanic[col_name])
    plt.grid()
    plt.show()
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
# np.array(titanic.fare)
# np.array(titanic.fare).reshape(-1,1)
# np.array(titanic.fare).reshape(-1,1).reshape(-1,)

titanic['fare_z'] = StandardScaler().fit_transform(np.array(titanic.fare).reshape(-1,1)).reshape(-1,)
draw_distplot('fare_z')

 

titanic['fare_mm'] = MinMaxScaler().fit_transform(np.array(titanic.fare).reshape(-1,1)).reshape(-1,)
draw_distplot('fare_mm')

 

titanic['fare_rc'] = RobustScaler().fit_transform(np.array(titanic.fare).reshape(-1,1)).reshape(-1,)
draw_distplot('fare_rc')