파이썬/시각화 matplot

색으로 값의 크기를 표현하는 히트맵

Merware 2023. 5. 16. 11:11

[학습목표]
히트맵을 이용하여 색으로 값의 크기를 표현하여 비교할 수 있다.

import matplotlib.pyplot as plt
import pandas as pd

히트맵

 

Choosing Colormaps in Matplotlib — Matplotlib 3.3.1 documentation

Note Click here to download the full example code Choosing Colormaps in Matplotlib Matplotlib has a number of built-in colormaps accessible via matplotlib.cm.get_cmap. There are also external libraries like [palettable] and [colorcet] that have many extra

matplotlib.org

# 샘플데이터
import numpy as np
arr = np.random.standard_normal((5,5))
df = pd.DataFrame(arr)
df

"""
		0		1		2		3		4
0	-0.406283	-0.972895	-0.149998	0.344562	-0.165722
1	-2.498506	1.402902	-0.500308	-0.780698	-0.542189
2	-1.731492	-0.985745	1.117941	-1.099192	-0.429982
3	0.855227	-0.326886	-0.308158	-0.578879	0.198838
4	-0.845543	-0.320289	2.049224	1.757041	-0.215578
# 히트맵 그리기
plt.pcolor(df, cmap='Blues')
plt.colorbar()

히트맵 예제

  • 타이나틱호의 연령대_객실등급별 승선자수

데이터 준비

import seaborn as sns
titanic = sns.load_dataset('titanic')
titanic

"""
	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	0	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	0	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
886	0	2	male	27.0	0	0	13.0000	S	Second	man	True	NaN	Southampton	no	True
887	1	1	female	19.0	0	0	30.0000	S	First	woman	False	B	Southampton	yes	True
888	0	3	female	NaN	1	2	23.4500	S	Third	woman	False	NaN	Southampton	no	False
889	1	1	male	26.0	0	0	30.0000	C	First	man	True	C	Cherbourg	yes	True
890	0	3	male	32.0	0	0	7.7500	Q	Third	man	True	NaN	Queenstown	no	True
891 rows × 15 columns

 

데이터 전처리

결측치 처리

# 결측치 확인
titanic.isnull().sum()
survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64
# 컬럼 삭제
titanic = titanic.drop(columns=['deck'])

# 결측치 삭제
titanic = titanic.dropna()

titanic.isnull().sum()
survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64

 

연령대 컬럼 생성

titanic['agerange'] = (titanic['age']/10).astype('int')*10

 

피벗테이블 : 연령대-객실등급 별 승선자 수

titanic_pivot = titanic.pivot_table(index='class', columns='agerange', values='survived', aggfunc='count')
titanic_pivot

"""
agerange	0	10	20	30	40	50	60	70	80
class									
First	3	18	34	49	37	27	12	3	1
Second	17	18	53	48	18	15	3	1	0
Third	42	66	133	69	34	6	3	2	0

 

히트맵

matplotlib

range(0,len(titanic_pivot.columns),1)
range(0, 9)

 

np.arange(0.5,len(titanic_pivot.columns),1)
array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])

 

plt.pcolor(titanic_pivot)
plt.colorbar()

plt.xticks(np.arange(0.5,len(titanic_pivot.columns),1), labels=titanic_pivot.columns)
plt.yticks(np.arange(0.5,len(titanic_pivot.index),1), labels=titanic_pivot.index)
plt.show()

 

seaborn

  • heatmap(data=2차원데이터)
    cmap=컬러맵 : 컬러맵 지정
    annot=True : 수치 표시
    fmt='d' : 정수로 표시
sns.heatmap(titanic_pivot, cmap='Blues', annot=True, fmt='d')