Data analysis 4 (pandas)

2023-02-20 15 분 소요

Pandas (2)

import numpy as np
import pandas as pd

5. 정렬과 순위

obj = pd.Series(np.arange(4), index=['d', 'a', 'b', 'c'])
obj

d    0
a    1
b    2
c    3
dtype: int64

obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

obj.sort_values()

d    0
a    1
b    2
c    3
dtype: int64

frame = pd.DataFrame(np.arange(8).reshape(2, 4),
                     index = ['three', 'one'],
                     columns = ['d', 'a', 'b', 'c'])
frame

	d	a	b	c
three	0	1	2	3
one	4	5	6	7

frame.sort_index() # axis=0(기본값) : index를 정렬, axis=1 : column을 정렬

	d	a	b	c
one	4	5	6	7
three	0	1	2	3

frame.sort_index(axis='columns') # axis=1 과 동일, column 을 정렬

	a	b	c	d
three	1	2	3	0
one	5	6	7	4

frame = pd.DataFrame({'b' : [4, 7, -3, 2], 'c' : [0, 1, 0, 1]})
frame

	b	c
0	4	0
1	7	1
2	-3	0
3	2	1

frame.sort_values(by='b') # axis=0 (기본값) : 행축을 따라서 정렬

	b	c
2	-3	0
3	2	1
0	4	0
1	7	1

frame.sort_values(by='b', axis=0, ascending=False)

	b	c
1	7	1
0	4	0
3	2	1
2	-3	0

# 계층적으로 정렬
frame.sort_values(by=['c', 'b'], axis=0)

	b	c
2	-3	0
0	4	0
3	2	1
1	7	1

6. 기술 통계 계산과 요약

df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                   [np.nan, np.nan], [0.75, -1.3]],
                  index=['a', 'b', 'c', 'd'],
                  columns=['one', 'two'])
df

	one	two
a	1.40	NaN
b	7.10	-4.5
c	NaN	NaN
d	0.75	-1.3

df.sum() # axis=0 : 행축을 따라서 합계 구함

one    9.25
two   -5.80
dtype: float64

df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

df.sum(axis=1, skipna=False)

a     NaN
b    2.60
c     NaN
d   -0.55
dtype: float64

# numpy : 최대값, 최소값의 위치를 구할 때 argmax(), argmin()
# pandas : 최대값, 최소값의 위치를 구할 때 idxmax(), idxmin()

df

	one	two
a	1.40	NaN
b	7.10	-4.5
c	NaN	NaN
d	0.75	-1.3

df.idxmax() # 행축을 따라서 최대값의 index

one    b
two    d
dtype: object

df.cumsum() # 행축을 따라서 누적합이 계산

	one	two
a	1.40	NaN
b	8.50	-4.5
c	NaN	NaN
d	9.25	-5.8

df.describe()

	one	two
count	3.000000	2.000000
mean	3.083333	-2.900000
std	3.493685	2.262742
min	0.750000	-4.500000
25%	1.075000	-3.700000
50%	1.400000	-2.900000
75%	4.250000	-2.100000
max	7.100000	-1.300000

obj = pd.Series(['a', 'a', 'b', 'c'] * 4)
obj

   a
   a
   b
   c
   a
   a
   b
   c
   a
   a
  b
  c
  a
  a
  b
  c
dtype: object

obj.describe() # 수치 데이터의 describe()와는 다른 결과
               # 범주 데이터의 describe()에는 고윳값의 갯수(unique), 빈도수가 가장 많았던 고윳값 (top, freq)

count     16
unique     3
top        a
freq       8
dtype: object

import seaborn as sns
df = sns.load_dataset('titanic')
df = df[['survived',	'pclass',	'sex',	'age']]
df.describe()

	survived	pclass	age
count	891.000000	891.000000	714.000000
mean	0.383838	2.308642	29.699118
std	0.486592	0.836071	14.526497
min	0.000000	1.000000	0.420000
25%	0.000000	2.000000	20.125000
50%	0.000000	3.000000	28.000000
75%	1.000000	3.000000	38.000000
max	1.000000	3.000000	80.000000

df['sex'].describe()

count      891
unique       2
top       male
freq       577
Name: sex, dtype: object

df.describe(include='all')

	survived	pclass	sex	age
count	891.000000	891.000000	891	714.000000
unique	NaN	NaN	2	NaN
top	NaN	NaN	male	NaN
freq	NaN	NaN	577	NaN
mean	0.383838	2.308642	NaN	29.699118
std	0.486592	0.836071	NaN	14.526497
min	0.000000	1.000000	NaN	0.420000
25%	0.000000	2.000000	NaN	20.125000
50%	0.000000	3.000000	NaN	28.000000
75%	1.000000	3.000000	NaN	38.000000
max	1.000000	3.000000	NaN	80.000000

6.1 상관관계와 공분산

df = pd.DataFrame({"math" : [50, 60, 40, 30, 70, 50], "physics" : [40, 60, 50, 20, 80, 50]})
df

	math	physics
0	50	40
1	60	60
2	40	50
3	30	20
4	70	80
5	50	50

산포도(산점도)

df.plot(kind='scatter', x='math', y='physics')

<matplotlib.axes._subplots.AxesSubplot at 0x7f1b174222b0>

png

상관계수

df.corr()

	math	physics
math	1.000000	0.919239
physics	0.919239	1.000000

# 자전거 공유 데이터셋 (bike_train.csv)

bike = pd.read_csv('./datasets/bike_train.csv')

bike.corr()

	season	holiday	workingday	weather	temp	atemp	humidity	windspeed	casual	registered	count
season	1.000000	0.029368	-0.008126	0.008879	0.258689	0.264744	0.190610	-0.147121	0.096758	0.164011	0.163439
holiday	0.029368	1.000000	-0.250491	-0.007074	0.000295	-0.005215	0.001929	0.008409	0.043799	-0.020956	-0.005393
workingday	-0.008126	-0.250491	1.000000	0.033772	0.029966	0.024660	-0.010880	0.013373	-0.319111	0.119460	0.011594
weather	0.008879	-0.007074	0.033772	1.000000	-0.055035	-0.055376	0.406244	0.007261	-0.135918	-0.109340	-0.128655
temp	0.258689	0.000295	0.029966	-0.055035	1.000000	0.984948	-0.064949	-0.017852	0.467097	0.318571	0.394454
atemp	0.264744	-0.005215	0.024660	-0.055376	0.984948	1.000000	-0.043536	-0.057473	0.462067	0.314635	0.389784
humidity	0.190610	0.001929	-0.010880	0.406244	-0.064949	-0.043536	1.000000	-0.318607	-0.348187	-0.265458	-0.317371
windspeed	-0.147121	0.008409	0.013373	0.007261	-0.017852	-0.057473	-0.318607	1.000000	0.092276	0.091052	0.101369
casual	0.096758	0.043799	-0.319111	-0.135918	0.467097	0.462067	-0.348187	0.092276	1.000000	0.497250	0.690414
registered	0.164011	-0.020956	0.119460	-0.109340	0.318571	0.314635	-0.265458	0.091052	0.497250	1.000000	0.970948
count	0.163439	-0.005393	0.011594	-0.128655	0.394454	0.389784	-0.317371	0.101369	0.690414	0.970948	1.000000

bike['count'].corr(bike['temp']) # 상관계수

0.39445364496724905

bike['count'].cov(bike['temp']) # 공분산

556.7331709807349

bike.corrwith(bike['count'])

season        0.163439
holiday      -0.005393
workingday    0.011594
weather      -0.128655
temp          0.394454
atemp         0.389784
humidity     -0.317371
windspeed     0.101369
casual        0.690414
registered    0.970948
count         1.000000
dtype: float64

bike.corrwith(bike['count']).sort_values(ascending=False)

count         1.000000
registered    0.970948
casual        0.690414
temp          0.394454
atemp         0.389784
season        0.163439
windspeed     0.101369
workingday    0.011594
holiday      -0.005393
weather      -0.128655
humidity     -0.317371
dtype: float64

bike.plot(kind='scatter', x='temp', y='count', alpha=0.3)

<matplotlib.axes._subplots.AxesSubplot at 0x7f1b14c2deb0>

png

6.2 범주 데이터 요약

import seaborn as sns
titanic = sns.load_dataset('titanic')

titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB

titanic.describe(include='object')

	sex	embarked	who	embark_town	alive
count	891	889	891	889	891
unique	2	3	3	3	2
top	male	S	man	Southampton	no
freq	577	644	537	644	549

titanic['embark_town'].unique() # unique한 원소들

array(['Southampton', 'Cherbourg', 'Queenstown', nan], dtype=object)

titanic['embark_town'].nunique() # unique한 원소들의 개수

titanic['embark_town'].value_counts()

Southampton    644
Cherbourg      168
Queenstown      77
Name: embark_town, dtype: int64

titanic['embark_town'].hist()

<matplotlib.axes._subplots.AxesSubplot at 0x7f1b0bc46f10>

png

7. 데이터 로딩과 저장

텍스트 형식 데이터 읽기

df = pd.read_csv('./examples/ex1.csv')
df

	a	b	c	d	message
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

df.columns

Index(['a', 'b', 'c', 'd', 'message'], dtype='object')

df = pd.read_table('./examples/ex1.csv', sep=',')
df

	a	b	c	d	message
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

df = pd.read_csv('./examples/ex2.csv', header=None)
df

	0	1	2	3	4
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

names = ['a', 'b', 'c', 'd', 'message']
df = pd.read_csv('./examples/ex2.csv', names=names)
df

	a	b	c	d	message
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

df.set_index('message')

	a	b	c	d
message
hello	1	2	3	4
world	5	6	7	8
foo	9	10	11	12

# 위에서 했던 컬럼명 지정과 인덱스 설정
names = ['a', 'b', 'c', 'd', 'message']
df = pd.read_csv('./examples/ex2.csv', names=names, index_col='message')
df

	a	b	c	d
message
hello	1	2	3	4
world	5	6	7	8
foo	9	10	11	12

# 위에서 했던 컬럼명 지정과 인덱스 설정
names = ['a', 'b', 'c', 'd', 'message']
df = pd.read_csv('./examples/ex2.csv', names=names, index_col=4)
df

	a	b	c	d
message
hello	1	2	3	4
world	5	6	7	8
foo	9	10	11	12

# !cat examples/ex3.txt

            A         B         C
aaa -0.264438 -1.026059 -0.619500
bbb  0.927272  0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382  1.100491

# !type examples/ex3.txt

/bin/bash: line 0: type: examples/ex3.txt: not found

lst = []
with open('examples/ex3.txt') as f:
  for line in f:
    lst.append(line)
lst    

['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

df = pd.read_table('./examples/ex3.txt', sep='\s+') # \s+ : 공백 1개 이상의 패턴과 매치시켜주는 정규표현식
df

	A	B	C
aaa	-0.264438	-1.026059	-0.619500
bbb	0.927272	0.302904	-0.032399
ccc	-0.264273	-0.386314	-0.217601
ddd	-0.871858	-0.348382	1.100491

큰 데이터를 조금씩 읽어오기

pd.options.display.max_rows = 10 # 결과창에 보이는 행의 개수 조정정

result = pd.read_csv('./examples/ex6.csv')
result

	one	two	three	four	key
0	0.467976	-0.038649	-0.295344	-1.824726	L
1	-0.358893	1.404453	0.704965	-0.200638	B
2	-0.501840	0.659254	-0.421691	-0.057688	G
3	0.204886	1.074134	1.388361	-0.982404	R
4	0.354628	-0.133116	0.283763	-0.837063	Q
...	...	...	...	...	...
9995	2.311896	-0.417070	-1.409599	-0.515821	L
9996	-0.479893	-0.650419	0.745152	-0.646038	E
9997	0.523331	0.787112	0.486066	1.093156	K
9998	-0.362559	0.598894	-1.843201	0.887292	G
9999	-0.096376	-1.012999	-0.657431	-0.573315	0

10000 rows × 5 columns

chunker = pd.read_csv('./examples/ex6.csv', chunksize=1000)
for piece in chunker:
  print(piece.head(2))

        one       two     three      four key
0  0.467976 -0.038649 -0.295344 -1.824726   L
1 -0.358893  1.404453  0.704965 -0.200638   B
           one       two     three      four key
1000  0.467976 -0.038649 -0.295344 -1.824726   T
1001 -0.358893  1.404453  0.704965 -0.200638   J
           one       two     three      four key
2000  0.467976 -0.038649 -0.295344 -1.824726   1
2001 -0.358893  1.404453  0.704965 -0.200638   H
           one       two     three      four key
3000  0.467976 -0.038649 -0.295344 -1.824726   H
3001 -0.358893  1.404453  0.704965 -0.200638   Y
           one       two     three      four key
4000  0.467976 -0.038649 -0.295344 -1.824726   H
4001 -0.358893  1.404453  0.704965 -0.200638   Z
           one       two     three      four key
5000  0.467976 -0.038649 -0.295344 -1.824726   1
5001 -0.358893  1.404453  0.704965 -0.200638   Z
           one       two     three      four key
6000  0.467976 -0.038649 -0.295344 -1.824726   I
6001 -0.358893  1.404453  0.704965 -0.200638   X
           one       two     three      four key
7000  0.467976 -0.038649 -0.295344 -1.824726   1
7001 -0.358893  1.404453  0.704965 -0.200638   I
           one       two     three      four key
8000  0.467976 -0.038649 -0.295344 -1.824726   7
8001 -0.358893  1.404453  0.704965 -0.200638   W
           one       two     three      four key
9000  0.467976 -0.038649 -0.295344 -1.824726   B
9001 -0.358893  1.404453  0.704965 -0.200638   M

텍스트 형식으로 기록하기

data = pd.read_csv('./examples/ex5.csv')
data

	something	a	b	c	d	message
0	one	1	2	3.0	4	NaN
1	two	5	6	NaN	8	world
2	three	9	10	11.0	12	foo

data.to_csv('./examples/out.csv') # index, column 포함해서 저장장

data.to_csv('./examples/out2.csv', index=False) # index 제외하고 저장장

data.to_csv('./examples/out3.csv', index=False, header=False) # index, column 모두 제외하고 저장

json 파일 읽기

data = pd.read_json('./examples/example.json')
data

	a	b	c
0	1	2	3
1	4	5	6
2	7	8	9

excel 파일 읽기

frame = pd.read_excel('./examples/ex1.xlsx')
frame

	Unnamed: 0	a	b	c	d	message
0	0	1	2	3	4	hello
1	1	5	6	7	8	world
2	2	9	10	11	12	foo

frame.to_excel('./examples/ex2.xlsx', index=False)

frame2 = pd.read_excel('./examples/ex2.xlsx')
frame2

	Unnamed: 0	a	b	c	d	message
0	0	1	2	3	4	hello
1	1	5	6	7	8	world
2	2	9	10	11	12	foo

8. 데이터 정제 및 준비

8.1 누락된 데이터 처리하기

결측치 확인, 삭제

string_data = pd.Series(['abc', 'def', np.nan, 'ghi'])
string_data

  abc
  def
  NaN
  ghi
dtype: object

string_data.isnull()

  False
  False
   True
  False
dtype: bool

pd.isnull(string_data)

  False
  False
   True
  False
dtype: bool

type(np.nan)

float

type(None)

NoneType

string_data = pd.Series(['abc', 'def', None, 'ghi'])
string_data

   abc
   def
  None
   ghi
dtype: object

string_data.isnull()

  False
  False
   True
  False
dtype: bool

data = pd.Series([1, np.nan, 3.5, np.nan, 7])
data

  1.0
  NaN
  3.5
  NaN
  7.0
dtype: float64

data.isnull()

  False
   True
  False
   True
  False
dtype: bool

data[data.isnull()] # 불리안 색인인

1   NaN
3   NaN
dtype: float64

# null인 행을 삭제하고 싶다면 null인 행의 인덱스 구하고 drop 함수 적용
data.drop([1, 3])

  1.0
  3.5
  7.0
dtype: float64

null_idx = data[data.isnull()].index
data.drop(null_idx) # axis=0이므로 null_idx인 행을 삭제

  1.0
  3.5
  7.0
dtype: float64

# dropna를 사용하면 null인 행을 삭제
data.dropna()

  1.0
  3.5
  7.0
dtype: float64

data = pd.DataFrame([[1.0, 6.5, 3.0],
                     [1.0, np.nan, np.nan],
                     [np.nan, np.nan, np.nan],
                     [np.nan, 6.0, 3.0]])
data

	0	1	2
0	1.0	6.5	3.0
1	1.0	NaN	NaN
2	NaN	NaN	NaN
3	NaN	6.0	3.0

data.dropna() # axis=0 기본값이므로 행을 삭제
              # how='any' 기본값이므로 null이 하나라도 있으면 삭제

	0	1	2
0	1.0	6.5	3.0

# 위와 결과 동일일
data.dropna(axis=0, how='any')

	0	1	2
0	1.0	6.5	3.0

data[100] = np.nan
data

	0	1	2	100
0	1.0	6.5	3.0	NaN
1	1.0	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN
3	NaN	6.0	3.0	NaN

data.dropna(axis=1) # axis=1이므로 열을 삭제
                    # how='any' 가 기본값이므로 null이 하나라도 있으면 삭제제


0
1
2
3

# 위와 동일한 결과
data.dropna(axis=1, how='any')


0
1
2
3

data.dropna(axis=1, how='all') # axis=1이므로 열을 삭제
                               # how='all'이므로 모든 값이 null일 경우 삭제제

	0	1	2
0	1.0	6.5	3.0
1	1.0	NaN	NaN
2	NaN	NaN	NaN
3	NaN	6.0	3.0

df = pd.DataFrame(np.random.randn(7, 3))
df

	0	1	2
0	1.576235	1.580954	0.137933
1	-0.078512	-0.616893	-0.397091
2	-1.607066	0.315902	1.822990
3	1.101237	0.075200	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.iloc[:4, 1] = np.nan
df.iloc[:2, 2] = np.nan
df

	0	1	2
0	1.576235	NaN	NaN
1	-0.078512	NaN	NaN
2	-1.607066	NaN	1.822990
3	1.101237	NaN	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.dropna() # axis=0, how='any'

	0	1	2
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.dropna(axis=1)

	0
0	1.576235
1	-0.078512
2	-1.607066
3	1.101237
4	0.113288
5	0.681725
6	-0.404688

df.dropna(axis=1, thresh=5) #thresh=5 : null이 아닌 데이터가 5개 미만인 데이터만 삭제

	0	2
0	1.576235	NaN
1	-0.078512	NaN
2	-1.607066	1.822990
3	1.101237	0.968919
4	0.113288	-0.050132
5	0.681725	-1.902540
6	-0.404688	0.691823

df

	0	1	2
0	1.576235	NaN	NaN
1	-0.078512	NaN	NaN
2	-1.607066	NaN	1.822990
3	1.101237	NaN	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

결측치 채우기

df

	0	1	2
0	1.576235	NaN	NaN
1	-0.078512	NaN	NaN
2	-1.607066	NaN	1.822990
3	1.101237	NaN	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.fillna(0)

	0	1	2
0	1.576235	0.000000	0.000000
1	-0.078512	0.000000	0.000000
2	-1.607066	0.000000	1.822990
3	1.101237	0.000000	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.fillna({1:0, 2:0.5})

	0	1	2
0	1.576235	0.000000	0.500000
1	-0.078512	0.000000	0.500000
2	-1.607066	0.000000	1.822990
3	1.101237	0.000000	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df.fillna({1:0, 2:0.5}, inplace=True)
df

	0	1	2
0	1.576235	0.000000	0.500000
1	-0.078512	0.000000	0.500000
2	-1.607066	0.000000	1.822990
3	1.101237	0.000000	0.968919
4	0.113288	1.684596	-0.050132
5	0.681725	-0.350161	-1.902540
6	-0.404688	0.052319	0.691823

df = pd.DataFrame(np.random.randn(6, 3))
df

	0	1	2
0	-0.636706	0.377941	0.021031
1	1.266285	-0.817885	1.025722
2	-0.526124	-0.261350	-0.509566
3	1.239667	1.088629	1.037033
4	0.900286	0.029285	-0.889282
5	-0.131138	-1.700356	-0.269322

df.iloc[2:, 1] = np.nan
df.iloc[4:, 2] = np.nan

df

	0	1	2
0	-0.636706	0.377941	0.021031
1	1.266285	-0.817885	1.025722
2	-0.526124	NaN	-0.509566
3	1.239667	NaN	1.037033
4	0.900286	NaN	NaN
5	-0.131138	NaN	NaN

df.fillna(axis=0, method='ffill') # axis=0이므로 "행축을 따라서" 누락값을 채워넣기
                                  # forward 방향으로 채워넣기기

	0	1	2
0	-0.636706	0.377941	0.021031
1	1.266285	-0.817885	1.025722
2	-0.526124	-0.817885	-0.509566
3	1.239667	-0.817885	1.037033
4	0.900286	-0.817885	1.037033
5	-0.131138	-0.817885	1.037033

df.fillna(axis=1, method='ffill') # axis=0이므로 "열열축을 따라서" 누락값을 채워넣기
                                  # forward 방향으로 채워넣기기

	0	1	2
0	-0.636706	0.377941	0.021031
1	1.266285	-0.817885	1.025722
2	-0.526124	-0.526124	-0.509566
3	1.239667	1.239667	1.037033
4	0.900286	0.900286	0.900286
5	-0.131138	-0.131138	-0.131138

df.fillna(axis=0, method='ffill', limit=2)

	0	1	2
0	-0.636706	0.377941	0.021031
1	1.266285	-0.817885	1.025722
2	-0.526124	-0.817885	-0.509566
3	1.239667	-0.817885	1.037033
4	0.900286	NaN	1.037033
5	-0.131138	NaN	1.037033

Reference

파이썬 라이브러리를 활용한 데이터 분석 (웨스 맥키니 저)

Twitter Facebook LinkedIn

Data analysis 4 (pandas)

Pandas (2)

5. 정렬과 순위

6. 기술 통계 계산과 요약

6.1 상관관계와 공분산

6.2 범주 데이터 요약

7. 데이터 로딩과 저장

8. 데이터 정제 및 준비

8.1 누락된 데이터 처리하기

Reference

공유하기

댓글남기기

참고

소개(about me)

Rnn 20 (attention and seq2seq learning using date dataset in pytorch)

Rnn 19 (attention and seq2seq learning using addition dataset in pytorch)

Rnn 18 (어텐션)