Rnn 15 (seq2seq learning using addition dataset in pytorch)
Seqence to Seqence Learning을 이용한 덧셈 규칙 학습
import os
import random
import string
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data
import pickle
from copy import deepcopy
from sklearn.model_selection import train_test_split
# 시드값 고정
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed) # 파이썬 난수 생성기 시드 고정
np.random.seed(seed) # 넘파이 난수 생성기 시드 고정
torch.manual_seed(seed) # 파이토치 난수 생성기 시드 고정 (CPU 사용 시)
torch.cuda.manual_seed(seed) # 파이토치 난수 생성기 시드 고정 (GPU 사용 시)
torch.cuda.manual_seed_all(seed) # 파이토치 난수 생성기 시드 고정 (멀티GPU 사용 시)
torch.backends.cudnn.deterministic = True # 확정적 연산 사용
torch.backends.cudnn.benchmark = False # 벤치마크 기능 해제
torch.backends.cudnn.enabled = False # cudnn 사용 해제
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
device(type='cpu')
1. 데이터 다운로드
from google.colab import files
# addition.txt 업로드 하기
t = files.upload()
Saving addition.txt to addition.txt
2. 데이터 불러오기
Vocabulary
class Vocabulary():
def __init__(self, file_path, vocab_from_file, vocab_file='./vocab.pkl'):
# dictionary 초기화
self.char2idx = {}
self.idx2char = {}
self.idx = 0
questions, answers = [], []
for line in open(file_path, 'r'):
idx = line.find('_')
questions.append(line[:idx])
answers.append(line[idx:-1])
self.questions, self.answers = questions, answers
if vocab_from_file:
with open(vocab_file, 'rb') as f:
vocab = pickle.load(f)
self.char2idx = vocab.char2idx
self.idx2char = vocab.idx2char
print('Vocabulary successfully loaded from vocab.pkl file!')
else:
self.build_vocab()
with open(vocab_file, 'wb') as f:
pickle.dump(self, f)
def build_vocab(self):
for i in range(len(self.questions)):
question, answer = self.questions[i], self.answers[i]
self.add_char(question)
self.add_char(answer)
print('Vocabulary 길이 : ', len(self.char2idx))
def add_char(self, txt):
chars = list(txt) # ['1', '6', '+', '7', '5', ' ', ' ']
for i, char in enumerate(chars):
if char not in self.char2idx:
tmp_id = len(self.char2idx)
self.char2idx[char] = tmp_id
self.idx2char[tmp_id] = char
def __len__(self):
return len(self.char2idx)
# Note. Dataset 클래스에서 Vocabulary()를 사용하지만,
# 편의상 전역변수로 준비해 놓으면 Dataset 클래스와 상관없이 활용
file_path='./addition.txt'
questions, answers = [], []
for line in open(file_path, 'r'):
idx = line.find('_')
questions.append(line[:idx])
answers.append(line[idx:-1])
vocab = Vocabulary(file_path, vocab_from_file=False)
Vocabulary 길이 : 13
Dataset
class AdditionDataset(Dataset):
def __init__(self, file_path, questions, answers, vocab_from_file, vocab_file='./vocab.pkl'):
vocab = Vocabulary(file_path, vocab_from_file, vocab_file)
self.questions, self.answers = questions, answers
self.x = []
self.t = []
for i, question in enumerate(self.questions):
self.x.append([vocab.char2idx[c] for c in list(question)])
for i, answer in enumerate(self.answers):
self.t.append([vocab.char2idx[c] for c in list(answer)])
def __getitem__(self, index):
#return torch.LongTensor(self.x[index]), torch.LongTensor(self.t[index])
return torch.LongTensor(self.x[index][::-1]), torch.LongTensor(self.t[index])
def __len__(self):
return len(self.t)
# train/valid/test 분리
train_indices, test_indices = train_test_split(range(len(questions)), test_size=0.1)
train_indices, valid_indices = train_test_split(range(len(train_indices)), test_size=0.1)
questions, answers = np.array(questions), np.array(answers)
questions_train, questions_valid, questions_test = questions[train_indices], questions[valid_indices], questions[test_indices]
answers_train, answers_valid, answers_test = answers[train_indices], answers[valid_indices], answers[test_indices]
questions_train.shape, answers_train.shape
((40500,), (40500,))
trainset = AdditionDataset(file_path, questions_train, answers_train, vocab_from_file=False)
validset = AdditionDataset(file_path, questions_valid, answers_valid, vocab_from_file=True)
testset = AdditionDataset(file_path, questions_test, answers_test, vocab_from_file=True)
Vocabulary 길이 : 13
Vocabulary successfully loaded from vocab.pkl file!
Vocabulary successfully loaded from vocab.pkl file!
trainset[0][0], trainset[0][1]
(tensor([5, 5, 0, 2, 1, 0, 1]), tensor([6, 1, 0, 3, 5]))
# 입력
for i in trainset[1][0]:
i = int(i)
char = vocab.idx2char[i]
print(char, end='')
print()
# 정답
for i in trainset[1][1]:
i = int(i)
char = vocab.idx2char[i]
print(char, end='')
634+982
_725
3. 데이터 적재 : DataLoader
batch_size = 128
trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
validloader = DataLoader(dataset=validset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=True)
batch = next(iter(trainloader))
batch[0].size(), batch[1].size()
(torch.Size([128, 7]), torch.Size([128, 5]))
batch = next(iter(validloader))
batch[0].size(), batch[1].size()
(torch.Size([128, 7]), torch.Size([128, 5]))
batch = next(iter(testloader))
batch[0].size(), batch[1].size()
(torch.Size([128, 7]), torch.Size([128, 5]))
len(trainloader), len(validloader), len(testloader)
(317, 36, 40)
5. 모델 생성: Seq2Seq
class Encoder(nn.Module):
def __init__(self, vocab_size, wordvec_size, hidden_size):
super().__init__()
self.embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=wordvec_size)
self.lstm = nn.LSTM(input_size=wordvec_size, hidden_size=hidden_size, batch_first=True)
def forward(self, inputs): # input shape (N=128, T=7)
embed = self.embed(inputs) # embed shape (N=128, T=7, D=16)
out, (h, c)= self.lstm(embed) # out shape (N=128, T=7, H=128)
# h(c) shape (num_layers=1, N=128, H=128)
return h
class PeekyDecoder(nn.Module):
def __init__(self, vocab_size, wordvec_size, hidden_size):
super().__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=wordvec_size)
self.lstm = nn.LSTM(input_size=wordvec_size+hidden_size, hidden_size=hidden_size, batch_first=True)
self.affine = nn.Linear(in_features=hidden_size+hidden_size, out_features=vocab_size)
def forward(self, inputs, h): # inputs shape (N=128, T=4)
# h shape (num_layers=1, N=128, H=128)
N, T = inputs.shape
_, N, H = h.shape
embed = self.embed(inputs) # embed shape (N=128, T=4, D=16)
hs = h.repeat(4, 1, 1) # hs shape : (T=4, N=128, H=128)
hs = hs.reshape(N, T, H) # hs shape : (N=128, T=4, H=128)
embed = torch.cat((hs, embed), dim=2) # embed shape (N=128, T=4, H+D=128+16)
c = self.init_cell(N)
out, _ = self.lstm(embed, (h, c)) # out shape (N=128, T=4, H=128)
out = torch.cat((hs, out), dim=2) # out shape (N=128, T=4, H+H=128+128)
out = self.affine(out) # out shape (N=128, T=4, V=13)
#out = out.view(-1, self.vocab_size) # out shape (NxT=128*4, V=13)
return out
def init_cell(self, batch_size):
weight = next(self.parameters())
return weight.new_zeros(1, batch_size, self.hidden_size)
def generate(self, h, start_id, sample_size):
sampled = []
sample_id = start_id
_, N, H = h.shape # h shape (num_layers=1, N=128, H=128)
hs = h.reshape(N, 1, H) # hs shape : (N=128, T=1, H=128)
c = self.init_cell(batch_size = 1) # c shape : (num_layers=1, N=1, H=128)
for _ in range(sample_size):
# sample_id = torch.tensor(sample_id).reshape(1, 1) # sample_id shape : (N=1, T=1)
sample_id = sample_id.clone().detach().reshape(1, 1) # remove userwarning
embed = self.embed(sample_id) # embed shape : (N=1, T=1, D=16)
embed = torch.cat((hs, embed), dim=2) # embed shape : (N=1, T=1 H+D=128+16)
out, (h, c)= self.lstm(embed, (h, c)) # out shape : (N=1, T=1, H=128)
out = torch.cat((hs, out), dim=2) # out shape : (N=1, T=1, H+H=128+128 )
score = self.affine(out) # score shape : (N=1, T=1, V=13)
sample_id = torch.max(score, dim=2)[1]
sampled.append(int(sample_id))
return sampled
class Seq2Seq(nn.Module):
def __init__(self, vocab_size, wordvec_size, hidden_size):
super().__init__()
self.encoder = Encoder(vocab_size, wordvec_size, hidden_size)
self.decoder = PeekyDecoder(vocab_size, wordvec_size, hidden_size)
def forward(self, inputs, targets): # inputs shape (N=128, T=7)
# targets shape (N=128, T=5)
decoder_in = targets[:, :-1] # decoder_in shape (N=128, T=4)
h = self.encoder(inputs) # h shape (num_layers=1, N=128, H=128)
out = self.decoder(decoder_in, h) # out shape (N=128, T=4, V=13)
#print('out', out.shape)
return out
def generate(self, inputs, start_id, sample_size): # inputs : (N=1, T=7)
h = self.encoder(inputs) # h shape (num_layers=1, N=1, H=128)
sampled = self.decoder.generate(h, start_id, sample_size) # start_id = 6('_'), sample_size=4
return sampled
하이퍼 파라미터 설정
vocab_size = len(vocab)
wordvec_size = 16
hidden_size = 128
batch_size = 128
learning_rate = 0.01
num_epochs=100
model = Seq2Seq(vocab_size=vocab_size,
wordvec_size=wordvec_size,
hidden_size=hidden_size)
model = model.to(device)
model
Seq2Seq(
(encoder): Encoder(
(embed): Embedding(13, 16)
(lstm): LSTM(16, 128, batch_first=True)
)
(decoder): PeekyDecoder(
(embed): Embedding(13, 16)
(lstm): LSTM(144, 128, batch_first=True)
(affine): Linear(in_features=256, out_features=13, bias=True)
)
)
out = model(batch[0].to(device), batch[1].to(device))
out.shape
torch.Size([128, 4, 13])
6. 모델 설정 (손실함수, 옵티마이저 선택)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
mode='min', factor=0.4,
patience=3, verbose=True)
7. 모델 훈련
def validate(model, validloader, loss_fn):
model.eval()
total = 0
correct = 0
valid_loss = []
valid_epoch_loss=0
with torch.no_grad():
for batch_data in validloader:
inputs = batch_data[0].to(device)
targets = batch_data[1].to(device)
# 전방향 예측과 손실
optimizer.zero_grad()
logits = model(inputs, targets)
targets = targets[:, 1:].clone() # Decoder의 정답을 준비하기 위해 1번째부터 색인
loss = loss_fn(logits.view(-1, vocab_size), targets.view(-1))
valid_loss.append(loss.item())
valid_epoch_loss = np.mean(valid_loss)
total_loss["val"].append(valid_epoch_loss)
return valid_epoch_loss
def eval_seq2seq(model, question, correct, idx2char, verbose=False, is_reverse=True):
model.eval()
correct = correct.flatten()
# 머릿글자
start_id = correct[0]
correct = correct[1:]
guess = model.generate(question, start_id, len(correct))
# 문자열로 변환
question = ''.join([idx2char[int(c)] for c in question.flatten()])
correct = ''.join([idx2char[int(c)] for c in correct])
guess = ''.join([idx2char[int(c)] for c in guess])
if verbose :
if is_reverse:
question = question[::-1]
print('Question : ', question)
print('True : ', correct)
print('Guess : ', guess)
print()
return 1 if guess == correct else 0
def train_loop(model, trainloader, loss_fn, epochs, optimizer):
min_loss = 1000000
trigger = 0
patience = 10
max_grad = 5.0
for epoch in range(epochs):
model.train()
train_loss = []
for batch_data in (trainloader):
inputs = batch_data[0].to(device)
targets = batch_data[1].to(device)
optimizer.zero_grad()
logits = model(inputs, targets)
targets = targets[:, 1:].clone() # Decoder의 정답을 준비하기 위해 1번째부터 색인
loss = loss_fn(logits.view(-1, vocab_size), targets.view(-1))
loss.backward()
# clipping gradient
torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad)
optimizer.step()
train_loss.append(loss.item())
train_epoch_loss = np.mean(train_loss)
total_loss["train"].append(train_epoch_loss)
valid_epoch_loss = validate(model, validloader, loss_fn)
# for valid accuracy (it takes time!!)
correct_num = 0
for i in range(len(validset)):
question = validset[i][0].unsqueeze(0).to(device)
correct = validset[i][1].unsqueeze(0).to(device)
correct_num += eval_seq2seq(model, question, correct, vocab.idx2char, verbose=False, is_reverse=True)
valid_accuracy = correct_num /len(validset)
print("Epoch: {}/{}, Train Loss={:.4f}, Val Loss={:.4f}, Val Accuracy={:.2f}".format(
epoch + 1, epochs,
total_loss["train"][-1],
total_loss["val"][-1],
valid_accuracy
))
# Early Stopping (조기 종료)
if valid_epoch_loss > min_loss: # valid_loss가 min_loss를 갱신하지 못하면
trigger += 1
print('trigger : ', trigger)
if trigger > patience:
print('Early Stopping !!!')
print('Training loop is finished !!')
return
else:
trigger = 0
min_loss = valid_epoch_loss
best_model_state = deepcopy(model.state_dict())
torch.save(best_model_state, 'best_checkpoint.pth')
# -------------------------------------------
# Learning Rate Scheduler
scheduler.step(valid_epoch_loss)
# -------------------------------------------
total_loss = {"train": [], "val": []}
%time train_loop(model, trainloader, loss_fn, num_epochs, optimizer)
Epoch: 1/100, Train Loss=0.7695, Val Loss=0.7200, Val Accuracy=0.12
Epoch: 2/100, Train Loss=0.6565, Val Loss=0.6035, Val Accuracy=0.16
Epoch: 3/100, Train Loss=0.5810, Val Loss=0.5544, Val Accuracy=0.20
Epoch: 4/100, Train Loss=0.5376, Val Loss=0.5268, Val Accuracy=0.23
Epoch: 5/100, Train Loss=0.5099, Val Loss=0.4790, Val Accuracy=0.25
Epoch: 6/100, Train Loss=0.4787, Val Loss=0.4751, Val Accuracy=0.25
Epoch: 7/100, Train Loss=0.4594, Val Loss=0.4623, Val Accuracy=0.27
Epoch: 8/100, Train Loss=0.4406, Val Loss=0.4304, Val Accuracy=0.30
Epoch: 9/100, Train Loss=0.4302, Val Loss=0.4227, Val Accuracy=0.30
Epoch: 10/100, Train Loss=0.4143, Val Loss=0.4196, Val Accuracy=0.31
Epoch: 11/100, Train Loss=0.4080, Val Loss=0.3948, Val Accuracy=0.33
Epoch: 12/100, Train Loss=0.3929, Val Loss=0.3941, Val Accuracy=0.34
Epoch: 13/100, Train Loss=0.3850, Val Loss=0.3995, Val Accuracy=0.33
trigger : 1
Epoch: 14/100, Train Loss=0.3705, Val Loss=0.3785, Val Accuracy=0.36
Epoch: 15/100, Train Loss=0.3603, Val Loss=0.3817, Val Accuracy=0.36
trigger : 1
Epoch: 16/100, Train Loss=0.3623, Val Loss=0.3707, Val Accuracy=0.37
Epoch: 17/100, Train Loss=0.3461, Val Loss=0.3587, Val Accuracy=0.39
Epoch: 18/100, Train Loss=0.3399, Val Loss=0.3473, Val Accuracy=0.42
Epoch: 19/100, Train Loss=0.3401, Val Loss=0.3647, Val Accuracy=0.39
trigger : 1
Epoch: 20/100, Train Loss=0.3333, Val Loss=0.3305, Val Accuracy=0.43
Epoch: 21/100, Train Loss=0.3253, Val Loss=0.3329, Val Accuracy=0.43
trigger : 1
Epoch: 22/100, Train Loss=0.3197, Val Loss=0.3197, Val Accuracy=0.46
Epoch: 23/100, Train Loss=0.3137, Val Loss=0.3321, Val Accuracy=0.43
trigger : 1
Epoch: 24/100, Train Loss=0.3114, Val Loss=0.3142, Val Accuracy=0.46
Epoch: 25/100, Train Loss=0.3009, Val Loss=0.3535, Val Accuracy=0.39
trigger : 1
Epoch: 26/100, Train Loss=0.3065, Val Loss=0.3151, Val Accuracy=0.44
trigger : 2
Epoch: 27/100, Train Loss=0.3011, Val Loss=0.3081, Val Accuracy=0.47
Epoch: 28/100, Train Loss=0.2983, Val Loss=0.2846, Val Accuracy=0.52
Epoch: 29/100, Train Loss=0.2858, Val Loss=0.2822, Val Accuracy=0.52
Epoch: 30/100, Train Loss=0.2822, Val Loss=0.2891, Val Accuracy=0.50
trigger : 1
Epoch: 31/100, Train Loss=0.2873, Val Loss=0.2828, Val Accuracy=0.51
trigger : 2
Epoch: 32/100, Train Loss=0.2769, Val Loss=0.3042, Val Accuracy=0.49
trigger : 3
Epoch: 33/100, Train Loss=0.2793, Val Loss=0.3104, Val Accuracy=0.48
trigger : 4
Epoch 00034: reducing learning rate of group 0 to 4.0000e-03.
Epoch: 34/100, Train Loss=0.1925, Val Loss=0.1908, Val Accuracy=0.75
Epoch: 35/100, Train Loss=0.1649, Val Loss=0.1759, Val Accuracy=0.77
Epoch: 36/100, Train Loss=0.1539, Val Loss=0.1679, Val Accuracy=0.78
Epoch: 37/100, Train Loss=0.1477, Val Loss=0.1685, Val Accuracy=0.75
trigger : 1
Epoch: 38/100, Train Loss=0.1444, Val Loss=0.1553, Val Accuracy=0.79
Epoch: 39/100, Train Loss=0.1427, Val Loss=0.1588, Val Accuracy=0.79
trigger : 1
Epoch: 40/100, Train Loss=0.1349, Val Loss=0.1490, Val Accuracy=0.80
Epoch: 41/100, Train Loss=0.1336, Val Loss=0.1589, Val Accuracy=0.78
trigger : 1
Epoch: 42/100, Train Loss=0.1257, Val Loss=0.1420, Val Accuracy=0.81
Epoch: 43/100, Train Loss=0.1239, Val Loss=0.1447, Val Accuracy=0.78
trigger : 1
Epoch: 44/100, Train Loss=0.1213, Val Loss=0.1441, Val Accuracy=0.79
trigger : 2
Epoch: 45/100, Train Loss=0.1150, Val Loss=0.1213, Val Accuracy=0.83
Epoch: 46/100, Train Loss=0.1093, Val Loss=0.1350, Val Accuracy=0.81
trigger : 1
Epoch: 47/100, Train Loss=0.1107, Val Loss=0.1276, Val Accuracy=0.81
trigger : 2
Epoch: 48/100, Train Loss=0.1024, Val Loss=0.1246, Val Accuracy=0.83
trigger : 3
Epoch: 49/100, Train Loss=0.1040, Val Loss=0.1189, Val Accuracy=0.84
Epoch: 50/100, Train Loss=0.0959, Val Loss=0.1244, Val Accuracy=0.82
trigger : 1
Epoch: 51/100, Train Loss=0.0994, Val Loss=0.1214, Val Accuracy=0.82
trigger : 2
Epoch: 52/100, Train Loss=0.0922, Val Loss=0.1222, Val Accuracy=0.82
trigger : 3
Epoch: 53/100, Train Loss=0.0928, Val Loss=0.1166, Val Accuracy=0.82
Epoch: 54/100, Train Loss=0.0884, Val Loss=0.1111, Val Accuracy=0.84
Epoch: 55/100, Train Loss=0.0869, Val Loss=0.1145, Val Accuracy=0.83
trigger : 1
Epoch: 56/100, Train Loss=0.0863, Val Loss=0.1233, Val Accuracy=0.81
trigger : 2
Epoch: 57/100, Train Loss=0.0883, Val Loss=0.1149, Val Accuracy=0.83
trigger : 3
Epoch: 58/100, Train Loss=0.0894, Val Loss=0.1127, Val Accuracy=0.83
trigger : 4
Epoch 00059: reducing learning rate of group 0 to 1.6000e-03.
Epoch: 59/100, Train Loss=0.0454, Val Loss=0.0595, Val Accuracy=0.94
Epoch: 60/100, Train Loss=0.0361, Val Loss=0.0567, Val Accuracy=0.94
Epoch: 61/100, Train Loss=0.0336, Val Loss=0.0538, Val Accuracy=0.95
Epoch: 62/100, Train Loss=0.0315, Val Loss=0.0524, Val Accuracy=0.95
Epoch: 63/100, Train Loss=0.0306, Val Loss=0.0527, Val Accuracy=0.95
trigger : 1
Epoch: 64/100, Train Loss=0.0300, Val Loss=0.0525, Val Accuracy=0.95
trigger : 2
Epoch: 65/100, Train Loss=0.0307, Val Loss=0.0565, Val Accuracy=0.94
trigger : 3
Epoch: 66/100, Train Loss=0.0317, Val Loss=0.0542, Val Accuracy=0.94
trigger : 4
Epoch 00067: reducing learning rate of group 0 to 6.4000e-04.
Epoch: 67/100, Train Loss=0.0234, Val Loss=0.0437, Val Accuracy=0.96
Epoch: 68/100, Train Loss=0.0213, Val Loss=0.0423, Val Accuracy=0.96
Epoch: 69/100, Train Loss=0.0204, Val Loss=0.0438, Val Accuracy=0.96
trigger : 1
Epoch: 70/100, Train Loss=0.0199, Val Loss=0.0422, Val Accuracy=0.96
Epoch: 71/100, Train Loss=0.0193, Val Loss=0.0417, Val Accuracy=0.96
Epoch: 72/100, Train Loss=0.0188, Val Loss=0.0417, Val Accuracy=0.96
trigger : 1
Epoch: 73/100, Train Loss=0.0184, Val Loss=0.0411, Val Accuracy=0.96
Epoch: 74/100, Train Loss=0.0180, Val Loss=0.0393, Val Accuracy=0.96
Epoch: 75/100, Train Loss=0.0174, Val Loss=0.0396, Val Accuracy=0.96
trigger : 1
Epoch: 76/100, Train Loss=0.0168, Val Loss=0.0395, Val Accuracy=0.96
trigger : 2
Epoch: 77/100, Train Loss=0.0164, Val Loss=0.0395, Val Accuracy=0.96
trigger : 3
Epoch: 78/100, Train Loss=0.0159, Val Loss=0.0375, Val Accuracy=0.96
Epoch: 79/100, Train Loss=0.0151, Val Loss=0.0380, Val Accuracy=0.96
trigger : 1
Epoch: 80/100, Train Loss=0.0150, Val Loss=0.0382, Val Accuracy=0.96
trigger : 2
Epoch: 81/100, Train Loss=0.0142, Val Loss=0.0399, Val Accuracy=0.96
trigger : 3
Epoch: 82/100, Train Loss=0.0137, Val Loss=0.0376, Val Accuracy=0.96
trigger : 4
Epoch 00083: reducing learning rate of group 0 to 2.5600e-04.
Epoch: 83/100, Train Loss=0.0122, Val Loss=0.0342, Val Accuracy=0.97
Epoch: 84/100, Train Loss=0.0118, Val Loss=0.0337, Val Accuracy=0.97
Epoch: 85/100, Train Loss=0.0115, Val Loss=0.0335, Val Accuracy=0.97
Epoch: 86/100, Train Loss=0.0114, Val Loss=0.0339, Val Accuracy=0.97
trigger : 1
Epoch: 87/100, Train Loss=0.0111, Val Loss=0.0342, Val Accuracy=0.97
trigger : 2
Epoch: 88/100, Train Loss=0.0110, Val Loss=0.0341, Val Accuracy=0.97
trigger : 3
Epoch: 89/100, Train Loss=0.0108, Val Loss=0.0328, Val Accuracy=0.97
Epoch: 90/100, Train Loss=0.0107, Val Loss=0.0333, Val Accuracy=0.97
trigger : 1
Epoch: 91/100, Train Loss=0.0104, Val Loss=0.0336, Val Accuracy=0.97
trigger : 2
Epoch: 92/100, Train Loss=0.0102, Val Loss=0.0332, Val Accuracy=0.97
trigger : 3
Epoch: 93/100, Train Loss=0.0101, Val Loss=0.0330, Val Accuracy=0.97
trigger : 4
Epoch 00094: reducing learning rate of group 0 to 1.0240e-04.
Epoch: 94/100, Train Loss=0.0096, Val Loss=0.0316, Val Accuracy=0.97
Epoch: 95/100, Train Loss=0.0094, Val Loss=0.0344, Val Accuracy=0.97
trigger : 1
Epoch: 96/100, Train Loss=0.0094, Val Loss=0.0319, Val Accuracy=0.97
trigger : 2
Epoch: 97/100, Train Loss=0.0093, Val Loss=0.0327, Val Accuracy=0.97
trigger : 3
Epoch: 98/100, Train Loss=0.0092, Val Loss=0.0323, Val Accuracy=0.97
trigger : 4
Epoch 00099: reducing learning rate of group 0 to 4.0960e-05.
Epoch: 99/100, Train Loss=0.0090, Val Loss=0.0319, Val Accuracy=0.97
trigger : 5
Epoch: 100/100, Train Loss=0.0090, Val Loss=0.0330, Val Accuracy=0.97
trigger : 6
CPU times: user 40min 45s, sys: 3.39 s, total: 40min 49s
Wall time: 40min 58s
import matplotlib.pyplot as plt
plt.plot(total_loss['train'], label="train_loss")
plt.plot(total_loss['val'], label="vallid_loss")
plt.legend()
plt.show()
8. 모델 평가
# test 데이터 전체로 평가
correct_num = 0
for i in range(len(testset)):
question = testset[i][0].unsqueeze(0).to(device)
correct = testset[i][1].unsqueeze(0).to(device)
correct_num += eval_seq2seq(model, question, correct, vocab.idx2char, verbose=False, is_reverse=True)
test_accuracy = correct_num / len(testset)
test_accuracy # before peekydecoder : 96%
0.9942
9. 모델 예측
for i in range(0, 3):
question = testset[i][0].unsqueeze(0).to(device)
correct = testset[i][1].unsqueeze(0).to(device)
eval_seq2seq(model, question, correct, vocab.idx2char, verbose=True, is_reverse=True)
Question : 31+648
True : 679
Guess : 679
Question : 744+531
True : 1275
Guess : 1275
Question : 7+909
True : 916
Guess : 916
댓글남기기