728x90
반응형
# imdb 데이터셋을 simple rnn으로 분석하기
from tensorflow.keras.datasets import imdb
num_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = num_words)
X_train.shape
# (25000,)
# 패딩
from tensorflow.keras.preprocessing.sequence import pad_sequences
max_len = 500
pad_X_train = pad_sequences(X_train, maxlen = max_len)
pad_X_test = pad_sequences(X_test, maxlen = max_len)
# 분석하기
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
return_sequences : 기본값 false, 출력값을 반환여부, 순환값
dropout = 입력값을 drop 시키는 비율
recurrent_dropout : 순환값 drop 시키는 비율
model = Sequential()
model.add(Embedding(input_dim = num_words, output_dim = 32))
model.add(SimpleRNN(32, return_sequences = True, dropout = 0.15, recurrent_dropout = 0.15))
model.add(SimpleRNN(32))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics = ['acc'])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, None, 32) 320000
_________________________________________________________________
simple_rnn (SimpleRNN) (None, None, 32) 2080
_________________________________________________________________
simple_rnn_1 (SimpleRNN) (None, 32) 2080
_________________________________________________________________
dense (Dense) (None, 1) 33
=================================================================
Total params: 324,193
Trainable params: 324,193
Non-trainable params: 0
_________________________________________________________________
history = model.fit(pad_X_train, y_train, batch_size = 32, epochs = 15, validation_split = 0.2)
Epoch 1/15
625/625 [==============================] - 43s 67ms/step - loss: 0.6911 - acc: 0.5356 - val_loss: 0.6146 - val_acc: 0.6862
Epoch 2/15
625/625 [==============================] - 41s 66ms/step - loss: 0.5349 - acc: 0.7323 - val_loss: 0.4432 - val_acc: 0.7986
Epoch 3/15
625/625 [==============================] - 42s 66ms/step - loss: 0.4389 - acc: 0.8065 - val_loss: 0.6516 - val_acc: 0.6824
Epoch 4/15
625/625 [==============================] - 42s 68ms/step - loss: 0.5697 - acc: 0.7071 - val_loss: 0.5844 - val_acc: 0.6994
Epoch 5/15
625/625 [==============================] - 42s 67ms/step - loss: 0.5356 - acc: 0.7357 - val_loss: 0.6082 - val_acc: 0.6658
Epoch 6/15
625/625 [==============================] - 42s 67ms/step - loss: 0.5133 - acc: 0.7520 - val_loss: 0.8289 - val_acc: 0.5406
Epoch 7/15
625/625 [==============================] - 41s 66ms/step - loss: 0.5330 - acc: 0.7317 - val_loss: 0.5149 - val_acc: 0.7470
Epoch 8/15
625/625 [==============================] - 41s 65ms/step - loss: 0.5198 - acc: 0.7435 - val_loss: 0.5873 - val_acc: 0.6998
Epoch 9/15
625/625 [==============================] - 41s 66ms/step - loss: 0.4812 - acc: 0.7720 - val_loss: 0.5361 - val_acc: 0.7388
Epoch 10/15
625/625 [==============================] - 41s 65ms/step - loss: 0.4684 - acc: 0.7818 - val_loss: 0.5729 - val_acc: 0.6942
Epoch 11/15
625/625 [==============================] - 42s 67ms/step - loss: 0.4367 - acc: 0.8001 - val_loss: 0.5292 - val_acc: 0.7416
Epoch 12/15
625/625 [==============================] - 42s 68ms/step - loss: 0.4064 - acc: 0.8243 - val_loss: 0.5065 - val_acc: 0.7660
Epoch 13/15
625/625 [==============================] - 42s 66ms/step - loss: 0.3777 - acc: 0.8399 - val_loss: 0.5160 - val_acc: 0.7320
Epoch 14/15
625/625 [==============================] - 42s 67ms/step - loss: 0.4170 - acc: 0.8135 - val_loss: 0.6287 - val_acc: 0.6220
Epoch 15/15
625/625 [==============================] - 43s 68ms/step - loss: 0.4658 - acc: 0.7863 - val_loss: 0.5528 - val_acc: 0.7462
import matplotlib.pyplot as plt
his_dict = history.history
loss = his_dict['loss']
val_loss = his_dict['val_loss']
epochs = range(1, len(loss)+1)
fig = plt.figure(figsize=(10,5))
# 훈련 손실 그리기
ax1 = fig.add_subplot(1,2,1)
ax1.plot(epochs, loss, color='blue', label='train_loss')
ax1.plot(epochs, val_loss, color='orange', label='val_loss')
ax1.set_title('train and val loss')
ax1.set_xlabel('epochs')
ax1.set_ylabel('loss')
ax1.legend()
# 정확도
acc = his_dict['acc']
val_acc = his_dict['val_acc']
ax2 = fig.add_subplot(1,2,1)
ax2.plot(epochs, acc, color='blue', label='train_acc')
ax2.plot(epochs, val_acc, color='orange', label='val_acc')
ax2.set_title('train and val acc')
ax2.set_xlabel('epochs')
ax2.set_ylabel('acc')
ax2.legend()
plt.show()

import matplotlib.pyplot as plt
plt.figure(figsize = (12, 4))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], 'b-', label='loss')
plt.plot(history.history['val_loss'], 'r--', label='val_loss')
plt.xlabel('Epoch')
plt.legend()
plt.subplot(1,2,2)
plt.plot(history.history['acc'], 'g-', label='acc')
plt.plot(history.history['val_acc'], 'k--', label='val_acc')
plt.xlabel('Epoch')
plt.legend()
plt.show()

simple 결론
embedding 층만 사용한 모델과 정확도에 차이가 없다.
simpleRNN 층을 사용하는 경우, 긴문장의 데이터 처리에 문제가 있다
앞부분을 상태가 기억하지만, 시점이 지날수록 기억이 없어진다
반응형
'Data_Science > Data_Analysis_Py' 카테고리의 다른 글
61. 네이버 영화리뷰 || LSTM (0) | 2021.12.07 |
---|---|
60. LSTM 기본 (0) | 2021.12.07 |
57. seed || simpleRNN (0) | 2021.12.07 |
56. 영화리뷰 분석 (0) | 2021.12.07 |
54. glob-clothes || conv 다중 분류 (0) | 2021.12.07 |