728x90
반응형
import tensorflow as tf

img = tf.keras.utils.get_file('zebra.jpg','https://i.imgur.com/XjeiRMV.jpg')

import cv2
import matplotlib.pyplot as plt

im = cv2.imread(img)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im_ = im.copy()

rec1 = cv2.rectangle(im_, (120,25),(200,165), color=(255,0,0), thickness=2)
rec2 = cv2.rectangle(im_, (300,50),(480,320), color=(255,0,0), thickness=2)

plt.imshow(im_)

im_.shape
# (333, 500, 3)

h = w = 800
im_r = cv2.resize(im, (h,w))
im_r_ = im_r.copy()
import numpy as np
x = np.array([120, 25, 200, 165])
y = np.array([300, 50, 480,320])
x[0] = int(x[0]*(w/im.shape[1]))
x[1] = int(x[1]*(h/im.shape[0]))
x[2] = int(x[2]*(w/im.shape[1]))
x[3] = int(x[3]*(h/im.shape[0]))
y[0] = int(y[0]*(w/im.shape[1]))
y[1] = int(y[1]*(h/im.shape[0]))
y[2] = int(y[2]*(w/im.shape[1]))
y[3] = int(y[3]*(h/im.shape[0]))
rec1 = cv2.rectangle(im_r_, (x[0],x[1]),(x[2],x[3]), color=(255,0,0), thickness=2)
rec2 = cv2.rectangle(im_r_, (y[0],y[1]),(y[2],y[3]), color=(255,0,0), thickness=2)
from skimage.util import view_as_blocks, view_as_windows

plt.figure(figsize=(8,8))
plt.imshow(im_r_)

vgg = tf.keras.applications.VGG16(include_top=False)

for j,i in enumerate(vgg.layers):
    output = tf.keras.models.Model(vgg.input, i.output)
    print(output(im_r_[tf.newaxis]).shape,j)
    
(1, 800, 800, 3) 0
(1, 800, 800, 64) 1
(1, 800, 800, 64) 2
(1, 400, 400, 64) 3
(1, 400, 400, 128) 4
(1, 400, 400, 128) 5
(1, 200, 200, 128) 6
(1, 200, 200, 256) 7
(1, 200, 200, 256) 8
(1, 200, 200, 256) 9
(1, 100, 100, 256) 10
(1, 100, 100, 512) 11
(1, 100, 100, 512) 12
(1, 100, 100, 512) 13
(1, 50, 50, 512) 14
(1, 50, 50, 512) 15
(1, 50, 50, 512) 16
(1, 50, 50, 512) 17
(1, 25, 25, 512) 18
backbone = tf.keras.models.Model(vgg.input, vgg.layers[17].output)

backbone(im_r_[tf.newaxis]).shape
# TensorShape([1, 50, 50, 512])

plt.imshow(backbone(im_r_[tf.newaxis])[0,...,4])

vgg.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
vgg(im_r_[tf.newaxis])


<tf.Tensor: shape=(1, 25, 25, 512), dtype=float32, numpy=
array([[[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.142358  , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  1.3040222 , ...,  0.        ,
           2.3414693 ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           3.648667  ,  0.        ],
         [ 0.        ,  0.        ,  2.5827253 , ...,  0.        ,
           1.2787921 ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        , 25.991032  , ...,  0.        ,
           4.2155175 ,  0.        ],
         [ 0.        ,  0.        ,  9.656704  , ...,  0.        ,
           6.1238546 ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        ...,

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           2.6076157 ,  0.24721637],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  1.4595927 ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.13756028],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[15.054876  ,  0.        ,  0.        , ...,  0.        ,
           2.182668  ,  0.        ],
         [11.117934  ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.29810184,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           1.1234534 ,  0.        ],
         ...,
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]]]], dtype=float32)>
im_r = cv2.resize(im, (h,w))
im_r_ = im_r.copy()

x = np.arange(8,800,16)
y = np.arange(8,800,16)

cl = np.array(np.meshgrid(x,y)).T.reshape(-1,2)

for i in range(2500):
    cv2.circle(im_r_, (cl[i,0], cl[i,1]),1, (255,0,0), thickness=2)
    
plt.figure(figsize=(10,10))
plt.imshow(im_r_)

50*50*9
# 22500

ratio = [0.5, 1, 2]
scale = [8,16,32]

al = np.zeros((22500,4))
count = 0
for i in cl:
    cx, cy = i[0],i[1]
    for r in ratio:
        for s in scale:
            h = pow(pow(s,2)/r,0.5)
            w = h*r
            h *= 16
            w *= 16
            xmin = cx-0.5*w
            ymin = cy-0.5*h
            xmax = cx+0.5*w
            ymax = cy+0.5*h
            al[count] = [xmin, ymin,xmax,ymax]
            count += 1
al.shape
# (22500, 4)

point = 570
im_r_
array([[[ 87,  51,  37],
        [ 91,  52,  40],
        [ 97,  54,  45],
        ...,
        [ 75,  48,  37],
        [ 67,  45,  35],
        [ 61,  43,  33]],

       [[ 87,  51,  37],
        [ 91,  53,  40],
        [ 97,  54,  45],
        ...,
        [ 74,  47,  36],
        [ 66,  44,  34],
        [ 60,  42,  32]],

       [[ 86,  52,  38],
        [ 90,  53,  41],
        [ 95,  54,  44],
        ...,
        [ 70,  43,  32],
        [ 62,  40,  30],
        [ 56,  38,  29]],

       ...,

       [[153,  93,  65],
        [106,  63,  43],
        [ 49,  28,  16],
        ...,
        [ 85,  50,  28],
        [129,  84,  55],
        [166, 113,  78]],

       [[106,  59,  47],
        [ 95,  50,  37],
        [ 82,  41,  25],
        ...,
        [108,  75,  51],
        [115,  75,  51],
        [123,  78,  53]],

       [[ 92,  49,  42],
        [ 91,  46,  35],
        [ 91,  45,  28],
        ...,
        [114,  83,  58],
        [110,  73,  50],
        [110,  68,  46]]], dtype=uint8)
# img_ = np.copy(im_r)
# for i in range(point,point+9):
#     x_min = int(al[i][0])
#     y_min = int(al[i][1])
#     x_max = int(al[i][2])
#     y_max = int(al[i][3])
#     cv2.rectangle(img_, (x_min,y_min),(x_max,y_max), (0,255,0), thickness=4)
# for i in range(2500):
#     cv2.circle(img_, (cl[i,0], cl[i,1]),1, (0,0,255), thickness=2)    

x = np.array([120, 25, 200, 165])
y = np.array([300, 50, 480,320])

x[0] = int(x[0]*1.6)
x[1] = int(x[1]*2.4)
x[2] = int(x[2]*1.6)
x[3] = int(x[3]*2.4)
y[0] = int(y[0]*1.6)
y[1] = int(y[1]*2.4)
y[2] = int(y[2]*1.6)
y[3] = int(y[3]*2.4)

rec1 = cv2.rectangle(im_r_, (x[0],x[1]),(x[2],x[3]), color=(255,0,0), thickness=5)
rec2 = cv2.rectangle(im_r_, (y[0],y[1]),(y[2],y[3]), color=(255,0,0), thickness=5)    

plt.imshow(im_r_)

22500 > 0< 800> 제외

np.where((al[:,0] >=0) & (al[:,1] >=0) &  (al[:,2] <= 800 ) &  (al[:,3] <= 800 ))
# (array([ 1404,  1413,  1422, ..., 21069, 21078, 21087], dtype=int64),)

is_al = al[np.where((al[:,0] >=0) & (al[:,1] >=0) &  (al[:,2] <= 800 ) &  (al[:,3] <= 800 ))]

len(is_al) # anchor 
# 8940
def iou(box1,box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    if (x1 < x2 and  y1 < y2):
        w_o = x2 - x1
        h_o = y2 - y1
        area = w_o*h_o
    else:
        return 0
    
    area_b1 = (box1[2]-box1[0])*(box1[3]-box1[1])
    area_b2 = (box2[2]-box2[0])*(box2[3]-box2[1])
    union = area_b1 + area_b2 - area
    
    return area/union

object 1 = x

x = np.array([300, 50, 480,320])
x[0] = int(x[0]*1.6)
x[1] = int(x[1]*2.4)
x[2] = int(x[2]*1.6)
x[3] = int(x[3]*2.4)

object 2 = y

y = np.array([120, 25, 200, 165])
y[0] = int(y[0]*1.6)
y[1] = int(y[1]*2.4)
y[2] = int(y[2]*1.6)
y[3] = int(y[3]*2.4)


objects = [x,y]

result = np.zeros((8940,len(objects)))
for t,g in enumerate(objects):
    for i,j in enumerate(is_al):
        result[i][t] = iou(j,g)
        

result
array([[0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       ...,
       [0.06581804, 0.        ],
       [0.06484636, 0.        ],
       [0.05869298, 0.        ]])
anchor_id = np.where((al[:,0] >=0) & (al[:,1] >=0) &  (al[:,2] <= 800 ) &  (al[:,3] <= 800 ))
anchor_id[0]
# array([ 1404,  1413,  1422, ..., 21069, 21078, 21087], dtype=int64)

pandas : 2차원

data = pd.DataFrame(data=[anchor_id[0], result[:,0], result[:,1]]).T

data.rename(columns={0:'anchor_id', 1:'o1_iou',2:'o2_iou'}, inplace=True)

data.anchor_id = data.anchor_id.astype('int')
data
	anchor_id	o1_iou	o2_iou
0	1404	0.000000	0.0
1	1413	0.000000	0.0
2	1422	0.000000	0.0
3	1431	0.000000	0.0
4	1440	0.000000	0.0
...	...	...	...
8935	21051	0.065818	0.0
8936	21060	0.065818	0.0
8937	21069	0.065818	0.0
8938	21078	0.064846	0.0
8939	21087	0.058693	0.0
data['o1_iou_objectness'] = data.apply(lambda x: 1 if x['o1_iou'] > 0.7 else -1, axis=1)

data[data['o1_iou_objectness'] == 1]
	anchor_id	o1_iou	o2_iou	o1_iou_objectness
7540	16877	0.711914	0.0	1
7547	16886	0.711914	0.0	1
7768	17327	0.711914	0.0	1
7775	17336	0.711914	0.0	1
data.o2_iou.argmax()
# 1785


data.loc[data.o2_iou.argmax()]
anchor_id            6418.00000
o1_iou                  0.00000
o2_iou                  0.65625
o1_iou_objectness      -1.00000
Name: 1785, dtype: float64

top
array([[418.98066402,  45.96132803, 781.01933598, 770.03867197],
       [418.98066402,  61.96132803, 781.01933598, 786.03867197],
       [434.98066402,  45.96132803, 797.01933598, 770.03867197],
       [434.98066402,  61.96132803, 797.01933598, 786.03867197]])
img_ = np.copy(im_r)

for i,j in enumerate(top):
    x_min = int(top[i][0])
    y_min = int(top[i][1])
    x_max = int(top[i][2])
    y_max = int(top[i][3])
    cv2.rectangle(img_, (x_min,y_min),(x_max,y_max), (0,255,0), thickness=1)

# x = np.array([120, 25, 200, 165])
# y = np.array([300, 50, 480,320])

# x[0] = int(x[0]*1.6)
# x[1] = int(x[1]*2.4)
# x[2] = int(x[2]*1.6)
# x[3] = int(x[3]*2.4)
# y[0] = int(y[0]*1.6)
# y[1] = int(y[1]*2.4)
# y[2] = int(y[2]*1.6)
# y[3] = int(y[3]*2.4)

# # rec1 = cv2.rectangle(im_r_, (x[0],x[1]),(x[2],x[3]), color=(255,0,0), thickness=5)
# # rec2 = cv2.rectangle(im_r_, (y[0],y[1]),(y[2],y[3]), color=(255,0,0), thickness=5)    
plt.figure(figsize=(10,10))
plt.imshow(img_)

 

 

 

 

반응형
728x90
반응형

Faster R-CNN

Fast R-CNN에서 Selective search를 제거하고 대신에 Region proposal network를 통해 대략적인 물체의 위치를 찾는 방식을 사용한다

처음 부터 끝까지 CNN안에서 작동하기 때문에 Fast R-CNN보다 훨씬 더 빠르다

Region Proposal

물체가 있을 만한 위치를 찾는 방법

1. Selective search

- 색상, 질감, 크기 등을 기준으로 유사도를 비교하여 영역들을 통합한다

이때 threshold에 따라서 후보 영역들을 만들어 낸다

(selective search 논문에서는 2000개의 후보영역을 만들어냈다)

2. Edge boxes

- Gradient magnitude와 gradient orientation을 사용하여 edge group을 표현하고,

이를 이용하여 bounding box score를 찾아내는 방법

3. Region proposal network

Region proposal network

원본 이미지에서 region proposals를 추출하는 네트워크
RPN은 각 위치에서 객체의 경계와 IOU점수를 동시에 예측하는 fully connected convolution network이다

import tensorflow as tf 
import cv2
import matplotlib.pyplot as plt
import numpy as np

img = tf.keras.utils.get_file('zebra.jpg', 'https://i.imgur.com/XjeiRMV.jpg')
im = cv2.imread(img)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(im)

im_ = im.copy()

rec1 = cv2.rectangle(im_, (120,25),(200,165), color=(255,0,0), thickness=2)
rec2 = cv2.rectangle(im_, (300,50),(480,320), color=(255,0,0), thickness=2)

plt.imshow(im_)

h = w = 800

plt.imshow(cv2.resize(im, (w,h), interpolation=cv2.INTER_NEAREST))

im_.shape
# (333, 500, 3)

800/333, 800/500
# (2.4024024024024024, 1.6)
im_r = cv2.resize(im, (h,w))
im_r_ = im_r.copy()

x = np.array([120,25,200,165])
y = np.array([300,50,480,320])
# mapping 
x[0] = int(x[0]*(w/im.shape[1])) # 비율 곱 
x[1] = int(x[1]*(h/im.shape[0]))
x[2] = int(x[2]*(w/im.shape[1]))
x[3] = int(x[3]*(h/im.shape[0]))

y[0] = int(y[0]*(w/im.shape[1]))
y[1] = int(y[1]*(h/im.shape[0]))
y[2] = int(y[2]*(w/im.shape[1]))
y[3] = int(y[3]*(h/im.shape[0]))

rec1 = cv2.rectangle(im_r_, (x[0],x[1]),(x[2],x[3]), color=(255,0,0), thickness=2)
rec2 = cv2.rectangle(im_r_, (y[0],y[1]),(y[2],y[3]), color=(255,0,0), thickness=2)

plt.imshow(im_r_)

vgg = tf.keras.applications.VGG16(include_top=False)

vgg.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
# 800x800 이미지가 최종적으로 25x25로 줄어든다 (convolutional layer를 통과하기 때문에)
for i in vgg.layers:
  output = tf.keras.models.Model(vgg.input, i.output)
  print(output(im_r_[tf.newaxis]).shape)
  
(1, 800, 800, 3)
(1, 800, 800, 64)
(1, 800, 800, 64)
(1, 400, 400, 64)
(1, 400, 400, 128)
(1, 400, 400, 128)
(1, 200, 200, 128)
(1, 200, 200, 256)
(1, 200, 200, 256)
(1, 200, 200, 256)
(1, 100, 100, 256)
(1, 100, 100, 512)
(1, 100, 100, 512)
(1, 100, 100, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 50, 50, 512)
(1, 25, 25, 512)
# 몇번째 layer까지 사용하는지에 따라 mapping하는 이미지 비율이 달라진다
backbone = tf.keras.models.Model(vgg.input, vgg.layers[17].output) # Faster R-CNN 논문에서는 17번째 layer까지 사용했다

backbone(im_r_[tf.newaxis]).shape
# TensorShape([1, 50, 50, 512])

from skimage.util import view_as_blocks, view_as_windows 겹쳐서 자르기, 겹치지 않고 자르기  

 

# 800x800 => 50x50  :   16x16이미지가 1x1로 표현된다는 의미이다    

x = np.arange(8,800,16)
y = np.arange(8,800,16)
cl = np.array(np.meshgrid(x,y)).T.reshape(-1,2) # 중점들 모음 array 

cl.shape
# (2500, 2)
im_r = cv2.resize(im, (h,w))
im_r_ = im_r.copy()

for i in range(2500):
  cv2.circle(im_r_, (cl[i,0],cl[i,1]), 1, (255,0,0), thickness=2)
  

plt.figure(figsize=(10,10))
plt.imshow(im_r_) # 50x50 영역을 표기하기 위한 점들

ratio = [0.5,1,2]
scale = [8,16,32]
al = np.zeros((22500,4)) # 50x50x9개 그림을 그릴 수 있다 
count = 0

같은 색 영역들은 모양만 다르고 가능한 영역의 크기들은 근접하게 만들어야 한다

(위 그림의 예시는 800x600 이미지에 대한 anchor boxes를 만든 것이다)

800x800 이미지에 대한 anchor box의 중점은 800/16 * 800/16 = 2500 이고,

중점당 anchor box의 개수가 9개 이므로 2500*9 = 22500 총 22500개의 anchor boxes가 생성이 된다

 

for i in cl:
  cx, cy = i[0], i[1]
  for r in ratio:
    for s in scale:
      h = pow(pow(s,2)/r,0.5) # 지수를 사용하고 루트를 씌우는 이유 : numerical stability때문에 
      w = h*r
      h *= 16 # 1칸에 16이기 때문에?
      w *= 16 
      xmin = cx-0.5*w
      ymin = cy-0.5*h
      xmax = cx+0.5*w
      ymax = cy+0.5*h
      al[count] = [xmin,ymin,xmax,ymax]
      count += 1
al
# array([[ -37.254834  ,  -82.50966799,   53.254834  ,   98.50966799],
       [ -82.50966799, -173.01933598,   98.50966799,  189.01933598],
       [-173.01933598, -354.03867197,  189.01933598,  370.03867197],
       ...,
       [ 701.49033201,  746.745166  ,  882.50966799,  837.254834  ],
       [ 610.98066402,  701.49033201,  973.01933598,  882.50966799],
       [ 429.96132803,  610.98066402, 1154.03867197,  973.01933598]])
img_ = np.copy(im_r)

point = 11465
for i in range(point,point+9):
  x_min = int(al[i][0])
  y_min = int(al[i][1])
  x_max = int(al[i][2])
  y_max = int(al[i][3])
  cv2.rectangle(img_, (x_min,y_min),(x_max,y_max),(0,255,0),thickness=4)

for i in range(2500):
  cv2.circle(img_, (cl[i,0],cl[i,1]), 1, (255,0,0), thickness=2)

x = np.array([120,25,200,165])
y = np.array([300,50,480,320])

x[0] = int(x[0]*(w/im.shape[1])) 
x[1] = int(x[1]*(h/im.shape[0]))
x[2] = int(x[2]*(w/im.shape[1]))
x[3] = int(x[3]*(h/im.shape[0]))

y[0] = int(y[0]*(w/im.shape[1]))
y[1] = int(y[1]*(h/im.shape[0]))
y[2] = int(y[2]*(w/im.shape[1]))
y[3] = int(y[3]*(h/im.shape[0]))

rec1 = cv2.rectangle(img_, (x[0],x[1]),(x[2],x[3]), color=(255,0,0), thickness=3)
rec2 = cv2.rectangle(img_, (y[0],y[1]),(y[2],y[3]), color=(255,0,0), thickness=3)

plt.imshow(img_)

물체를 인식하는 사각형 하나와 22500개의 anchor box가 겹치는 부분을 구한다

그렇게 해서 계산된 IOU값을 구하고 가장 많이 겹치는 anchor box를 구한다

논문에서는 IOU가 0.7보다 높은 것을 사용하고 0.3 보다 작은 것은 없는 것으로 판단한다

 

 

 

반응형
728x90
반응형

Fast R-CNN

Fast R-CNN의 구조 Feature Extractor -> ROI Pooling -> Classifier & Regressor ※ ROI(Region of interest): 관심 영역

Feature extractor

R-CNN이나 SPPNet에서 사용된것과 같이 CNN을 통해 이미지의 Feature Map을 추출하는 단계이다

R-CNN을 떠올려보면 먼저 Selective Search를 이용해 이미지 하나 당 약 2000개의 RoI를 뽑아내고 이를 모두 CNN에 통과시켰기 때문에 엄청난 처리시간이 요구되었다

또한 추출된 RoI는 서로 겹치는 부분이 굉장히 많이 발생하기 때문에 같은 영역의 이미지가 CNN에 여러번 들어가게 되므로 비효율적이다

 

반면, Fast R-CNN은 Selective Search를 적용할 때 이미지를 잘라내는 것이 아니라 그 좌표와 크기정보만을 (r, c, h, w) 추출해 낸다

이는 이미지에 비해 굉장히 적은 용량이기 때문에 다른 저장공간을 요구하지 않는다는 장점이 있다

그리고 CNN에는 이미지 한 장 만이 들어가 공통적인 Feature Map을 추출하고, 각 RoI들은 모델을 통과하며 줄어든 크기의 비율을 따져 좌표만 변경시킨다.

이를 RoI Projection이라고 한다

ROI pooling

이미지 하나의 feature map + RoI들의좌표들을 동일한 크기로 변환하는 layer를 거친다

이를 RoI pooling layer라고 하고 한 층짜리 spatial pyramid pooling layer와 똑같다고 생각 하면 된다.

spatial pyramid pooling할 때 층층이 쌓인 피라미드가 누적되서 더 좋은 효과를 내지 못한다는 사실을 알게 되었기 때문에 굳이 하지 않게 되었다

Classifier & Regressor

Classifier는 물체가 무엇인지 구별하는 classification 역할을 하고

Regressor는 물체의 영역을 표시하기 위한 localization 역할을 한다

R-CNN에서는 이 둘을 따로 학습시켰지만, Fast R-CNN에서는 Multi-task loss function을 고안해서 End-to-end로 학습이 가능해 졌다

보통 기존 Loss에 새로운 Loss를 추가할 때에는 위 식처럼 덧셈으로 연결한 뒤, 새로운 Loss의 영향력을 조절하기 위해 가중치 λ를 붙여준다

Lcls는 따로 SVM을 학습시키지 않고 Classification을 하기 위한 Softmax 함수이며, Lloc는 Localization을 위한 L1 Loss 함수이다

λ는 1로 고정시켰고, [u≥1]은 'Classification 결과가 Background (u=0)이면 Lloc를 죽이고, 그렇지 않으면 살린다'는 의미이다

Bounding Box를 치고자 하는 대상은 Background가 아닌 Object이기 때문이다

R-CNN과 SPPNet에서 사용한 L2 대신 L1을 사용한 이유는 덜 민감한 함수이기 때문에 Fine Tuning을 사용하기 쉬워진다

import pandas as pd 
import tensorflow as tf 


air = pd.read_csv('dataset/annotations/airplane.csv', header=None, names=['filename','x1','y1','x2','y2','class'])
face = pd.read_csv('dataset/annotations/face.csv', header=None, names=['filename','x1','y1','x2','y2','class'])
motorcycle = pd.read_csv('dataset/annotations/motorcycle.csv', header=None, names=['filename','x1','y1','x2','y2','class'])
air.filename = air.filename.map(lambda x : 'airplane/'+x)
face.filename = face.filename.map(lambda x : 'face/'+x)
motorcycle.filename = motorcycle.filename.map(lambda x : 'motorcycle/'+x)
data = pd.concat([air, face, motorcycle], ignore_index=True)
data = pd.concat([data, pd.get_dummies(data['class'])], axis=1)
data.drop(columns='class',inplace=True)

data.columns[1:]
# Index(['x1', 'y1', 'x2', 'y2', 'airplane', 'face', 'motorcycle'], dtype='object')
dig = tf.keras.preprocessing.image.ImageDataGenerator() # dataframe을 바로 불러올 수 있기 때문에 imagedatagenerator를 사용한다
dig = dig.flow_from_dataframe(data, 'dataset/images/', class_mode='raw', y_col=data.columns[1:], target_size=(224,224))
# Found 2033 validated image filenames.
def flow(x):
  while True:
    (X,y) = next(x)
    yield X, (y[:,:4],y[:,4:]) # X는 그대로, y는 4개, 3개로 분리 
    
dfg = tf.data.Dataset.from_generator(lambda : flow(dig), output_shapes=(((None,224,224,3)), ((None,4), (None,3))),
                                                  output_types=((tf.float32), (tf.float32, tf.float32)))

next(iter(dfg))[1][0] # y값 (4개)

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[113.,   9., 286., 239.],
       [ 61.,  47., 210., 141.],
       [ 41.,  22., 225., 134.],
       [ 39.,  32., 226., 149.],
       [ 53.,  29., 349., 135.],
       [ 49.,  28., 346., 116.],
       [ 53.,  63., 335., 138.],
       [ 32.,  16., 230., 113.],
       [121.,  34., 314., 302.],
       [ 32.,  25., 228., 164.],
       [ 37.,  22., 233., 140.],
       [ 34.,  42., 230., 133.],
       [ 35.,  16., 230., 112.],
       [ 75.,  25., 259., 283.],
       [ 46.,  37., 342., 140.],
       [180.,  23., 361., 276.],
       [ 38.,  25., 227., 128.],
       [ 38.,  25., 220., 155.],
       [ 44.,  41., 221., 154.],
       [ 42.,  52., 226., 162.],
       [ 61.,  24., 346., 148.],
       [ 62.,  16., 270., 277.],
       [ 31.,  19., 233., 135.],
       [ 49.,  30., 349., 137.],
       [ 65.,  42., 345., 158.],
       [162.,  47., 414., 302.],
       [ 54.,  30., 345., 130.],
       [ 48.,  31., 350., 112.],
       [ 38.,  24., 224., 127.],
       [ 45.,  24., 237., 142.],
       [ 34.,  19., 230., 127.],
       [ 42.,  36., 231., 150.]], dtype=float32)>
next(iter(dfg))[1][1] # y값 (3개)

<tf.Tensor: shape=(32, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)>
vgg = tf.keras.applications.VGG16(include_top=False, input_shape=(224,224,3))
vgg.trainable = False 

input_ = tf.keras.Input((224,224,3))
preprocess = tf.keras.layers.Lambda(lambda x: tf.keras.applications.vgg16.preprocess_input(x))(input_)
x = vgg(preprocess)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)

box = tf.keras.layers.Dense(64, activation='relu')(x)
box = tf.keras.layers.Dense(4, name='box',activation='relu')(box)

target = tf.keras.layers.Dense(64, activation='relu')(x)
target = tf.keras.layers.Dense(3, name='target',activation='softmax')(target)
model=tf.keras.models.Model(input_, [box,target])

loss = {
    'box': tf.keras.losses.MeanAbsoluteError(),
    'target': tf.keras.losses.CategoricalCrossentropy()
}

model.compile(loss=loss)
model.fit(dfg, epochs=20, steps_per_epoch=10) # 한 epoch당 10번 업데이트 
Epoch 1/20
10/10 [==============================] - 199s 16s/step - loss: 90.5833 - box_loss: 80.8374 - target_loss: 9.7459
Epoch 2/20
10/10 [==============================] - 158s 16s/step - loss: 38.9525 - box_loss: 38.9085 - target_loss: 0.0441
Epoch 3/20
10/10 [==============================] - 156s 16s/step - loss: 32.4575 - box_loss: 32.4469 - target_loss: 0.0106
Epoch 4/20
10/10 [==============================] - 155s 16s/step - loss: 28.6964 - box_loss: 28.6949 - target_loss: 0.0014
Epoch 5/20
10/10 [==============================] - 153s 15s/step - loss: 28.4143 - box_loss: 28.4143 - target_loss: 6.0349e-08
Epoch 6/20
10/10 [==============================] - 158s 16s/step - loss: 27.1531 - box_loss: 27.1531 - target_loss: 2.0451e-07
Epoch 7/20
10/10 [==============================] - 62s 5s/step - loss: 22.2554 - box_loss: 22.2548 - target_loss: 6.0339e-04
Epoch 8/20
10/10 [==============================] - 3s 350ms/step - loss: 27.1756 - box_loss: 27.1756 - target_loss: 1.8626e-09
Epoch 9/20
10/10 [==============================] - 3s 344ms/step - loss: 22.8763 - box_loss: 22.8763 - target_loss: 0.0000e+00
Epoch 10/20
10/10 [==============================] - 3s 344ms/step - loss: 23.6116 - box_loss: 23.5324 - target_loss: 0.0792
Epoch 11/20
10/10 [==============================] - 3s 351ms/step - loss: 23.6213 - box_loss: 23.6213 - target_loss: 6.5937e-08
Epoch 12/20
10/10 [==============================] - 3s 349ms/step - loss: 20.9325 - box_loss: 20.9325 - target_loss: 9.6111e-08
Epoch 13/20
10/10 [==============================] - 3s 342ms/step - loss: 19.7643 - box_loss: 19.7643 - target_loss: 6.2536e-09
Epoch 14/20
10/10 [==============================] - 3s 346ms/step - loss: 21.9087 - box_loss: 21.9071 - target_loss: 0.0016
Epoch 15/20
10/10 [==============================] - 3s 348ms/step - loss: 19.8910 - box_loss: 19.8445 - target_loss: 0.0465
Epoch 16/20
10/10 [==============================] - 3s 341ms/step - loss: 18.0794 - box_loss: 18.0794 - target_loss: 2.2352e-09
Epoch 17/20
10/10 [==============================] - 3s 349ms/step - loss: 19.9182 - box_loss: 19.9182 - target_loss: 0.0000e+00
Epoch 18/20
10/10 [==============================] - 3s 337ms/step - loss: 18.0254 - box_loss: 18.0253 - target_loss: 8.1338e-05
Epoch 19/20
10/10 [==============================] - 3s 339ms/step - loss: 19.9694 - box_loss: 19.9694 - target_loss: 1.5977e-06
Epoch 20/20
10/10 [==============================] - 3s 331ms/step - loss: 17.7530 - box_loss: 17.7530 - target_loss: 3.9085e-10
model.summary()
#Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 224, 224, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
vgg16 (Functional)              (None, 7, 7, 512)    14714688    lambda[0][0]                     
__________________________________________________________________________________________________
flatten (Flatten)               (None, 25088)        0           vgg16[0][0]                      
__________________________________________________________________________________________________
dense (Dense)                   (None, 256)          6422784     flatten[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          32896       dense[0][0]                      
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 64)           8256        dense_1[0][0]                    
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 64)           8256        dense_1[0][0]                    
__________________________________________________________________________________________________
box (Dense)                     (None, 4)            260         dense_2[0][0]                    
__________________________________________________________________________________________________
target (Dense)                  (None, 3)            195         dense_3[0][0]                    
==================================================================================================
Total params: 21,187,335
Trainable params: 6,472,647
Non-trainable params: 14,714,688
__________________________________________________________________________________________________
tf.keras.utils.plot_model(model, rankdir='BT')

history = model.fit(dfg, epochs=20, steps_per_epoch=20)

Epoch 1/20
20/20 [==============================] - 7s 352ms/step - loss: 17.1495 - box_loss: 17.1495 - target_loss: 8.9183e-06
Epoch 2/20
20/20 [==============================] - 7s 349ms/step - loss: 16.8312 - box_loss: 16.8311 - target_loss: 1.7778e-05
Epoch 3/20
20/20 [==============================] - 7s 338ms/step - loss: 16.1163 - box_loss: 16.1148 - target_loss: 0.0015
Epoch 4/20
20/20 [==============================] - 7s 346ms/step - loss: 16.0480 - box_loss: 16.0480 - target_loss: 0.0000e+00
Epoch 5/20
20/20 [==============================] - 7s 349ms/step - loss: 16.7372 - box_loss: 16.7371 - target_loss: 1.0930e-04
Epoch 6/20
20/20 [==============================] - 7s 344ms/step - loss: 15.5043 - box_loss: 15.4865 - target_loss: 0.0177
Epoch 7/20
20/20 [==============================] - 7s 347ms/step - loss: 14.6377 - box_loss: 14.6377 - target_loss: 9.2198e-08
Epoch 8/20
20/20 [==============================] - 7s 343ms/step - loss: 14.4987 - box_loss: 14.4818 - target_loss: 0.0169
Epoch 9/20
20/20 [==============================] - 7s 344ms/step - loss: 14.9790 - box_loss: 14.9560 - target_loss: 0.0229
Epoch 10/20
20/20 [==============================] - 7s 340ms/step - loss: 13.8011 - box_loss: 13.8011 - target_loss: 0.0000e+00
Epoch 11/20
20/20 [==============================] - 7s 349ms/step - loss: 14.2557 - box_loss: 14.2554 - target_loss: 2.6677e-04
Epoch 12/20
20/20 [==============================] - 7s 338ms/step - loss: 12.9601 - box_loss: 12.9229 - target_loss: 0.0371
Epoch 13/20
20/20 [==============================] - 7s 343ms/step - loss: 13.7806 - box_loss: 13.7806 - target_loss: 0.0000e+00
Epoch 14/20
20/20 [==============================] - 7s 337ms/step - loss: 12.5579 - box_loss: 12.5379 - target_loss: 0.0200
Epoch 15/20
20/20 [==============================] - 7s 343ms/step - loss: 14.1479 - box_loss: 14.1479 - target_loss: 2.3356e-07
Epoch 16/20
20/20 [==============================] - 7s 342ms/step - loss: 12.5556 - box_loss: 12.5556 - target_loss: 3.8147e-10
Epoch 17/20
20/20 [==============================] - 7s 344ms/step - loss: 12.5946 - box_loss: 12.5944 - target_loss: 2.0212e-04
Epoch 18/20
20/20 [==============================] - 7s 340ms/step - loss: 11.9439 - box_loss: 11.9369 - target_loss: 0.0070
Epoch 19/20
20/20 [==============================] - 7s 337ms/step - loss: 12.3697 - box_loss: 12.3697 - target_loss: 3.1069e-06
Epoch 20/20
20/20 [==============================] - 7s 341ms/step - loss: 11.9605 - box_loss: 11.9605 - target_loss: 4.5287e-05
pd.DataFrame(history.history).plot.line()

im = tf.keras.preprocessing.image.load_img('dataset/images/airplane/image_0001.jpg')

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpt

bim = np.array(im.resize((224,224,)))[tf.newaxis]

model(bim)
# [<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[ 42.0325  ,  28.545605, 291.0303  , 110.72619 ]], dtype=float32)>,
#  <tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[1.0000000e+00, 1.5445283e-31, 0.0000000e+00]], dtype=float32)>]
fig, ax = plt.subplots(1,1)
ax.imshow(im)
pt = mpt.Rectangle((42.0325, 28.545605),291.0303-42.0325,110.72619-28.545605,fill=False)
ax.add_patch(pt)

 

 

!pip install -U tensorflow-hub

모델 가져다 쓰는 3가지

1. tf.keras.applications

2. tensorflow hub

3. model garden

 

import tensorflow_hub as hub # 공개된 모델을 사용할 수 있는 Package

model = tf.keras.models.Sequential([
    hub.KerasLayer('https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4', trainable=True, 
                   input_shape=(224,224,3))
])
# 주소에 classification가 있으면 classification전용이고 build하지 않아도 모델 안에 넣을 수 있다 (일반적으로) / 뒤에 있는 번호는 버전 

model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
keras_layer_1 (KerasLayer)   (None, 1001)              3540265   
=================================================================
Total params: 3,540,265
Trainable params: 3,506,153
Non-trainable params: 34,112
_________________________________________________________________
model = tf.keras.models.Sequential([
    hub.KerasLayer('https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4', trainable=True,
                   input_shape=(224,224,3))
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
keras_layer_3 (KerasLayer)   (None, 1280)              2257984   
=================================================================
Total params: 2,257,984
Trainable params: 2,223,872
Non-trainable params: 34,112
_________________________________________________________________
mv2 = tf.keras.applications.MobileNetV2(include_top=True)

mv2.summary()
Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_3 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         input_3[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112, 32) 0           bn_Conv1[0][0]                   
__________________________________________________________________________________________________
expanded_conv_depthwise (Depthw (None, 112, 112, 32) 288         Conv1_relu[0][0]                 
__________________________________________________________________________________________________
expanded_conv_depthwise_BN (Bat (None, 112, 112, 32) 128         expanded_conv_depthwise[0][0]    
__________________________________________________________________________________________________
expanded_conv_depthwise_relu (R (None, 112, 112, 32) 0           expanded_conv_depthwise_BN[0][0] 
__________________________________________________________________________________________________
expanded_conv_project (Conv2D)  (None, 112, 112, 16) 512         expanded_conv_depthwise_relu[0][0
__________________________________________________________________________________________________
expanded_conv_project_BN (Batch (None, 112, 112, 16) 64          expanded_conv_project[0][0]      
__________________________________________________________________________________________________
block_1_expand (Conv2D)         (None, 112, 112, 96) 1536        expanded_conv_project_BN[0][0]   
__________________________________________________________________________________________________
block_1_expand_BN (BatchNormali (None, 112, 112, 96) 384         block_1_expand[0][0]             
__________________________________________________________________________________________________
block_1_expand_relu (ReLU)      (None, 112, 112, 96) 0           block_1_expand_BN[0][0]          
__________________________________________________________________________________________________
block_1_pad (ZeroPadding2D)     (None, 113, 113, 96) 0           block_1_expand_relu[0][0]        
__________________________________________________________________________________________________
block_1_depthwise (DepthwiseCon (None, 56, 56, 96)   864         block_1_pad[0][0]                
__________________________________________________________________________________________________
block_1_depthwise_BN (BatchNorm (None, 56, 56, 96)   384         block_1_depthwise[0][0]          
__________________________________________________________________________________________________
block_1_depthwise_relu (ReLU)   (None, 56, 56, 96)   0           block_1_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_1_project (Conv2D)        (None, 56, 56, 24)   2304        block_1_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_1_project_BN (BatchNormal (None, 56, 56, 24)   96          block_1_project[0][0]            
__________________________________________________________________________________________________
block_2_expand (Conv2D)         (None, 56, 56, 144)  3456        block_1_project_BN[0][0]         
__________________________________________________________________________________________________
block_2_expand_BN (BatchNormali (None, 56, 56, 144)  576         block_2_expand[0][0]             
__________________________________________________________________________________________________
block_2_expand_relu (ReLU)      (None, 56, 56, 144)  0           block_2_expand_BN[0][0]          
__________________________________________________________________________________________________
block_2_depthwise (DepthwiseCon (None, 56, 56, 144)  1296        block_2_expand_relu[0][0]        
__________________________________________________________________________________________________
block_2_depthwise_BN (BatchNorm (None, 56, 56, 144)  576         block_2_depthwise[0][0]          
__________________________________________________________________________________________________
block_2_depthwise_relu (ReLU)   (None, 56, 56, 144)  0           block_2_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_2_project (Conv2D)        (None, 56, 56, 24)   3456        block_2_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_2_project_BN (BatchNormal (None, 56, 56, 24)   96          block_2_project[0][0]            
__________________________________________________________________________________________________
block_2_add (Add)               (None, 56, 56, 24)   0           block_1_project_BN[0][0]         
                                                                 block_2_project_BN[0][0]         
__________________________________________________________________________________________________
block_3_expand (Conv2D)         (None, 56, 56, 144)  3456        block_2_add[0][0]                
__________________________________________________________________________________________________
block_3_expand_BN (BatchNormali (None, 56, 56, 144)  576         block_3_expand[0][0]             
__________________________________________________________________________________________________
block_3_expand_relu (ReLU)      (None, 56, 56, 144)  0           block_3_expand_BN[0][0]          
__________________________________________________________________________________________________
block_3_pad (ZeroPadding2D)     (None, 57, 57, 144)  0           block_3_expand_relu[0][0]        
__________________________________________________________________________________________________
block_3_depthwise (DepthwiseCon (None, 28, 28, 144)  1296        block_3_pad[0][0]                
__________________________________________________________________________________________________
block_3_depthwise_BN (BatchNorm (None, 28, 28, 144)  576         block_3_depthwise[0][0]          
__________________________________________________________________________________________________
block_3_depthwise_relu (ReLU)   (None, 28, 28, 144)  0           block_3_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_3_project (Conv2D)        (None, 28, 28, 32)   4608        block_3_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_3_project_BN (BatchNormal (None, 28, 28, 32)   128         block_3_project[0][0]            
__________________________________________________________________________________________________
block_4_expand (Conv2D)         (None, 28, 28, 192)  6144        block_3_project_BN[0][0]         
__________________________________________________________________________________________________
block_4_expand_BN (BatchNormali (None, 28, 28, 192)  768         block_4_expand[0][0]             
__________________________________________________________________________________________________
block_4_expand_relu (ReLU)      (None, 28, 28, 192)  0           block_4_expand_BN[0][0]          
__________________________________________________________________________________________________
block_4_depthwise (DepthwiseCon (None, 28, 28, 192)  1728        block_4_expand_relu[0][0]        
__________________________________________________________________________________________________
block_4_depthwise_BN (BatchNorm (None, 28, 28, 192)  768         block_4_depthwise[0][0]          
__________________________________________________________________________________________________
block_4_depthwise_relu (ReLU)   (None, 28, 28, 192)  0           block_4_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_4_project (Conv2D)        (None, 28, 28, 32)   6144        block_4_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_4_project_BN (BatchNormal (None, 28, 28, 32)   128         block_4_project[0][0]            
__________________________________________________________________________________________________
block_4_add (Add)               (None, 28, 28, 32)   0           block_3_project_BN[0][0]         
                                                                 block_4_project_BN[0][0]         
__________________________________________________________________________________________________
block_5_expand (Conv2D)         (None, 28, 28, 192)  6144        block_4_add[0][0]                
__________________________________________________________________________________________________
block_5_expand_BN (BatchNormali (None, 28, 28, 192)  768         block_5_expand[0][0]             
__________________________________________________________________________________________________
block_5_expand_relu (ReLU)      (None, 28, 28, 192)  0           block_5_expand_BN[0][0]          
__________________________________________________________________________________________________
block_5_depthwise (DepthwiseCon (None, 28, 28, 192)  1728        block_5_expand_relu[0][0]        
__________________________________________________________________________________________________
block_5_depthwise_BN (BatchNorm (None, 28, 28, 192)  768         block_5_depthwise[0][0]          
__________________________________________________________________________________________________
block_5_depthwise_relu (ReLU)   (None, 28, 28, 192)  0           block_5_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_5_project (Conv2D)        (None, 28, 28, 32)   6144        block_5_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_5_project_BN (BatchNormal (None, 28, 28, 32)   128         block_5_project[0][0]            
__________________________________________________________________________________________________
block_5_add (Add)               (None, 28, 28, 32)   0           block_4_add[0][0]                
                                                                 block_5_project_BN[0][0]         
__________________________________________________________________________________________________
block_6_expand (Conv2D)         (None, 28, 28, 192)  6144        block_5_add[0][0]                
__________________________________________________________________________________________________
block_6_expand_BN (BatchNormali (None, 28, 28, 192)  768         block_6_expand[0][0]             
__________________________________________________________________________________________________
block_6_expand_relu (ReLU)      (None, 28, 28, 192)  0           block_6_expand_BN[0][0]          
__________________________________________________________________________________________________
block_6_pad (ZeroPadding2D)     (None, 29, 29, 192)  0           block_6_expand_relu[0][0]        
__________________________________________________________________________________________________
block_6_depthwise (DepthwiseCon (None, 14, 14, 192)  1728        block_6_pad[0][0]                
__________________________________________________________________________________________________
block_6_depthwise_BN (BatchNorm (None, 14, 14, 192)  768         block_6_depthwise[0][0]          
__________________________________________________________________________________________________
block_6_depthwise_relu (ReLU)   (None, 14, 14, 192)  0           block_6_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_6_project (Conv2D)        (None, 14, 14, 64)   12288       block_6_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_6_project_BN (BatchNormal (None, 14, 14, 64)   256         block_6_project[0][0]            
__________________________________________________________________________________________________
block_7_expand (Conv2D)         (None, 14, 14, 384)  24576       block_6_project_BN[0][0]         
__________________________________________________________________________________________________
block_7_expand_BN (BatchNormali (None, 14, 14, 384)  1536        block_7_expand[0][0]             
__________________________________________________________________________________________________
block_7_expand_relu (ReLU)      (None, 14, 14, 384)  0           block_7_expand_BN[0][0]          
__________________________________________________________________________________________________
block_7_depthwise (DepthwiseCon (None, 14, 14, 384)  3456        block_7_expand_relu[0][0]        
__________________________________________________________________________________________________
block_7_depthwise_BN (BatchNorm (None, 14, 14, 384)  1536        block_7_depthwise[0][0]          
__________________________________________________________________________________________________
block_7_depthwise_relu (ReLU)   (None, 14, 14, 384)  0           block_7_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_7_project (Conv2D)        (None, 14, 14, 64)   24576       block_7_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_7_project_BN (BatchNormal (None, 14, 14, 64)   256         block_7_project[0][0]            
__________________________________________________________________________________________________
block_7_add (Add)               (None, 14, 14, 64)   0           block_6_project_BN[0][0]         
                                                                 block_7_project_BN[0][0]         
__________________________________________________________________________________________________
block_8_expand (Conv2D)         (None, 14, 14, 384)  24576       block_7_add[0][0]                
__________________________________________________________________________________________________
block_8_expand_BN (BatchNormali (None, 14, 14, 384)  1536        block_8_expand[0][0]             
__________________________________________________________________________________________________
block_8_expand_relu (ReLU)      (None, 14, 14, 384)  0           block_8_expand_BN[0][0]          
__________________________________________________________________________________________________
block_8_depthwise (DepthwiseCon (None, 14, 14, 384)  3456        block_8_expand_relu[0][0]        
__________________________________________________________________________________________________
block_8_depthwise_BN (BatchNorm (None, 14, 14, 384)  1536        block_8_depthwise[0][0]          
__________________________________________________________________________________________________
block_8_depthwise_relu (ReLU)   (None, 14, 14, 384)  0           block_8_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_8_project (Conv2D)        (None, 14, 14, 64)   24576       block_8_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_8_project_BN (BatchNormal (None, 14, 14, 64)   256         block_8_project[0][0]            
__________________________________________________________________________________________________
block_8_add (Add)               (None, 14, 14, 64)   0           block_7_add[0][0]                
                                                                 block_8_project_BN[0][0]         
__________________________________________________________________________________________________
block_9_expand (Conv2D)         (None, 14, 14, 384)  24576       block_8_add[0][0]                
__________________________________________________________________________________________________
block_9_expand_BN (BatchNormali (None, 14, 14, 384)  1536        block_9_expand[0][0]             
__________________________________________________________________________________________________
block_9_expand_relu (ReLU)      (None, 14, 14, 384)  0           block_9_expand_BN[0][0]          
__________________________________________________________________________________________________
block_9_depthwise (DepthwiseCon (None, 14, 14, 384)  3456        block_9_expand_relu[0][0]        
__________________________________________________________________________________________________
block_9_depthwise_BN (BatchNorm (None, 14, 14, 384)  1536        block_9_depthwise[0][0]          
__________________________________________________________________________________________________
block_9_depthwise_relu (ReLU)   (None, 14, 14, 384)  0           block_9_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_9_project (Conv2D)        (None, 14, 14, 64)   24576       block_9_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_9_project_BN (BatchNormal (None, 14, 14, 64)   256         block_9_project[0][0]            
__________________________________________________________________________________________________
block_9_add (Add)               (None, 14, 14, 64)   0           block_8_add[0][0]                
                                                                 block_9_project_BN[0][0]         
__________________________________________________________________________________________________
block_10_expand (Conv2D)        (None, 14, 14, 384)  24576       block_9_add[0][0]                
__________________________________________________________________________________________________
block_10_expand_BN (BatchNormal (None, 14, 14, 384)  1536        block_10_expand[0][0]            
__________________________________________________________________________________________________
block_10_expand_relu (ReLU)     (None, 14, 14, 384)  0           block_10_expand_BN[0][0]         
__________________________________________________________________________________________________
block_10_depthwise (DepthwiseCo (None, 14, 14, 384)  3456        block_10_expand_relu[0][0]       
__________________________________________________________________________________________________
block_10_depthwise_BN (BatchNor (None, 14, 14, 384)  1536        block_10_depthwise[0][0]         
__________________________________________________________________________________________________
block_10_depthwise_relu (ReLU)  (None, 14, 14, 384)  0           block_10_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_10_project (Conv2D)       (None, 14, 14, 96)   36864       block_10_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_10_project_BN (BatchNorma (None, 14, 14, 96)   384         block_10_project[0][0]           
__________________________________________________________________________________________________
block_11_expand (Conv2D)        (None, 14, 14, 576)  55296       block_10_project_BN[0][0]        
__________________________________________________________________________________________________
block_11_expand_BN (BatchNormal (None, 14, 14, 576)  2304        block_11_expand[0][0]            
__________________________________________________________________________________________________
block_11_expand_relu (ReLU)     (None, 14, 14, 576)  0           block_11_expand_BN[0][0]         
__________________________________________________________________________________________________
block_11_depthwise (DepthwiseCo (None, 14, 14, 576)  5184        block_11_expand_relu[0][0]       
__________________________________________________________________________________________________
block_11_depthwise_BN (BatchNor (None, 14, 14, 576)  2304        block_11_depthwise[0][0]         
__________________________________________________________________________________________________
block_11_depthwise_relu (ReLU)  (None, 14, 14, 576)  0           block_11_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_11_project (Conv2D)       (None, 14, 14, 96)   55296       block_11_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_11_project_BN (BatchNorma (None, 14, 14, 96)   384         block_11_project[0][0]           
__________________________________________________________________________________________________
block_11_add (Add)              (None, 14, 14, 96)   0           block_10_project_BN[0][0]        
                                                                 block_11_project_BN[0][0]        
__________________________________________________________________________________________________
block_12_expand (Conv2D)        (None, 14, 14, 576)  55296       block_11_add[0][0]               
__________________________________________________________________________________________________
block_12_expand_BN (BatchNormal (None, 14, 14, 576)  2304        block_12_expand[0][0]            
__________________________________________________________________________________________________
block_12_expand_relu (ReLU)     (None, 14, 14, 576)  0           block_12_expand_BN[0][0]         
__________________________________________________________________________________________________
block_12_depthwise (DepthwiseCo (None, 14, 14, 576)  5184        block_12_expand_relu[0][0]       
__________________________________________________________________________________________________
block_12_depthwise_BN (BatchNor (None, 14, 14, 576)  2304        block_12_depthwise[0][0]         
__________________________________________________________________________________________________
block_12_depthwise_relu (ReLU)  (None, 14, 14, 576)  0           block_12_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_12_project (Conv2D)       (None, 14, 14, 96)   55296       block_12_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_12_project_BN (BatchNorma (None, 14, 14, 96)   384         block_12_project[0][0]           
__________________________________________________________________________________________________
block_12_add (Add)              (None, 14, 14, 96)   0           block_11_add[0][0]               
                                                                 block_12_project_BN[0][0]        
__________________________________________________________________________________________________
block_13_expand (Conv2D)        (None, 14, 14, 576)  55296       block_12_add[0][0]               
__________________________________________________________________________________________________
block_13_expand_BN (BatchNormal (None, 14, 14, 576)  2304        block_13_expand[0][0]            
__________________________________________________________________________________________________
block_13_expand_relu (ReLU)     (None, 14, 14, 576)  0           block_13_expand_BN[0][0]         
__________________________________________________________________________________________________
block_13_pad (ZeroPadding2D)    (None, 15, 15, 576)  0           block_13_expand_relu[0][0]       
__________________________________________________________________________________________________
block_13_depthwise (DepthwiseCo (None, 7, 7, 576)    5184        block_13_pad[0][0]               
__________________________________________________________________________________________________
block_13_depthwise_BN (BatchNor (None, 7, 7, 576)    2304        block_13_depthwise[0][0]         
__________________________________________________________________________________________________
block_13_depthwise_relu (ReLU)  (None, 7, 7, 576)    0           block_13_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_13_project (Conv2D)       (None, 7, 7, 160)    92160       block_13_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_13_project_BN (BatchNorma (None, 7, 7, 160)    640         block_13_project[0][0]           
__________________________________________________________________________________________________
block_14_expand (Conv2D)        (None, 7, 7, 960)    153600      block_13_project_BN[0][0]        
__________________________________________________________________________________________________
block_14_expand_BN (BatchNormal (None, 7, 7, 960)    3840        block_14_expand[0][0]            
__________________________________________________________________________________________________
block_14_expand_relu (ReLU)     (None, 7, 7, 960)    0           block_14_expand_BN[0][0]         
__________________________________________________________________________________________________
block_14_depthwise (DepthwiseCo (None, 7, 7, 960)    8640        block_14_expand_relu[0][0]       
__________________________________________________________________________________________________
block_14_depthwise_BN (BatchNor (None, 7, 7, 960)    3840        block_14_depthwise[0][0]         
__________________________________________________________________________________________________
block_14_depthwise_relu (ReLU)  (None, 7, 7, 960)    0           block_14_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_14_project (Conv2D)       (None, 7, 7, 160)    153600      block_14_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_14_project_BN (BatchNorma (None, 7, 7, 160)    640         block_14_project[0][0]           
__________________________________________________________________________________________________
block_14_add (Add)              (None, 7, 7, 160)    0           block_13_project_BN[0][0]        
                                                                 block_14_project_BN[0][0]        
__________________________________________________________________________________________________
block_15_expand (Conv2D)        (None, 7, 7, 960)    153600      block_14_add[0][0]               
__________________________________________________________________________________________________
block_15_expand_BN (BatchNormal (None, 7, 7, 960)    3840        block_15_expand[0][0]            
__________________________________________________________________________________________________
block_15_expand_relu (ReLU)     (None, 7, 7, 960)    0           block_15_expand_BN[0][0]         
__________________________________________________________________________________________________
block_15_depthwise (DepthwiseCo (None, 7, 7, 960)    8640        block_15_expand_relu[0][0]       
__________________________________________________________________________________________________
block_15_depthwise_BN (BatchNor (None, 7, 7, 960)    3840        block_15_depthwise[0][0]         
__________________________________________________________________________________________________
block_15_depthwise_relu (ReLU)  (None, 7, 7, 960)    0           block_15_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_15_project (Conv2D)       (None, 7, 7, 160)    153600      block_15_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_15_project_BN (BatchNorma (None, 7, 7, 160)    640         block_15_project[0][0]           
__________________________________________________________________________________________________
block_15_add (Add)              (None, 7, 7, 160)    0           block_14_add[0][0]               
                                                                 block_15_project_BN[0][0]        
__________________________________________________________________________________________________
block_16_expand (Conv2D)        (None, 7, 7, 960)    153600      block_15_add[0][0]               
__________________________________________________________________________________________________
block_16_expand_BN (BatchNormal (None, 7, 7, 960)    3840        block_16_expand[0][0]            
__________________________________________________________________________________________________
block_16_expand_relu (ReLU)     (None, 7, 7, 960)    0           block_16_expand_BN[0][0]         
__________________________________________________________________________________________________
block_16_depthwise (DepthwiseCo (None, 7, 7, 960)    8640        block_16_expand_relu[0][0]       
__________________________________________________________________________________________________
block_16_depthwise_BN (BatchNor (None, 7, 7, 960)    3840        block_16_depthwise[0][0]         
__________________________________________________________________________________________________
block_16_depthwise_relu (ReLU)  (None, 7, 7, 960)    0           block_16_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_16_project (Conv2D)       (None, 7, 7, 320)    307200      block_16_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_16_project_BN (BatchNorma (None, 7, 7, 320)    1280        block_16_project[0][0]           
__________________________________________________________________________________________________
Conv_1 (Conv2D)                 (None, 7, 7, 1280)   409600      block_16_project_BN[0][0]        
__________________________________________________________________________________________________
Conv_1_bn (BatchNormalization)  (None, 7, 7, 1280)   5120        Conv_1[0][0]                     
__________________________________________________________________________________________________
out_relu (ReLU)                 (None, 7, 7, 1280)   0           Conv_1_bn[0][0]                  
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1280)         0           out_relu[0][0]                   
__________________________________________________________________________________________________
predictions (Dense)             (None, 1000)         1281000     global_average_pooling2d[0][0]   
==================================================================================================
Total params: 3,538,984
Trainable params: 3,504,872
Non-trainable params: 34,112
__________________________________________________________________________________________________

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

반응형
728x90
반응형

Localization

객체라고 판단되는 곳에 직사각형(bounding box)를 그려주는 작업
학습을 통해 위치를 찾는다

학습 데이터는 이미지의 target(label)과 rectangle(x,y좌표, 가로,세로)로 구성되어 있다

import matplotlib.pyplot as plt
import matplotlib.patches as mpt
import pandas as pd
import tensorflow as tf
import numpy as np
from PIL import Image 

air = pd.read_csv('dataset/annotations/airplane.csv', header=None)
face = pd.read_csv('dataset/annotations/face.csv', header=None)
motorcycle = pd.read_csv('dataset/annotations/motorcycle.csv', header=None)
air.rename(columns={1:'x1',2:'y1',3:'x2',4:'y2',0:'filename',5:'target'}, inplace=True)
face.rename(columns={1:'x1',2:'y1',3:'x2',4:'y2',0:'filename',5:'target'}, inplace=True)
motorcycle.rename(columns={1:'x1',2:'y1',3:'x2',4:'y2',0:'filename',5:'target'}, inplace=True)

air.filename = air.filename.map(lambda x: 'dataset/images/airplane/'+x)
face.filename = face.filename.map(lambda x: 'dataset/images/face/'+x)
motorcycle.filename = motorcycle.filename.map(lambda x: 'dataset/images/motorcycle/'+x)

data=pd.concat([air,face,motorcycle], axis=0, ignore_index=True)

air.head()

	filename	x1	y1	x2	y2	target
0	image_0001.jpg	49	30	349	137	airplane
1	image_0002.jpg	59	35	342	153	airplane
2	image_0003.jpg	47	36	331	135	airplane
3	image_0004.jpg	47	24	342	141	airplane
4	image_0005.jpg	48	18	339	146	airplane
im = plt.imread('dataset/images/airplane/image_0001.jpg')

fig, ax = plt.subplots(1,1)
ax.imshow(im)
pt = mpt.Rectangle((49,30),349-49,137-30, fill=False) 
ax.add_patch(pt)

data.target.value_counts() # imbalanced data

# airplane      800
# motorcycle    798
# face          435
# Name: target, dtype: int64
im = plt.imread(data.loc[0, 'filename'])

def show_images(i):
  im = plt.imread(data.loc[i, 'filename'])
  fig, ax = plt.subplots(1,1)
  ax.imshow(im)
  pt = mpt.Rectangle((data.loc[i,'x1'],data.loc[i,'y1']),
                     data.loc[i,'x2']-data.loc[i,'x1'],
                     data.loc[i,'y2']-data.loc[i,'y1'], fill=False) 
  ax.add_patch(pt)
  
  show_images(832)

One-hot encoding 3가지 방법

1. scikit learn - onehotencoder

2. pandas - get_dummies

3. data['name'] = (data.target=='name')*1

4. tf.keras.utils.to_categorical

# label encoding이 되어 있는 것만 바꿀 수 있다

from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder()
ohe.fit_transform(data[['target']]).toarray()

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])
pd.get_dummies(data.target)

	airplane	face	motorcycle
0	1	0	0
1	1	0	0
2	1	0	0
3	1	0	0
4	1	0	0
...	...	...	...
2028	0	0	1
2029	0	0	1
2030	0	0	1
2031	0	0	1
2032	0	0	1
data['airplane'] = (data.target=='airplane')*1
data['face'] = (data.target=='face')*1
data['motorcycle'] = (data.target=='motorcycle')*1

data.tail().filename
# 2028    dataset/images/motorcycle/image_0794.jpg
# 2029    dataset/images/motorcycle/image_0795.jpg
# 2030    dataset/images/motorcycle/image_0796.jpg
# 2031    dataset/images/motorcycle/image_0797.jpg
# 2032    dataset/images/motorcycle/image_0798.jpg
# Name: filename, dtype: object
# data['label']=data.target.map({'airplane':0, 'face':1, 'motorcycle':2})
tf.keras.utils.to_categorical(data.label)
# array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

data.drop(columns=['target','label'], inplace=True)

Label encoding

1. map

2. scikit-learn - labelencoder

data['label']=data.target.map({'airplane':0, 'face':1, 'motorcycle:':2})

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit_transform(data.target)
# array([0, 0, 0, ..., 2, 2, 2])

data.values
array([['dataset/images/airplane/image_0001.jpg', 49, 30, ..., 1, 0, 0],
       ['dataset/images/airplane/image_0002.jpg', 59, 35, ..., 1, 0, 0],
       ['dataset/images/airplane/image_0003.jpg', 47, 36, ..., 1, 0, 0],
       ...,
       ['dataset/images/motorcycle/image_0796.jpg', 47, 40, ..., 0, 0, 1],
       ['dataset/images/motorcycle/image_0797.jpg', 48, 54, ..., 0, 0, 1],
       ['dataset/images/motorcycle/image_0798.jpg', 42, 33, ..., 0, 0, 1]],
      dtype=object)

이미지 한꺼번에 불러오기

1. tf.keras.preprocessing.image_dataset_from_directory => tf.data.Dataset으로 불러온다

2. tf.keras.preprocessing.image.ImageDataGenerator => numpy로 불러온다 / augmentation과 파일 저장을 옵션으로 사용할 있다

 

data.head()

filename	x1	y1	x2	y2	airplane	face	motorcycle
0	dataset/images/airplane/image_0001.jpg	49	30	349	137	1	0	0
1	dataset/images/airplane/image_0002.jpg	59	35	342	153	1	0	0
2	dataset/images/airplane/image_0003.jpg	47	36	331	135	1	0	0
3	dataset/images/airplane/image_0004.jpg	47	24	342	141	1	0	0
4	dataset/images/airplane/image_0005.jpg	48	18	339	146	1	0	0
idg = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255) # normalization하면서 불러온다
dg = idg.flow_from_dataframe(data, class_mode='raw', y_col=['x1','x2','y1','y2'], target_size=(64,128)) # resize를 강제시킨다 / 데이터 값을 내가 원하는 형태로 불러올 수 있다 
# localization에 활용하기 좋은 형태로 불러올 수 있다 

ims = next(dg)

ims[1][0], ims[1][1]
# (array([ 82, 366,  56, 171]), array([ 34, 230,  19, 127]))

ims[0][0].shape
# (256, 256, 3)
train = tf.data.Dataset.from_generator(lambda :dg, output_types=(tf.float32,tf.float32))

t = iter(train.take(1))

next(t)
# (<tf.Tensor: shape=(32, 256, 256, 3), dtype=float32, numpy=
 array([[[[1.        , 1.        , 1.        ],
          [1.        , 1.        , 1.        ],
          [1.        , 1.        , 1.        ],
          ...,
         [[0.9960785 , 0.98823535, 0.9921569 ],
          [1.        , 0.9960785 , 1.        ],
          [1.        , 0.9960785 , 0.9921569 ],
          ...,
          [1.        , 1.        , 1.        ],
          [1.        , 1.        , 1.        ],
          [1.        , 1.        , 1.        ]]]], dtype=float32)>,
          <tf.Tensor: shape=(32, 4), dtype=float32, numpy=
 array([[ 45., 345.,  31., 165.],
        [ 38., 238.,  24., 143.],
        [ 48., 351.,  29., 113.],
        [ 49., 209.,  51., 127.],
        [176., 379.,  15., 275.],
        [ 35., 234.,  25., 144.],
        [ 52., 346.,  27., 112.],
        [ 51., 350.,  33., 123.],
        [ 50., 350.,  29., 112.],
        [ 56., 353.,  33., 138.],
        [ 32., 241.,  20., 283.],
        [ 48., 348.,  26.,  92.],
        [ 35., 229.,  22., 151.],
        [ 36., 226.,  23., 129.],
        [ 36., 228.,  60., 174.],
        [ 32., 230.,  21., 118.],
        [ 47., 232.,  26., 136.],
        [ 51., 345.,  29., 132.],
        [171., 379.,  26., 278.],
        [ 59., 350.,  32., 126.],
        [ 36., 227.,  35., 147.],
        [ 24., 349.,  28., 124.],
        [ 29., 228.,  24., 136.],
        [ 32., 229.,  21., 150.],
        [ 35., 227.,  21., 143.],
        [154., 342.,  38., 285.],
        [122., 320.,  16., 253.],
        [ 35., 232.,  15., 132.],
        [ 50., 347.,  27., 119.],
        [ 37., 227.,  32., 164.],
        [ 75., 358.,  42., 153.],
        [ 39., 231.,  27., 140.]], dtype=float32)>)
for i in train.take(1):
  print(i[1])
  
tf.Tensor(
[[ 46. 352.  29. 113.]
 [ 47. 217.  40. 141.]
 [ 34. 228.  18. 121.]
 [266. 462.  54. 324.]
 [207. 391.  20. 282.]
 [ 61. 346.  24. 148.]
 [ 70. 347.  86. 167.]
 [ 33. 228.  17. 127.]
 [ 80. 280.  39. 314.]
 [ 48. 345.  27. 108.]
 [ 35. 229.  20. 126.]
 [ 54. 346.  29. 120.]
 [ 35. 232.  23. 149.]
 [ 61. 350.  27. 113.]
 [ 35. 216.  23. 134.]
 [ 48. 221.  43. 138.]
 [155. 348.  18. 263.]
 [ 52. 349.  28. 116.]
 [160. 375.  28. 304.]
 [ 51. 345.  33. 138.]
 [ 49. 344.  23. 122.]
 [ 55. 345.  22. 140.]
 [ 49. 349.  33. 107.]
 [ 34. 226.  22. 133.]
 [ 62. 353.  67. 127.]
 [ 49. 225.  52. 150.]
 [ 32. 233.  18. 137.]
 [118. 297.  13. 258.]
 [ 43. 344.  31. 117.]
 [ 57. 344.  24. 105.]
 [247. 432.  19. 286.]
 [ 55. 352.  31. 140.]], shape=(32, 4), dtype=float32)
vgg = tf.keras.applications.VGG16(include_top=False)
vgg.trainable = False 

input_ = tf.keras.Input((64,128,3))
x = tf.keras.layers.Lambda(lambda x:tf.keras.applications.vgg16.preprocess_input(x))(input_)
x = vgg(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(4)(x)
model = tf.keras.models.Model(input_, x)

model.compile(loss=tf.keras.losses.MeanAbsoluteError(), metrics=['mae']) 
# Huber는 값이 클 때는 L1 loss, 값이 작을 때는 L2 loss 방식을 쓴다 

model.summary()
Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_8 (InputLayer)         [(None, 64, 128, 3)]      0         
_________________________________________________________________
lambda_3 (Lambda)            (None, 64, 128, 3)        0         
_________________________________________________________________
vgg16 (Functional)           (None, None, None, 512)   14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 4096)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 16388     
=================================================================
Total params: 14,731,076
Trainable params: 16,388
Non-trainable params: 14,714,688
_________________________________________________________________
model.fit(train, epochs=1, steps_per_epoch=50)
# 50/50 [==============================] - 933s 19s/step - loss: 108.1627 - mae: 108.1627 

model(x[tf.newaxis])
# <tf.Tensor: shape=(1, 4), dtype=float32, numpy=
# array([[-17.944298 ,  15.799198 ,  19.511517 ,   1.9699388]],
#       dtype=float32)>
im = Image.open('dataset/images/airplane/image_0001.jpg')
x = np.array(im.resize((128,64)))

plt.imshow(x)

Fast R-CNN

Localization + Classification을 한방에 처리하는 모델

Localization 문제를 CNN으로 풀수 있지만 Machine learning은 ad-hoc 이라는 특성 때문에

catastrophic forgetting문제와 multi-loss문제로 인해 Localization + classification을 동시에 없었다

왜냐 하면 여태까지 localization을 Regression으로 해결해왔기 때문에 multiple loss를 사용하면 어느 하나 모델은 굉장히 성능이 떨어지는 문제가 있었다

그리고 classification + localization 문제를 CNN하나로 해결 있을 거라 생각하지 했었다

그런데 CNN은 위치정보와 특성정보를 동시에 갖고 있기 때문에 output을 개로 늘려 multiple loss를 동시에 사용해도 학습이 되는 것을 확인할 있었다

 

반응형
728x90
반응형

Object detection

직사각형 안에 무엇이 있는지 찾는 것

Multi-Labeled Classification + Bounding Box Regression(Localization)

여러 물체에 대해 어떤 물체인지 분류하는 Classification문제와 그 물체가 어디 있는지

박스(Bounding box)를 통해 위치 정보를 나타내는 Localization문제 둘 다를 해내야 하는 분야

Object dection paper flow

R-CNN은 object detection 문제에서 CNN을 활용한 최초의 논문이다

Localization

객체라고 판단되는 곳에 직사각형(bounding box)를 그려주는 작업

Segmentation

모든 픽셀의 label을 예측한다

1. Semantic segmentation

- 모든 픽셀을 미리 지정된 개수의 class 분류하는

- 동일한 객체들끼리는 번에 masking을 수행한다

2. Instance segmentation

- sematic segmentation와 유사하지만 같은 class이더라도 다른 것으로 분류한다

- 동일한 객체들이여도 개별로 masking을 수행한다

Naive approach

물체가 존재할 수 있는 모든 크기의 영역에 대해 sliding window 방식으로 이미지를 모두 탐색하면서 classificaion을 수행하는 방식
탐색해야 할 영역이 많기 때문에 연산 시간이 오래걸리고 비효율적인 방법이다

Sliding window

일정 크기의 Window를 이미지 위에서 조금씩 옮기면서 값을 비교하는 알고리즘

Region proposals

sliding window 방식의 비효율성을 개선하기 위해 입력 영상에서 '물체가 있을 법한' 영역을 빠른 속도로 찾아내는 알고리즘

객체의 주변 간의 색감, 질감 차이, 다른 물체에 둘러 쌓여 있는지 여부 등을 파악해서

다양한 전략으로 물체의 위치를 파악할 수 있도록 하는 알고리즘

 

Bounding box들을 Random하게 많이 생성을 하고 이들을 조금씩 Merge해나가면서 물체를 인식해 나간다

 

!pip install selective-search
!pip install selectivesearch
!pip install -U opencv-contrib-python
import selective_search
import selectivesearch

import skimage
from skimage import data
import matplotlib.pyplot as plt
import matplotlib.patches as pts

dir(selective_search)
# ['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'box_filter',
 'core',
 'measure',
 'selective_search',
 'structure',
 'util']
plt.imshow(data.astronaut())

plt.imshow(data.rocket())

im = data.rocket()

ss = selective_search.selective_search(im) # 좌표값을 list로 찾아준다 

len(ss)
# 489
boxes = selective_search.box_filter(ss, topN=30)

fig, ax = plt.subplots(1,1)
ax.imshow(im)
for b in boxes:
  box = pts.Rectangle((b[0],b[1]), b[2]-b[0], b[3]-b[1], fill=None, edgecolor='red') # xy(원점), width, height
  ax.add_patch(box)

ss2 = selective_search.selective_search(im, mode='fast')

boxes = selective_search.box_filter(ss2, topN=30)

fig, ax = plt.subplots(1,1)
ax.imshow(im)
for b in boxes:
  box = pts.Rectangle((b[0],b[1]), b[2]-b[0], b[3]-b[1], fill=None, edgecolor='red') 
  ax.add_patch(box)

 

im2 = data.astronaut()

ss = selective_search.selective_search(im2)
boxes = selective_search.box_filter(ss, topN=20)

fig, ax = plt.subplots(1,1)
ax.imshow(im2)
for b in boxes:
  box = pts.Rectangle((b[0],b[1]), b[2]-b[0], b[3]-b[1], fill=None, edgecolor='red') # xy(원점), width, height
  ax.add_patch(box)

sss = selectivesearch.selective_search(im, min_size=500) # numpy로 찾아준다 

sss[1] # 영역 
[{'labels': [0.0], 'rect': (0, 0, 18, 7), 'size': 121},
 {'labels': [1.0], 'rect': (19, 0, 9, 8), 'size': 65},
 {'labels': [2.0], 'rect': (16, 0, 32, 15), 'size': 207},
 {'labels': [3.0], 'rect': (45, 0, 14, 35), 'size': 223},
 {'labels': [4.0], 'rect': (57, 0, 34, 10), 'size': 300},
 {'labels': [5.0], 'rect': (88, 0, 29, 65), 'size': 209},
 {'labels': [6.0], 'rect': (89, 0, 49, 7), 'size': 217},
 {'labels': [7.0], 'rect': (133, 0, 42, 15), 'size': 242},
 {'labels': [8.0], 'rect': (152, 0, 68, 8), 'size': 292},
 {'labels': [9.0], 'rect': (214, 0, 22, 7), 'size': 96},
 {'labels': [10.0], 'rect': (232, 0, 32, 8), 'size': 189},
 {'labels': [11.0], 'rect': (265, 0, 20, 9), 'size': 114},
 {'labels': [12.0], 'rect': (269, 0, 26, 5), 'size': 118},
 {'labels': [13.0], 'rect': (291, 0, 22, 15), 'size': 218},
 {'labels': [14.0], 'rect': (307, 0, 20, 15), 'size': 145},
 {'labels': [15.0], 'rect': (312, 0, 18, 6), 'size': 81},
 {'labels': [16.0], 'rect': (325, 0, 28, 9), 'size': 115},
 {'labels': [17.0], 'rect': (339, 0, 19, 7), 'size': 93},
 {'labels': [18.0], 'rect': (358, 0, 25, 5), 'size': 92},
 {'labels': [19.0], 'rect': (384, 0, 11, 5), 'size': 55},
 {'labels': [20.0], 'rect': (384, 0, 18, 12), 'size': 126},
 {'labels': [21.0], 'rect': (398, 0, 27, 8), 'size': 163},
 {'labels': [22.0], 'rect': (402, 0, 31, 15), 'size': 191},
 {'labels': [23.0], 'rect': (429, 0, 15, 9), 'size': 142},
 {'labels': [24.0], 'rect': (432, 0, 19, 15), 'size': 136},
 {'labels': [25.0], 'rect': (447, 0, 22, 15), 'size': 161},
 {'labels': [26.0], 'rect': (456, 0, 26, 10), 'size': 206},
 {'labels': [27.0], 'rect': (472, 0, 16, 7), 'size': 72},
 {'labels': [28.0], 'rect': (487, 0, 16, 17), 'size': 139},
 {'labels': [29.0], 'rect': (493, 0, 37, 28), 'size': 503},
 {'labels': [30.0], 'rect': (529, 0, 16, 24), 'size': 234},
 {'labels': [31.0], 'rect': (546, 0, 12, 11), 'size': 118},
 {'labels': [32.0], 'rect': (539, 0, 21, 61), 'size': 209},
 {'labels': [33.0], 'rect': (560, 0, 11, 10), 'size': 97},
 {'labels': [34.0], 'rect': (567, 0, 14, 26), 'size': 107},
 {'labels': [35.0], 'rect': (579, 0, 19, 17), 'size': 225},
 {'labels': [36.0], 'rect': (591, 0, 31, 20), 'size': 232},
 {'labels': [37.0], 'rect': (613, 0, 18, 10), 'size': 94},
 {'labels': [38.0], 'rect': (626, 0, 13, 13), 'size': 124},
 {'labels': [39.0], 'rect': (10, 2, 14, 9), 'size': 70},
 {'labels': [40.0], 'rect': (114, 2, 21, 9), 'size': 131},
 {'labels': [41.0], 'rect': (221, 2, 16, 10), 'size': 107},
 {'labels': [42.0], 'rect': (354, 2, 43, 14), 'size': 224},
 {'labels': [43.0], 'rect': (90, 3, 13, 12), 'size': 95},
 {'labels': [44.0], 'rect': (199, 3, 36, 23), 'size': 365},
 {'labels': [45.0], 'rect': (320, 3, 17, 9), 'size': 90},
 {'labels': [46.0], 'rect': (160, 4, 39, 11), 'size': 125},
 {'labels': [47.0], 'rect': (280, 4, 19, 11), 'size': 122},
 {'labels': [48.0], 'rect': (336, 4, 23, 11), 'size': 132},
 {'labels': [49.0], 'rect': (0, 5, 51, 18), 'size': 243},
 {'labels': [50.0], 'rect': (99, 5, 14, 6), 'size': 71},
 {'labels': [51.0], 'rect': (208, 5, 14, 10), 'size': 75},
 {'labels': [52.0], 'rect': (236, 5, 33, 7), 'size': 124},
 {'labels': [53.0], 'rect': (479, 5, 25, 18), 'size': 184},
 {'labels': [54.0], 'rect': (496, 5, 23, 17), 'size': 171},
 {'labels': [55.0], 'rect': (89, 6, 22, 22), 'size': 280},
 {'labels': [56.0], 'rect': (104, 6, 44, 17), 'size': 402},
 {'labels': [57.0], 'rect': (169, 6, 29, 8), 'size': 170},
 {'labels': [58.0], 'rect': (600, 6, 21, 40), 'size': 270},
 {'labels': [59.0], 'rect': (0, 7, 15, 11), 'size': 129},
 {'labels': [60.0], 'rect': (358, 7, 15, 5), 'size': 60},
 {'labels': [61.0], 'rect': (549, 7, 9, 16), 'size': 67},
 {'labels': [62.0], 'rect': (574, 7, 26, 27), 'size': 140},
 {'labels': [63.0], 'rect': (146, 8, 22, 14), 'size': 165},
 {'labels': [64.0], 'rect': (160, 8, 35, 12), 'size': 163},
 {'labels': [65.0], 'rect': (238, 8, 47, 9), 'size': 241},
 {'labels': [66.0], 'rect': (470, 8, 15, 12), 'size': 81},
 {'labels': [67.0], 'rect': (527, 8, 9, 13), 'size': 69},
 {'labels': [68.0], 'rect': (576, 8, 13, 20), 'size': 139},
 {'labels': [69.0], 'rect': (62, 9, 25, 45), 'size': 226},
 {'labels': [70.0], 'rect': (540, 9, 12, 6), 'size': 62},
 {'labels': [71.0], 'rect': (560, 9, 11, 46), 'size': 175},
 {'labels': [72.0], 'rect': (611, 9, 20, 15), 'size': 104},
 {'labels': [73.0], 'rect': (622, 9, 17, 14), 'size': 149},
 {'labels': [74.0], 'rect': (31, 10, 19, 20), 'size': 134},
 {'labels': [75.0], 'rect': (53, 10, 15, 30), 'size': 195},
 {'labels': [76.0], 'rect': (342, 10, 44, 13), 'size': 351},
 {'labels': [77.0], 'rect': (394, 10, 38, 6), 'size': 102},
 {'labels': [78.0], 'rect': (454, 11, 24, 10), 'size': 131},
 {'labels': [79.0], 'rect': (62, 12, 22, 21), 'size': 273},
 {'labels': [80.0], 'rect': (313, 12, 30, 11), 'size': 232},
 {'labels': [81.0], 'rect': (544, 12, 14, 19), 'size': 175},
 {'labels': [82.0], 'rect': (231, 13, 16, 7), 'size': 76},
 {'labels': [83.0], 'rect': (236, 13, 25, 5), 'size': 57},
 {'labels': [84.0], 'rect': (249, 13, 31, 6), 'size': 75},
 {'labels': [85.0], 'rect': (282, 13, 30, 15), 'size': 226},
 {'labels': [86.0], 'rect': (408, 13, 20, 13), 'size': 127},
 {'labels': [87.0], 'rect': (440, 13, 17, 12), 'size': 97},
 {'labels': [88.0], 'rect': (156, 14, 56, 13), 'size': 233},
 {'labels': [89.0], 'rect': (296, 14, 17, 9), 'size': 122},
 {'labels': [90.0], 'rect': (392, 14, 16, 9), 'size': 101},
 {'labels': [91.0], 'rect': (588, 14, 9, 13), 'size': 76},
 {'labels': [92.0], 'rect': (429, 15, 10, 8), 'size': 56},
 {'labels': [93.0], 'rect': (450, 15, 34, 14), 'size': 223},
 {'labels': [94.0], 'rect': (0, 16, 23, 12), 'size': 141},
 {'labels': [95.0], 'rect': (247, 16, 16, 8), 'size': 63},
 {'labels': [96.0], 'rect': (339, 16, 18, 7), 'size': 100},
 {'labels': [97.0], 'rect': (384, 16, 17, 12), 'size': 83},
 {'labels': [98.0], 'rect': (515, 16, 12, 28), 'size': 146},
 {'labels': [99.0], 'rect': (561, 16, 11, 27), 'size': 201},
 {'labels': [100.0], 'rect': (575, 16, 8, 11), 'size': 58},
 {'labels': [101.0], 'rect': (621, 16, 18, 15), 'size': 148},
 {'labels': [102.0], 'rect': (32, 17, 16, 27), 'size': 182},
 {'labels': [103.0], 'rect': (606, 17, 13, 11), 'size': 91},
 {'labels': [104.0], 'rect': (48, 18, 8, 45), 'size': 202},
 {'labels': [105.0], 'rect': (106, 18, 21, 8), 'size': 113},
 {'labels': [106.0], 'rect': (262, 18, 26, 9), 'size': 168},
 {'labels': [107.0], 'rect': (400, 18, 15, 9), 'size': 95},
 {'labels': [108.0], 'rect': (432, 18, 13, 8), 'size': 65},
 {'labels': [109.0], 'rect': (493, 18, 23, 17), 'size': 190},
 {'labels': [110.0], 'rect': (536, 18, 7, 13), 'size': 78},
 {'labels': [111.0], 'rect': (5, 19, 34, 12), 'size': 182},
 {'labels': [112.0], 'rect': (59, 19, 12, 20), 'size': 139},
 {'labels': [113.0], 'rect': (129, 19, 22, 4), 'size': 70},
 {'labels': [114.0], 'rect': (166, 19, 40, 6), 'size': 123},
 {'labels': [115.0], 'rect': (226, 19, 31, 10), 'size': 169},
 {'labels': [116.0], 'rect': (310, 19, 25, 9), 'size': 134},
 {'labels': [117.0], 'rect': (409, 19, 16, 7), 'size': 58},
 {'labels': [118.0], 'rect': (335, 20, 32, 22), 'size': 266},
 {'labels': [119.0], 'rect': (485, 20, 30, 18), 'size': 170},
 {'labels': [120.0], 'rect': (528, 20, 9, 11), 'size': 72},
 {'labels': [121.0], 'rect': (355, 21, 29, 11), 'size': 163},
 {'labels': [122.0], 'rect': (440, 21, 13, 10), 'size': 74},
 {'labels': [123.0], 'rect': (127, 22, 23, 9), 'size': 64},
 {'labels': [124.0], 'rect': (136, 22, 17, 6), 'size': 62},
 {'labels': [125.0], 'rect': (136, 22, 21, 9), 'size': 106},
 {'labels': [126.0], 'rect': (156, 22, 37, 13), 'size': 249},
 {'labels': [127.0], 'rect': (198, 22, 22, 9), 'size': 93},
 {'labels': [128.0], 'rect': (384, 22, 13, 8), 'size': 57},
 {'labels': [129.0], 'rect': (536, 22, 22, 25), 'size': 192},
 {'labels': [130.0], 'rect': (593, 22, 7, 11), 'size': 64},
 {'labels': [131.0], 'rect': (240, 23, 45, 13), 'size': 342},
 {'labels': [132.0], 'rect': (343, 23, 18, 6), 'size': 61},
 {'labels': [133.0], 'rect': (380, 23, 15, 18), 'size': 108},
 {'labels': [134.0], 'rect': (428, 23, 21, 14), 'size': 143},
 {'labels': [135.0], 'rect': (483, 23, 7, 10), 'size': 61},
 {'labels': [136.0], 'rect': (120, 24, 10, 7), 'size': 54},
 {'labels': [137.0], 'rect': (395, 24, 20, 18), 'size': 169},
 {'labels': [138.0], 'rect': (416, 24, 15, 11), 'size': 95},
 {'labels': [139.0], 'rect': (452, 24, 11, 8), 'size': 53},
 {'labels': [140.0], 'rect': (456, 24, 26, 13), 'size': 164},
 {'labels': [141.0], 'rect': (604, 24, 20, 32), 'size': 331},
 {'labels': [142.0], 'rect': (613, 24, 26, 31), 'size': 395},
 {'labels': [143.0], 'rect': (185, 25, 32, 9), 'size': 168},
 {'labels': [144.0], 'rect': (219, 25, 12, 6), 'size': 61},
 {'labels': [145.0], 'rect': (329, 25, 30, 17), 'size': 165},
 {'labels': [146.0], 'rect': (0, 26, 12, 9), 'size': 99},
 {'labels': [147.0], 'rect': (13, 26, 14, 9), 'size': 82},
 {'labels': [148.0], 'rect': (89, 26, 22, 15), 'size': 136},
 {'labels': [149.0], 'rect': (91, 26, 28, 8), 'size': 86},
 {'labels': [150.0], 'rect': (166, 26, 26, 8), 'size': 106},
 {'labels': [151.0], 'rect': (224, 26, 23, 10), 'size': 101},
 {'labels': [152.0], 'rect': (303, 26, 25, 6), 'size': 95},
 {'labels': [153.0], 'rect': (526, 26, 17, 18), 'size': 154},
 {'labels': [154.0], 'rect': (575, 26, 11, 14), 'size': 126},
 {'labels': [155.0], 'rect': (361, 27, 15, 11), 'size': 100},
 {'labels': [156.0], 'rect': (519, 27, 12, 12), 'size': 64},
 {'labels': [157.0], 'rect': (561, 27, 13, 38), 'size': 304},
 {'labels': [158.0], 'rect': (65, 28, 21, 26), 'size': 294},
 {'labels': [159.0], 'rect': (102, 28, 20, 7), 'size': 98},
 {'labels': [160.0], 'rect': (112, 28, 23, 8), 'size': 84},
 {'labels': [161.0], 'rect': (209, 28, 22, 13), 'size': 144},
 {'labels': [162.0], 'rect': (242, 28, 19, 10), 'size': 82},
 {'labels': [163.0], 'rect': (280, 28, 31, 7), 'size': 113},
 {'labels': [164.0], 'rect': (444, 28, 19, 18), 'size': 149},
 {'labels': [165.0], 'rect': (8, 29, 33, 18), 'size': 149},
 {'labels': [166.0], 'rect': (96, 29, 7, 8), 'size': 51},
 {'labels': [167.0], 'rect': (120, 29, 41, 6), 'size': 75},
 {'labels': [168.0], 'rect': (309, 29, 44, 18), 'size': 270},
 {'labels': [169.0], 'rect': (406, 29, 13, 14), 'size': 132},
 {'labels': [170.0], 'rect': (417, 29, 21, 9), 'size': 100},
 {'labels': [171.0], 'rect': (15, 30, 30, 12), 'size': 223},
 {'labels': [172.0], 'rect': (191, 30, 21, 8), 'size': 72},
 {'labels': [173.0], 'rect': (454, 30, 26, 19), 'size': 241},
 {'labels': [174.0], 'rect': (592, 30, 11, 19), 'size': 86},
 {'labels': [175.0], 'rect': (128, 31, 33, 7), 'size': 82},
 {'labels': [176.0], 'rect': (53, 32, 14, 24), 'size': 155},
 {'labels': [177.0], 'rect': (136, 32, 30, 9), 'size': 91},
 {'labels': [178.0], 'rect': (169, 32, 39, 9), 'size': 147},
 {'labels': [179.0], 'rect': (238, 32, 33, 14), 'size': 215},
 {'labels': [180.0], 'rect': (289, 32, 39, 11), 'size': 214},
 {'labels': [181.0], 'rect': (374, 32, 11, 8), 'size': 62},
 {'labels': [182.0], 'rect': (488, 32, 15, 9), 'size': 67},
 {'labels': [183.0], 'rect': (278, 33, 34, 15), 'size': 224},
 {'labels': [184.0], 'rect': (582, 33, 22, 21), 'size': 170},
 {'labels': [185.0], 'rect': (232, 34, 15, 8), 'size': 72},
 {'labels': [186.0], 'rect': (320, 34, 23, 7), 'size': 56},
 {'labels': [187.0], 'rect': (480, 34, 30, 18), 'size': 317},
 {'labels': [188.0], 'rect': (583, 34, 8, 13), 'size': 74},
 {'labels': [189.0], 'rect': (165, 35, 19, 4), 'size': 72},
 {'labels': [190.0], 'rect': (368, 35, 26, 9), 'size': 160},
 {'labels': [191.0], 'rect': (428, 35, 21, 8), 'size': 97},
 {'labels': [192.0], 'rect': (0, 36, 10, 7), 'size': 76},
 {'labels': [193.0], 'rect': (89, 36, 45, 10), 'size': 169},
 {'labels': [194.0], 'rect': (97, 36, 50, 13), 'size': 167},
 {'labels': [195.0], 'rect': (262, 36, 17, 9), 'size': 102},
 {'labels': [196.0], 'rect': (414, 36, 41, 24), 'size': 422},
 {'labels': [197.0], 'rect': (508, 36, 15, 11), 'size': 109},
 {'labels': [198.0], 'rect': (190, 37, 40, 7), 'size': 94},
 {'labels': [199.0], 'rect': (214, 37, 33, 9), 'size': 97},
 {'labels': [200.0], 'rect': (302, 37, 33, 11), 'size': 159},
 {'labels': [201.0], 'rect': (536, 37, 17, 13), 'size': 110},
 {'labels': [202.0], 'rect': (562, 37, 16, 54), 'size': 273},
 {'labels': [203.0], 'rect': (128, 38, 38, 6), 'size': 152},
 {'labels': [204.0], 'rect': (547, 38, 11, 14), 'size': 70},
 {'labels': [205.0], 'rect': (25, 39, 22, 22), 'size': 165},
 {'labels': [206.0], 'rect': (160, 39, 28, 12), 'size': 214},
 {'labels': [207.0], 'rect': (526, 39, 17, 18), 'size': 145},
 {'labels': [208.0], 'rect': (23, 40, 17, 6), 'size': 64},
 {'labels': [209.0], 'rect': (347, 40, 22, 7), 'size': 106},
 {'labels': [210.0], 'rect': (394, 40, 15, 7), 'size': 88},
 {'labels': [211.0], 'rect': (478, 40, 19, 13), 'size': 118},
 {'labels': [212.0], 'rect': (576, 40, 27, 30), 'size': 369},
 {'labels': [213.0], 'rect': (616, 40, 15, 23), 'size': 233},
 {'labels': [214.0], 'rect': (184, 41, 16, 4), 'size': 51},
 {'labels': [215.0], 'rect': (211, 41, 28, 9), 'size': 133},
 {'labels': [216.0], 'rect': (462, 41, 13, 7), 'size': 60},
 {'labels': [217.0], 'rect': (292, 42, 26, 9), 'size': 72},
 {'labels': [218.0], 'rect': (504, 42, 30, 18), 'size': 231},
 {'labels': [219.0], 'rect': (0, 43, 11, 12), 'size': 95},
 {'labels': [220.0], 'rect': (128, 43, 34, 6), 'size': 103},
 {'labels': [221.0], 'rect': (274, 43, 37, 12), 'size': 201},
 {'labels': [222.0], 'rect': (354, 43, 29, 11), 'size': 182},
 {'labels': [223.0], 'rect': (374, 43, 19, 8), 'size': 112},
 {'labels': [224.0], 'rect': (408, 43, 25, 22), 'size': 222},
 {'labels': [225.0], 'rect': (444, 43, 30, 13), 'size': 151},
 {'labels': [226.0], 'rect': (0, 44, 25, 15), 'size': 246},
 {'labels': [227.0], 'rect': (23, 44, 21, 17), 'size': 155},
 {'labels': [228.0], 'rect': (89, 44, 30, 7), 'size': 73},
 {'labels': [229.0], 'rect': (89, 44, 46, 17), 'size': 232},
 {'labels': [230.0], 'rect': (180, 44, 36, 6), 'size': 115},
 {'labels': [231.0], 'rect': (238, 44, 35, 9), 'size': 164},
 {'labels': [232.0], 'rect': (392, 44, 20, 18), 'size': 150},
 {'labels': [233.0], 'rect': (408, 44, 14, 9), 'size': 96},
 {'labels': [234.0], 'rect': (68, 45, 17, 16), 'size': 99},
 {'labels': [235.0], 'rect': (78, 45, 6, 12), 'size': 58},
 {'labels': [236.0], 'rect': (198, 45, 21, 11), 'size': 110},
 {'labels': [237.0], 'rect': (329, 45, 24, 7), 'size': 125},
 {'labels': [238.0], 'rect': (136, 46, 25, 9), 'size': 172},
 {'labels': [239.0], 'rect': (59, 47, 24, 22), 'size': 317},
 {'labels': [240.0], 'rect': (167, 47, 32, 5), 'size': 78},
 {'labels': [241.0], 'rect': (216, 47, 21, 10), 'size': 123},
 {'labels': [242.0], 'rect': (262, 47, 37, 16), 'size': 165},
 {'labels': [243.0], 'rect': (107, 48, 33, 18), 'size': 259},
 {'labels': [244.0], 'rect': (160, 48, 48, 11), 'size': 219},
 {'labels': [245.0], 'rect': (207, 48, 20, 13), 'size': 115},
 {'labels': [246.0], 'rect': (236, 48, 52, 10), 'size': 208},
 {'labels': [247.0], 'rect': (307, 48, 44, 23), 'size': 276},
 {'labels': [248.0], 'rect': (315, 48, 29, 9), 'size': 100},
 {'labels': [249.0], 'rect': (469, 48, 36, 23), 'size': 180},
 {'labels': [250.0], 'rect': (488, 48, 43, 17), 'size': 181},
 {'labels': [251.0], 'rect': (607, 48, 14, 22), 'size': 110},
 {'labels': [252.0], 'rect': (344, 49, 28, 10), 'size': 65},
 {'labels': [253.0], 'rect': (607, 49, 32, 46), 'size': 747},
 {'labels': [254.0], 'rect': (384, 50, 27, 15), 'size': 154},
 {'labels': [255.0], 'rect': (428, 50, 14, 13), 'size': 104},
 {'labels': [256.0], 'rect': (477, 50, 35, 19), 'size': 183},
 {'labels': [257.0], 'rect': (531, 50, 18, 25), 'size': 218},
 {'labels': [258.0], 'rect': (230, 51, 17, 6), 'size': 57},
 {'labels': [259.0], 'rect': (359, 51, 24, 13), 'size': 110},
 {'labels': [260.0], 'rect': (578, 51, 14, 24), 'size': 118},
 {'labels': [261.0], 'rect': (136, 52, 35, 7), 'size': 162},
 {'labels': [262.0], 'rect': (456, 52, 17, 13), 'size': 120},
 {'labels': [263.0], 'rect': (272, 53, 26, 11), 'size': 103},
 {'labels': [264.0], 'rect': (297, 53, 41, 15), 'size': 174},
 {'labels': [265.0], 'rect': (376, 53, 23, 11), 'size': 138},
 {'labels': [266.0], 'rect': (471, 53, 7, 14), 'size': 52},
 {'labels': [267.0], 'rect': (40, 54, 9, 43), 'size': 167},
 {'labels': [268.0], 'rect': (340, 54, 41, 11), 'size': 187},
 {'labels': [269.0], 'rect': (82, 55, 4, 33), 'size': 61},
 {'labels': [270.0], 'rect': (87, 55, 25, 48), 'size': 284},
 {'labels': [271.0], 'rect': (223, 55, 72, 16), 'size': 470},
 {'labels': [272.0], 'rect': (512, 55, 13, 14), 'size': 149},
 {'labels': [273.0], 'rect': (592, 55, 17, 30), 'size': 174},
 {'labels': [274.0], 'rect': (0, 56, 15, 6), 'size': 69},
 {'labels': [275.0], 'rect': (16, 56, 27, 11), 'size': 204},
 {'labels': [276.0], 'rect': (54, 56, 9, 12), 'size': 63},
 {'labels': [277.0], 'rect': (54, 56, 14, 25), 'size': 160},
 {'labels': [278.0], 'rect': (168, 56, 23, 6), 'size': 104},
 {'labels': [279.0], 'rect': (446, 56, 8, 18), 'size': 121},
 {'labels': [280.0], 'rect': (454, 56, 33, 17), 'size': 175},
 {'labels': [281.0], 'rect': (480, 56, 15, 10), 'size': 83},
 {'labels': [282.0], 'rect': (524, 56, 19, 18), 'size': 104},
 {'labels': [283.0], 'rect': (546, 56, 15, 51), 'size': 230},
 {'labels': [284.0], 'rect': (630, 56, 9, 11), 'size': 90},
 {'labels': [285.0], 'rect': (141, 57, 13, 9), 'size': 79},
 {'labels': [286.0], 'rect': (420, 57, 13, 6), 'size': 59},
 {'labels': [287.0], 'rect': (599, 57, 7, 13), 'size': 74},
 {'labels': [288.0], 'rect': (96, 58, 10, 8), 'size': 72},
 {'labels': [289.0], 'rect': (102, 58, 19, 10), 'size': 135},
 {'labels': [290.0], 'rect': (176, 58, 52, 13), 'size': 221},
 {'labels': [291.0], 'rect': (340, 58, 50, 17), 'size': 351},
 {'labels': [292.0], 'rect': (548, 58, 11, 12), 'size': 86},
 {'labels': [293.0], 'rect': (550, 58, 10, 34), 'size': 164},
 {'labels': [294.0], 'rect': (128, 59, 15, 9), 'size': 107},
 {'labels': [295.0], 'rect': (150, 59, 17, 9), 'size': 86},
 {'labels': [296.0], 'rect': (291, 59, 53, 15), 'size': 262},
 {'labels': [297.0], 'rect': (46, 60, 13, 32), 'size': 179},
 {'labels': [298.0], 'rect': (243, 60, 20, 9), 'size': 118},
 {'labels': [299.0], 'rect': (411, 60, 30, 11), 'size': 125},
 {'labels': [300.0], 'rect': (453, 60, 18, 14), 'size': 148},
 {'labels': [301.0], 'rect': (0, 61, 24, 7), 'size': 119},
 {'labels': [302.0], 'rect': (25, 61, 18, 18), 'size': 135},
 {'labels': [303.0], 'rect': (142, 61, 25, 11), 'size': 117},
 {'labels': [304.0], 'rect': (156, 61, 27, 17), 'size': 187},
 {'labels': [305.0], 'rect': (320, 61, 14, 4), 'size': 52},
 {'labels': [306.0], 'rect': (392, 61, 23, 12), 'size': 134},
 {'labels': [307.0], 'rect': (433, 61, 26, 23), 'size': 293},
 {'labels': [308.0], 'rect': (88, 62, 15, 10), 'size': 94},
 {'labels': [309.0], 'rect': (174, 63, 26, 9), 'size': 131},
 {'labels': [310.0], 'rect': (196, 63, 21, 5), 'size': 66},
 {'labels': [311.0], 'rect': (266, 63, 24, 3), 'size': 51},
 {'labels': [312.0], 'rect': (382, 63, 30, 14), 'size': 139},
 {'labels': [313.0], 'rect': (503, 63, 25, 22), 'size': 202},
 {'labels': [314.0], 'rect': (0, 64, 31, 15), 'size': 238},
 {'labels': [315.0], 'rect': (67, 64, 17, 24), 'size': 162},
 {'labels': [316.0], 'rect': (104, 64, 45, 11), 'size': 232},
 {'labels': [317.0], 'rect': (218, 64, 25, 20), 'size': 155},
 {'labels': [318.0], 'rect': (258, 64, 10, 11), 'size': 82},
 {'labels': [319.0], 'rect': (478, 64, 12, 7), 'size': 55},
 {'labels': [320.0], 'rect': (523, 64, 18, 20), 'size': 218},
 {'labels': [321.0], 'rect': (579, 64, 11, 12), 'size': 98},
 {'labels': [322.0], 'rect': (234, 65, 24, 12), 'size': 203},
 {'labels': [323.0], 'rect': (268, 65, 28, 9), 'size': 84},
 {'labels': [324.0], 'rect': (294, 65, 23, 15), 'size': 106},
 {'labels': [325.0], 'rect': (426, 65, 14, 14), 'size': 149},
 {'labels': [326.0], 'rect': (266, 66, 31, 13), 'size': 95},
 {'labels': [327.0], 'rect': (304, 66, 23, 9), 'size': 114},
 {'labels': [328.0], 'rect': (344, 66, 15, 5), 'size': 77},
 {'labels': [329.0], 'rect': (75, 67, 8, 10), 'size': 59},
 {'labels': [330.0], 'rect': (273, 67, 36, 13), 'size': 140},
 {'labels': [331.0], 'rect': (410, 67, 16, 19), 'size': 152},
 {'labels': [332.0], 'rect': (421, 67, 42, 39), 'size': 340},
 {'labels': [333.0], 'rect': (490, 67, 13, 20), 'size': 140},
 {'labels': [334.0], 'rect': (10, 68, 21, 11), 'size': 96},
 {'labels': [335.0], 'rect': (180, 68, 27, 7), 'size': 113},
 {'labels': [336.0], 'rect': (204, 68, 19, 6), 'size': 92},
 {'labels': [337.0], 'rect': (88, 69, 23, 17), 'size': 179},
 {'labels': [338.0], 'rect': (585, 69, 18, 17), 'size': 164},
 {'labels': [339.0], 'rect': (123, 70, 34, 5), 'size': 75},
 {'labels': [340.0], 'rect': (343, 71, 17, 5), 'size': 62},
 {'labels': [341.0], 'rect': (385, 71, 22, 16), 'size': 112},
 {'labels': [342.0], 'rect': (472, 71, 20, 8), 'size': 120},
 {'labels': [343.0], 'rect': (563, 71, 13, 59), 'size': 369},
 {'labels': [344.0], 'rect': (565, 71, 15, 46), 'size': 299},
 {'labels': [345.0], 'rect': (17, 72, 24, 14), 'size': 77},
 {'labels': [346.0], 'rect': (102, 72, 28, 6), 'size': 82},
 {'labels': [347.0], 'rect': (137, 72, 30, 7), 'size': 122},
 {'labels': [348.0], 'rect': (269, 72, 18, 9), 'size': 81},
 {'labels': [349.0], 'rect': (340, 72, 45, 17), 'size': 204},
 {'labels': [350.0], 'rect': (352, 72, 32, 12), 'size': 165},
 {'labels': [351.0], 'rect': (392, 72, 18, 10), 'size': 108},
 {'labels': [352.0], 'rect': (448, 72, 17, 20), 'size': 128},
 {'labels': [353.0], 'rect': (496, 72, 19, 15), 'size': 210},
 {'labels': [354.0], 'rect': (539, 72, 16, 13), 'size': 135},
 {'labels': [355.0], 'rect': (17, 73, 24, 18), 'size': 180},
 {'labels': [356.0], 'rect': (102, 73, 39, 12), 'size': 272},
 {'labels': [357.0], 'rect': (136, 73, 42, 16), 'size': 274},
 {'labels': [358.0], 'rect': (202, 73, 21, 6), 'size': 88},
 {'labels': [359.0], 'rect': (231, 73, 53, 14), 'size': 312},
 {'labels': [360.0], 'rect': (314, 73, 29, 23), 'size': 198},
 {'labels': [361.0], 'rect': (378, 73, 28, 17), 'size': 68},
 {'labels': [362.0], 'rect': (461, 73, 15, 18), 'size': 162},
 {'labels': [363.0], 'rect': (182, 74, 13, 4), 'size': 60},
 {'labels': [364.0], 'rect': (184, 74, 36, 14), 'size': 221},
 {'labels': [365.0], 'rect': (65, 76, 19, 24), 'size': 167},
 {'labels': [366.0], 'rect': (284, 76, 27, 7), 'size': 109},
 {'labels': [367.0], 'rect': (477, 76, 15, 12), 'size': 113},
 {'labels': [368.0], 'rect': (579, 76, 22, 32), 'size': 337},
 {'labels': [369.0], 'rect': (613, 76, 18, 34), 'size': 230},
 {'labels': [370.0], 'rect': (46, 77, 16, 19), 'size': 145},
 {'labels': [371.0], 'rect': (306, 77, 16, 13), 'size': 160},
 {'labels': [372.0], 'rect': (315, 77, 19, 16), 'size': 131},
 {'labels': [373.0], 'rect': (339, 77, 47, 25), 'size': 450},
 {'labels': [374.0], 'rect': (0, 78, 25, 12), 'size': 172},
 {'labels': [375.0], 'rect': (177, 78, 18, 11), 'size': 95},
 {'labels': [376.0], 'rect': (84, 79, 1, 48), 'size': 68},
 {'labels': [377.0], 'rect': (35, 80, 20, 54), 'size': 246},
 {'labels': [378.0], 'rect': (156, 80, 25, 11), 'size': 209},
 {'labels': [379.0], 'rect': (218, 80, 17, 17), 'size': 155},
 {'labels': [380.0], 'rect': (280, 80, 14, 15), 'size': 111},
 {'labels': [381.0], 'rect': (408, 80, 37, 25), 'size': 128},
 {'labels': [382.0], 'rect': (512, 80, 22, 24), 'size': 295},
 {'labels': [383.0], 'rect': (531, 80, 25, 15), 'size': 234},
 {'labels': [384.0], 'rect': (599, 80, 11, 19), 'size': 158},
 {'labels': [385.0], 'rect': (615, 80, 16, 54), 'size': 372},
 {'labels': [386.0], 'rect': (76, 81, 7, 12), 'size': 64},
 {'labels': [387.0], 'rect': (232, 81, 31, 8), 'size': 172},
 {'labels': [388.0], 'rect': (296, 81, 26, 12), 'size': 101},
 {'labels': [389.0], 'rect': (340, 81, 13, 17), 'size': 99},
 {'labels': [390.0], 'rect': (391, 81, 25, 7), 'size': 73},
 {'labels': [391.0], 'rect': (0, 82, 17, 18), 'size': 193},
 {'labels': [392.0], 'rect': (55, 82, 26, 24), 'size': 367},
 {'labels': [393.0], 'rect': (138, 82, 13, 5), 'size': 59},
 {'labels': [394.0], 'rect': (192, 82, 23, 7), 'size': 96},
 {'labels': [395.0], 'rect': (285, 82, 10, 8), 'size': 61},
 {'labels': [396.0], 'rect': (385, 82, 42, 21), 'size': 327},
 {'labels': [397.0], 'rect': (435, 82, 36, 16), 'size': 187},
 {'labels': [398.0], 'rect': (504, 82, 19, 13), 'size': 130},
 {'labels': [399.0], 'rect': (629, 82, 10, 28), 'size': 159},
 {'labels': [400.0], 'rect': (112, 83, 21, 6), 'size': 85},
 {'labels': [401.0], 'rect': (470, 83, 14, 9), 'size': 66},
 {'labels': [402.0], 'rect': (23, 84, 16, 22), 'size': 105},
 {'labels': [403.0], 'rect': (177, 84, 41, 11), 'size': 186},
 {'labels': [404.0], 'rect': (267, 84, 15, 9), 'size': 72},
 {'labels': [405.0], 'rect': (288, 84, 46, 16), 'size': 253},
 {'labels': [406.0], 'rect': (114, 85, 34, 10), 'size': 227},
 {'labels': [407.0], 'rect': (231, 85, 32, 11), 'size': 197},
 {'labels': [408.0], 'rect': (463, 85, 24, 14), 'size': 113},
 {'labels': [409.0], 'rect': (474, 85, 15, 12), 'size': 95},
 {'labels': [410.0], 'rect': (491, 85, 31, 23), 'size': 200},
 {'labels': [411.0], 'rect': (325, 86, 8, 10), 'size': 67},
 {'labels': [412.0], 'rect': (96, 87, 19, 7), 'size': 94},
 {'labels': [413.0], 'rect': (408, 87, 15, 11), 'size': 100},
 {'labels': [414.0], 'rect': (0, 88, 37, 20), 'size': 346},
 {'labels': [415.0], 'rect': (127, 88, 34, 12), 'size': 209},
 {'labels': [416.0], 'rect': (241, 88, 56, 10), 'size': 273},
 {'labels': [417.0], 'rect': (346, 88, 21, 10), 'size': 186},
 {'labels': [418.0], 'rect': (424, 88, 24, 16), 'size': 126},
 {'labels': [419.0], 'rect': (490, 88, 22, 20), 'size': 142},
 {'labels': [420.0], 'rect': (581, 88, 15, 14), 'size': 92},
 {'labels': [421.0], 'rect': (86, 89, 9, 66), 'size': 139},
 {'labels': [422.0], 'rect': (170, 89, 13, 6), 'size': 64},
 {'labels': [423.0], 'rect': (184, 89, 23, 9), 'size': 140},
 {'labels': [424.0], 'rect': (219, 89, 20, 14), 'size': 133},
 {'labels': [425.0], 'rect': (158, 90, 14, 9), 'size': 87},
 {'labels': [426.0], 'rect': (531, 90, 21, 16), 'size': 118},
 {'labels': [427.0], 'rect': (424, 91, 7, 12), 'size': 73},
 {'labels': [428.0], 'rect': (605, 91, 8, 19), 'size': 110},
 {'labels': [429.0], 'rect': (49, 92, 8, 9), 'size': 53},
 {'labels': [430.0], 'rect': (93, 92, 18, 11), 'size': 96},
 {'labels': [431.0], 'rect': (333, 92, 31, 24), 'size': 241},
 {'labels': [432.0], 'rect': (532, 92, 30, 38), 'size': 532},
 {'labels': [433.0], 'rect': (98, 93, 21, 16), 'size': 156},
 {'labels': [434.0], 'rect': (166, 93, 20, 12), 'size': 123},
 {'labels': [435.0], 'rect': (208, 93, 13, 10), 'size': 101},
 {'labels': [436.0], 'rect': (488, 93, 15, 11), 'size': 101},
 {'labels': [437.0], 'rect': (112, 94, 18, 8), 'size': 83},
 {'labels': [438.0], 'rect': (184, 95, 25, 12), 'size': 157},
 {'labels': [439.0], 'rect': (462, 95, 17, 18), 'size': 130},
 {'labels': [440.0], 'rect': (129, 96, 48, 12), 'size': 267},
 {'labels': [441.0], 'rect': (238, 96, 16, 7), 'size': 64},
 {'labels': [442.0], 'rect': (297, 96, 20, 13), 'size': 135},
 {'labels': [443.0], 'rect': (376, 96, 41, 15), 'size': 288},
 {'labels': [444.0], 'rect': (444, 96, 16, 17), 'size': 116},
 {'labels': [445.0], 'rect': (254, 97, 24, 14), 'size': 140},
 {'labels': [446.0], 'rect': (15, 98, 22, 18), 'size': 193},
 {'labels': [447.0], 'rect': (49, 98, 17, 18), 'size': 145},
 {'labels': [448.0], 'rect': (210, 98, 37, 29), 'size': 422},
 {'labels': [449.0], 'rect': (235, 98, 24, 22), 'size': 159},
 {'labels': [450.0], 'rect': (267, 98, 26, 11), 'size': 167},
 {'labels': [451.0], 'rect': (280, 98, 17, 5), 'size': 72},
 {'labels': [452.0], 'rect': (309, 98, 26, 6), 'size': 97},
 {'labels': [453.0], 'rect': (342, 98, 24, 5), 'size': 88},
 {'labels': [454.0], 'rect': (373, 98, 34, 28), 'size': 432},
 {'labels': [455.0], 'rect': (432, 98, 14, 11), 'size': 54},
 {'labels': [456.0], 'rect': (470, 98, 23, 9), 'size': 159},
 {'labels': [457.0], 'rect': (581, 98, 24, 21), 'size': 243},
 {'labels': [458.0], 'rect': (422, 99, 24, 18), 'size': 117},
 {'labels': [459.0], 'rect': (462, 99, 19, 23), 'size': 118},
 {'labels': [460.0], 'rect': (58, 100, 25, 36), 'size': 223},
 {'labels': [461.0], 'rect': (87, 100, 16, 17), 'size': 169},
 {'labels': [462.0], 'rect': (117, 100, 12, 8), 'size': 79},
 {'labels': [463.0], 'rect': (43, 101, 14, 17), 'size': 143},
 {'labels': [464.0], 'rect': (192, 101, 23, 27), 'size': 337},
 {'labels': [465.0], 'rect': (205, 101, 18, 14), 'size': 154},
 {'labels': [466.0], 'rect': (313, 101, 22, 22), 'size': 261},
 {'labels': [467.0], 'rect': (359, 101, 11, 8), 'size': 70},
 {'labels': [468.0], 'rect': (410, 101, 28, 22), 'size': 262},
 {'labels': [469.0], 'rect': (505, 101, 16, 10), 'size': 61},
 {'labels': [470.0], 'rect': (73, 102, 10, 12), 'size': 88},
 {'labels': [471.0], 'rect': (161, 102, 38, 22), 'size': 352},
 {'labels': [472.0], 'rect': (360, 102, 23, 25), 'size': 332},
 {'labels': [473.0], 'rect': (507, 102, 32, 22), 'size': 368},
 {'labels': [474.0], 'rect': (0, 104, 27, 17), 'size': 210},
 {'labels': [475.0], 'rect': (126, 104, 17, 15), 'size': 123},
 {'labels': [476.0], 'rect': (134, 104, 11, 5), 'size': 56},
 {'labels': [477.0], 'rect': (235, 104, 17, 12), 'size': 103},
 {'labels': [478.0], 'rect': (255, 104, 10, 13), 'size': 77},
 {'labels': [479.0], 'rect': (287, 104, 27, 23), 'size': 165},
 {'labels': [480.0], 'rect': (293, 104, 29, 24), 'size': 252},
 {'labels': [481.0], 'rect': (330, 104, 29, 16), 'size': 192},
 {'labels': [482.0], 'rect': (393, 104, 16, 8), 'size': 66},
 {'labels': [483.0], 'rect': (456, 104, 14, 16), 'size': 131},
 {'labels': [484.0], 'rect': (477, 104, 12, 16), 'size': 68},
 {'labels': [485.0], 'rect': (481, 104, 22, 16), 'size': 187},
 {'labels': [486.0], 'rect': (496, 104, 29, 24), 'size': 221},
 {'labels': [487.0], 'rect': (96, 105, 23, 12), 'size': 107},
 {'labels': [488.0], 'rect': (110, 105, 24, 18), 'size': 271},
 {'labels': [489.0], 'rect': (150, 105, 11, 6), 'size': 62},
 {'labels': [490.0], 'rect': (618, 105, 21, 26), 'size': 273},
 {'labels': [491.0], 'rect': (282, 106, 20, 17), 'size': 201},
 {'labels': [492.0], 'rect': (589, 106, 22, 14), 'size': 189},
 {'labels': [493.0], 'rect': (440, 107, 19, 17), 'size': 128},
 {'labels': [494.0], 'rect': (566, 107, 13, 27), 'size': 214},
 {'labels': [495.0], 'rect': (142, 108, 9, 13), 'size': 91},
 {'labels': [496.0], 'rect': (264, 108, 19, 13), 'size': 115},
 {'labels': [497.0], 'rect': (390, 108, 17, 11), 'size': 123},
 {'labels': [498.0], 'rect': (158, 109, 26, 15), 'size': 240},
 {'labels': [499.0], 'rect': (312, 109, 7, 10), 'size': 53},
 {'labels': [500.0], 'rect': (322, 109, 22, 27), 'size': 283},
 {'labels': [501.0], 'rect': (567, 109, 14, 47), 'size': 277},
 {'labels': [502.0], 'rect': (581, 109, 2, 31), 'size': 57},
 {'labels': [503.0], 'rect': (7, 110, 28, 29), 'size': 281},
 {'labels': [504.0], 'rect': (69, 110, 14, 15), 'size': 77},
 {'labels': [505.0], 'rect': (400, 110, 46, 23), 'size': 423},
 {'labels': [506.0], 'rect': (433, 110, 13, 13), 'size': 103},
 {'labels': [507.0], 'rect': (606, 110, 9, 13), 'size': 85},
 {'labels': [508.0], 'rect': (0, 112, 23, 17), 'size': 254},
 {'labels': [509.0], 'rect': (92, 112, 19, 15), 'size': 159},
 {'labels': [510.0], 'rect': (130, 112, 36, 18), 'size': 290},
 {'labels': [511.0], 'rect': (223, 112, 32, 15), 'size': 339},
 {'labels': [512.0], 'rect': (526, 112, 27, 26), 'size': 244},
 {'labels': [513.0], 'rect': (585, 112, 31, 28), 'size': 463},
 {'labels': [514.0], 'rect': (87, 113, 8, 12), 'size': 67},
 {'labels': [515.0], 'rect': (176, 113, 21, 20), 'size': 206},
 {'labels': [516.0], 'rect': (495, 113, 14, 13), 'size': 103},
 {'labels': [517.0], 'rect': (260, 114, 17, 12), 'size': 159},
 {'labels': [518.0], 'rect': (41, 115, 18, 16), 'size': 161},
 {'labels': [519.0], 'rect': (82, 115, 2, 48), 'size': 85},
 {'labels': [520.0], 'rect': (344, 115, 24, 10), 'size': 141},
 {'labels': [521.0], 'rect': (64, 116, 18, 15), 'size': 88},
 {'labels': [522.0], 'rect': (584, 116, 19, 34), 'size': 144},
 {'labels': [523.0], 'rect': (32, 117, 9, 47), 'size': 168},
 {'labels': [524.0], 'rect': (106, 117, 29, 15), 'size': 212},
 {'labels': [525.0], 'rect': (474, 117, 17, 13), 'size': 134},
 {'labels': [526.0], 'rect': (564, 117, 14, 20), 'size': 82},
 {'labels': [527.0], 'rect': (620, 117, 19, 40), 'size': 366},
 {'labels': [528.0], 'rect': (73, 118, 9, 11), 'size': 70},
 {'labels': [529.0], 'rect': (453, 118, 26, 11), 'size': 160},
 {'labels': [530.0], 'rect': (542, 118, 20, 20), 'size': 142},
 {'labels': [531.0], 'rect': (50, 119, 30, 25), 'size': 440},
 {'labels': [532.0], 'rect': (278, 120, 12, 7), 'size': 74},
 {'labels': [533.0], 'rect': (288, 120, 19, 7), 'size': 91},
 {'labels': [534.0], 'rect': (334, 120, 27, 8), 'size': 116},
 {'labels': [535.0], 'rect': (486, 120, 26, 15), 'size': 174},
 {'labels': [536.0], 'rect': (564, 120, 4, 46), 'size': 89},
 {'labels': [537.0], 'rect': (247, 121, 19, 7), 'size': 87},
 {'labels': [538.0], 'rect': (87, 122, 21, 16), 'size': 199},
 {'labels': [539.0], 'rect': (271, 122, 29, 19), 'size': 216},
 {'labels': [540.0], 'rect': (447, 122, 30, 69), 'size': 515},
 {'labels': [541.0], 'rect': (475, 122, 12, 16), 'size': 89},
 {'labels': [542.0], 'rect': (501, 122, 33, 16), 'size': 195},
 {'labels': [543.0], 'rect': (525, 122, 27, 28), 'size': 240},
 {'labels': [544.0], 'rect': (120, 123, 23, 16), 'size': 204},
 {'labels': [545.0], 'rect': (136, 123, 23, 19), 'size': 196},
 {'labels': [546.0], 'rect': (447, 123, 2, 67), 'size': 70},
 {'labels': [547.0], 'rect': (580, 123, 7, 64), 'size': 165},
 {'labels': [548.0], 'rect': (155, 124, 12, 9), 'size': 73},
 {'labels': [549.0], 'rect': (166, 124, 14, 11), 'size': 111},
 {'labels': [550.0], 'rect': (197, 124, 7, 68), 'size': 220},
 {'labels': [551.0], 'rect': (444, 124, 4, 65), 'size': 135},
 {'labels': [552.0], 'rect': (455, 124, 14, 15), 'size': 124},
 {'labels': [553.0], 'rect': (216, 125, 18, 12), 'size': 154},
 {'labels': [554.0], 'rect': (356, 125, 16, 11), 'size': 121},
 {'labels': [555.0], 'rect': (429, 125, 13, 12), 'size': 95},
 {'labels': [556.0], 'rect': (415, 125, 28, 29), 'size': 376},
 {'labels': [557.0], 'rect': (412, 125, 32, 57), 'size': 369},
 {'labels': [558.0], 'rect': (419, 125, 27, 80), 'size': 498},
 {'labels': [559.0], 'rect': (205, 126, 15, 12), 'size': 126},
 {'labels': [560.0], 'rect': (421, 126, 10, 10), 'size': 58},
 {'labels': [561.0], 'rect': (585, 126, 13, 14), 'size': 93},
 {'labels': [562.0], 'rect': (0, 127, 15, 13), 'size': 121},
 {'labels': [563.0], 'rect': (10, 127, 14, 10), 'size': 116},
 {'labels': [564.0], 'rect': (104, 127, 10, 7), 'size': 55},
 {'labels': [565.0], 'rect': (176, 127, 21, 62), 'size': 281},
 {'labels': [566.0], 'rect': (204, 127, 18, 40), 'size': 201},
 {'labels': [567.0], 'rect': (235, 127, 20, 13), 'size': 138},
 {'labels': [568.0], 'rect': (248, 127, 22, 23), 'size': 211},
 {'labels': [569.0], 'rect': (261, 127, 16, 11), 'size': 128},
 {'labels': [570.0], 'rect': (296, 127, 23, 31), 'size': 271},
 {'labels': [571.0], 'rect': (312, 127, 20, 34), 'size': 372},
 {'labels': [572.0], 'rect': (369, 127, 13, 7), 'size': 68},
 {'labels': [573.0], 'rect': (368, 127, 26, 14), 'size': 196},
 {'labels': [574.0], 'rect': (385, 127, 19, 26), 'size': 193},
 {'labels': [575.0], 'rect': (398, 127, 18, 21), 'size': 252},
 {'labels': [576.0], 'rect': (462, 127, 19, 11), 'size': 100},
 {'labels': [577.0], 'rect': (21, 128, 11, 44), 'size': 201},
 {'labels': [578.0], 'rect': (85, 128, 26, 55), 'size': 377},
 {'labels': [579.0], 'rect': (203, 128, 28, 74), 'size': 412},
 {'labels': [580.0], 'rect': (330, 128, 13, 38), 'size': 191},
 {'labels': [581.0], 'rect': (341, 128, 20, 23), 'size': 327},
 {'labels': [582.0], 'rect': (416, 128, 9, 10), 'size': 64},
 {'labels': [583.0], 'rect': (36, 129, 23, 41), 'size': 287},
 {'labels': [584.0], 'rect': (46, 129, 10, 13), 'size': 76},
 {'labels': [585.0], 'rect': (42, 129, 19, 23), 'size': 159},
 {'labels': [586.0], 'rect': (172, 129, 21, 13), 'size': 91},
 {'labels': [587.0], 'rect': (197, 129, 4, 61), 'size': 120},
 {'labels': [588.0], 'rect': (231, 129, 14, 12), 'size': 94},
 {'labels': [589.0], 'rect': (601, 129, 16, 15), 'size': 52},
 {'labels': [590.0], 'rect': (152, 130, 19, 15), 'size': 127},
 {'labels': [591.0], 'rect': (275, 130, 13, 12), 'size': 92},
 {'labels': [592.0], 'rect': (2, 131, 24, 12), 'size': 129},
 {'labels': [593.0], 'rect': (488, 131, 25, 14), 'size': 176},
 {'labels': [594.0], 'rect': (536, 131, 27, 35), 'size': 358},
 {'labels': [595.0], 'rect': (622, 131, 13, 54), 'size': 364},
 {'labels': [596.0], 'rect': (512, 132, 14, 11), 'size': 122},
 {'labels': [597.0], 'rect': (96, 133, 21, 5), 'size': 78},
 {'labels': [598.0], 'rect': (111, 133, 13, 8), 'size': 64},
 {'labels': [599.0], 'rect': (359, 133, 26, 22), 'size': 206},
 {'labels': [600.0], 'rect': (504, 133, 10, 9), 'size': 66},
 {'labels': [601.0], 'rect': (546, 133, 16, 22), 'size': 208},
 {'labels': [602.0], 'rect': (605, 133, 14, 12), 'size': 120},
 {'labels': [603.0], 'rect': (118, 134, 40, 27), 'size': 414},
 {'labels': [604.0], 'rect': (155, 134, 20, 19), 'size': 184},
 {'labels': [605.0], 'rect': (260, 134, 13, 11), 'size': 64},
 {'labels': [606.0], 'rect': (408, 134, 12, 8), 'size': 66},
 {'labels': [607.0], 'rect': (205, 135, 18, 14), 'size': 172},
 {'labels': [608.0], 'rect': (223, 135, 18, 14), 'size': 138},
 {'labels': [609.0], 'rect': (475, 135, 29, 25), 'size': 290},
 {'labels': [610.0], 'rect': (242, 136, 13, 11), 'size': 80},
 {'labels': [611.0], 'rect': (267, 136, 21, 25), 'size': 283},
 {'labels': [612.0], 'rect': (467, 136, 18, 9), 'size': 101},
 {'labels': [613.0], 'rect': (64, 137, 18, 34), 'size': 161},
 {'labels': [614.0], 'rect': (88, 137, 21, 15), 'size': 116},
 {'labels': [615.0], 'rect': (105, 138, 6, 8), 'size': 56},
 {'labels': [616.0], 'rect': (384, 138, 9, 12), 'size': 81},
 {'labels': [617.0], 'rect': (526, 138, 17, 11), 'size': 75},
 {'labels': [618.0], 'rect': (111, 139, 13, 11), 'size': 81},
 {'labels': [619.0], 'rect': (286, 139, 23, 18), 'size': 300},
 {'labels': [620.0], 'rect': (65, 140, 16, 11), 'size': 108},
 {'labels': [621.0], 'rect': (216, 140, 13, 7), 'size': 63},
 {'labels': [622.0], 'rect': (334, 140, 17, 35), 'size': 177},
 {'labels': [623.0], 'rect': (352, 140, 27, 29), 'size': 393},
 {'labels': [624.0], 'rect': (372, 140, 14, 12), 'size': 95},
 {'labels': [625.0], 'rect': (586, 140, 13, 10), 'size': 102},
 {'labels': [626.0], 'rect': (0, 141, 13, 14), 'size': 101},
 {'labels': [627.0], 'rect': (88, 141, 14, 7), 'size': 58},
 {'labels': [628.0], 'rect': (572, 141, 11, 21), 'size': 123},
 {'labels': [629.0], 'rect': (0, 142, 24, 15), 'size': 119},
 {'labels': [630.0], 'rect': (398, 142, 13, 11), 'size': 83},
 {'labels': [631.0], 'rect': (493, 142, 28, 42), 'size': 397},
 {'labels': [632.0], 'rect': (509, 142, 17, 8), 'size': 94},
 {'labels': [633.0], 'rect': (565, 142, 12, 65), 'size': 245},
 {'labels': [634.0], 'rect': (109, 143, 24, 19), 'size': 246},
 {'labels': [635.0], 'rect': (525, 143, 18, 13), 'size': 161},
 {'labels': [636.0], 'rect': (165, 144, 19, 18), 'size': 229},
 {'labels': [637.0], 'rect': (178, 144, 18, 46), 'size': 332},
 {'labels': [638.0], 'rect': (205, 144, 33, 12), 'size': 237},
 {'labels': [639.0], 'rect': (233, 144, 26, 15), 'size': 204},
 {'labels': [640.0], 'rect': (315, 144, 18, 32), 'size': 201},
 {'labels': [641.0], 'rect': (603, 144, 19, 17), 'size': 158},
 {'labels': [642.0], 'rect': (0, 145, 27, 30), 'size': 260},
 {'labels': [643.0], 'rect': (55, 145, 26, 29), 'size': 206},
 {'labels': [644.0], 'rect': (423, 145, 20, 16), 'size': 173},
 {'labels': [645.0], 'rect': (505, 145, 17, 10), 'size': 75},
 {'labels': [646.0], 'rect': (593, 145, 18, 14), 'size': 138},
 {'labels': [647.0], 'rect': (316, 146, 13, 7), 'size': 101},
 {'labels': [648.0], 'rect': (378, 146, 23, 27), 'size': 268},
 {'labels': [649.0], 'rect': (475, 146, 12, 8), 'size': 54},
 {'labels': [650.0], 'rect': (149, 147, 20, 10), 'size': 107},
 {'labels': [651.0], 'rect': (264, 147, 13, 14), 'size': 108},
 {'labels': [652.0], 'rect': (408, 147, 17, 12), 'size': 142},
 {'labels': [653.0], 'rect': (10, 148, 19, 29), 'size': 242},
 {'labels': [654.0], 'rect': (86, 148, 24, 19), 'size': 245},
 {'labels': [655.0], 'rect': (256, 148, 10, 19), 'size': 150},
 {'labels': [656.0], 'rect': (516, 148, 23, 26), 'size': 226},
 {'labels': [657.0], 'rect': (464, 149, 28, 25), 'size': 347},
 {'labels': [658.0], 'rect': (586, 149, 11, 12), 'size': 64},
 {'labels': [659.0], 'rect': (280, 150, 30, 36), 'size': 241},
 {'labels': [660.0], 'rect': (524, 150, 8, 13), 'size': 51},
 {'labels': [661.0], 'rect': (219, 151, 19, 14), 'size': 178},
 {'labels': [662.0], 'rect': (452, 151, 19, 38), 'size': 203},
 {'labels': [663.0], 'rect': (41, 152, 17, 12), 'size': 62},
 {'labels': [664.0], 'rect': (130, 152, 18, 11), 'size': 131},
 {'labels': [665.0], 'rect': (240, 152, 15, 14), 'size': 100},
 {'labels': [666.0], 'rect': (335, 152, 14, 7), 'size': 87},
 {'labels': [667.0], 'rect': (98, 153, 16, 12), 'size': 130},
 {'labels': [668.0], 'rect': (144, 153, 14, 9), 'size': 93},
 {'labels': [669.0], 'rect': (150, 153, 33, 24), 'size': 373},
 {'labels': [670.0], 'rect': (311, 153, 17, 22), 'size': 169},
 {'labels': [671.0], 'rect': (396, 153, 18, 10), 'size': 128},
 {'labels': [672.0], 'rect': (568, 153, 14, 21), 'size': 176},
 {'labels': [673.0], 'rect': (628, 153, 11, 14), 'size': 133},
 {'labels': [674.0], 'rect': (237, 154, 24, 20), 'size': 209},
 {'labels': [675.0], 'rect': (333, 154, 22, 37), 'size': 334},
 {'labels': [676.0], 'rect': (610, 154, 13, 13), 'size': 82},
 {'labels': [677.0], 'rect': (29, 155, 6, 44), 'size': 128},
 {'labels': [678.0], 'rect': (509, 155, 21, 28), 'size': 218},
 {'labels': [679.0], 'rect': (588, 155, 34, 25), 'size': 461},
 {'labels': [680.0], 'rect': (0, 156, 16, 19), 'size': 130},
 {'labels': [681.0], 'rect': (41, 156, 12, 7), 'size': 56},
 {'labels': [682.0], 'rect': (62, 156, 16, 16), 'size': 57},
 {'labels': [683.0], 'rect': (71, 156, 9, 12), 'size': 75},
 {'labels': [684.0], 'rect': (172, 156, 20, 15), 'size': 154},
 {'labels': [685.0], 'rect': (414, 156, 26, 12), 'size': 159},
 {'labels': [686.0], 'rect': (530, 156, 15, 20), 'size': 151},
 {'labels': [687.0], 'rect': (44, 157, 24, 25), 'size': 306},
 {'labels': [688.0], 'rect': (278, 157, 19, 21), 'size': 141},
 {'labels': [689.0], 'rect': (391, 157, 10, 9), 'size': 53},
 {'labels': [690.0], 'rect': (124, 158, 22, 9), 'size': 88},
 {'labels': [691.0], 'rect': (344, 158, 17, 7), 'size': 80},
 {'labels': [692.0], 'rect': (269, 159, 10, 21), 'size': 136},
 {'labels': [693.0], 'rect': (286, 159, 21, 21), 'size': 302},
 {'labels': [694.0], 'rect': (104, 160, 21, 23), 'size': 260},
 {'labels': [695.0], 'rect': (261, 160, 15, 27), 'size': 192},
 {'labels': [696.0], 'rect': (396, 160, 22, 17), 'size': 140},
 {'labels': [697.0], 'rect': (222, 161, 19, 10), 'size': 72},
 {'labels': [698.0], 'rect': (404, 161, 21, 11), 'size': 125},
 {'labels': [699.0], 'rect': (492, 161, 9, 16), 'size': 109},
 {'labels': [700.0], 'rect': (549, 161, 14, 14), 'size': 120},
 {'labels': [701.0], 'rect': (587, 161, 20, 20), 'size': 62},
 {'labels': [702.0], 'rect': (143, 162, 22, 21), 'size': 224},
 {'labels': [703.0], 'rect': (367, 162, 27, 18), 'size': 171},
 {'labels': [704.0], 'rect': (453, 162, 11, 14), 'size': 89},
 {'labels': [705.0], 'rect': (569, 162, 15, 25), 'size': 192},
 {'labels': [706.0], 'rect': (59, 163, 20, 17), 'size': 213},
 {'labels': [707.0], 'rect': (205, 163, 21, 9), 'size': 97},
 {'labels': [708.0], 'rect': (457, 163, 20, 12), 'size': 110},
 {'labels': [709.0], 'rect': (538, 163, 10, 17), 'size': 54},
 {'labels': [710.0], 'rect': (578, 163, 19, 66), 'size': 238},
 {'labels': [711.0], 'rect': (80, 164, 3, 79), 'size': 142},
 {'labels': [712.0], 'rect': (84, 164, 16, 39), 'size': 305},
 {'labels': [713.0], 'rect': (110, 164, 27, 21), 'size': 197},
 {'labels': [714.0], 'rect': (350, 164, 28, 22), 'size': 272},
 {'labels': [715.0], 'rect': (475, 164, 18, 27), 'size': 190},
 {'labels': [716.0], 'rect': (588, 164, 16, 19), 'size': 150},
 {'labels': [717.0], 'rect': (628, 164, 11, 38), 'size': 211},
 {'labels': [718.0], 'rect': (37, 165, 22, 26), 'size': 199},
 {'labels': [719.0], 'rect': (485, 166, 13, 15), 'size': 106},
 {'labels': [720.0], 'rect': (546, 166, 8, 11), 'size': 63},
 {'labels': [721.0], 'rect': (33, 167, 11, 18), 'size': 80},
 {'labels': [722.0], 'rect': (130, 167, 13, 8), 'size': 66},
 {'labels': [723.0], 'rect': (222, 167, 23, 11), 'size': 123},
 {'labels': [724.0], 'rect': (310, 167, 22, 15), 'size': 136},
 {'labels': [725.0], 'rect': (559, 167, 5, 32), 'size': 90},
 {'labels': [726.0], 'rect': (13, 168, 13, 43), 'size': 254},
 {'labels': [727.0], 'rect': (41, 168, 13, 14), 'size': 93},
 {'labels': [728.0], 'rect': (279, 168, 22, 15), 'size': 104},
 {'labels': [729.0], 'rect': (469, 168, 10, 9), 'size': 68},
 {'labels': [730.0], 'rect': (502, 168, 14, 14), 'size': 90},
 {'labels': [731.0], 'rect': (550, 168, 15, 47), 'size': 208},
 {'labels': [732.0], 'rect': (606, 168, 21, 25), 'size': 288},
 {'labels': [733.0], 'rect': (7, 170, 11, 6), 'size': 54},
 {'labels': [734.0], 'rect': (252, 170, 21, 9), 'size': 112},
 {'labels': [735.0], 'rect': (341, 170, 33, 27), 'size': 301},
 {'labels': [736.0], 'rect': (166, 171, 26, 9), 'size': 142},
 {'labels': [737.0], 'rect': (248, 171, 16, 23), 'size': 252},
 {'labels': [738.0], 'rect': (401, 171, 13, 15), 'size': 86},
 {'labels': [739.0], 'rect': (352, 172, 12, 10), 'size': 83},
 {'labels': [740.0], 'rect': (387, 172, 22, 14), 'size': 160},
 {'labels': [741.0], 'rect': (21, 173, 9, 59), 'size': 185},
 {'labels': [742.0], 'rect': (169, 173, 26, 17), 'size': 184},
 {'labels': [743.0], 'rect': (380, 173, 11, 10), 'size': 79},
 {'labels': [744.0], 'rect': (523, 173, 14, 17), 'size': 126},
 {'labels': [745.0], 'rect': (127, 174, 16, 4), 'size': 52},
 {'labels': [746.0], 'rect': (233, 174, 17, 17), 'size': 180},
 {'labels': [747.0], 'rect': (411, 174, 13, 12), 'size': 128},
 {'labels': [748.0], 'rect': (453, 174, 16, 17), 'size': 178},
 {'labels': [749.0], 'rect': (462, 174, 22, 28), 'size': 295},
 {'labels': [750.0], 'rect': (532, 174, 21, 24), 'size': 238},
 {'labels': [751.0], 'rect': (546, 174, 17, 14), 'size': 134},
 {'labels': [752.0], 'rect': (0, 175, 12, 6), 'size': 74},
 {'labels': [753.0], 'rect': (58, 175, 23, 19), 'size': 209},
 {'labels': [754.0], 'rect': (96, 175, 15, 17), 'size': 154},
 {'labels': [755.0], 'rect': (290, 175, 23, 92), 'size': 352},
 {'labels': [756.0], 'rect': (494, 175, 19, 11), 'size': 126},
 {'labels': [757.0], 'rect': (158, 176, 38, 37), 'size': 411},
 {'labels': [758.0], 'rect': (224, 176, 14, 9), 'size': 90},
 {'labels': [759.0], 'rect': (264, 176, 33, 30), 'size': 526},
 {'labels': [760.0], 'rect': (630, 176, 9, 68), 'size': 440},
 {'labels': [761.0], 'rect': (67, 177, 13, 13), 'size': 113},
 {'labels': [762.0], 'rect': (121, 177, 25, 5), 'size': 79},
 {'labels': [763.0], 'rect': (208, 177, 23, 22), 'size': 317},
 {'labels': [764.0], 'rect': (317, 177, 17, 50), 'size': 279},
 {'labels': [765.0], 'rect': (429, 177, 13, 10), 'size': 108},
 {'labels': [766.0], 'rect': (135, 179, 19, 10), 'size': 97},
 {'labels': [767.0], 'rect': (113, 180, 13, 12), 'size': 85},
 {'labels': [768.0], 'rect': (152, 180, 15, 12), 'size': 111},
 {'labels': [769.0], 'rect': (310, 180, 11, 34), 'size': 134},
 {'labels': [770.0], 'rect': (327, 180, 7, 52), 'size': 170},
 {'labels': [771.0], 'rect': (373, 180, 18, 10), 'size': 114},
 {'labels': [772.0], 'rect': (534, 180, 17, 22), 'size': 108},
 {'labels': [773.0], 'rect': (578, 180, 8, 12), 'size': 64},
 {'labels': [774.0], 'rect': (589, 180, 18, 12), 'size': 154},
 {'labels': [775.0], 'rect': (0, 181, 12, 7), 'size': 55},
 {'labels': [776.0], 'rect': (120, 181, 15, 13), 'size': 130},
 {'labels': [777.0], 'rect': (331, 181, 4, 21), 'size': 72},
 {'labels': [778.0], 'rect': (340, 181, 13, 10), 'size': 113},
 {'labels': [779.0], 'rect': (516, 181, 11, 9), 'size': 63},
 {'labels': [780.0], 'rect': (0, 182, 23, 20), 'size': 274},
 {'labels': [781.0], 'rect': (131, 182, 12, 10), 'size': 93},
 {'labels': [782.0], 'rect': (311, 182, 5, 32), 'size': 71},
 {'labels': [783.0], 'rect': (314, 182, 8, 60), 'size': 291},
 {'labels': [784.0], 'rect': (390, 182, 13, 9), 'size': 65},
 {'labels': [785.0], 'rect': (506, 182, 21, 25), 'size': 175},
 {'labels': [786.0], 'rect': (548, 183, 12, 15), 'size': 102},
 {'labels': [787.0], 'rect': (30, 184, 15, 16), 'size': 128},
 {'labels': [788.0], 'rect': (228, 184, 14, 23), 'size': 221},
 {'labels': [789.0], 'rect': (334, 184, 16, 19), 'size': 147},
 {'labels': [790.0], 'rect': (394, 184, 29, 19), 'size': 355},
 {'labels': [791.0], 'rect': (455, 184, 16, 17), 'size': 149},
 {'labels': [792.0], 'rect': (485, 184, 23, 18), 'size': 217},
 {'labels': [793.0], 'rect': (498, 184, 15, 11), 'size': 88},
 {'labels': [794.0], 'rect': (597, 184, 21, 15), 'size': 178},
 {'labels': [795.0], 'rect': (104, 186, 16, 8), 'size': 81},
 {'labels': [796.0], 'rect': (135, 186, 24, 15), 'size': 223},
 {'labels': [797.0], 'rect': (165, 186, 11, 13), 'size': 111},
 {'labels': [798.0], 'rect': (52, 187, 28, 28), 'size': 168},
 {'labels': [799.0], 'rect': (382, 187, 16, 14), 'size': 136},
 {'labels': [800.0], 'rect': (477, 187, 10, 27), 'size': 144},
 {'labels': [801.0], 'rect': (525, 187, 21, 22), 'size': 177},
 {'labels': [802.0], 'rect': (569, 187, 17, 10), 'size': 69},
 {'labels': [803.0], 'rect': (344, 188, 23, 14), 'size': 185},
 {'labels': [804.0], 'rect': (372, 188, 14, 12), 'size': 103},
 {'labels': [805.0], 'rect': (581, 188, 6, 34), 'size': 105},
 {'labels': [806.0], 'rect': (620, 188, 9, 13), 'size': 84},
 {'labels': [807.0], 'rect': (38, 189, 18, 16), 'size': 87},
 {'labels': [808.0], 'rect': (236, 189, 21, 13), 'size': 171},
 {'labels': [809.0], 'rect': (442, 189, 16, 37), 'size': 286},
 {'labels': [810.0], 'rect': (155, 190, 15, 9), 'size': 70},
 {'labels': [811.0], 'rect': (520, 190, 13, 9), 'size': 71},
 {'labels': [812.0], 'rect': (570, 190, 16, 23), 'size': 216},
 {'labels': [813.0], 'rect': (191, 191, 12, 24), 'size': 100},
 {'labels': [814.0], 'rect': (295, 191, 13, 24), 'size': 221},
 {'labels': [815.0], 'rect': (96, 192, 34, 21), 'size': 325},
 {'labels': [816.0], 'rect': (367, 192, 27, 19), 'size': 256},
 {'labels': [817.0], 'rect': (442, 192, 5, 22), 'size': 112},
 {'labels': [818.0], 'rect': (0, 193, 22, 19), 'size': 335},
 {'labels': [819.0], 'rect': (188, 193, 7, 23), 'size': 131},
 {'labels': [820.0], 'rect': (203, 193, 1, 69), 'size': 70},
 {'labels': [821.0], 'rect': (272, 193, 15, 8), 'size': 64},
 {'labels': [822.0], 'rect': (441, 193, 12, 55), 'size': 160},
 {'labels': [823.0], 'rect': (447, 193, 3, 20), 'size': 62},
 {'labels': [824.0], 'rect': (32, 194, 18, 9), 'size': 96},
 {'labels': [825.0], 'rect': (68, 194, 12, 13), 'size': 104},
 {'labels': [826.0], 'rect': (123, 194, 18, 12), 'size': 111},
 {'labels': [827.0], 'rect': (210, 194, 13, 14), 'size': 101},
 {'labels': [828.0], 'rect': (393, 194, 14, 15), 'size': 115},
 {'labels': [829.0], 'rect': (417, 194, 24, 116), 'size': 359},
 {'labels': [830.0], 'rect': (458, 194, 26, 30), 'size': 444},
 {'labels': [831.0], 'rect': (507, 194, 14, 12), 'size': 70},
 {'labels': [832.0], 'rect': (591, 194, 29, 27), 'size': 452},
 {'labels': [833.0], 'rect': (619, 194, 12, 12), 'size': 99},
 {'labels': [834.0], 'rect': (39, 195, 39, 28), 'size': 671},
 {'labels': [835.0], 'rect': (194, 195, 6, 18), 'size': 107},
 {'labels': [836.0], 'rect': (207, 195, 33, 30), 'size': 467},
 {'labels': [837.0], 'rect': (256, 195, 27, 15), 'size': 137},
 {'labels': [838.0], 'rect': (568, 195, 4, 50), 'size': 74},
 {'labels': [839.0], 'rect': (85, 197, 30, 13), 'size': 237},
 {'labels': [840.0], 'rect': (256, 197, 8, 16), 'size': 87},
 {'labels': [841.0], 'rect': (356, 197, 12, 11), 'size': 78},
 {'labels': [842.0], 'rect': (146, 198, 25, 12), 'size': 145},
 {'labels': [843.0], 'rect': (205, 198, 30, 43), 'size': 284},
 {'labels': [844.0], 'rect': (335, 198, 0, 73), 'size': 74},
 {'labels': [845.0], 'rect': (344, 198, 16, 11), 'size': 83},
 {'labels': [846.0], 'rect': (488, 198, 10, 17), 'size': 87},
 {'labels': [847.0], 'rect': (499, 198, 25, 24), 'size': 235},
 {'labels': [848.0], 'rect': (233, 199, 26, 22), 'size': 288},
 {'labels': [849.0], 'rect': (331, 199, 3, 28), 'size': 103},
 {'labels': [850.0], 'rect': (336, 199, 17, 52), 'size': 233},
 {'labels': [851.0], 'rect': (525, 199, 12, 12), 'size': 96},
 {'labels': [852.0], 'rect': (544, 199, 10, 15), 'size': 82},
 {'labels': [853.0], 'rect': (29, 200, 27, 30), 'size': 292},
 {'labels': [854.0], 'rect': (134, 200, 44, 19), 'size': 192},
 {'labels': [855.0], 'rect': (166, 200, 20, 20), 'size': 234},
 {'labels': [856.0], 'rect': (263, 200, 8, 14), 'size': 74},
 {'labels': [857.0], 'rect': (579, 200, 33, 38), 'size': 387},
 {'labels': [858.0], 'rect': (283, 201, 20, 24), 'size': 226},
 {'labels': [859.0], 'rect': (478, 201, 13, 18), 'size': 77},
 {'labels': [860.0], 'rect': (570, 201, 15, 19), 'size': 145},
 {'labels': [861.0], 'rect': (389, 202, 18, 21), 'size': 165},
 {'labels': [862.0], 'rect': (496, 202, 13, 17), 'size': 144},
 {'labels': [863.0], 'rect': (508, 202, 12, 10), 'size': 78},
 {'labels': [864.0], 'rect': (8, 203, 17, 29), 'size': 194},
 {'labels': [865.0], 'rect': (123, 203, 28, 16), 'size': 250},
 {'labels': [866.0], 'rect': (310, 203, 3, 24), 'size': 90},
 {'labels': [867.0], 'rect': (353, 203, 24, 20), 'size': 211},
 {'labels': [868.0], 'rect': (404, 203, 16, 11), 'size': 127},
 {'labels': [869.0], 'rect': (83, 204, 34, 40), 'size': 202},
 {'labels': [870.0], 'rect': (84, 204, 35, 12), 'size': 185},
 {'labels': [871.0], 'rect': (521, 205, 28, 19), 'size': 269},
 {'labels': [872.0], 'rect': (551, 205, 14, 22), 'size': 161},
 {'labels': [873.0], 'rect': (618, 205, 13, 13), 'size': 90},
 {'labels': [874.0], 'rect': (384, 206, 9, 12), 'size': 73},
 {'labels': [875.0], 'rect': (33, 207, 7, 12), 'size': 64},
 {'labels': [876.0], 'rect': (270, 207, 21, 17), 'size': 221},
 {'labels': [877.0], 'rect': (371, 207, 12, 12), 'size': 74},
 {'labels': [878.0], 'rect': (400, 207, 24, 22), 'size': 243},
 {'labels': [879.0], 'rect': (486, 207, 18, 25), 'size': 238},
 {'labels': [880.0], 'rect': (551, 207, 16, 48), 'size': 273},
 {'labels': [881.0], 'rect': (350, 208, 11, 16), 'size': 68},
 {'labels': [882.0], 'rect': (380, 208, 10, 13), 'size': 60},
 {'labels': [883.0], 'rect': (517, 208, 13, 26), 'size': 156},
 {'labels': [884.0], 'rect': (560, 208, 6, 34), 'size': 125},
 {'labels': [885.0], 'rect': (0, 209, 16, 19), 'size': 180},
 {'labels': [886.0], 'rect': (245, 209, 20, 17), 'size': 223},
 {'labels': [887.0], 'rect': (426, 209, 13, 22), 'size': 126},
 {'labels': [888.0], 'rect': (187, 210, 12, 12), 'size': 101},
 {'labels': [889.0], 'rect': (322, 210, 11, 36), 'size': 126},
 {'labels': [890.0], 'rect': (26, 211, 13, 18), 'size': 84},
 {'labels': [891.0], 'rect': (84, 211, 22, 20), 'size': 182},
 {'labels': [892.0], 'rect': (260, 211, 17, 23), 'size': 208},
 {'labels': [893.0], 'rect': (625, 211, 7, 13), 'size': 76},
 {'labels': [894.0], 'rect': (92, 212, 18, 7), 'size': 59},
 {'labels': [895.0], 'rect': (107, 212, 20, 11), 'size': 143},
 {'labels': [896.0], 'rect': (294, 212, 14, 15), 'size': 119},
 {'labels': [897.0], 'rect': (414, 212, 22, 19), 'size': 192},
 {'labels': [898.0], 'rect': (459, 212, 12, 24), 'size': 110},
 {'labels': [899.0], 'rect': (149, 213, 37, 37), 'size': 476},
 {'labels': [900.0], 'rect': (443, 213, 8, 10), 'size': 73},
 {'labels': [901.0], 'rect': (615, 213, 21, 21), 'size': 165},
 {'labels': [902.0], 'rect': (77, 214, 4, 112), 'size': 245},
 {'labels': [903.0], 'rect': (126, 214, 29, 9), 'size': 110},
 {'labels': [904.0], 'rect': (187, 214, 15, 12), 'size': 98},
 {'labels': [905.0], 'rect': (548, 214, 13, 23), 'size': 138},
 {'labels': [906.0], 'rect': (57, 215, 20, 10), 'size': 75},
 {'labels': [907.0], 'rect': (337, 215, 21, 17), 'size': 197},
 {'labels': [908.0], 'rect': (375, 215, 24, 27), 'size': 263},
 {'labels': [909.0], 'rect': (526, 215, 23, 18), 'size': 144},
 {'labels': [910.0], 'rect': (569, 215, 11, 30), 'size': 149},
 {'labels': [911.0], 'rect': (63, 216, 16, 14), 'size': 136},
 {'labels': [912.0], 'rect': (196, 216, 7, 56), 'size': 136},
 {'labels': [913.0], 'rect': (368, 216, 20, 26), 'size': 202},
 {'labels': [914.0], 'rect': (355, 217, 12, 10), 'size': 82},
 {'labels': [915.0], 'rect': (499, 218, 19, 23), 'size': 146},
 {'labels': [916.0], 'rect': (506, 218, 18, 11), 'size': 102},
 {'labels': [917.0], 'rect': (591, 218, 3, 44), 'size': 75},
 {'labels': [918.0], 'rect': (124, 219, 40, 9), 'size': 161},
 {'labels': [919.0], 'rect': (212, 219, 21, 20), 'size': 128},
 {'labels': [920.0], 'rect': (235, 219, 26, 15), 'size': 164},
 {'labels': [921.0], 'rect': (321, 219, 6, 66), 'size': 219},
 {'labels': [922.0], 'rect': (459, 219, 35, 33), 'size': 373},
 {'labels': [923.0], 'rect': (104, 220, 12, 9), 'size': 53},
 {'labels': [924.0], 'rect': (337, 220, 23, 35), 'size': 432},
 {'labels': [925.0], 'rect': (484, 220, 29, 29), 'size': 211},
 {'labels': [926.0], 'rect': (24, 221, 17, 27), 'size': 206},
 {'labels': [927.0], 'rect': (87, 221, 16, 7), 'size': 74},
 {'labels': [928.0], 'rect': (116, 221, 36, 7), 'size': 127},
 {'labels': [929.0], 'rect': (186, 221, 13, 26), 'size': 202},
 {'labels': [930.0], 'rect': (277, 221, 14, 9), 'size': 60},
 {'labels': [931.0], 'rect': (394, 221, 22, 20), 'size': 281},
 {'labels': [932.0], 'rect': (205, 222, 7, 13), 'size': 82},
 {'labels': [933.0], 'rect': (354, 222, 22, 18), 'size': 163},
 {'labels': [934.0], 'rect': (593, 222, 22, 7), 'size': 98},
 {'labels': [935.0], 'rect': (0, 223, 15, 37), 'size': 388},
 {'labels': [936.0], 'rect': (100, 223, 15, 8), 'size': 76},
 {'labels': [937.0], 'rect': (49, 224, 30, 27), 'size': 255},
 {'labels': [938.0], 'rect': (84, 224, 46, 12), 'size': 280},
 {'labels': [939.0], 'rect': (125, 224, 25, 12), 'size': 122},
 {'labels': [940.0], 'rect': (156, 224, 16, 6), 'size': 63},
 {'labels': [941.0], 'rect': (211, 224, 10, 17), 'size': 77},
 {'labels': [942.0], 'rect': (292, 224, 16, 15), 'size': 158},
 {'labels': [943.0], 'rect': (424, 224, 8, 15), 'size': 75},
 {'labels': [944.0], 'rect': (529, 224, 7, 15), 'size': 64},
 {'labels': [945.0], 'rect': (532, 224, 10, 17), 'size': 91},
 {'labels': [946.0], 'rect': (544, 224, 9, 17), 'size': 94},
 {'labels': [947.0], 'rect': (4, 225, 15, 26), 'size': 166},
 {'labels': [948.0], 'rect': (60, 225, 19, 22), 'size': 225},
 {'labels': [949.0], 'rect': (229, 225, 22, 10), 'size': 96},
 {'labels': [950.0], 'rect': (275, 225, 12, 11), 'size': 90},
 {'labels': [951.0], 'rect': (450, 225, 11, 42), 'size': 268},
 {'labels': [952.0], 'rect': (593, 225, 33, 34), 'size': 492},
 {'labels': [953.0], 'rect': (600, 225, 25, 15), 'size': 184},
 {'labels': [954.0], 'rect': (615, 225, 24, 45), 'size': 359},
 {'labels': [955.0], 'rect': (13, 226, 12, 48), 'size': 257},
 {'labels': [956.0], 'rect': (274, 226, 24, 20), 'size': 243},
 {'labels': [957.0], 'rect': (250, 227, 18, 26), 'size': 234},
 {'labels': [958.0], 'rect': (263, 227, 11, 25), 'size': 162},
 {'labels': [959.0], 'rect': (310, 227, 3, 47), 'size': 184},
 {'labels': [960.0], 'rect': (329, 227, 5, 118), 'size': 193},
 {'labels': [961.0], 'rect': (414, 227, 9, 13), 'size': 102},
 {'labels': [962.0], 'rect': (486, 227, 19, 26), 'size': 191},
 {'labels': [963.0], 'rect': (152, 228, 20, 9), 'size': 93},
 {'labels': [964.0], 'rect': (443, 228, 9, 13), 'size': 96},
 {'labels': [965.0], 'rect': (516, 228, 16, 16), 'size': 146},
 {'labels': [966.0], 'rect': (571, 228, 7, 25), 'size': 107},
 {'labels': [967.0], 'rect': (628, 228, 9, 15), 'size': 101},
 {'labels': [968.0], 'rect': (29, 229, 23, 36), 'size': 164},
 {'labels': [969.0], 'rect': (142, 229, 27, 10), 'size': 121},
 {'labels': [970.0], 'rect': (462, 229, 20, 12), 'size': 138},
 {'labels': [971.0], 'rect': (419, 230, 15, 19), 'size': 98},
 {'labels': [972.0], 'rect': (435, 230, 4, 20), 'size': 78},
 {'labels': [973.0], 'rect': (121, 231, 41, 9), 'size': 131},
 {'labels': [974.0], 'rect': (237, 231, 15, 7), 'size': 81},
 {'labels': [975.0], 'rect': (498, 231, 21, 18), 'size': 136},
 {'labels': [976.0], 'rect': (293, 232, 15, 35), 'size': 213},
 {'labels': [977.0], 'rect': (426, 232, 9, 11), 'size': 71},
 {'labels': [978.0], 'rect': (168, 233, 16, 39), 'size': 242},
 {'labels': [979.0], 'rect': (353, 233, 9, 10), 'size': 68},
 {'labels': [980.0], 'rect': (31, 234, 14, 8), 'size': 67},
 {'labels': [981.0], 'rect': (205, 234, 8, 19), 'size': 107},
 {'labels': [982.0], 'rect': (210, 234, 20, 18), 'size': 189},
 {'labels': [983.0], 'rect': (540, 234, 16, 17), 'size': 115},
 {'labels': [984.0], 'rect': (570, 234, 3, 54), 'size': 123},
 {'labels': [985.0], 'rect': (573, 234, 16, 21), 'size': 204},
 {'labels': [986.0], 'rect': (32, 235, 27, 29), 'size': 354},
 {'labels': [987.0], 'rect': (113, 235, 34, 12), 'size': 246},
 {'labels': [988.0], 'rect': (224, 235, 31, 13), 'size': 275},
 {'labels': [989.0], 'rect': (366, 235, 22, 16), 'size': 115},
 {'labels': [990.0], 'rect': (359, 236, 18, 16), 'size': 144},
 {'labels': [991.0], 'rect': (384, 236, 15, 11), 'size': 120},
 {'labels': [992.0], 'rect': (138, 237, 32, 11), 'size': 226},
 {'labels': [993.0], 'rect': (185, 237, 17, 65), 'size': 304},
 {'labels': [994.0], 'rect': (472, 237, 21, 15), 'size': 147},
 {'labels': [995.0], 'rect': (520, 237, 23, 18), 'size': 208},
 {'labels': [996.0], 'rect': (84, 238, 39, 9), 'size': 167},
 {'labels': [997.0], 'rect': (441, 238, 12, 22), 'size': 185},
 {'labels': [998.0], 'rect': (583, 238, 13, 48), 'size': 170},
 {'labels': [999.0], 'rect': (587, 239, 4, 24), 'size': 80},
 ...]
fig, ax = plt.subplots(1,1)
ax.imshow(im2)
for b in sss[1]:
  box = pts.Rectangle((b['rect'][0],b['rect'][1]), b['rect'][2]-b['rect'][0], b['rect'][3]-b['rect'][1], fill=None, edgecolor='red') # xy(원점), width, height
  ax.add_patch(box)

import cv2 # 속도가 빠르고 다른 라이브러리와 연동하기가 좋다 

ssss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation() # monkey patch / opencv-contrib-python 모듈을 설치하면서 모듈이 확장되었다 

ssss.setBaseImage(im)

ssss.switchToSelectiveSearchFast() # 빠르게 찾는다 
# ssss.switchToSelectiveSearchQuality() # 좀 더 느리게 찾지만 비교적 정확하게 찾음 

rect = ssss.process()

rect
# array([[594, 388,  46,  39],
#        [433, 191,  48, 236],
#        [590, 194,  32,  28],
#        ...,
#        [  0,   0, 196, 427],
#        [169,   0, 471, 427],
#        [182,   0, 458, 427]], dtype=int32)

cv2.setUseOptimized(True)

R-CNN

1. 이미지를 input data로 넣는다

2. 2000개의 bounding box를 selective search 알고리즘을 통해 추출한다

3. Flatten하기 위해 2000개의 bounding box를 같은 크기로 만든다 (warping)

- warping: 휘어지게 하다

- 왜곡하다의 의미로 보정이나 찌그러진 이미지를 정규화 하기위한 처리 방법

4. warped image를 CNN 모델에 넣는다 5. 마지막으로 SVM을 활용해 분류 한다

 

Domain-specific fine-tuning

기존에 AlexNet은 1000가지 이미지를 분류하는 모델이었다

그런데 R-CNN에서 해결하고자 하는 목표는 20가지 이미지를 분류해야 한다

따라서 Catastrophic forgetting을 막기 위해서 fine-tuning을 할 필요가 있다

AlexNet은 맨 끝에 있는 layer가 1000가지를 분류하기 때문에 마지막 layer를 변형해야 한다

우선 가장 먼저 feature selection하는 부분을 frozen 즉, trainable=False를 한다

그리고 초기 가중치를 그대로 사용한다(새로 학습할 이미지가 기존 이미지와 유사하기 때문에 가능하다)

그 다음 learning rate를 줄인후 PASCAL VOC dataset으로 학습 시킨다

그리고 나서 Fine tuning작업을 거쳐야 한다

frozen된 것을 풀고나서 learning rate를 확 줄이면서 재학습을 시킨다

R-CNN의 문제점

1. AlexNet의 구조를 사용했기 때문에 입력 이미지의 크기를 강제하므로 crop이나 wrapping으로 인한 이미지 손실이나 변형으로 성능 저하가 있다

2. 2000개의 bounding box에 대해 순차적으로 CNN을 수행하기 때문에 실행 시간이 길어진다

3. Region proposal이나 SVM때문에 GPU 사용에 적합하지 않다

4, 3개의 다른 모델을 학습시켜야 하기 때문에 전체적인 pipeline을 학습시키기 어려운 문제가 있다

Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition

SPPNet
Global average pooling에 영향을 받았다

R-CNN에서 2000개의 bounding box가 FC layer로 들어가기 위해서 wrapping하는 작업을 거치는데

이때 이미지의 크기를 고정된 크기로 자르거나 비율을 조정하기 때문에 본래의 이미지랑 달라지는 문제가 있었다

이러한 문제점을 해결하기 위해 입력 이미지의 크기에 관계없이 convolution layer를 통과 시키고

FC layer에 통과하기 전에 feature map들을 동일한 크기로 조절해주는 pooling을 적용하자는 아이디어를 활용하게 된다

Convolution layer들을 거쳐 추출된 feature map을 인풋으로 받는다

그리고 이를 미리 정해져 있는 영역으로 나누어 준다

위의 예시에서는 미리 4x4, 2x2, 1x1 세 가지 영역을 제공하며, 각각을 하나의 피라미드라고 부른다

여기서 피라미드의 한 칸을 bin이라고 한다

예를 들어 입력이 64 x 64 x 256 크기의 피쳐 맵이 들어온다고 했을 때, 4x4의 피라미드의 bin의 크기는 16x16이 된다

각 bin에서 max pooling을 수행하고 그 결과를 쭉 이어 붙인다

입력 feature map의 채널 크기를 k, bin의 개수를 m이라고 했을 때 최종 결과는 k*m차원 벡터이다

위의 예시는 k=256, m=(16+4+1)=21 입력 이미지의 크기와 상관없이 미리 설정한 bin의 수와 cnn의 채널값에 의해 출력이 결정되기 때문에 항상 동일한 크기의 결과를 반환하게 된다

 

 

반응형
728x90
반응형

Image(Object) Classification

이미지 분류 흐름

 

비용 절감 vs 성능 향상

┌─ 사람이 만든 알고리즘 (Rule 기반)

└─ ML

      ┌─ 전통적인 ML

      └─ Deep Learning

            └─ 크기와 위치가 같은 상황에서만 성능이 좋음 (가정이 필요)

                  ┌─ FN

                  └─ CNN

                        └─ Capacity가 크면 수록 성능이 좋다

                              (kernel, filter, layer, perceptron 개수 )

                              └─ 이미지 데이터 개수가 아주 많아야 한다

                                    └─ Augmentation (데이터 뻥튀기)

                                          └─ 데이터 양의 한계가 있을

                                                └─ Transfer learing

                                                      ┌─ Feature Extraction

                                                      └─ Fine Tuning

Image data 저장 방법

- Directory -> 대부분 방법을 사용

- HDF -> 연구용

- LMDB

데이터를 불러오는 방법

ImageDataGenerator + tf.data

ImageDataGenerator를 활용하여 저장, 전처리, augmentation을 사용할 수 있지만 속도가 느리기 때문에 ImageDataGenerator와 tf.data를 섞어 사용하면 훨씬 효율적이다

import tensorflow as tf 

idg = tf.keras.preprocessing.image.ImageDataGenerator() # 옵션을 사용해서 저장, augmentation을 사용할 수 있는 장점이 있다 
didg = idg.flow_from_directory('flower_photos/')
# Found 3670 images belonging to 5 classes.

next(didg)[0].dtype, next(didg)[0].shape
# (dtype('float32'), (32, 256, 256, 3))

next(didg)[1].dtype, next(didg)[1].shape
# (dtype('float32'), (32, 5))

# 인자에는 callable이 들어가야 한다 
train = tf.data.Dataset.from_generator(lambda: didg, output_types=(tf.float32, tf.float32), output_shapes=((None,256,256,3),(None,5))) 

train
# <FlatMapDataset shapes: ((None, 256, 256, 3), (None, 5)), types: (tf.float32, tf.float32)>
for i in train.take(1):
  print(i)
  (<tf.Tensor: shape=(32, 256, 256, 3), dtype=float32, numpy=
array([[[[124., 118.,  94.],
         [125., 123., 102.],
         [112., 106.,  92.],
         ...,
         [ 10.,  17.,   9.],
         [  8.,  18.,   9.],
         [  7.,  19.,   9.]],

        [[111., 106.,  84.],
         [122., 121., 100.],
         [115., 112.,  95.],
         ...,
         [ 10.,  17.,   9.],
         [  8.,  18.,   9.],
         [  8.,  18.,   9.]],

        [[108., 103.,  84.],
         [124., 123., 103.],
         [118., 116.,  95.],
         ...,
         [ 10.,  17.,   9.],
         [ 10.,  17.,   9.],
         [ 11.,  16.,   9.]],

        ...,

        [[163., 127., 129.],
         [157., 118., 119.],
         [ 26.,  37.,  20.],
         ...,
         [  6.,  11.,   5.],
         [  6.,  11.,   5.],
         [  5.,  10.,   4.]],

        [[ 94., 117.,  99.],
         [ 48.,  50.,  39.],
         [ 24.,  37.,  20.],
         ...,
         [  6.,  11.,   5.],
         [  6.,  11.,   5.],
         [  6.,  11.,   4.]],

        [[ 36.,  48.,  34.],
         [ 21.,  27.,  15.],
         [ 25.,  36.,  20.],
         ...,
         [  6.,  11.,   5.],
         [  9.,  12.,   5.],
         [  9.,  12.,   5.]]],


       [[[ 68.,  61.,  53.],
         [ 67.,  58.,  51.],
         [ 73.,  63.,  54.],
         ...,
         [ 92., 101.,  56.],
         [ 72.,  95.,  41.],
         [ 67.,  94.,  41.]],

        [[ 64.,  55.,  48.],
         [ 64.,  55.,  46.],
         [ 75.,  62.,  54.],
         ...,
         [ 95., 100.,  59.],
         [ 72.,  88.,  41.],
         [ 68.,  87.,  41.]],

        [[ 63.,  53.,  44.],
         [ 66.,  56.,  47.],
         [ 79.,  64.,  57.],
         ...,
         [103., 103.,  67.],
         [ 76.,  82.,  44.],
         [ 74.,  82.,  45.]],

        ...,

        [[133., 126.,  80.],
         [132., 128.,  81.],
         [138., 133.,  93.],
         ...,
         [ 99.,  99.,  91.],
         [ 92.,  89.,  80.],
         [ 84.,  80.,  71.]],

        [[132., 126.,  78.],
         [131., 127.,  79.],
         [134., 130.,  85.],
         ...,
         [104., 105., 100.],
         [ 92.,  92.,  84.],
         [ 85.,  82.,  73.]],

        [[131., 125.,  75.],
         [129., 126.,  75.],
         [131., 127.,  80.],
         ...,
         [114., 114., 112.],
         [102., 102.,  94.],
         [ 94.,  94.,  84.]]],


       [[[177., 196., 104.],
         [135., 159., 123.],
         [135., 159., 123.],
         ...,
         [ 19.,  43., 151.],
         [ 19.,  43., 151.],
         [ 19.,  43., 151.]],

        [[181., 202.,  99.],
         [139., 158., 112.],
         [139., 158., 112.],
         ...,
         [ 20.,  47., 150.],
         [ 20.,  47., 150.],
         [ 20.,  47., 150.]],

        [[165., 183.,  65.],
         [190., 210.,  97.],
         [190., 210.,  97.],
         ...,
         [ 20.,  48., 148.],
         [ 20.,  48., 148.],
         [ 20.,  48., 148.]],

        ...,

        [[201., 194.,  44.],
         [205., 200.,  48.],
         [205., 200.,  48.],
         ...,
         [101.,  97., 112.],
         [101.,  97., 112.],
         [120., 111., 138.]],

        [[196., 191.,  27.],
         [186., 181.,  15.],
         [186., 181.,  15.],
         ...,
         [ 80.,  80.,  82.],
         [ 80.,  80.,  82.],
         [ 53.,  52.,  50.]],

        [[201., 192.,  39.],
         [205., 196.,  39.],
         [205., 196.,  39.],
         ...,
         [125., 127., 114.],
         [125., 127., 114.],
         [ 87.,  89.,  68.]]],


       ...,


       [[[152., 178.,  68.],
         [147., 172.,  44.],
         [148., 174.,  51.],
         ...,
         [ 47.,  86.,   5.],
         [ 55.,  80.,  22.],
         [ 14.,  28.,   3.]],

        [[159., 178.,  52.],
         [155., 184.,  56.],
         [147., 182.,  62.],
         ...,
         [ 49.,  84.,   4.],
         [ 13.,  29.,   0.],
         [ 37.,  43.,  31.]],

        [[166., 203.,  37.],
         [158., 192.,  43.],
         [120., 153.,  20.],
         ...,
         [ 53.,  79.,  32.],
         [ 37.,  53.,   8.],
         [ 20.,  29.,   0.]],

        ...,

        [[106., 140., 105.],
         [ 83.,  96.,  68.],
         [ 33.,  21.,   9.],
         ...,
         [ 59.,  69.,  35.],
         [ 45.,  77.,  12.],
         [ 41.,  67.,  80.]],

        [[171., 208., 157.],
         [ 74., 107.,  50.],
         [  5.,   3.,  16.],
         ...,
         [ 63.,  71.,  34.],
         [ 30.,  58.,   7.],
         [ 36.,  64.,  68.]],

        [[191., 209., 171.],
         [104., 142.,  65.],
         [  2.,   4.,  25.],
         ...,
         [ 57.,  76.,  31.],
         [ 39.,  57.,  19.],
         [ 47.,  61.,  48.]]],


       [[[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        ...,

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]]],


       [[[ 92., 131., 164.],
         [ 91., 130., 163.],
         [ 91., 130., 163.],
         ...,
         [ 74., 110., 146.],
         [ 74., 110., 146.],
         [ 74., 110., 146.]],

        [[ 92., 131., 164.],
         [ 91., 130., 163.],
         [ 91., 130., 163.],
         ...,
         [ 74., 110., 146.],
         [ 74., 110., 146.],
         [ 74., 110., 146.]],

        [[ 94., 130., 164.],
         [ 94., 130., 164.],
         [ 94., 130., 164.],
         ...,
         [ 74., 110., 146.],
         [ 74., 110., 146.],
         [ 73., 109., 145.]],

        ...,

        [[ 94., 139., 181.],
         [ 94., 137., 180.],
         [ 94., 137., 180.],
         ...,
         [ 79., 107.,  92.],
         [ 79., 107.,  92.],
         [ 77., 107.,  83.]],

        [[ 99., 138., 179.],
         [ 99., 138., 179.],
         [ 99., 138., 179.],
         ...,
         [ 81., 111., 109.],
         [ 81., 111., 109.],
         [ 79., 110., 104.]],

        [[105., 140., 178.],
         [108., 142., 179.],
         [108., 142., 179.],
         ...,
         [ 84., 114., 124.],
         [ 84., 114., 124.],
         [ 80., 110., 118.]]]], dtype=float32)>, <tf.Tensor: shape=(32, 5), dtype=float32, numpy=
array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.]], dtype=float32)>)

전처리 방식

1. inside model

- 학습과 전처리를 동시에 하기 때문에 시간이 많이든다

2. outside model

- 모델 밖에서 전처리를 경우 번거롭고 실수가 발생할 가능성이 높다

tf.keras.layers.experimental.preprocessing(전처리 layer) 사용할 경우 모델 안에서, 밖에서 유동적으로 사용 가능하다

=> 레이어는 함수이기 때문에 map 함께 쓸수 있다

경량화 모델

m1 = tf.keras.applications.MobileNet()

m1.summary()
Model: "mobilenet_1.00_224"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (ReLU)        (None, 112, 112, 32)      0         
_________________________________________________________________
conv_pw_1 (Conv2D)           (None, 112, 112, 64)      2048      
_________________________________________________________________
conv_pw_1_bn (BatchNormaliza (None, 112, 112, 64)      256       
_________________________________________________________________
conv_pw_1_relu (ReLU)        (None, 112, 112, 64)      0         
_________________________________________________________________
conv_pad_2 (ZeroPadding2D)   (None, 113, 113, 64)      0         
_________________________________________________________________
conv_dw_2 (DepthwiseConv2D)  (None, 56, 56, 64)        576       
_________________________________________________________________
conv_dw_2_bn (BatchNormaliza (None, 56, 56, 64)        256       
_________________________________________________________________
conv_dw_2_relu (ReLU)        (None, 56, 56, 64)        0         
_________________________________________________________________
conv_pw_2 (Conv2D)           (None, 56, 56, 128)       8192      
_________________________________________________________________
conv_pw_2_bn (BatchNormaliza (None, 56, 56, 128)       512       
_________________________________________________________________
conv_pw_2_relu (ReLU)        (None, 56, 56, 128)       0         
_________________________________________________________________
conv_dw_3 (DepthwiseConv2D)  (None, 56, 56, 128)       1152      
_________________________________________________________________
conv_dw_3_bn (BatchNormaliza (None, 56, 56, 128)       512       
_________________________________________________________________
conv_dw_3_relu (ReLU)        (None, 56, 56, 128)       0         
_________________________________________________________________
conv_pw_3 (Conv2D)           (None, 56, 56, 128)       16384     
_________________________________________________________________
conv_pw_3_bn (BatchNormaliza (None, 56, 56, 128)       512       
_________________________________________________________________
conv_pw_3_relu (ReLU)        (None, 56, 56, 128)       0         
_________________________________________________________________
conv_pad_4 (ZeroPadding2D)   (None, 57, 57, 128)       0         
_________________________________________________________________
conv_dw_4 (DepthwiseConv2D)  (None, 28, 28, 128)       1152      
_________________________________________________________________
conv_dw_4_bn (BatchNormaliza (None, 28, 28, 128)       512       
_________________________________________________________________
conv_dw_4_relu (ReLU)        (None, 28, 28, 128)       0         
_________________________________________________________________
conv_pw_4 (Conv2D)           (None, 28, 28, 256)       32768     
_________________________________________________________________
conv_pw_4_bn (BatchNormaliza (None, 28, 28, 256)       1024      
_________________________________________________________________
conv_pw_4_relu (ReLU)        (None, 28, 28, 256)       0         
_________________________________________________________________
conv_dw_5 (DepthwiseConv2D)  (None, 28, 28, 256)       2304      
_________________________________________________________________
conv_dw_5_bn (BatchNormaliza (None, 28, 28, 256)       1024      
_________________________________________________________________
conv_dw_5_relu (ReLU)        (None, 28, 28, 256)       0         
_________________________________________________________________
conv_pw_5 (Conv2D)           (None, 28, 28, 256)       65536     
_________________________________________________________________
conv_pw_5_bn (BatchNormaliza (None, 28, 28, 256)       1024      
_________________________________________________________________
conv_pw_5_relu (ReLU)        (None, 28, 28, 256)       0         
_________________________________________________________________
conv_pad_6 (ZeroPadding2D)   (None, 29, 29, 256)       0         
_________________________________________________________________
conv_dw_6 (DepthwiseConv2D)  (None, 14, 14, 256)       2304      
_________________________________________________________________
conv_dw_6_bn (BatchNormaliza (None, 14, 14, 256)       1024      
_________________________________________________________________
conv_dw_6_relu (ReLU)        (None, 14, 14, 256)       0         
_________________________________________________________________
conv_pw_6 (Conv2D)           (None, 14, 14, 512)       131072    
_________________________________________________________________
conv_pw_6_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_6_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_dw_7 (DepthwiseConv2D)  (None, 14, 14, 512)       4608      
_________________________________________________________________
conv_dw_7_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_dw_7_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pw_7 (Conv2D)           (None, 14, 14, 512)       262144    
_________________________________________________________________
conv_pw_7_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_7_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_dw_8 (DepthwiseConv2D)  (None, 14, 14, 512)       4608      
_________________________________________________________________
conv_dw_8_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_dw_8_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pw_8 (Conv2D)           (None, 14, 14, 512)       262144    
_________________________________________________________________
conv_pw_8_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_8_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_dw_9 (DepthwiseConv2D)  (None, 14, 14, 512)       4608      
_________________________________________________________________
conv_dw_9_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_dw_9_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pw_9 (Conv2D)           (None, 14, 14, 512)       262144    
_________________________________________________________________
conv_pw_9_bn (BatchNormaliza (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_9_relu (ReLU)        (None, 14, 14, 512)       0         
_________________________________________________________________
conv_dw_10 (DepthwiseConv2D) (None, 14, 14, 512)       4608      
_________________________________________________________________
conv_dw_10_bn (BatchNormaliz (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_dw_10_relu (ReLU)       (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pw_10 (Conv2D)          (None, 14, 14, 512)       262144    
_________________________________________________________________
conv_pw_10_bn (BatchNormaliz (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_10_relu (ReLU)       (None, 14, 14, 512)       0         
_________________________________________________________________
conv_dw_11 (DepthwiseConv2D) (None, 14, 14, 512)       4608      
_________________________________________________________________
conv_dw_11_bn (BatchNormaliz (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_dw_11_relu (ReLU)       (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pw_11 (Conv2D)          (None, 14, 14, 512)       262144    
_________________________________________________________________
conv_pw_11_bn (BatchNormaliz (None, 14, 14, 512)       2048      
_________________________________________________________________
conv_pw_11_relu (ReLU)       (None, 14, 14, 512)       0         
_________________________________________________________________
conv_pad_12 (ZeroPadding2D)  (None, 15, 15, 512)       0         
_________________________________________________________________
conv_dw_12 (DepthwiseConv2D) (None, 7, 7, 512)         4608      
_________________________________________________________________
conv_dw_12_bn (BatchNormaliz (None, 7, 7, 512)         2048      
_________________________________________________________________
conv_dw_12_relu (ReLU)       (None, 7, 7, 512)         0         
_________________________________________________________________
conv_pw_12 (Conv2D)          (None, 7, 7, 1024)        524288    
_________________________________________________________________
conv_pw_12_bn (BatchNormaliz (None, 7, 7, 1024)        4096      
_________________________________________________________________
conv_pw_12_relu (ReLU)       (None, 7, 7, 1024)        0         
_________________________________________________________________
conv_dw_13 (DepthwiseConv2D) (None, 7, 7, 1024)        9216      
_________________________________________________________________
conv_dw_13_bn (BatchNormaliz (None, 7, 7, 1024)        4096      
_________________________________________________________________
conv_dw_13_relu (ReLU)       (None, 7, 7, 1024)        0         
_________________________________________________________________
conv_pw_13 (Conv2D)          (None, 7, 7, 1024)        1048576   
_________________________________________________________________
conv_pw_13_bn (BatchNormaliz (None, 7, 7, 1024)        4096      
_________________________________________________________________
conv_pw_13_relu (ReLU)       (None, 7, 7, 1024)        0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 1, 1, 1024)        0         
_________________________________________________________________
dropout (Dropout)            (None, 1, 1, 1024)        0         
_________________________________________________________________
conv_preds (Conv2D)          (None, 1, 1, 1000)        1025000   
_________________________________________________________________
reshape_2 (Reshape)          (None, 1000)              0         
_________________________________________________________________
predictions (Activation)     (None, 1000)              0         
=================================================================
Total params: 4,253,864
Trainable params: 4,231,976
Non-trainable params: 21,888
_________________________________________________________________

 

tf.keras.utils.plot_model(m1,show_layer_names=True)

Convolution 4가지

1. Convolution

2. Grouped convolution

- 연산량을 줄이기 위해서 convolution연산을 분할 한다

3. 1x1 convolution

- 차원을 축소하고 non-linearity 특성을 활용하기 위해 elementwise 연산을 하는 convolution 방법

4. Depth-wise convolution

- 채널별로 나누어 convolution 연산을 한다

- 같은 depth끼리 연산한다

5. Depth-wise seperable convolution - Depthwise convolution + 1x1 convolution

Depth-wise convolution

Depth wise convolution은 각 채널별로 연산하는 방식으로

Standard convolution이 각 채널만의 spatial feature를 추출하는것이 불가능하기 때문에 고안해낸 방법이다

 

각 채널별로 연산하기 때문에 한번에 연산하는 파라미터의 수를 줄일 수 있는 장점이 있다

depth wise convolution은 일반 convolution보다 성능은 안좋지만 경량화에 목적이 있기 때문에 성능을 어느정도 감수해야 한다

가능하다면 성능을 유지하면서 경량화 하는 것을 최대 목표로 삼아야 한다

하지만 depth를 맞춰야 한다는 단점이 있다

Depth-wise seperable convolution

Depthwise convolution + 1x1 convolution

 

m2 = tf.keras.applications.MobileNetV2() # ResNet 방식을 따른다

tf.keras.utils.plot_model(m2 ,show_layer_names=True)

x = tf.keras.layers.BatchNormalization()
x.build((None,2))
x(tf.constant([[1. ,2.]]))

# <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.99950033, 1.9990007 ]], dtype=float32)>
x.weights # gamma, beta는 학습을 통해서 값을 찾는다 
# [<tf.Variable 'gamma:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>,
#  <tf.Variable 'beta:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_mean:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_variance:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>]
x.trainable_weights
# [<tf.Variable 'gamma:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>,
#  <tf.Variable 'beta:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]
x.non_trainable_weights # 단순 계산 
# [<tf.Variable 'moving_mean:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_variance:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>]
input_ = tf.keras.Input((28,28,1))
x = tf.keras.layers.MaxPool2D(2,2)(input_)
model = tf.keras.models.Model(input_,x)

model.summary() # pooling은 연산만 하기 때문에 파라미터가 없다 / 학습과 관련이 없다 
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 1)         0         
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
data_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    ]
)

preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

prediction_layer = tf.keras.layers.Dense(1)

inputs = tf.keras.Input(shape=(160,160,3)) # 배치 크기만큼 입력을 받는다 
x = data_augmentation(inputs) # augmentation 사용 
x = preprocess_input(x)  # 전처리 
x = base_model(x, training=False) # training = False이기 때문에 내부적으로 평균과 표준편차를 구해서 배치마다 다른 평균과 표준편차를 사용할 수 있게 된다 
x = global_average_layer(x) 
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)   # prediction_layer만 backpropagation을 하고 이전 layer에서는 단순 계산만 한다 
model = tf.keras.Model(inputs, outputs)
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
              

model.summary()
Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_7 (InputLayer)         [(None, 160, 160, 3)]     0         
_________________________________________________________________
sequential_2 (Sequential)    (None, 160, 160, 3)       0         
_________________________________________________________________
tf.math.truediv_2 (TFOpLambd (None, 160, 160, 3)       0         
_________________________________________________________________
tf.math.subtract_2 (TFOpLamb (None, 160, 160, 3)       0         
_________________________________________________________________
mobilenetv2_1.00_160 (Functi (None, 5, 5, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d_2 ( (None, 1280)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 1281      
=================================================================
Total params: 2,259,265
Trainable params: 2,225,153
Non-trainable params: 34,112
_________________________________________________________________
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

from tensorflow.keras.preprocessing import image_dataset_from_directory

BATCH_SIZE = 32
IMG_SIZE = (160, 160)
initial_epochs = 10

train_dataset = image_dataset_from_directory('cats_and_dogs_filtered/train',
                                             shuffle=True,
                                             batch_size=BATCH_SIZE,
                                             image_size=IMG_SIZE)

validation_dataset = image_dataset_from_directory('cats_and_dogs_filtered/validation',
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE,
                                                  image_size=IMG_SIZE)
                                                  
# Found 2000 files belonging to 2 classes.
# Found 1000 files belonging to 2 classes.
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)
# (32, 5, 5, 1280)
history = model.fit(train_dataset,
                    epochs=initial_epochs,
                    validation_data=validation_dataset)

Epoch 1/10
63/63 [==============================] - 252s 4s/step - loss: 0.3724 - accuracy: 0.8405 - val_loss: 0.1214 - val_accuracy: 0.9730
Epoch 2/10
63/63 [==============================] - 162s 3s/step - loss: 0.1438 - accuracy: 0.9430 - val_loss: 0.0879 - val_accuracy: 0.9520
Epoch 3/10
63/63 [==============================] - 161s 3s/step - loss: 0.1267 - accuracy: 0.9525 - val_loss: 0.0871 - val_accuracy: 0.9580
Epoch 4/10
63/63 [==============================] - 161s 3s/step - loss: 0.0929 - accuracy: 0.9590 - val_loss: 0.0485 - val_accuracy: 0.9770
Epoch 5/10
63/63 [==============================] - 162s 3s/step - loss: 0.0856 - accuracy: 0.9660 - val_loss: 0.3747 - val_accuracy: 0.9060
Epoch 6/10
63/63 [==============================] - 160s 3s/step - loss: 0.0874 - accuracy: 0.9655 - val_loss: 0.0987 - val_accuracy: 0.9780
Epoch 7/10
63/63 [==============================] - 160s 3s/step - loss: 0.0817 - accuracy: 0.9755 - val_loss: 0.0573 - val_accuracy: 0.9760
Epoch 8/10
63/63 [==============================] - 160s 3s/step - loss: 0.0556 - accuracy: 0.9830 - val_loss: 0.0556 - val_accuracy: 0.9810
Epoch 9/10
63/63 [==============================] - 160s 3s/step - loss: 0.0603 - accuracy: 0.9765 - val_loss: 0.0524 - val_accuracy: 0.9810
Epoch 10/10
63/63 [==============================] - 160s 3s/step - loss: 0.0663 - accuracy: 0.9815 - val_loss: 0.0380 - val_accuracy: 0.9840
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

Tensorflow에서는 학습할 때 validation dataset을 포함하면

평가시 dropout과 batch normalization을 하지 않고 하기 때문에 validation loss가 더 낮게 나오는 경우가 많이 발생한다

(이런 경우는 underfitting 문제가 아닐 수 있다)

Fine Tuning 고급 테크닉

Fine tuning

기존에 학습된 모델을 기반으로 아키텍쳐를 새로운 목적에 맞게 변형하고 이미 학습된 모델 Weights로 부터 학습을 업데이트하는 방법을 말한다.

1. 어느정도 성능이 확보된 모델을 만든다

2. 일부 Freezon (trainable=False)시키고 느린 learning rate을 통해 나머지를 재학습 시킨다

3. 새 학습 데이터가 기존 학습 데이터와 큰 차이가 나면 안된다

4. 새로 학습할 레이어 초기화 하면 기존 능력이 없어지기 때문에 성능이 확보된 모델 가중치를 그대로 사용한다

최상위 층 고정 해제하기

# Fine tuning이기 때문에 이미 학습된 모델로 사용한다 

base_model.trainable = True # 학습할 수 있도록 우선 만들고 

fine_tune_at = 100

for layer in base_model.layers[:fine_tune_at]: # 맨 마지막 층을 제외하고 고정시킨다 
  layer.trainable = False                      # 맨 마지막 층은 결과를 내는 층 (개, 고양이 분류)

Fine tuning 하는 간단한 방식

1. 가능한 마지막 층 부터 재학습을 한다 (마지막 층들은 전문화된 특징을 분류한다)

1. learning rate 바꾸기

2. epoch 수 줄이기

 

Catastrophic forgetting

- 다른 종류의 데이터를 학습하면 이전에 학습했던 데이터에 대한 성능이 현저하게 떨어지는 문제

- 이 현상은 이전 학습 dataset과 새로운 학습 dataset 사이에 연관성이 있더라도 이전 dataset에 대한 정보를 대량으로 손실하는 문제가 발생한다

 

Semantic shift

- 기존에 학습된 모델에 새로운 형태의 데이터가 추가 학습 되었을 때 가중치가 변하면서 의미가 변해버리는 현상

Incremental learning

전혀 다른 특징을 가진 데이터 셋을 학습하여 내가 가진 모델에서 전혀 다른 특징을 가진 데이터를 분류할 수 있는 모델로 능력을 키우는 것

 

 

반응형
728x90
반응형

Transfer learning

적은 데이터를 갖고 있더라도 복잡한 문제 상황을 해결할 수 있는 방법
이미 학습된 모델을 통해 적은 데이터를 학습시켜 최적의 성능을 낼수 있는 방법

Layer의 앞부분에서는 간단한 특징들(, ) 탐지할 있지만, layer들의 조합으로 인해 점차 복잡한 특징을 탐지해낸다

결국 layer의 뒷부분으로 갈수록 학습한 데이터에 맞춰 특징을 탐지할 있도록 변환된다

데이터가 유사하면 유사할 수록 앞단의 layer부분에서는 비슷한 특징을 추출한다

예를 들어서 고양이를 학습한 모델을 활용하여 개를 분류하는 모델로 전이학습을 한다고 한다면 앞부분의 layer는 학습을 시키지 않고 뒷부분만 새로운 데이터인 '개' 데이터로 학습을 시켜 새로운 모델을 만들어 낸다

유사한 데이터를 전이학습하고자 할 때는 특징을 추출하는 Convolutional layer부분은 그대로 사용하고

Fully connected 부분만 변형하여 사용한다

Transfer learning 두 가지 특징

1. Feature extraction

- 비슷한 이미지를 학습한 모델의 convolutional layer만 활용하는 방식

- Fully connected layer는 새로 학습할 데이터에 맞게 새로운 추출기를 사용한다

- 예시) 개와 고양이 분류 모델

2. Fine tuning

- 범용적인 이미지를 학습한 모델을 그대로 사용하되 Fully connected layer 부분을 미세하게 조정하여 사용하는 방법

- 예시) ImageNet 데이터를 학습한 모델을 활용할

Fine tuning

import tensorflow as tf 
import numpy as np

vgg = tf.keras.applications.VGG16(include_top=True)

tf.keras.utils.plot_model(vgg, rankdir='BT')

vgg.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________
vgg.input, vgg.output, vgg.layers
# (<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'input_3')>,
 <KerasTensor: shape=(None, 1000) dtype=float32 (created by layer 'predictions')>,
 [<keras.engine.input_layer.InputLayer at 0x7f580e71d090>,
  <keras.layers.convolutional.Conv2D at 0x7f580f887390>,
  <keras.layers.convolutional.Conv2D at 0x7f580e71f110>,
  <keras.layers.pooling.MaxPooling2D at 0x7f580e78f710>,
  <keras.layers.convolutional.Conv2D at 0x7f580f830d10>,
  <keras.layers.convolutional.Conv2D at 0x7f580e788110>,
  <keras.layers.pooling.MaxPooling2D at 0x7f580e7938d0>,
  <keras.layers.convolutional.Conv2D at 0x7f580f821ed0>,
  <keras.layers.convolutional.Conv2D at 0x7f580e771b50>,
  <keras.layers.convolutional.Conv2D at 0x7f580e77a110>,
  <keras.layers.pooling.MaxPooling2D at 0x7f580e76be10>,
  <keras.layers.convolutional.Conv2D at 0x7f580f82ec10>,
  <keras.layers.convolutional.Conv2D at 0x7f580e75a310>,
  <keras.layers.convolutional.Conv2D at 0x7f580e76bbd0>,
  <keras.layers.pooling.MaxPooling2D at 0x7f580e79bcd0>,
  <keras.layers.convolutional.Conv2D at 0x7f580e7cbf50>,
  <keras.layers.convolutional.Conv2D at 0x7f580e79ca90>,
  <keras.layers.convolutional.Conv2D at 0x7f580f8a3250>,
  <keras.layers.pooling.MaxPooling2D at 0x7f5880044250>,
  <keras.layers.core.Flatten at 0x7f5880040b90>,
  <keras.layers.core.Dense at 0x7f5880040890>,
  <keras.layers.core.Dense at 0x7f580e7d8c10>,
  <keras.layers.core.Dense at 0x7f588002ee50>])
#vgg.layers.pop() # 맨 마지막 layer 삭제 # 내가 직접 지정하려고
# <keras.layers.core.Dense at 0x7f580f8308d0>
vgg.layers.append(tf.keras.layers.Dense(5)) # 마지막 layer에 추가

vgg.layers
# [<keras.engine.input_layer.InputLayer at 0x7f5880019110>,
#  <keras.layers.convolutional.Conv2D at 0x7f588003ed90>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f898810>,
#  <keras.layers.pooling.MaxPooling2D at 0x7f580f8ce050>,
#  <keras.layers.convolutional.Conv2D at 0x7f580e7df8d0>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f8c8ad0>,
#  <keras.layers.pooling.MaxPooling2D at 0x7f588003ef10>,
#  <keras.layers.convolutional.Conv2D at 0x7f580e7e7450>,
#  <keras.layers.convolutional.Conv2D at 0x7f588005f410>,
#  <keras.layers.convolutional.Conv2D at 0x7f58d9d08790>,
#  <keras.layers.pooling.MaxPooling2D at 0x7f580e80b0d0>,
#  <keras.layers.convolutional.Conv2D at 0x7f580e7e7750>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f843ed0>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f841350>,
#  <keras.layers.pooling.MaxPooling2D at 0x7f580f83f050>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f83dfd0>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f8381d0>,
#  <keras.layers.convolutional.Conv2D at 0x7f580f83db10>,
#  <keras.layers.pooling.MaxPooling2D at 0x7f580f8349d0>,
#  <keras.layers.core.Flatten at 0x7f580f82e950>,
#  <keras.layers.core.Dense at 0x7f580f82e850>,
#  <keras.layers.core.Dense at 0x7f580f82e190>,
#  <keras.layers.core.Dense at 0x7f580f8308d0>]
mylayer = vgg.layers

vgg.layers[1].trainable
# True

vgg.trainable = False # weight를 학습하지 마세요
mylayer = []
for i in vgg.layers[:-1]:
  i.trainable = False 
  mylayer.append(i)
mylayer.append(tf.keras.layers.Dense(5))

model = tf.keras.models.Sequential(mylayer)

model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 20485     
=================================================================
Total params: 134,281,029
Trainable params: 20,485
Non-trainable params: 134,260,544
_________________________________________________________________

성능은 어느정도 원하는 기대치에 도달했으나 모델을 사용하는 것이 현실적으로 쉽지 않았다

정확도가 높으면 메모리 사용량이 많아지고, 연산량이 많아져서 실제로 사용하기에는 적합하지 않았다

그래서 2017년 이후 모델들은 경량화에 집중하기 시작했다

 

google = tf.keras.applications.InceptionV3()
tf.keras.utils.plot_model(google) 
# GoogLeNet은 복잡하기 때문에 transfer learing으로 사용하기에 부적합한 모델이다 / 
# Fine tuning하기 쉽지 않음

 

Sequential 방식

vgg = tf.keras.applications.VGG16(include_top=True)
# fine tuning 관점 / # input은 224,224 형태 데이터를 받아야 한다 

vgg.trainable = False # 모든 layer를 학습하지 않도록 설정 

mylayer = vgg.layers

mylayer.pop()
# <keras.layers.core.Dense at 0x7f580e1e7f90>
model = tf.keras.models.Sequential(mylayer + [tf.keras.layers.Dense(5)])
model.summary() # Flatten을 사용하면 데이터 크기가 맞아야 한다 
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 20485     
=================================================================
Total params: 134,281,029
Trainable params: 20,485
Non-trainable params: 134,260,544
_________________________________________________________________

Model 방식

input_ = vgg.input
x = vgg.layers[1](input_)
x = vgg.layers[2](x)
x = vgg.layers[3](x) 

import numpy as np

im = tf.keras.preprocessing.image.load_img('people.jpg')
im2 = np.resize(np.array(im), (224,224,3)) # 입력 크기를 맞추기 위해서 크기를 조절한다 

im2.shape
# (224, 224, 3)

model(im2[np.newaxis])
# <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
# array([[ 0.60317594, -0.7978455 ,  2.3856583 , -0.96538734, -0.4498136 ]],
#       dtype=float32)>

Feature extraction

Feature extraction에서는 FC부분이 없기 때문에 FC로 넘겨주기 위한 데이터가 1개인 1차원으로 변경해야 한다

# Hyperparameter로써 접근해야 한다 / 상황에 맞게 선택하여 사용

1. Flatten

2. Global average pooling

 

vgg2 = tf.keras.applications.VGG16(include_top=False) 
# include_top = False로 지정하면 분류 층이 포함되지 않은 네트워크를 로드하므로 특징 추출에 이상적이다

vgg2.summary() # 크기 맞출 필요 없다 
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_8 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
vgg2.trainable = False 
vgg2.layers[0] = tf.keras.layers.InputLayer((224,224,3))
layers = vgg2.layers+[tf.keras.layers.GlobalAvgPool2D()] # flatten과 달리 입력 크기와 상관없이 결과 낼수 있는 모델을 만들 수 있다 
layers = layers + [tf.keras.layers.Dense(5)]
model2 = tf.keras.models.Sequential(layers) 

model2(np.array(im)[np.newaxis])
# <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
# array([[-1.9419088,  6.326975 , -2.2549076, -3.0776834,  1.4872327]],
#       dtype=float32)>
model2(im2[np.newaxis])
# <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
# array([[ 2.4478257 ,  1.1684433 , -8.583718  , -0.92841846,  6.4567146 ]],
#       dtype=float32)>
model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 2565      
=================================================================
Total params: 14,717,253
Trainable params: 2,565
Non-trainable params: 14,714,688
_________________________________________________________________

 

vgg2 = tf.keras.applications.VGG16(include_top=False, input_shape=(224,224,3)) 
vgg2.trainable = False 
layers = vgg2.layers+[tf.keras.layers.GlobalAvgPool2D()] 
layers = layers + [tf.keras.layers.Dense(5)]
model2 = tf.keras.models.Sequential(layers)
model2.summary()
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
global_average_pooling2d_1 ( (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 5)                 2565      
=================================================================
Total params: 14,717,253
Trainable params: 2,565
Non-trainable params: 14,714,688
_________________________________________________________________

Transfer learning + preprocessing layer

im = tf.keras.preprocessing.image.load_img('people.jpg')
im2 = np.resize(np.array(im), (224,224,3)) 

# vgg는 채널별로 평균을 내서 빼준다 (Zero centered)
tf.keras.applications.vgg16.preprocess_input(im2) 

array([[[ 73.061    ,  72.221    ,  77.32     ],
        [ 75.061    ,  74.221    ,  79.32     ],
        [ 76.061    ,  75.221    ,  80.32     ],
        ...,
        [  4.060997 ,  26.221    ,  45.32     ],
        [  7.060997 ,  29.221    ,  48.32     ],
        [ 12.060997 ,  34.221    ,  53.32     ]],

       [[ 15.060997 ,  29.221    ,  43.32     ],
        [ 10.060997 ,  28.221    ,  44.32     ],
        [  7.060997 ,  26.221    ,  48.32     ],
        ...,
        [ 67.061    ,  84.221    , 102.32     ],
        [106.061    , 123.221    , 131.32     ],
        [ 56.060997 ,  75.221    ,  91.32     ]],

       [[ 41.060997 ,  61.221    ,  87.32     ],
        [ 43.060997 ,  63.221    ,  89.32     ],
        [ 45.060997 ,  65.221    ,  91.32     ],
        ...,
        [ 29.060997 ,  28.221    ,  33.32     ],
        [ 29.060997 ,  34.221    ,  44.32     ],
        [ 38.060997 ,  49.221    ,  64.32     ]],

       ...,

       [[  5.060997 ,  22.221    ,  40.32     ],
        [  6.060997 ,  23.221    ,  41.32     ],
        [  6.060997 ,  23.221    ,  41.32     ],
        ...,
        [-43.939003 , -49.779    , -53.68     ],
        [-37.939003 , -46.779    , -52.68     ],
        [-47.939003 , -58.779    , -64.68     ]],

       [[-46.939003 , -61.779    , -68.68     ],
        [-38.939003 , -53.779    , -60.68     ],
        [-39.939003 , -54.779    , -61.68     ],
        ...,
        [ 31.060997 ,  46.221    ,  63.32     ],
        [ 30.060997 ,  45.221    ,  62.32     ],
        [ 32.060997 ,  45.221    ,  62.32     ]],

       [[ 31.060997 ,  45.221    ,  64.32     ],
        [ 31.060997 ,  45.221    ,  64.32     ],
        [ 29.060997 ,  44.221    ,  61.32     ],
        ...,
        [-20.939003 , -25.779    ,  -3.6800003],
        [-11.939003 , -18.779    ,   3.3199997],
        [ -3.939003 , -10.778999 ,  11.32     ]]], dtype=float32)
pretrain_model = tf.keras.applications.VGG16(include_top=False, input_shape=(224,224,3))
pretrain_model.trainable = False
model = tf.keras.models.Sequential([
    tf.keras.layers.Lambda(lambda data: tf.keras.applications.vgg16.preprocess_input(
        tf.cast(data, tf.float32)), input_shape=(224,224,3) # data n개씩 처리 할 수 있도록 lambda layer를 사용한다  
    ), 
    pretrain_model,
    tf.keras.layers.GlobalAvgPool2D(),
    tf.keras.layers.Dense(5)
]) # 전처리를 하지 않는 실수를 방지할 수 있다
model.summary()
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lambda_2 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
global_average_pooling2d_2 ( (None, 512)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 5)                 2565      
=================================================================
Total params: 14,717,253
Trainable params: 2,565
Non-trainable params: 14,714,688
_________________________________________________________________

Zero centered

Type Markdown and LaTeX: 𝛼2

 

x = tf.keras.layers.Dense(5)

x.weights, x.trainable_weights, x.non_trainable_weights
# ([], [], [])

x(tf.constant([[1,2,],[3,4,]]))
# <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
# array([[-0.12723184,  1.5356824 , -0.45605123, -1.7108853 , -2.0955188 ],
#        [ 0.3099841 ,  2.8317063 , -0.42237318, -3.7084074 , -4.6590133 ]],
#       dtype=float32)>
x.weights, x.trainable_weights, x.non_trainable_weights

# ([<tf.Variable 'dense_14/kernel:0' shape=(2, 5) dtype=float32, numpy=
#   array([[ 0.56444776, -0.23965877,  0.4897293 , -0.28663665, -0.46797565],
#          [-0.3458398 ,  0.88767064, -0.47289026, -0.71212435, -0.81377155]],
#         dtype=float32)>,
#   <tf.Variable 'dense_14/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>],
#  [<tf.Variable 'dense_14/kernel:0' shape=(2, 5) dtype=float32, numpy=
#   array([[ 0.56444776, -0.23965877,  0.4897293 , -0.28663665, -0.46797565],
#          [-0.3458398 ,  0.88767064, -0.47289026, -0.71212435, -0.81377155]],
#         dtype=float32)>,
#   <tf.Variable 'dense_14/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>],
#  [])
y = tf.keras.layers.BatchNormalization()

y.built
# False

y.build((None,4))

y.built
# True
y.weights 
# [<tf.Variable 'gamma:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>,
#  <tf.Variable 'beta:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_mean:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_variance:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>]
# 이동평균과 표준편차는 학습을 통해 구하는 것이 아니기 때문에 trainable weight가 아니다
y.trainable_weights  
# [<tf.Variable 'gamma:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>,
#  <tf.Variable 'beta:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

y.trainable = False
y.trainable_weights
# []
y.non_trainable_weights
# [<tf.Variable 'gamma:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>,
#  <tf.Variable 'beta:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_mean:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
#  <tf.Variable 'moving_variance:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>]
model(im2[tf.newaxis], training=True)
# <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
# array([[-1.6044631,  4.722974 ,  2.0047941, -2.9608102,  1.3918904]],
#       dtype=float32)>

Batch normalization할 때 layer.trainable = False를 설정하면 학습하지 않고 이동평균을 그대로 사용하게 된다

따라서 학습 상태를 유지하기 위해서 batch normalization layer를 포함하는 모델을 동결 해제 하면

기본 모델을 예측(호출)할 때 training = False를 전달해야 한다

 

 

 

반응형
728x90
반응형

Numerical stability

계산적인 안정성
계산적인 안정성은 Input data가 모델이나 알고리즘의 성향에 영향을 미치는 것과 관련이 있다

컴퓨터터는 부동소수점 연산을 근사적으로 하기 때문에 아주 큰수나 아주 작은 수가 입력값으로 주어졌을 계산을 제대로 하지 못하는 경우가 생길 있다

따라서 이러한 경우에 연산을 보장하기 위해서 parametric trick을 사용하여 numerical stability을 확보한다

 

Numerical stability를 깨버리는 요소

1. activation function

- sigmoid, tanh같은 activation function을 사용했을 아주 값이나 아주 작은 값의 미분 값이 0 되는 경우가 발생한다

특정 구간에서 미분값이 0이 되어 버리는 sigmoid

Normalization vs Standardization vs Regularization

셋의 공통점은 머신러닝 또는 딥러닝에 학습을 효율적으로 하거나 overfitting을 피하기 위한 방법들이다

Normalization

모든 데이터 포인트가 동일한 정도의 스케일(중요도)로 반영되도록 해주는 re-scaling 방법

1. 데이터의 범위를 조정한다

2. 범위의 차이를 왜곡시키지 않고 데이터의 분포를 조정한다

3. 값의 범위를 0~1사이의 값으로 바꾼다

4. 예시

- MinMaxScaler

- Standard Score

- Student's t-statistic

- Studentized residual

- Standardized moment

- Coefficient of variation

!pip install -U mglearn
import mglearn
mglearn.plot_scaling.plot_scaling()

Standardization

데이터가 평균으로부터 얼마나 떨어져 있는지 분포를 통해 확인하는 re-scaling 방법

1. 값의 범위를 평균 0, 분산 1 되도록 변환한다

2. 예시

- Standard Scaler

- z-score normalization

Normalization을 하지 않으면 데이터의 분포가 불균형한 경우 학습이 제대로 되지 않을 수도 있고 학습하기까지 시간이 오래 걸릴 수가 있다

Regularization

모델에 제약(penalty)을 줌으로써 overfitting을 방지하고 일반화 시키는 방법
주로 하이퍼 파라미터를 수정하는 방식으로 Regularization한다

모델의 설명도를 유지하면서 모델의 복잡도를 줄이는 방식

1. Early stopping

2. Noisy input

3. Drop-out

4. Pruning & feature selection

5. Ensemble

6. L1, L2 (Ridge, Lasso)

Initializer

Breaking the symmetry

Breaking the symmetry는 신경망과 같은 기계 학습 모델을 초기화해야 하는 경우 지켜야할 조건이다

기계 학습 모델의 가중치가 모두 동일한 값으로 초기화된 경우 모델이 학습될 가중치가 달라지는 것이 어렵거나 불가능할 있는데 이것을 "symmetry(대칭)"이라고 한다

따라서 이러한 symmetry한 성질을 없애야 제대된 학습이 가능해진다

가중치를 0으로 초기화하는 경우 어떤 값이 들어와도 0이 되어버리기 때문에 0으로 초기화를 하면 안된다

전부 같은 값으로 가중치를 초기화 하는 경우 같은 값이 나오기 때문에 동일한 가중치를 갖게 되므로 동일한 값으로 초기화 하는 것도 피해야 한다

랜덤하게 가중치를 초기화 하는 것도 문제가 될 때가 있다

우연치 않게 계속 0으로 가중치가 초기화 되거나 같은 값으로 초기화가 될 수 있기 때문에

값을 랜덤하게 초기화 하되 범위를 제약시켜 초기화시켜야 한다

평균이 0이고 표준편차가 1인 정규분포에서 가중치를 랜덤하게 초기화 하면 가중치 업데이트가 정상적으로 될수 있다

 

Batch Normalization

Normalization을 했음에도 'layer가 깊어짐' 따라 좋지 못한 성능을 보일때가 있다

이때 Internal Covariance Shift와 같은 문제가 발생하기도 한다

'Batch normalization' internal covariance shift문제가 발생하는 것을 방지하기 위해 만들어진 기법이다

(사실상 2017 후속논문을 통해 'internal covariance shift문제를 해결하진 못했지만 효과는 좋더라'라고 밝혀졌다)

Internal Covariance shift: 학습 도중 이전 layer의 파라미터 변화로 인해 원래 분포와 전혀 상관없는 분포로 바뀌어버리는 현상

Whitening

평균을 0, 표준편차를 1로 만드는 작업

BN 방법

 

1. 'samling한 평균은 전체 평균과 같아진다' 특징을 활용하여 학습할 batch 단위로 whitening을 진행한다

2. whitening할 분모의 값이 0 되는 것을 방지하여 아주 작은 값인 엡실론을 더한다

(numerical stability를 보장하기 위해)

3. scale and shift연산을 한다 (learned parameter γ,β를 추가한다)

- BN은 activation layer이전에 위치하는데 normalization을 후에 activation layer를 통과하게 되면 non-linearity를 감소시킬 우려가 있다

따라서 γ,β를 추가함으로써 학습을 통해 찾아내고 non-linearity한 성질을 유지시킨다

BN Test-time

기본적으로 ML에서 테스트할 때 학습했을 방법과 똑같이 사용한다

예를 들어 minmax를 했다면 테스트 할 때도 minmax를 한 후에 예측한다

BN 테스트할 때에도 모든 배치의 평균과 표준편차들을 이동평균을 하여 사용한다

이동평균: 이동평균은 전체 데이터 집합의 여러 하위 집합에 대한 일련의 평균을 만들어 데이터 요소를 분석하는 계산이다

BN 예시

import tensorflow as tf 
resnet = tf.keras.applications.ResNet50()
resnet.summary()
Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
conv1_relu (Activation)         (None, 112, 112, 64) 0           conv1_bn[0][0]                   
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 114, 114, 64) 0           conv1_relu[0][0]                 
__________________________________________________________________________________________________
pool1_pool (MaxPooling2D)       (None, 56, 56, 64)   0           pool1_pad[0][0]                  
__________________________________________________________________________________________________
conv2_block1_1_conv (Conv2D)    (None, 56, 56, 64)   4160        pool1_pool[0][0]                 
__________________________________________________________________________________________________
conv2_block1_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_1_relu (Activation (None, 56, 56, 64)   0           conv2_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block1_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_2_relu (Activation (None, 56, 56, 64)   0           conv2_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_0_conv (Conv2D)    (None, 56, 56, 256)  16640       pool1_pool[0][0]                 
__________________________________________________________________________________________________
conv2_block1_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block1_0_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_add (Add)          (None, 56, 56, 256)  0           conv2_block1_0_bn[0][0]          
                                                                 conv2_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_out (Activation)   (None, 56, 56, 256)  0           conv2_block1_add[0][0]           
__________________________________________________________________________________________________
conv2_block2_1_conv (Conv2D)    (None, 56, 56, 64)   16448       conv2_block1_out[0][0]           
__________________________________________________________________________________________________
conv2_block2_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_1_relu (Activation (None, 56, 56, 64)   0           conv2_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block2_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_2_relu (Activation (None, 56, 56, 64)   0           conv2_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block2_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_add (Add)          (None, 56, 56, 256)  0           conv2_block1_out[0][0]           
                                                                 conv2_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_out (Activation)   (None, 56, 56, 256)  0           conv2_block2_add[0][0]           
__________________________________________________________________________________________________
conv2_block3_1_conv (Conv2D)    (None, 56, 56, 64)   16448       conv2_block2_out[0][0]           
__________________________________________________________________________________________________
conv2_block3_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_1_relu (Activation (None, 56, 56, 64)   0           conv2_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block3_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_2_relu (Activation (None, 56, 56, 64)   0           conv2_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block3_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_add (Add)          (None, 56, 56, 256)  0           conv2_block2_out[0][0]           
                                                                 conv2_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_out (Activation)   (None, 56, 56, 256)  0           conv2_block3_add[0][0]           
__________________________________________________________________________________________________
conv3_block1_1_conv (Conv2D)    (None, 28, 28, 128)  32896       conv2_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block1_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_1_relu (Activation (None, 28, 28, 128)  0           conv3_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block1_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_2_relu (Activation (None, 28, 28, 128)  0           conv3_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_0_conv (Conv2D)    (None, 28, 28, 512)  131584      conv2_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block1_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block1_0_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_add (Add)          (None, 28, 28, 512)  0           conv3_block1_0_bn[0][0]          
                                                                 conv3_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_out (Activation)   (None, 28, 28, 512)  0           conv3_block1_add[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block1_out[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_1_relu (Activation (None, 28, 28, 128)  0           conv3_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block2_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_2_relu (Activation (None, 28, 28, 128)  0           conv3_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block2_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_add (Add)          (None, 28, 28, 512)  0           conv3_block1_out[0][0]           
                                                                 conv3_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_out (Activation)   (None, 28, 28, 512)  0           conv3_block2_add[0][0]           
__________________________________________________________________________________________________
conv3_block3_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block2_out[0][0]           
__________________________________________________________________________________________________
conv3_block3_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_1_relu (Activation (None, 28, 28, 128)  0           conv3_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block3_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_2_relu (Activation (None, 28, 28, 128)  0           conv3_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block3_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_add (Add)          (None, 28, 28, 512)  0           conv3_block2_out[0][0]           
                                                                 conv3_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_out (Activation)   (None, 28, 28, 512)  0           conv3_block3_add[0][0]           
__________________________________________________________________________________________________
conv3_block4_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block4_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block4_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_1_relu (Activation (None, 28, 28, 128)  0           conv3_block4_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block4_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block4_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block4_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_2_relu (Activation (None, 28, 28, 128)  0           conv3_block4_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block4_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block4_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block4_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_add (Add)          (None, 28, 28, 512)  0           conv3_block3_out[0][0]           
                                                                 conv3_block4_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_out (Activation)   (None, 28, 28, 512)  0           conv3_block4_add[0][0]           
__________________________________________________________________________________________________
conv4_block1_1_conv (Conv2D)    (None, 14, 14, 256)  131328      conv3_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block1_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_1_relu (Activation (None, 14, 14, 256)  0           conv4_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_relu (Activation (None, 14, 14, 256)  0           conv4_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_0_conv (Conv2D)    (None, 14, 14, 1024) 525312      conv3_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block1_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block1_0_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_add (Add)          (None, 14, 14, 1024) 0           conv4_block1_0_bn[0][0]          
                                                                 conv4_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_out (Activation)   (None, 14, 14, 1024) 0           conv4_block1_add[0][0]           
__________________________________________________________________________________________________
conv4_block2_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block1_out[0][0]           
__________________________________________________________________________________________________
conv4_block2_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_1_relu (Activation (None, 14, 14, 256)  0           conv4_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block2_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_2_relu (Activation (None, 14, 14, 256)  0           conv4_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block2_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_add (Add)          (None, 14, 14, 1024) 0           conv4_block1_out[0][0]           
                                                                 conv4_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_out (Activation)   (None, 14, 14, 1024) 0           conv4_block2_add[0][0]           
__________________________________________________________________________________________________
conv4_block3_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block2_out[0][0]           
__________________________________________________________________________________________________
conv4_block3_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_1_relu (Activation (None, 14, 14, 256)  0           conv4_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block3_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_2_relu (Activation (None, 14, 14, 256)  0           conv4_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block3_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_add (Add)          (None, 14, 14, 1024) 0           conv4_block2_out[0][0]           
                                                                 conv4_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_out (Activation)   (None, 14, 14, 1024) 0           conv4_block3_add[0][0]           
__________________________________________________________________________________________________
conv4_block4_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block3_out[0][0]           
__________________________________________________________________________________________________
conv4_block4_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block4_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_1_relu (Activation (None, 14, 14, 256)  0           conv4_block4_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block4_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block4_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block4_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_2_relu (Activation (None, 14, 14, 256)  0           conv4_block4_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block4_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block4_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block4_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_add (Add)          (None, 14, 14, 1024) 0           conv4_block3_out[0][0]           
                                                                 conv4_block4_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_out (Activation)   (None, 14, 14, 1024) 0           conv4_block4_add[0][0]           
__________________________________________________________________________________________________
conv4_block5_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block5_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block5_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_1_relu (Activation (None, 14, 14, 256)  0           conv4_block5_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block5_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block5_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block5_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_2_relu (Activation (None, 14, 14, 256)  0           conv4_block5_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block5_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block5_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block5_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_add (Add)          (None, 14, 14, 1024) 0           conv4_block4_out[0][0]           
                                                                 conv4_block5_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_out (Activation)   (None, 14, 14, 1024) 0           conv4_block5_add[0][0]           
__________________________________________________________________________________________________
conv4_block6_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block5_out[0][0]           
__________________________________________________________________________________________________
conv4_block6_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block6_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_1_relu (Activation (None, 14, 14, 256)  0           conv4_block6_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block6_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block6_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block6_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_2_relu (Activation (None, 14, 14, 256)  0           conv4_block6_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block6_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block6_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block6_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_add (Add)          (None, 14, 14, 1024) 0           conv4_block5_out[0][0]           
                                                                 conv4_block6_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_out (Activation)   (None, 14, 14, 1024) 0           conv4_block6_add[0][0]           
__________________________________________________________________________________________________
conv5_block1_1_conv (Conv2D)    (None, 7, 7, 512)    524800      conv4_block6_out[0][0]           
__________________________________________________________________________________________________
conv5_block1_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_1_relu (Activation (None, 7, 7, 512)    0           conv5_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block1_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_2_relu (Activation (None, 7, 7, 512)    0           conv5_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_0_conv (Conv2D)    (None, 7, 7, 2048)   2099200     conv4_block6_out[0][0]           
__________________________________________________________________________________________________
conv5_block1_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block1_0_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_add (Add)          (None, 7, 7, 2048)   0           conv5_block1_0_bn[0][0]          
                                                                 conv5_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_out (Activation)   (None, 7, 7, 2048)   0           conv5_block1_add[0][0]           
__________________________________________________________________________________________________
conv5_block2_1_conv (Conv2D)    (None, 7, 7, 512)    1049088     conv5_block1_out[0][0]           
__________________________________________________________________________________________________
conv5_block2_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_1_relu (Activation (None, 7, 7, 512)    0           conv5_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block2_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_2_relu (Activation (None, 7, 7, 512)    0           conv5_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block2_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_add (Add)          (None, 7, 7, 2048)   0           conv5_block1_out[0][0]           
                                                                 conv5_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_out (Activation)   (None, 7, 7, 2048)   0           conv5_block2_add[0][0]           
__________________________________________________________________________________________________
conv5_block3_1_conv (Conv2D)    (None, 7, 7, 512)    1049088     conv5_block2_out[0][0]           
__________________________________________________________________________________________________
conv5_block3_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_1_relu (Activation (None, 7, 7, 512)    0           conv5_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_2_relu (Activation (None, 7, 7, 512)    0           conv5_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_add (Add)          (None, 7, 7, 2048)   0           conv5_block2_out[0][0]           
                                                                 conv5_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_out (Activation)   (None, 7, 7, 2048)   0           conv5_block3_add[0][0]           
__________________________________________________________________________________________________
avg_pool (GlobalAveragePooling2 (None, 2048)         0           conv5_block3_out[0][0]           
__________________________________________________________________________________________________
predictions (Dense)             (None, 1000)         2049000     avg_pool[0][0]                   
==================================================================================================
Total params: 25,636,712
Trainable params: 25,583,592
Non-trainable params: 53,120
__________________________________________________________________________________________________
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(1,3, input_shape=(32,32,3),),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(2,3),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

# batch normalization은 training동안에 epsilon, gamma(scale), beta(shift), moving_mean(이동평균), moving_var(이동표준편차)를 구한다   
# 따라서 param이 4개 인 것이다(이동평균, 이동표준편차는 하나의 쌍) 
model.summary() 

# Model: "sequential_3"
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# conv2d_6 (Conv2D)            (None, 30, 30, 1)         28        
# _________________________________________________________________
# batch_normalization_6 (Batch (None, 30, 30, 1)         4         
# _________________________________________________________________
# re_lu_6 (ReLU)               (None, 30, 30, 1)         0         
# _________________________________________________________________
# conv2d_7 (Conv2D)            (None, 28, 28, 2)         20        
# _________________________________________________________________
# batch_normalization_7 (Batch (None, 28, 28, 2)         8         
# _________________________________________________________________
# re_lu_7 (ReLU)               (None, 28, 28, 2)         0         
# _________________________________________________________________
# flatten_3 (Flatten)          (None, 1568)              0         
# _________________________________________________________________
# dense_3 (Dense)              (None, 10)                15690     
# =================================================================
# Total params: 15,750
# Trainable params: 15,744
# Non-trainable params: 6
# _________________________________________________________________

Training accuarcy는 높아지지만 validation accuracy가 스파이크 형태를 띄는 것은 데이터가 부족하다는 의미이다

즉 overfitting되었기 때문에 데이터의 양을 늘려야 한다 => data augmentation을 해야 한다

Data augmentation

img_height = 256 
img_width = 256
num_classes = 10

# random jittering or random crop 후 
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal', input_shape=(img_height, img_width,3)),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1)
])

model = tf.keras.Sequential([
    data_augmentation,
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Conv2D(16,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2), # 데이터가 확보된 후에 overfitting을 방지하기 위해 사용한다   
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes)
])

 

 

반응형
728x90
반응형

Tensorflow 6가지 방법

1. tf.Module (meta class)

2. tf.keras.models.Model (multi input, output 가능)

3. tf.keras.models.Sequential

4. subclass (tf.keras.models.Model)

5. tf.estimator

6. tf.nn

Data

1. numpy

2. tensor

- tf.Tensor

- tf.data.Dataset => ML용 dataset 데이터 셋을 가장 효율적으로 관리하고 하드웨어적으로 최적화 되어 사용할 있는 장점이 있다

- tf.Variable

Data Pipelines

데이터의 흐름을 나타내는 것
data load -> model training

tf.data를 활용하여 data pipeline을 구축하면 하드웨어를 효율적으로 사용할 수 있다

prefetch를 통해 gpu를 사용하여 데이터 학습 중일 때 데이터 로드시간을 줄이기 위해 cpu 연산을 하여 불러온다

병렬 연산, cache또한 지원한다

 

 

import tensorflow as tf 
import pandas as pd

pd.DataFrame.from_dict() # classmethod를 이용해서 dataset을 만든다 

x = tf.constant([[1,2],[3,4],[5,6]]) 
y = tf.constant([[1],[3],[5]]) 

x # 요소가 두 개인 데이터 3개
# <tf.Tensor: shape=(3, 2), dtype=int32, numpy=
# array([[1, 2],
#        [3, 4],
#        [5, 6]], dtype=int32)>
xx = tf.data.Dataset.from_tensor_slices(x) # lazy 기법을 사용해서 불러온다

for i in xx.take(2):
  print(i)
# tf.Tensor([1 2], shape=(2,), dtype=int32)
# tf.Tensor([3 4], shape=(2,), dtype=int32)
xx = tf.data.Dataset.from_tensor_slices((x,y)) # x,y 묶음으로 관리 가능 

for i, j in xx:
  print(i,j)
# tf.Tensor([1 2], shape=(2,), dtype=int32) tf.Tensor([1], shape=(1,), dtype=int32)
# tf.Tensor([3 4], shape=(2,), dtype=int32) tf.Tensor([3], shape=(1,), dtype=int32)
# tf.Tensor([5 6], shape=(2,), dtype=int32) tf.Tensor([5], shape=(1,), dtype=int32)
xx.cache().prefetch(32).shuffle(32)
# <ShuffleDataset shapes: ((2,), (1,)), types: (tf.int32, tf.int32)>

xx = tf.data.Dataset.from_tensors(x) # 전체 데이터 
for i in xx.take(1):
  print(i)
# tf.Tensor(
# [[1 2]
#  [3 4]
#  [5 6]], shape=(3, 2), dtype=int32)

Data Augmentation

augmentation 방법 가지

1. 원본을 array로 바꾸고 나서 array를 augmentation

2. 원본 자체를 augmentation (이미지 파일 처리)

1. Basic image manipulation

- overfitting 방지하는 방식

- 원본 자체를 augmentation을 하지 않는다 (성능향상에 도움이 되는 방법이 아니다)

 

2. Deep learning approaches

- 원본 자체를 augmentation하는 방법

- 그럴듯한 가짜 데이터를 생성하는 방법은 일반적인 성능을 높일 수 있다

 

데이터 관리 방식

1. Directory

2. DB(LMDB)

- 불러오는 리소스가 크기 때문에 사용하지 않는 관리 방법이다

- LMDB를 사용해서 이미지를 DB에 저장하는 방식도 있지만 요즘에는 사용하지 않는다

3. HDF

데이터를 불러오는 방법

1. tf.keras.preprocessing.image_dataset_from_directory

2. tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory

3. pathlib.Path.glob

4. tf.data.Dataset.list_files

5. tf.data.Dataset.from_generator

 

 

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') # directory에 있는 모든 이미지를 불러온다 
# Found 3670 files belonging to 5 classes.

data # tf.data.Dataset이기 때문에 map을 이용해서 전처리가 가능하다 
# <BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

next(iter(data))
# (<tf.Tensor: shape=(32, 256, 256, 3), dtype=float32, numpy=
#  array([[[[1.64000000e+02, 1.60000000e+02, 1.48000000e+02],
#           [1.64000000e+02, 1.60000000e+02, 1.48000000e+02],
#           [1.65000000e+02, 1.61000000e+02, 1.49000000e+02],
#           ...,
#           [2.52875000e+02, 2.54250000e+02, 2.53875000e+02],
#           [2.49750000e+02, 2.50500000e+02, 2.48875000e+02],
#           [2.54250000e+02, 2.54250000e+02, 2.52250000e+02]]]],
#        dtype=float32)>, <tf.Tensor: shape=(32,), dtype=int32, numpy=
#  array([3, 1, 2, 0, 2, 2, 2, 3, 1, 1, 4, 3, 1, 1, 0, 4, 2, 1, 4, 0, 3, 1,
#         1, 2, 2, 3, 0, 0, 3, 2, 2, 0], dtype=int32)>)
idg = tf.keras.preprocessing.image.ImageDataGenerator() # augmentation을 함 
data2 = idg.flow_from_directory('flower_photos/')
# Found 3670 images belonging to 5 classes.

idg.flow_from_dataframe() 
next(data2)
# (array([[[[ 63.,  73.,  39.],
#           [ 63.,  73.,  39.],
#           [ 63.,  73.,  39.],
#           ...,
#           [166., 140., 107.],
#           [160., 136., 102.],
#           [157., 133.,  99.]]]], dtype=float32), 
#   array([[1., 0., 0., 0., 0.],
#          [0., 0., 0., 0., 1.],
#          [1., 0., 0., 0., 0.]], dtype=float32))
tf.data.Dataset.from_generator
# <function tensorflow.python.data.ops.dataset_ops.DatasetV2.from_generator>
import pathlib
flower_path = pathlib.Path('flower_photos')
for i in flower_path.glob('*/*.jpg'):
  print(i)
# flower_photos/dandelion/8915661673_9a1cdc3755_m.jpg
# flower_photos/dandelion/8740218495_23858355d8_n.jpg
# flower_photos/dandelion/2608937632_cfd93bc7cd.jpg  
# ...
# flower_photos/tulips/8690791226_b1f015259f_n.jpg
# flower_photos/tulips/4612075317_91eefff68c_n.jpg
ls = tf.data.Dataset.list_files('flower_photos/*/*.jpg')  # 파일 명을 불러온다 
import matplotlib.pyplot as plt
for i in ls.take(100):
  x = tf.keras.preprocessing.image.load_img(i.numpy())
  plt.imshow(x)

 

 

!pip install Augmentor
import Augmentor
pipe = Augmentor.Pipeline('augmentor/')

pipe.rotate(0.5,15,15)

pipe.sample(5) # 랜덤하게 n개 뽑아서 처리한다 

pipe.process()

pipe.rotate(1,15,15) 

pipe.flip_left_right(0.5) # 0.5확률로 좌우 반전한다

pipe.process() # 전부다 처리한다
g = pipe.keras_generator(2)

tf.data.Dataset.from_generator # 내부적으로 PIL로 만들어졌다 

next(g)
# (array([[[[0.        , 0.        , 0.        ],
#           [0.67058825, 0.5803922 , 0.3529412 ],
#           [0.5254902 , 0.54901963, 0.34509805],
#           ...,
#           [0.34117648, 0.11764706, 0.08235294],
#           [0.34901962, 0.12156863, 0.09411765],
#           [0.35686275, 0.1254902 , 0.10980392]]]], dtype=float32), 
#           array([[0], [0]]))
!pip install -U albumentations
!pip install -U tensorflow-datasets
import tensorflow_datasets as tfds

flowers, info = tfds.load('tf_flowers', split='train', as_supervised=True, with_info=True)

flowers # as_supervised=False
# <PrefetchDataset shapes: {image: (None, None, 3), label: ()}, types: {image: tf.uint8, label: tf.int64}>

flowers # as_supervised=True
# <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>

info
# tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_path='/root/tensorflow_datasets/tf_flowers/3.0.1',
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)
tfds.visualization.show_examples(flowers, info)

flowers_pd = tfds.as_dataframe(flowers,info) # pandas data / eda 쉽게 하기 위해 
flowers_pd
data, metadata = tfds.load(
    'tf_flowers',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True
)

data
# [<PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>,
#  <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>,
#  <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>]
metadata
# tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_path='/root/tensorflow_datasets/tf_flowers/3.0.1',
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)
metadata.features['image']
# Image(shape=(None, None, 3), dtype=tf.uint8)

metadata.features['label']
# ClassLabel(shape=(), dtype=tf.int64, num_classes=5)

metadata.features['label'].int2str(0)
# 'dandelion'


tf.keras.layers.experimental.preprocessing.RandomCrop
tf.keras.layers.RandomCrop
# model 안에서 함께 사용 가능하다 (preprocessing layer) => map이랑 같이 쓸수 있다
import numpy as np
im = tf.keras.preprocessing.image.load_img('people.jpg')

x = np.array(im)

xx = tf.keras.layers.RandomRotation(0.4)(x)

plt.imshow(xx)

xx = tf.keras.layers.RandomFlip()(x)

plt.imshow(xx)

aug = tf.keras.models.Sequential([
  tf.keras.layers.RandomRotation(0.5),s
  tf.keras.layers.RandomFlip()
]) # Model보다 Sequential이 좋은 점은 다른 모델안에도 들어갈수 있다는 점이다 (전처리 레이어를 사용할 때 명확하게 확인 가능하다)

plt.imshow(aug(x))

# Model안에 Sequential 전처리 Layer 포함된 예시 
model = tf.keras.models.Model([
  aug, 
  tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
]) # 전처리 자체가 모델 안에 들어가기 때문에 gpu로 전처리가 가능하다 

tf.image.random_crop # 하나씩 처리
from albumentations import Compose, RandomBrightnessContrast, HorizontalFlip

aug = Compose([RandomBrightnessContrast(), HorizontalFlip()])

aug
# Compose([
#   RandomBrightnessContrast(always_apply=False, p=0.5, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True),
#   HorizontalFlip(always_apply=False, p=0.5),
# ], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={})
plt.imshow(aug(image=x)['image'])

 

 

반응형
728x90
반응형

Computer Vision Footprint

1998 - LeNet 5

2012 - AlexNet

- Gradient vanishin문제를 해결하는 ReLU를 사용

- LRN 사용 안했음

- Overlapping Pooling(영향력이 크진 않음)

- Dropout(Overfitting 방지)

2013 - ZFNet

- AlexNet을 Visualization통해 insight를 제공

- HyperParameter tuning 방법론 제시

- Network in Network

- 1x1 Convolution 도입 (1. 차원 조절 2. non-linear 특성 부여 3. fully connected 처럼 사용)

- Flatten 대신 사용하는 방법인 Global Average Pooling 도입

- 같은 형태를 반복하고 겹쳐서 사용하는 Stacking 도입

2014 - GoogLeNet v1 (1등)

- 1x1 convolution 활용

- Inception module stacking

- Global average pooling 활용

- VGG (2등)

2015 - ResNet

- 최초로 인간을 뛰어 넘는 성능을 보여준 알고리즘 - 잔차(Residual)을 최소로 하는 방향으로 학습

- Batch Normalization 활용

2016 - GoogLeNet v4

Deep Residual Learning for Image Recognition

ResNet은 인간을 뛰어넘은 첫번째 모델
Residual => 실제값 - 예측값 (잔차)

차원이 증가할 때 점선 화살표로 표시했다

Skip connection

deep architectures에서 short skip connections은 하나의 layer의 output을 몇 개의 layer를 건너뛰고 다음 layer의 input에 추가하는 것

import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
y = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
z = tf.keras.layers.Concatenate()([x,y]) # 구조 자체를 더해준다 
model = tf.keras.models.Model(input_, z)

model.summary()
# Model: "model"
# __________________________________________________________________________________________________
# Layer (type)                    Output Shape         Param #     Connected to                     
# ==================================================================================================
# input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
# __________________________________________________________________________________________________
# conv2d (Conv2D)                 (None, 32, 32, 2)    56          input_1[0][0]                    
# __________________________________________________________________________________________________
# conv2d_1 (Conv2D)               (None, 32, 32, 2)    56          input_1[0][0]                    
# __________________________________________________________________________________________________
# concatenate (Concatenate)       (None, 32, 32, 4)    0           conv2d[0][0]                     
#                                                                  conv2d_1[0][0]                   
# ==================================================================================================
# Total params: 112
# Trainable params: 112
# Non-trainable params: 0
# __________________________________________________________________________________________________
# ReNet에서 구현하는 방식 (Add)
input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
y = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
z = tf.keras.layers.Add()([x,y]) # 값을 더해준다 (element wise)
model = tf.keras.models.Model(input_, z)

model.summary()
# Model: "model_1"
# __________________________________________________________________________________________________
# Layer (type)                    Output Shape         Param #     Connected to                     
# ==================================================================================================
# input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
# __________________________________________________________________________________________________
# conv2d_2 (Conv2D)               (None, 32, 32, 2)    56          input_2[0][0]                    
# __________________________________________________________________________________________________
# conv2d_3 (Conv2D)               (None, 32, 32, 2)    56          input_2[0][0]                    
# __________________________________________________________________________________________________
# add (Add)                       (None, 32, 32, 2)    0           conv2d_2[0][0]                   
#                                                                  conv2d_3[0][0]                   
# ==================================================================================================
# Total params: 112
# Trainable params: 112
# Non-trainable params: 0
# __________________________________________________________________________________________________

 

 

실전 예제

 

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

np.unique(y_train, return_counts=True)
# (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
#  array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))
plt.hist(y_train, width=0.3) # 데이터 클래스가 균등한지 확인 

# (array([5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000.]),
#  array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ]),
#  <a list of 10 Patch objects>)

# 비율과 크기가 다르다 => 전통적인 ml방식으로는 해결할 수 없다 => CNN으로 해결해야 한다 
rows = 3
cols = 3
axes=[]
fig=plt.figure(figsize=(10,7))

for a in range(rows*cols):    
    axes.append(fig.add_subplot(rows, cols, a+1))
    plt.imshow(X_train[a])
fig.tight_layout()    
plt.show()

np.unique(y_test, return_counts=True)
# (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
#  array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]))

plt.hist(y_test, width=0.3)
# (array([1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000.]),
#  array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ]),
#  <a list of 10 Patch objects>)

plt.imshow(X_test[3]) # data image크기가 작기 때문에 deep한 layer를 사용하기 힘들다

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = X_train / 255
X_test = X_test / 255 

# layer를 구성하는 것은 만드는 사람에 따라 달라질 수 있다 

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(32,3)(input_)
# x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) # 2x2 stride 2 
x = tf.keras.layers.Conv2D(64,3)(x) 
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Flatten()(x) # 이미지 크기가 작기때문에 flatten해도 상관없다 
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Dense(10)(x)

model = tf.keras.models.Model(input_, x)

model.summary()
# Model: "model"
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
# _________________________________________________________________
# conv2d (Conv2D)              (None, 30, 30, 32)        896       
# _________________________________________________________________
# re_lu (ReLU)                 (None, 30, 30, 32)        0         
# _________________________________________________________________
# max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
# _________________________________________________________________
# conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
# _________________________________________________________________
# re_lu_1 (ReLU)               (None, 13, 13, 64)        0         
# _________________________________________________________________
# max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
# _________________________________________________________________
# conv2d_2 (Conv2D)            (None, 4, 4, 64)          36928     
# _________________________________________________________________
# re_lu_2 (ReLU)               (None, 4, 4, 64)          0         
# _________________________________________________________________
# flatten (Flatten)            (None, 1024)              0         
# _________________________________________________________________
# dense (Dense)                (None, 512)               524800    
# _________________________________________________________________
# re_lu_3 (ReLU)               (None, 512)               0         
# _________________________________________________________________
# dense_1 (Dense)              (None, 10)                5130      
# =================================================================
# Total params: 586,250
# Trainable params: 586,250
# Non-trainable params: 0
# _________________________________________________________________
# 사용자가 해석한 값이 아닌 raw값 그대로 loss를 구한다 /
# Numerical stability가 더 좋기 때문에 거의 차이가 안난다 
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                                                                 optimizer='adam',
                                                                 metrics=['accuracy'])
                                                                 
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test)) 
# 정규화 했을 때 더 낮은 loss에서 시작한다 그리고 정규화를 했을 때 수렴속도가 빠르다                                                                  

Epoch 1/20
1563/1563 [==============================] - 14s 8ms/step - loss: 1.4378 - accuracy: 0.4767 - val_loss: 1.1393 - val_accuracy: 0.5892
Epoch 2/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.0637 - accuracy: 0.6254 - val_loss: 1.0237 - val_accuracy: 0.6401
Epoch 3/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8987 - accuracy: 0.6826 - val_loss: 0.9170 - val_accuracy: 0.6825
Epoch 4/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.7807 - accuracy: 0.7277 - val_loss: 0.8540 - val_accuracy: 0.7036
Epoch 5/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6769 - accuracy: 0.7609 - val_loss: 0.8615 - val_accuracy: 0.7131
Epoch 6/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5830 - accuracy: 0.7932 - val_loss: 0.8833 - val_accuracy: 0.7151
Epoch 7/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4926 - accuracy: 0.8251 - val_loss: 0.9074 - val_accuracy: 0.7179
Epoch 8/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4044 - accuracy: 0.8571 - val_loss: 0.9816 - val_accuracy: 0.7117
Epoch 9/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3321 - accuracy: 0.8829 - val_loss: 1.0310 - val_accuracy: 0.7151
Epoch 10/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.2701 - accuracy: 0.9034 - val_loss: 1.2083 - val_accuracy: 0.7054
Epoch 11/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.2228 - accuracy: 0.9207 - val_loss: 1.3023 - val_accuracy: 0.7059
Epoch 12/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1909 - accuracy: 0.9319 - val_loss: 1.4026 - val_accuracy: 0.7109
Epoch 13/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1661 - accuracy: 0.9411 - val_loss: 1.6160 - val_accuracy: 0.6998
Epoch 14/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1501 - accuracy: 0.9471 - val_loss: 1.5875 - val_accuracy: 0.7062
Epoch 15/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1311 - accuracy: 0.9548 - val_loss: 1.8276 - val_accuracy: 0.6951
Epoch 16/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1288 - accuracy: 0.9552 - val_loss: 1.8113 - val_accuracy: 0.7016
Epoch 17/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1169 - accuracy: 0.9591 - val_loss: 2.0500 - val_accuracy: 0.6922
Epoch 18/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1180 - accuracy: 0.9604 - val_loss: 2.0089 - val_accuracy: 0.6998
Epoch 19/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1095 - accuracy: 0.9634 - val_loss: 1.9952 - val_accuracy: 0.6968
Epoch 20/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1034 - accuracy: 0.9657 - val_loss: 2.0972 - val_accuracy: 0.6938

 

history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test)) 
# 정규화 하지 않았을 때 더 높은 loss에서 시작한다 

Epoch 1/20
1563/1563 [==============================] - 19s 8ms/step - loss: 2.0055 - accuracy: 0.3065 - val_loss: 1.5826 - val_accuracy: 0.4172
Epoch 2/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.4448 - accuracy: 0.4833 - val_loss: 1.3306 - val_accuracy: 0.5283
Epoch 3/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.2506 - accuracy: 0.5596 - val_loss: 1.2615 - val_accuracy: 0.5648
Epoch 4/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.1343 - accuracy: 0.6053 - val_loss: 1.2301 - val_accuracy: 0.5752
Epoch 5/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.0247 - accuracy: 0.6427 - val_loss: 1.1291 - val_accuracy: 0.6128
Epoch 6/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.9380 - accuracy: 0.6753 - val_loss: 1.2271 - val_accuracy: 0.5985
Epoch 7/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8566 - accuracy: 0.7020 - val_loss: 1.1887 - val_accuracy: 0.6037
Epoch 8/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.7684 - accuracy: 0.7344 - val_loss: 1.2450 - val_accuracy: 0.6161
Epoch 9/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6898 - accuracy: 0.7621 - val_loss: 1.3770 - val_accuracy: 0.6004
Epoch 10/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6153 - accuracy: 0.7890 - val_loss: 1.4217 - val_accuracy: 0.6172
Epoch 11/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5556 - accuracy: 0.8114 - val_loss: 1.4523 - val_accuracy: 0.6202
Epoch 12/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5212 - accuracy: 0.8254 - val_loss: 1.6618 - val_accuracy: 0.5982
Epoch 13/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4697 - accuracy: 0.8419 - val_loss: 1.7345 - val_accuracy: 0.6087
Epoch 14/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4385 - accuracy: 0.8550 - val_loss: 1.9349 - val_accuracy: 0.6040
Epoch 15/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4044 - accuracy: 0.8672 - val_loss: 2.1098 - val_accuracy: 0.5947
Epoch 16/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3703 - accuracy: 0.8803 - val_loss: 2.3537 - val_accuracy: 0.6007
Epoch 17/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3737 - accuracy: 0.8796 - val_loss: 2.4973 - val_accuracy: 0.6003
Epoch 18/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3659 - accuracy: 0.8848 - val_loss: 2.5574 - val_accuracy: 0.5830
Epoch 19/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3423 - accuracy: 0.8938 - val_loss: 2.4638 - val_accuracy: 0.6007
Epoch 20/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3459 - accuracy: 0.8939 - val_loss: 2.6130 - val_accuracy: 0.5853

 

 

pd.DataFrame(history.history).plot.line(figsize=(10,8)) # validation loss가 증가하는 지점부터 overfitting이 시작된다 (정규화)

pd.DataFrame(history.history).plot.line(figsize=(10,8)) # validation loss가 증가하는 지점부터 overfitting이 시작된다 (정규화 X)

 

 

Flower Datasets

tf.keras.utils.get_file("flower_photos","https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz")

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
# Found 3670 files belonging to 5 classes.

data # tf.data.Dataset
# <BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

idg = tf.keras.preprocessing.image.ImageDataGenerator()
idg.flow_from_directory('flower_photos/') # raw data를 클래스별로 모아 두면 한번에 바꾸는 것을 지원한다 (하나의 array로 만들어줌) 
# Found 3670 images belonging to 5 classes.
# <keras.preprocessing.image.DirectoryIterator at 0x7fee7a4b1390>

numpy와 tensor

Numpy와 tensor는 서로 호환이 된다

tensor로 사용하면 호환성은 떨어지지만 gpu나 tensorflow 내부의 기능을 극대화 할 수 있다

tf.data.Dataset는 tensor중에서 ml용으로 관리한다

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

tensor data로 만드는 방법

1. from_tensors

2. from_tensor_slices

tf.data.Dataset.from_tensors(X_train) # 데이터 전체를 기반으로 만들어 준다 
tf.data.Dataset.from_tensors((X_train,y_train)) # 데이터 전체를 묶어서 관리 
# <TensorDataset shapes: ((50000, 32, 32, 3), (50000, 1)), types: (tf.uint8, tf.uint8)>

tf.data.Dataset.from_tensor_slices(X_train) # 데이터 한 개를 기반으로 만들어 준다 
tf.data.Dataset.from_tensor_slices((X_train, y_train)) # 데이터 한쌍을 묶어서 관리 
# <TensorSliceDataset shapes: ((32, 32, 3), (1,)), types: (tf.uint8, tf.uint8)>


# y_train = tf.keras.utils.to_categorical(y_train)
train = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)

train_t = tf.data.Dataset.from_tensor_slices((X_train, y_train))

train_t.batch(32)
# <BatchDataset shapes: ((None, 32, 32, 3), (None, 1)), types: (tf.float64, tf.uint8)>

train_t.shuffle(400)
# <ShuffleDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

# cache : preprocessing 시간이 너무 길어서 줄이고 싶을때 사용
# prefetch : 학습중일때, 데이터 로드시간을 줄이기 위해 미리 메모리에 적재시킴 이때, 괄호안의 숫자는 얼마만큼 적재시킬지에 대한 숫자 / AUTOTUNE는 자동으로 
train_t.shuffle(400).cache()
# <CacheDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

train_t.shuffle(400).cache().prefetch(tf.data.AUTOTUNE) 
# <PrefetchDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

'_GeneratorState' in dir(train) # lazy하게 데이터를 불러온다 => 호출되기 전까지 메모리에 올라가지 않는다 
# True
for i in train.take(2): # 데이터 2개 불러온다 
  print(i)
  
# (<tf.Tensor: shape=(32, 32, 32, 3), dtype=float64, numpy=
# array([[[[0.23137255, 0.24313725, 0.24705882],
         [0.16862745, 0.18039216, 0.17647059],
         [0.19607843, 0.18823529, 0.16862745],
         ...,
         [0.19215686, 0.28235294, 0.17647059],
         [0.12156863, 0.2       , 0.11764706],
         [0.08235294, 0.15294118, 0.08235294]]]])>, <tf.Tensor: shape=(32, 1), dtype=uint8, numpy=
array([[1],
       [3],
       [4],
       [0],
       [3],
       [7],
       [3],
       [3],
       [5],
       [2],
       [2],
       [7],
       [1],
       [1],
       [1],
       [2],
       [2],
       [0],
       [9],
       [5],
       [7],
       [9],
       [2],
       [2],
       [5],
       [2],
       [4],
       [3],
       [1],
       [1],
       [8],
       [2]], dtype=uint8)>)

Naive

prefetch

tf.data.Dataset을 사용하지 않으면 cpu를 읽고 쓰고하는 것을 각각 할당한다

그런데 cache, prefetch를 사용하면 내부적으로 cpu, gpu를 압축적으로 사용할 수 있도록 처리해준다

따라서 numpy보다 메모리를 효율적으로 사용할 수 있다

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(32,3)(input_)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x) 
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Flatten()(x) 
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Dense(10)(x)

model = tf.keras.models.Model(input_, x)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                                                                 optimizer='adam',
                                                                 metrics=['accuracy'])
                                                                 
history = model.fit(train, epochs=10)
# tf.data.Dataset으로 학습을 하면 tensor로 처리하고 내부적인 기능이 최적화되어 사용하기 때문에 훨씬 더 효율적으로 사용할 수 있다 / 메모리 효율이 좋다 
Epoch 1/10
1563/1563 [==============================] - 16s 10ms/step - loss: 1.4560 - accuracy: 0.4731
Epoch 2/10
1563/1563 [==============================] - 14s 9ms/step - loss: 1.0674 - accuracy: 0.6208
Epoch 3/10
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8919 - accuracy: 0.6871
Epoch 4/10
1563/1563 [==============================] - 18s 12ms/step - loss: 0.7641 - accuracy: 0.7327
Epoch 5/10
1563/1563 [==============================] - 18s 12ms/step - loss: 0.6630 - accuracy: 0.7666
Epoch 6/10
1563/1563 [==============================] - 15s 10ms/step - loss: 0.5731 - accuracy: 0.7999
Epoch 7/10
1563/1563 [==============================] - 15s 9ms/step - loss: 0.4959 - accuracy: 0.8257
Epoch 8/10
1563/1563 [==============================] - 22s 14ms/step - loss: 0.4299 - accuracy: 0.8476
Epoch 9/10
1563/1563 [==============================] - 25s 16ms/step - loss: 0.3662 - accuracy: 0.8701
Epoch 10/10
1563/1563 [==============================] - 24s 15ms/step - loss: 0.3118 - accuracy: 0.8894
data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
# Found 3670 files belonging to 5 classes.
plt.imshow(next(iter(data.take(1)))[0][0])

plt.imshow(next(iter(data.take(1)))[0][0]/255) 
# 255값으로 나누면 이미지의 특성을 좀 더 반영할 수 있다

tf.keras.utils.image_dataset_from_directory is tf.keras.preprocessing.image_dataset_from_directorys 
# True
# 같은 방법이다

Data augmentation

Data augmentation은 현재 갖고 있는 데이터를 좀 더 다양하게 만들어 AlexNet에서 처음 도입된 개념이다

AlexNet에서 256x256 이미지를 224x224 크기로 무작위하게 잘라서 데이터의 수를 키웠다

(2048배 뻥튀기 되었다)

일반적으로는 cpu기반으로 연산되지만 batch기반으로 학습될때는 gpu로 연산된다

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
rescale = tf.keras.layers.experimental.preprocessing.RandomCrop(224,224,3)
# Found 3670 files belonging to 5 classes.

data.map(lambda x,y: (rescale(x), y), num_parallel_calls=tf.data.AUTOTUNE) # num_parallel_calls: 동시에 cpu 몇개 사용할 것인가
# <ParallelMapDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int32)>

3670*2048
# 7516160

lambda layer

t = tf.constant([[1,2,],[3,4,]])
t
# <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
# array([[1, 2],
#       [3, 4]], dtype=int32)>
tf.keras.layers.Lambda(lambda x:x+1)(t) # layer안에 함수를 쓴다 
# <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
# array([[2, 3],
#        [4, 5]], dtype=int32)>
rescale = tf.keras.layers.Lambda(lambda x:tf.image.random_crop(x, (224,224,3)))
# data.map(lambda x,y: (rescale(x), y), num_parallel_calls=tf.data.AUTOTUNE)
data.map(lambda x,y:(rescale(x)(),y))
# ValueError: in user code:

    <ipython-input-81-dc4661f485b7>:1 None  *
        lambda x,y:(rescale(x)(x),y))
    /usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:1037 __call__  **
        outputs = call_fn(inputs, *args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/keras/layers/core.py:903 call
        result = self.function(inputs, **kwargs)
    <ipython-input-74-6ba6a1285d04>:1 <lambda>
        rescale = tf.keras.layers.Lambda(lambda x:tf.image.random_crop(x, (224,224,3)))
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/random_ops.py:402 random_crop
        math_ops.reduce_all(shape >= size),
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_ops.py:1817 wrapper
        return fn(x, y, *args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_math_ops.py:4050 greater_equal
        "GreaterEqual", x=x, y=y, name=name)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
        compute_device)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3569 _create_op_internal
        op_def=op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
        control_input_ops, op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 4 and 3 for '{{node lambda_2/random_crop/GreaterEqual}} = GreaterEqual[T=DT_INT32](lambda_2/random_crop/Shape, lambda_2/random_crop/size)' with input shapes: [4], [3].

 

 

 

 

 

 

 

 

반응형

+ Recent posts