OpenCV Darknet Yolo를 이용하여 이미지Object Detection
yolo와 tiny-yolo 를 이용하여 Object Detection
입력 이미지로 사용될 이미지 다운로드/보기
!mkdir /content/data
!wget -O ./data/beatles01.jpg https://raw.githubusercontent.com/chulminkw/DLCV/master/data/image/beatles01.jpg
Darknet Yolo사이트에서 coco로 학습된 Inference모델와 환경파일을 다운로드 받은 후 이를 이용해 OpenCV에서 Inference 모델 생성
!mkdir ./pretrained
!echo "##### downloading pretrained yolo/tiny-yolo weight file and config file"
!wget -O /content/pretrained/yolov3.weights https://pjreddie.com/media/files/yolov3.weights
!wget -O /content/pretrained/yolov3.cfg https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg?raw=true
!wget -O /content/pretrained/yolov3-tiny.weights https://pjreddie.com/media/files/yolov3-tiny.weights
!wget -O /content/pretrained/yolov3-tiny.cfg https://github.com/pjreddie/darknet/blob/master/cfg/yolov3-tiny.cfg?raw=true
!ls /content/pretrained
readNetFromDarknet(config파일, weight파일)을 이용하여 yolo inference network 모델을 로딩
import os
import cv2
weights_path = '/content/pretrained/yolov3.weights'
config_path = '/content/pretrained/yolov3.cfg'
cv_net_yolo = cv2.dnn.readNetFromDarknet(config_path, weights_path)
COCO class id와 class 명 매핑
labels_to_names_seq = {0 :'person' ,1 :'bicycle' ,2 :'car' ,3 :'motorbike' ,4 :'aeroplane' ,5 :'bus' ,6 :'train' ,7 :'truck' ,8 :'boat' ,9 :'traffic light' ,10 :'fire hydrant' ,
11 :'stop sign' ,12 :'parking meter' ,13 :'bench' ,14 :'bird' ,15 :'cat' ,16 :'dog' ,17 :'horse' ,18 :'sheep' ,19 :'cow' ,20 :'elephant' ,
21 :'bear' ,22 :'zebra' ,23 :'giraffe' ,24 :'backpack' ,25 :'umbrella' ,26 :'handbag' ,27 :'tie' ,28 :'suitcase' ,29 :'frisbee' ,30 :'skis' ,
31 :'snowboard' ,32 :'sports ball' ,33 :'kite' ,34 :'baseball bat' ,35 :'baseball glove' ,36 :'skateboard' ,37 :'surfboard' ,38 :'tennis racket' ,39 :'bottle' ,40 :'wine glass' ,
41 :'cup' ,42 :'fork' ,43 :'knife' ,44 :'spoon' ,45 :'bowl' ,46 :'banana' ,47 :'apple' ,48 :'sandwich' ,49 :'orange' ,50 :'broccoli' ,
51 :'carrot' ,52 :'hot dog' ,53 :'pizza' ,54 :'donut' ,55 :'cake' ,56 :'chair' ,57 :'sofa' ,58 :'pottedplant' ,59 :'bed' ,60 :'diningtable' ,
61 :'toilet' ,62 :'tvmonitor' ,63 :'laptop' ,64 :'mouse' ,65 :'remote' ,66 :'keyboard' ,67 :'cell phone' ,68 :'microwave' ,69 :'oven' ,70 :'toaster' ,
71 :'sink' ,72 :'refrigerator' ,73 :'book' ,74 :'clock' ,75 :'vase' ,76 :'scissors' ,77 :'teddy bear' ,78 :'hair drier' ,79 :'toothbrush' }
3개의 scale Output layer에서 결과 데이터 추출
layer_names = cv_net_yolo.getLayerNames()
print ('### yolo v3 layer name:' , layer_names)
print ('final output layer id:' , cv_net_yolo.getUnconnectedOutLayers())
print ('final output layer name:' , [layer_names[i[0 ] - 1 ] for i in cv_net_yolo.getUnconnectedOutLayers()])
final output layer id : [[200 ]
[227 ]
[254 ]]
final output layer name: ['yolo_82' , 'yolo_94' , 'yolo_106' ]
layer_names = cv_net_yolo.getLayerNames()
outlayer_names = [layer_names[i[0 ] - 1 ] for i in cv_net_yolo.getUnconnectedOutLayers()]
print ('output_layer name:' , outlayer_names)
img = cv2.imread('./data/beatles01.jpg' )
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv_net_yolo.setInput(cv2.dnn.blobFromImage(img, scalefactor=1 /255.0 , size=(416 , 416 ), swapRB=True , crop=False ))
cv_outs = cv_net_yolo.forward(outlayer_names)
print ('cv_outs type:' , type (cv_outs), 'cv_outs의 내부 원소개수:' , len (cv_outs))
print (cv_outs[0 ].shape, cv_outs[1 ].shape, cv_outs[2 ].shape)
print (cv_outs)
output_layer name: ['yolo_82' , 'yolo_94' , 'yolo_106' ]
cv_outs type : <class 'list '> cv_outs 의 내부 원소개수: 3
(507 , 85 ) (2028 , 85 ) (8112 , 85 )
[array([[0.03803749 , 0.0470234 , 0.3876816 , ..., 0. , 0. ,
0. ],
[0.04705836 , 0.03385845 , 0.2689603 , ..., 0. , 0. ,
0. ],
[0.04941482 , 0.03791986 , 0.7151826 , ..., 0. , 0. ,
0. ],
...,
[0.9585798 , 0.9460585 , 0.35046625 , ..., 0. , 0. ,
0. ],
[0.96015006 , 0.9630715 , 0.29724196 , ..., 0. , 0. ,
0. ],
[0.9663636 , 0.9657401 , 0.79356086 , ..., 0. , 0. ,
0. ]], dtype=float32), array([[0.01637367 , 0.02457962 , 0.04684627 , ..., 0. , 0. ,
0. ],
[0.01678773 , 0.01458679 , 0.46203217 , ..., 0. , 0. ,
0. ],
[0.02219823 , 0.01376948 , 0.0662718 , ..., 0. , 0. ,
0. ],
...,
[0.97421783 , 0.97686917 , 0.04557502 , ..., 0. , 0. ,
0. ],
[0.98114103 , 0.9762939 , 0.33147967 , ..., 0. , 0. ,
0. ],
[0.97884774 , 0.98335934 , 0.07896643 , ..., 0. , 0. ,
0. ]], dtype=float32), array([[0.00859342 , 0.00442324 , 0.01781066 , ..., 0. , 0. ,
0. ],
[0.010101 , 0.01088366 , 0.01980249 , ..., 0. , 0. ,
0. ],
[0.01071996 , 0.00756924 , 0.20484295 , ..., 0. , 0. ,
0. ],
...,
[0.9901033 , 0.9906244 , 0.01741469 , ..., 0. , 0. ,
0. ],
[0.9907341 , 0.9876037 , 0.01802968 , ..., 0. , 0. ,
0. ],
[0.98756605 , 0.99131656 , 0.17707303 , ..., 0. , 0. ,
0. ]], dtype=float32)]
3개의 scale output layer에서 Object Detection 정보를 모두 수집.
center와 width,height좌표는 모두 좌상단, 우하단 좌표로 변경.
import numpy as np
rows = img.shape[0 ]
cols = img.shape[1 ]
conf_threshold = 0.5
nms_threshold = 0.4
green_color=(0 , 255 , 0 )
red_color=(0 , 0 , 255 )
class_ids = []
confidences = []
boxes = []
for ix, output in enumerate (cv_outs):
print ('output shape:' , output.shape)
for jx, detection in enumerate (output):
class_scores = detection[5 :]
class_id = np.argmax(class_scores)
confidence = class_scores[class_id]
if confidence > conf_threshold:
print ('ix:' , ix, 'jx:' , jx, 'class_id' , class_id, 'confidence:' , confidence)
center_x = int (detection[0 ] * cols)
center_y = int (detection[1 ] * rows)
width = int (detection[2 ] * cols)
height = int (detection[3 ] * rows)
left = int (center_x - width / 2 )
top = int (center_y - height / 2 )
class_ids.append(class_id)
confidences.append(float (confidence))
boxes.append([left, top, width, height])
output shape: (507 , 85 )
ix: 0 jx: 319 class_id 0 confidence: 0.9317017
ix: 0 jx: 328 class_id 0 confidence: 0.96232384
ix: 0 jx: 334 class_id 0 confidence: 0.9984486
ix: 0 jx: 343 class_id 0 confidence: 0.9978433
output shape: (2028 , 85 )
ix: 1 jx: 831 class_id 2 confidence: 0.8169964
ix: 1 jx: 955 class_id 2 confidence: 0.8472691
ix: 1 jx: 1262 class_id 0 confidence: 0.9877816
ix: 1 jx: 1280 class_id 0 confidence: 0.99840033
ix: 1 jx: 1295 class_id 0 confidence: 0.6916561
ix: 1 jx: 1313 class_id 0 confidence: 0.9205806
output shape: (8112 , 85 )
ix: 2 jx: 2883 class_id 2 confidence: 0.9077368
ix: 2 jx: 2886 class_id 2 confidence: 0.63324535
ix: 2 jx: 3048 class_id 2 confidence: 0.9412014
ix: 2 jx: 3051 class_id 2 confidence: 0.615405
ix: 2 jx: 3184 class_id 2 confidence: 0.95041
ix: 2 jx: 3214 class_id 2 confidence: 0.9064125
ix: 2 jx: 3373 class_id 2 confidence: 0.68998003
ix: 2 jx: 3394 class_id 0 confidence: 0.76407045
NMS를 이용하여 각 Output layer에서 Detected된 Object의 겹치는 Bounding box를 제외.
conf_threshold = 0.5
nms_threshold = 0.4
idxs = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
idxs
array([[ 2 ],
[ 7 ],
[ 3 ],
[ 6 ],
[14 ],
[12 ],
[10 ],
[15 ],
[ 5 ],
[ 4 ],
[17 ],
[16 ],
[11 ],
[13 ]], dtype=int32)
idxs.flatten()
array([ 2 , 7 , 3 , 6 , 14 , 12 , 10 , 15 , 5 , 4 , 17 , 16 , 11 , 13 ],
dtype=int32)
NMS로 최종 filtering된 idxs를 이용하여 boxes, classes, confidences에서 해당하는 Object정보를 추출하고 시각화.
import matplotlib.pyplot as plt
draw_img = img.copy()
if len (idxs) > 0 :
for i in idxs.flatten():
box = boxes[i]
left = box[0 ]
top = box[1 ]
width = box[2 ]
height = box[3 ]
caption = "{}: {:.4f}" .format (labels_to_names_seq[class_ids[i]], confidences[i])
cv2.rectangle(draw_img, (int (left), int (top)), (int (left+width), int (top+height)), color=green_color, thickness=2 )
cv2.putText(draw_img, caption, (int (left), int (top - 5 )), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , red_color, 1 )
print (caption)
img_rgb = cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12 , 12 ))
plt.imshow(img_rgb)
person: 0.9984
person: 0.9984
person: 0.9978
person: 0.9878
car: 0.9504
car: 0.9412
car: 0.9077
car: 0.9064
car: 0.8473
car: 0.8170
person: 0.7641
car: 0.6900
car: 0.6332
car: 0.6154
<matplotlib.image.AxesImage at 0x7f4dd35bdc90 >
단일 이미지를 Yolo로 detect하는 get_detected_img() 함수 생성.
def get_detected_img (cv_net, img_array, conf_threshold, nms_threshold, is_print=True ):
rows = img_array.shape[0 ]
cols = img_array.shape[1 ]
draw_img = img_array.copy()
layer_names = cv_net.getLayerNames()
outlayer_names = [layer_names[i[0 ] - 1 ] for i in cv_net.getUnconnectedOutLayers()]
cv_net.setInput(cv2.dnn.blobFromImage(img_array, scalefactor=1 /255.0 , size=(416 , 416 ), swapRB=True , crop=False ))
start = time.time()
cv_outs = cv_net.forward(outlayer_names)
layerOutputs = cv_net.forward(outlayer_names)
green_color=(0 , 255 , 0 )
red_color=(0 , 0 , 255 )
class_ids = []
confidences = []
boxes = []
for ix, output in enumerate (cv_outs):
for jx, detection in enumerate (output):
scores = detection[5 :]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > conf_threshold:
center_x = int (detection[0 ] * cols)
center_y = int (detection[1 ] * rows)
width = int (detection[2 ] * cols)
height = int (detection[3 ] * rows)
left = int (center_x - width / 2 )
top = int (center_y - height / 2 )
class_ids.append(class_id)
confidences.append(float (confidence))
boxes.append([left, top, width, height])
idxs = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
if len (idxs) > 0 :
for i in idxs.flatten():
box = boxes[i]
left = box[0 ]
top = box[1 ]
width = box[2 ]
height = box[3 ]
caption = "{}: {:.4f}" .format (labels_to_names_seq[class_ids[i]], confidences[i])
cv2.rectangle(draw_img, (int (left), int (top)), (int (left+width), int (top+height)), color=green_color, thickness=2 )
cv2.putText(draw_img, caption, (int (left), int (top - 5 )), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , red_color, 1 )
if is_print:
print ('Detection 수행시간:' ,round (time.time() - start, 2 ),"초" )
return draw_img
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import time
import os
img = cv2.imread('/content/data/beatles01.jpg' )
weights_path = '/content/pretrained/yolov3.weights'
config_path = '/content/pretrained/yolov3.cfg'
cv_net_yolo = cv2.dnn.readNetFromDarknet(config_path, weights_path)
conf_threshold = 0.5
nms_threshold = 0.4
draw_img = get_detected_img(cv_net_yolo, img, conf_threshold=conf_threshold, nms_threshold=nms_threshold, is_print=True )
img_rgb = cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12 , 12 ))
plt.imshow(img_rgb)
Detection 수행시간: 4.08 초
<matplotlib.image.AxesImage at 0x7f4dc2adbd50 >
tiny Yolo로 Object Detection 수행하기.
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import time
import os
img = cv2.imread('/content/data/beatles01.jpg' )
weights_path = '/content/pretrained/yolov3-tiny.weights'
config_path = '/content/pretrained/yolov3-tiny.cfg'
cv_net_yolo_tiny = cv2.dnn.readNetFromDarknet(config_path, weights_path)
conf_threshold = 0.2
nms_threshold = 0.4
draw_img = get_detected_img(cv_net_yolo_tiny, img, conf_threshold=conf_threshold, nms_threshold=nms_threshold, is_print=True )
img_rgb = cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12 , 12 ))
plt.imshow(img_rgb)
Detection 수행시간: 0.48 초
<matplotlib.image.AxesImage at 0x7f4dc2275ad0 >