728x90
반응형

EfficientDet lite0 Pretrained 모델 Inference 수행

  • EfficientDet Lite는 automl 패키지로 구현됨.
  • 입력 이미지로 numpy array, tensor 모두 가능, type은 unit8 필요.
  • inference 결과로 box정보, score정보, class 정보를 각각 Tensor로 반환.
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np

detector_automl_lite0 = hub.load("https://tfhub.dev/tensorflow/efficientdet/lite0/detection/1")

 

img_array = cv2.cvtColor(cv2.imread('/content/data/baseball01.jpg'), cv2.COLOR_BGR2RGB)
#img_array_01 = img_array[np.newaxis, ...]
img_tensor = tf.convert_to_tensor(img_array, dtype=tf.uint8)[tf.newaxis, ...]

start_time = time.time()
# image를 detector_model에 인자로 입력하여 inference 수행. 
#boxes, scores, classes, num_detections = detector_automl_lite0(img_array_01)
boxes, scores, classes, num_detections = detector_automl_lite0(img_tensor)

print('elapsed time:', time.time()-start_time)

# elapsed time: 2.2259414196014404

 

boxes.shape, scores.shape, classes.shape, num_detections

# (TensorShape([1, 100, 4]),
#  TensorShape([1, 100]),
#  TensorShape([1, 100]),
#  <tf.Tensor: shape=(1,), dtype=int32, numpy=array([100], dtype=int32)>)

 

#좌표값이 0~1사이로 정규화 되지 않고 원본 이미지의 좌표값으로 반환 
print('원본 이미지 shape:', img_array.shape)
boxes[0, 0:10], scores[0, :10], classes[0, :10]

원본 이미지 shape: (476, 735, 3)
(<tf.Tensor: shape=(10, 4), dtype=float32, numpy=
 array([[202.11865 ,  31.700203, 445.80273 , 188.72594 ],
        [259.24213 , 174.63013 , 455.48645 , 373.06177 ],
        [127.93706 , 324.82584 , 407.09464 , 493.36212 ],
        [300.97815 , 331.1551  , 344.10986 , 374.19156 ],
        [236.80632 , 542.42914 , 261.38422 , 609.6119  ],
        [222.16089 , 659.98816 , 236.1999  , 678.62085 ],
        [227.95496 , 496.07425 , 263.47733 , 610.33655 ],
        [223.7558  , 647.2287  , 238.00728 , 661.4642  ],
        [218.31972 , 654.4792  , 226.83658 , 662.96124 ],
        [220.97037 , 494.3057  , 246.90009 , 588.9908  ]], dtype=float32)>,
 <tf.Tensor: shape=(10,), dtype=float32, numpy=
 array([0.90091765, 0.8834344 , 0.8455462 , 0.4854285 , 0.38026473,
        0.2700993 , 0.26304004, 0.25670242, 0.17502265, 0.1738919 ],
       dtype=float32)>,
 <tf.Tensor: shape=(10,), dtype=float32, numpy=array([ 1.,  1.,  1., 40., 39., 37., 39., 37., 37., 39.], dtype=float32)>)

 

labels_to_names = {1:'person',2:'bicycle',3:'car',4:'motorcycle',5:'airplane',6:'bus',7:'train',8:'truck',9:'boat',10:'traffic light',
                    11:'fire hydrant',12:'street sign',13:'stop sign',14:'parking meter',15:'bench',16:'bird',17:'cat',18:'dog',19:'horse',20:'sheep',
                    21:'cow',22:'elephant',23:'bear',24:'zebra',25:'giraffe',26:'hat',27:'backpack',28:'umbrella',29:'shoe',30:'eye glasses',
                    31:'handbag',32:'tie',33:'suitcase',34:'frisbee',35:'skis',36:'snowboard',37:'sports ball',38:'kite',39:'baseball bat',40:'baseball glove',
                    41:'skateboard',42:'surfboard',43:'tennis racket',44:'bottle',45:'plate',46:'wine glass',47:'cup',48:'fork',49:'knife',50:'spoon',
                    51:'bowl',52:'banana',53:'apple',54:'sandwich',55:'orange',56:'broccoli',57:'carrot',58:'hot dog',59:'pizza',60:'donut',
                    61:'cake',62:'chair',63:'couch',64:'potted plant',65:'bed',66:'mirror',67:'dining table',68:'window',69:'desk',70:'toilet',
                    71:'door',72:'tv',73:'laptop',74:'mouse',75:'remote',76:'keyboard',77:'cell phone',78:'microwave',79:'oven',80:'toaster',
                    81:'sink',82:'refrigerator',83:'blender',84:'book',85:'clock',86:'vase',87:'scissors',88:'teddy bear',89:'hair drier',90:'toothbrush',
                    91:'hair brush'}

 

def get_detected_img_automl(model, img_array, score_threshold, object_show_count=100, is_print=True):   
  # automl efficent은 반환 bbox 좌표값이 원본 이미지 좌표값으로 되어 있으므로 별도의 scaling작업 필요 없음. 
  '''
  height = img_array.shape[0]
  width = img_array.shape[1]
  '''
  # cv2의 rectangle()은 인자로 들어온 이미지 배열에 직접 사각형을 업데이트 하므로 그림 표현을 위한 별도의 이미지 배열 생성. 
  draw_img = img_array.copy()

  # bounding box의 테두리와 caption 글자색 지정
  green_color=(0, 255, 0)
  red_color=(0, 0, 255)

  # cv2로 만들어진 numpy image array를 tensor로 변환
  img_tensor = tf.convert_to_tensor(img_array, dtype=tf.uint8)[tf.newaxis, ...]
  #img_tensor = tf.convert_to_tensor(img_array, dtype=tf.float32)[tf.newaxis, ...]

  # efficientdet 모델을 다운로드 한 뒤 inference 수행. 
  start_time = time.time()
  # automl efficientdet 모델은 boxes, score, classes, num_detections를 각각 Tensor로 반환. 
  boxes, scores, classes, num_detections = model(img_tensor)
  # Tensor값을 시각화를 위해 numpy 로 변환. 
  boxes = boxes.numpy()
  scores = scores.numpy()
  classes = classes.numpy()
  num_detections = num_detections.numpy()
  
  # detected 된 object들을 iteration 하면서 정보 추출. detect된 object의 갯수는 100개
  for i in range(num_detections[0]):
    # detection score를 iteration시 마다 높은 순으로 추출하고 SCORE_THRESHOLD보다 낮으면 loop 중단. 
    score = scores[0, i]
    if score < score_threshold:
      break
    # detected된 object들은 scale된 기준으로 예측되었으므로 다시 원본 이미지 비율로 계산
    box = boxes[0, i]

    ''' **** 주의 ******
    box는 ymin, xmin, ymax, xmax 순서로 되어 있음. 또한 원본 좌표값으로 되어 있음. '''
    left = box[1]
    top = box[0] 
    right = box[3] 
    bottom = box[2] 

    # class id 추출하고 class 명으로 매핑
    class_id = classes[0, i]
    caption = "{}: {:.4f}".format(labels_to_names[class_id], score)
    print(caption)
    #cv2.rectangle()은 인자로 들어온 draw_img에 사각형을 그림. 위치 인자는 반드시 정수형.
    cv2.rectangle(draw_img, (int(left), int(top)), (int(right), int(bottom)), color=green_color, thickness=2)
    cv2.putText(draw_img, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, red_color, 1)

  if is_print:
    print('Detection 수행시간:',round(time.time() - start_time, 2),"초")

  return draw_img

 

img_array = cv2.cvtColor(cv2.imread('/content/data/baseball01.jpg'), cv2.COLOR_BGR2RGB)
draw_img = get_detected_img_automl(detector_automl_lite0, img_array, score_threshold=0.3, object_show_count=100, is_print=True)
plt.figure(figsize=(12, 12))
plt.imshow(draw_img)

person: 0.9009
person: 0.8834
person: 0.8455
baseball glove: 0.4854
baseball bat: 0.3803
Detection 수행시간: 0.04 초

 

img_array = cv2.cvtColor(cv2.imread('/content/data/beatles01.jpg'), cv2.COLOR_BGR2RGB)
img_tensor = tf.convert_to_tensor(img_array, dtype=tf.uint8)[tf.newaxis, ...]
draw_img = get_detected_img_automl(detector_automl_lite0, img_array, score_threshold=0.3, object_show_count=100, is_print=True)
plt.figure(figsize=(12, 12))
plt.imshow(draw_img)


person: 0.8218
person: 0.8134
person: 0.7396
person: 0.6831
car: 0.6212
car: 0.4215
car: 0.3183
Detection 수행시간: 1.72 초

 

EfficientDet lite2 모델로 inference 수행.

detector_automl_lite2 = hub.load("https://tfhub.dev/tensorflow/efficientdet/lite2/detection/1")

 

img_array = cv2.cvtColor(cv2.imread('/content/data/beatles01.jpg'), cv2.COLOR_BGR2RGB)
img_tensor = tf.convert_to_tensor(img_array, dtype=tf.uint8)[tf.newaxis, ...]
draw_img = get_detected_img_automl(detector_automl_lite2, img_array, score_threshold=0.5, object_show_count=100, is_print=True)
plt.figure(figsize=(12, 12))
plt.imshow(draw_img)

person: 0.9152
person: 0.9089
person: 0.8914
person: 0.8808
car: 0.6071
car: 0.5114
Detection 수행시간: 2.73 초

 

def do_detected_video_automl(model, input_path, output_path, score_threshold, is_print):
    
    cap = cv2.VideoCapture(input_path)

    codec = cv2.VideoWriter_fourcc(*'XVID')

    vid_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    vid_fps = cap.get(cv2.CAP_PROP_FPS)

    vid_writer = cv2.VideoWriter(output_path, codec, vid_fps, vid_size) 

    frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print('총 Frame 갯수:', frame_cnt)

    green_color=(0, 255, 0)
    red_color=(0, 0, 255)
    while True:
        hasFrame, img_frame = cap.read()
        if not hasFrame:
            print('더 이상 처리할 frame이 없습니다.')
            break
        # get_detected_img_automl() 호출 
        img_frame = get_detected_img_automl(model, img_frame, score_threshold=score_threshold, object_show_count=100, is_print=is_print)
        
        vid_writer.write(img_frame)
    # end of while loop

    vid_writer.release()
    cap.release()

 

do_detected_video_automl(detector_automl_lite2, '/content/data/Jonh_Wick_small.mp4', './data/John_Wick_small_lite_02.mp4', 0.5, True)

총 Frame 갯수: 58
car: 0.7213
car: 0.7048
person: 0.6839
car: 0.6667
car: 0.6558
car: 0.5763
Detection 수행시간: 2.18 초
car: 0.7208
car: 0.7070
person: 0.6858
car: 0.6600
car: 0.6558
car: 0.5790
Detection 수행시간: 0.07 초
car: 0.6072
person: 0.5958
car: 0.5877
car: 0.5727
Detection 수행시간: 0.06 초
car: 0.6773
person: 0.6331
car: 0.5557
Detection 수행시간: 0.06 초
car: 0.7090
person: 0.6330
car: 0.6061
car: 0.5067
Detection 수행시간: 0.06 초
car: 0.6947
car: 0.6896
person: 0.5417
Detection 수행시간: 0.06 초
car: 0.6893
car: 0.6833
person: 0.5335
Detection 수행시간: 0.06 초
car: 0.7336
car: 0.7109
car: 0.6813
motorcycle: 0.5120
Detection 수행시간: 0.06 초
car: 0.7302
car: 0.7150
motorcycle: 0.6327
car: 0.6062
Detection 수행시간: 0.06 초
car: 0.7211
car: 0.7191
motorcycle: 0.6324
car: 0.6052
Detection 수행시간: 0.06 초
car: 0.7077
car: 0.6678
car: 0.6344
motorcycle: 0.6321
Detection 수행시간: 0.06 초
car: 0.7145
car: 0.7095
car: 0.6083
motorcycle: 0.6038
Detection 수행시간: 0.06 초
car: 0.7381
car: 0.6946
motorcycle: 0.5520
car: 0.5499
Detection 수행시간: 0.06 초
car: 0.7156
car: 0.6753
car: 0.6429
motorcycle: 0.5482
Detection 수행시간: 0.06 초
car: 0.7285
car: 0.6851
car: 0.6462
motorcycle: 0.5571
Detection 수행시간: 0.06 초
car: 0.7230
car: 0.6673
person: 0.6127
car: 0.5793
motorcycle: 0.5393
Detection 수행시간: 0.06 초
car: 0.7441
car: 0.6925
motorcycle: 0.5914
person: 0.5100
Detection 수행시간: 0.07 초
car: 0.6714
car: 0.6340
person: 0.5797
Detection 수행시간: 0.06 초
car: 0.7087
motorcycle: 0.6122
car: 0.6025
car: 0.5611
car: 0.5445
Detection 수행시간: 0.06 초
car: 0.7057
motorcycle: 0.6146
car: 0.5860
car: 0.5615
car: 0.5392
Detection 수행시간: 0.06 초
car: 0.7421
motorcycle: 0.6152
car: 0.5331
car: 0.5200
car: 0.5190
Detection 수행시간: 0.06 초
car: 0.7229
motorcycle: 0.6333
car: 0.5775
car: 0.5757
Detection 수행시간: 0.06 초
car: 0.6589
car: 0.5912
motorcycle: 0.5899
Detection 수행시간: 0.06 초
person: 0.7521
person: 0.6622
motorcycle: 0.5707
Detection 수행시간: 0.06 초
person: 0.7558
person: 0.6703
motorcycle: 0.5853
Detection 수행시간: 0.06 초
person: 0.7575
person: 0.7115
Detection 수행시간: 0.06 초
person: 0.7646
person: 0.7391
Detection 수행시간: 0.06 초
person: 0.7744
person: 0.7480
Detection 수행시간: 0.06 초
person: 0.7778
car: 0.7730
horse: 0.7068
car: 0.5107
Detection 수행시간: 0.06 초
car: 0.7756
person: 0.7676
horse: 0.7113
car: 0.5043
Detection 수행시간: 0.06 초
car: 0.7500
person: 0.6981
person: 0.6900
car: 0.5125
Detection 수행시간: 0.06 초
car: 0.6990
person: 0.6905
person: 0.6854
Detection 수행시간: 0.06 초
person: 0.7262
car: 0.7237
person: 0.6961
motorcycle: 0.5003
Detection 수행시간: 0.06 초
car: 0.7820
person: 0.7677
person: 0.6664
Detection 수행시간: 0.06 초
car: 0.7785
person: 0.7709
person: 0.6833
Detection 수행시간: 0.06 초
car: 0.8247
person: 0.7472
person: 0.7321
car: 0.6222
motorcycle: 0.5294
Detection 수행시간: 0.06 초
car: 0.8342
person: 0.7343
person: 0.7152
car: 0.5990
motorcycle: 0.5350
Detection 수행시간: 0.06 초
car: 0.8224
person: 0.7061
person: 0.7014
motorcycle: 0.5112
Detection 수행시간: 0.06 초
car: 0.8180
person: 0.7044
person: 0.6811
car: 0.5980
Detection 수행시간: 0.06 초
car: 0.8190
person: 0.7017
person: 0.6996
car: 0.6456
motorcycle: 0.5053
Detection 수행시간: 0.06 초
car: 0.8029
person: 0.7463
person: 0.6732
car: 0.6710
Detection 수행시간: 0.06 초
car: 0.7811
person: 0.7525
person: 0.7167
car: 0.7015
Detection 수행시간: 0.06 초
car: 0.7778
person: 0.7342
car: 0.6729
person: 0.6687
motorcycle: 0.5495
Detection 수행시간: 0.06 초
car: 0.8041
person: 0.7691
horse: 0.6278
car: 0.5558
Detection 수행시간: 0.06 초
car: 0.8021
person: 0.7739
horse: 0.6354
car: 0.5613
Detection 수행시간: 0.06 초
car: 0.8237
person: 0.7113
person: 0.6251
horse: 0.5672
car: 0.5561
motorcycle: 0.5362
Detection 수행시간: 0.06 초
car: 0.8573
car: 0.6713
person: 0.5939
person: 0.5814
car: 0.5653
Detection 수행시간: 0.06 초
car: 0.8395
car: 0.7330
person: 0.5949
person: 0.5404
horse: 0.5206
Detection 수행시간: 0.06 초
car: 0.8135
horse: 0.6740
car: 0.6538
person: 0.6302
person: 0.5339
Detection 수행시간: 0.06 초
car: 0.8160
horse: 0.6635
car: 0.6524
person: 0.6147
person: 0.5334
Detection 수행시간: 0.06 초
car: 0.8571
person: 0.6551
car: 0.6094
horse: 0.5733
person: 0.5346
Detection 수행시간: 0.06 초
car: 0.8386
car: 0.6963
horse: 0.6246
person: 0.5747
person: 0.5439
Detection 수행시간: 0.07 초
car: 0.8146
car: 0.6012
person: 0.5879
horse: 0.5211
Detection 수행시간: 0.06 초
car: 0.7695
car: 0.7263
person: 0.5264
horse: 0.5262
motorcycle: 0.5096
Detection 수행시간: 0.06 초
car: 0.7618
car: 0.7197
person: 0.5401
horse: 0.5215
motorcycle: 0.5116
Detection 수행시간: 0.06 초
car: 0.7638
car: 0.7306
person: 0.6056
car: 0.5946
person: 0.5777
Detection 수행시간: 0.07 초
car: 0.7433
car: 0.6687
car: 0.6672
person: 0.5707
person: 0.5527
Detection 수행시간: 0.06 초
car: 0.7775
car: 0.7218
car: 0.6586
person: 0.5992
Detection 수행시간: 0.06 초
더 이상 처리할 frame이 없습니다.

 

 

반응형

+ Recent posts