728x90
반응형

Config를 설정하고 COCO로 Pretrained 된 모델을 Download

  • config파일은 faster rcnn resnet 50 backbone 사용.
  • Oxford Pet 데이터는 학습에 시간에 소모 되므로 학습으로 생성된 모델을 Google Drive에 저장
config_file = './mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
checkpoint_file = './mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

!cd mmdetection; mkdir checkpoints
!wget -O ./mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
from mmcv import Config

cfg = Config.fromfile(config_file)
print(cfg.pretty_text)
model = dict(
    type='FasterRCNN',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100)))
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_train2017.json',
        img_prefix='data/coco/train2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
        ]),
    val=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]),
    test=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]))
evaluation = dict(interval=1, metric='bbox')
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
# Google Drive 접근을 위한 Mount 적용. 
import os, sys 
from google.colab import drive 

drive.mount('/content/gdrive')
# soft link로 Google Drive Directory 연결. 
!ln -s /content/gdrive/My\ Drive/ /mydrive
!ls /mydrive
# Google Drive 밑에 Directory 생성. 이미 생성 되어 있을 시 오류 발생. 
!mkdir "/mydrive/pet_work_dir"
!nvidia-smi
Sun Oct 17 09:04:09 2021       
# +-----------------------------------------------------------------------------+
# | NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
# |-------------------------------+----------------------+----------------------+
# | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
# | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
# |                               |                      |               MIG M. |
# |===============================+======================+======================|
# |   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
# | N/A   73C    P8    35W / 149W |      3MiB / 11441MiB |      0%      Default |
# |                               |                      |                  N/A |
# +-------------------------------+----------------------+----------------------+
#                                                                                
# +-----------------------------------------------------------------------------+
# | Processes:                                                                  |
# |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
# |        ID   ID                                                   Usage      |
# |=============================================================================|
# |  No running processes found                                                 |
# +-----------------------------------------------------------------------------+
from mmdet.apis import set_random_seed

# dataset에 대한 환경 파라미터 수정. 
cfg.dataset_type = 'PetDataset'
cfg.data_root = '/content/data/'

# train, val, test dataset에 대한 type, data_root, ann_file, img_prefix 환경 파라미터 수정. 
cfg.data.train.type = 'PetDataset'
cfg.data.train.data_root = '/content/data/'
cfg.data.train.ann_file = 'train.txt'
cfg.data.train.img_prefix = 'images'

cfg.data.val.type = 'PetDataset'
cfg.data.val.data_root = '/content/data/'
cfg.data.val.ann_file = 'val.txt'
cfg.data.val.img_prefix = 'images'

# class의 갯수 수정. 
cfg.model.roi_head.bbox_head.num_classes = 37
# pretrained 모델
cfg.load_from = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

# 학습 weight 파일로 로그를 저장하기 위한 디렉토리로 구글 Drive 설정. 
cfg.work_dir = '/mydrive/pet_work_dir'

# 학습율 변경 환경 파라미터 설정. 
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 5

cfg.runner.max_epochs = 5

# 평가 metric 설정. 
cfg.evaluation.metric = 'mAP'
# 평가 metric 수행할 epoch interval 설정. 
cfg.evaluation.interval = 5
# 학습 iteration시마다 모델을 저장할 epoch interval 설정. 
cfg.checkpoint_config.interval = 5

# 학습 시 Batch size 설정(단일 GPU 별 Batch size로 설정됨)
cfg.data.samples_per_gpu = 4 
# 3000을 2장씩 1500번보다 4장씩 725번이라 좀더 빨리진
# 근데 너무 높이면, gpu 메모리를 많이 먹어서 다운됨

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
# 두번 config를 로드하면 lr_config의 policy가 사라지는 오류로 인하여 설정. 
cfg.lr_config.policy='step'
# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='FasterRCNN',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=37,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100)))
dataset_type = 'PetDataset'
data_root = '/content/data/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=4,
    workers_per_gpu=2,
    train=dict(
        type='PetDataset',
        ann_file='train.txt',
        img_prefix='images',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
        ],
        data_root='/content/data/'),
    val=dict(
        type='PetDataset',
        ann_file='val.txt',
        img_prefix='images',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        data_root='/content/data/'),
    test=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]))
evaluation = dict(interval=5, metric='mAP')
optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup=None,
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=5)
checkpoint_config = dict(interval=5)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
resume_from = None
workflow = [('train', 1)]
work_dir = '/mydrive/pet_work_dir'
seed = 0
gpu_ids = range(0, 1)

 

Train용 데이터를 생성하고 Oxford Dataset을 학습수행.

  • build_dataset()로 train config 설정에 따른 Train용 dataset 생성.
  • build_detector()로 train과 test config반영하여 model 생성.
  • train_detector()로 model 학습.
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# train용 Dataset 생성. 
datasets = [build_dataset(cfg.data.train)]

datasets
# [
#  PetDataset Train dataset with number of images 3304, and instance counts: 
#  +-----------------------+-------+-------------------------+-------+-------------------------------+-------+---------------------+-------+---------------------------------+-------+
#  | category              | count | category                | count | category                      | count | category            | count | category                        | count |
#  +-----------------------+-------+-------------------------+-------+-------------------------------+-------+---------------------+-------+---------------------------------+-------+
#  | 0 [Abyssinian]        | 89    | 1 [american_bulldog]    | 90    | 2 [american_pit_bull_terrier] | 90    | 3 [basset_hound]    | 90    | 4 [beagle]                      | 90    |
#  | 5 [Bengal]            | 89    | 6 [Birman]              | 90    | 7 [Bombay]                    | 86    | 8 [boxer]           | 90    | 9 [British_Shorthair]           | 90    |
#  | 10 [chihuahua]        | 90    | 11 [Egyptian_Mau]       | 81    | 12 [english_cocker_spaniel]   | 86    | 13 [english_setter] | 90    | 14 [german_shorthaired]         | 90    |
#  | 15 [great_pyrenees]   | 90    | 16 [havanese]           | 90    | 17 [japanese_chin]            | 90    | 18 [keeshond]       | 90    | 19 [leonberger]                 | 90    |
#  | 20 [Maine_Coon]       | 90    | 21 [miniature_pinscher] | 90    | 22 [newfoundland]             | 87    | 23 [Persian]        | 90    | 24 [pomeranian]                 | 90    |
#  | 25 [pug]              | 90    | 26 [Ragdoll]            | 89    | 27 [Russian_Blue]             | 90    | 28 [saint_bernard]  | 89    | 29 [samoyed]                    | 90    |
#  | 30 [scottish_terrier] | 90    | 31 [shiba_inu]          | 90    | 32 [Siamese]                  | 89    | 33 [Sphynx]         | 90    | 34 [staffordshire_bull_terrier] | 90    |
#  |                       |       |                         |       |                               |       |                     |       |                                 |       |
#  | 35 [wheaten_terrier]  | 90    | 36 [yorkshire_terrier]  | 90    |                               |       |                     |       |                                 |       |
#  +-----------------------+-------+-------------------------+-------+-------------------------------+-------+---------------------+-------+---------------------------------+-------+]
%cd mmdetection

model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES

mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# epochs는 config의 runner 파라미터로 지정됨. 기본 12회 
train_detector(model, datasets, cfg, distributed=False, validate=True)

/content/mmdetection
/usr/local/lib/python3.7/dist-packages/mmdet-2.17.0-py3.7.egg/mmdet/core/anchor/builder.py:17: UserWarning: ``build_anchor_generator`` would be deprecated soon, please use ``build_prior_generator`` 
  '``build_anchor_generator`` would be deprecated soon, please use '
2021-10-17 09:04:41,047 - mmdet - INFO - load checkpoint from checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
2021-10-17 09:04:41,048 - mmdet - INFO - Use load_from_local loader
2021-10-17 09:04:41,215 - mmdet - WARNING - The model and loaded state dict do not match exactly

size mismatch for roi_head.bbox_head.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([38, 1024]).
size mismatch for roi_head.bbox_head.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([38]).
size mismatch for roi_head.bbox_head.fc_reg.weight: copying a param with shape torch.Size([320, 1024]) from checkpoint, the shape in current model is torch.Size([148, 1024]).
size mismatch for roi_head.bbox_head.fc_reg.bias: copying a param with shape torch.Size([320]) from checkpoint, the shape in current model is torch.Size([148]).
2021-10-17 09:04:41,226 - mmdet - INFO - Start running, host: root@bcf0fa707f92, work_dir: /mydrive/pet_work_dir
2021-10-17 09:04:41,228 - mmdet - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) StepLrUpdaterHook                  
(NORMAL      ) CheckpointHook                     
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) StepLrUpdaterHook                  
(NORMAL      ) NumClassCheckHook                  
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_iter:
(VERY_HIGH   ) StepLrUpdaterHook                  
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
 -------------------- 
after_train_iter:
(ABOVE_NORMAL) OptimizerHook                      
(NORMAL      ) CheckpointHook                     
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
after_train_epoch:
(NORMAL      ) CheckpointHook                     
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_val_epoch:
(NORMAL      ) NumClassCheckHook                  
(LOW         ) IterTimerHook                      
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_val_iter:
(LOW         ) IterTimerHook                      
 -------------------- 
after_val_iter:
(LOW         ) IterTimerHook                      
 -------------------- 
after_val_epoch:
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
2021-10-17 09:04:41,231 - mmdet - INFO - workflow: [('train', 1)], max: 5 epochs
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
/usr/local/lib/python3.7/dist-packages/mmdet-2.17.0-py3.7.egg/mmdet/core/anchor/anchor_generator.py:324: UserWarning: ``grid_anchors`` would be deprecated soon. Please use ``grid_priors`` 
  warnings.warn('``grid_anchors`` would be deprecated soon. '
/usr/local/lib/python3.7/dist-packages/mmdet-2.17.0-py3.7.egg/mmdet/core/anchor/anchor_generator.py:361: UserWarning: ``single_level_grid_anchors`` would be deprecated soon. Please use ``single_level_grid_priors`` 
  '``single_level_grid_anchors`` would be deprecated soon. '
2021-10-17 09:05:01,399 - mmdet - INFO - Epoch [1][5/827]	lr: 2.500e-03, eta: 4:29:38, time: 3.917, data_time: 0.507, memory: 9645, loss_rpn_cls: 0.0123, loss_rpn_bbox: 0.0100, loss_cls: 2.2307, acc: 58.4863, loss_bbox: 0.1100, loss: 2.3629
# torch의 epoch방식 // batch size를 고려한  827, 1로 하면 3312개 // eta : 종료시간계산 // time  걸린 시간// memory : gpu메모리//      rpn  : rpn찾는 loss,// bbox찾는 loss        //                     main acc //                       main loss

 

 

 

 

 

 

반응형
728x90
반응형
# 런타임->런타임 다시 시작 후 아래 수행. 
from mmdet.apis import init_detector, inference_detector
import mmcv

Oxford Pet Dataset 다운로드

image와 annotations을 압축파일로 각각 download 수행.

!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
# /content/data 디렉토리를 만들고 해당 디렉토리에 다운로드 받은 압축 파일 풀기.
!mkdir /content/data
!tar -xvf images.tar.gz -C /content/data
!tar -xvf annotations.tar.gz -C /content/data

이미지 디렉토리와 annotation 파일 살펴 보기

!ls -lia ./data/images/Abyss*.jpg
!ls -lia ./data/images
!cat ./data/annotations/xmls/Abyssinian_1.xml
import glob
import xml.etree.ElementTree as ET

# annotation xml 파일 파싱해서 bbox정보 추출
def get_bboxes_from_xml_test(xml_file):
  tree = ET.parse(xml_file)
  root = tree.getroot()
  bbox_names = []
  bboxes = []
  # 파일내에 있는 모든 object Element를 찾음. 
  for obj in root.findall('object'):

    bbox_name = obj.find('name').text
    xmlbox = obj.find('bndbox')
    x1 = int(xmlbox.find('xmin').text)
    y1 = int(xmlbox.find('ymin').text)
    x2 = int(xmlbox.find('xmax').text)
    y2 = int(xmlbox.find('ymax').text)

    bbox_names.append(bbox_name)
    bboxes.append([x1, y1, x2, y2])

  return bbox_names, bboxes

get_bboxes_from_xml_test('./data/annotations/xmls/Abyssinian_1.xml')
# (['cat'], [[333, 72, 425, 158]])
!ls -lia ./data/annotations/xmls/Abys*.xml

train, val image/annotation 메타 파일 보기

  • train과 valid 데이터로 나뉠 image와 annotation의 파일명을 가지는 메타 파일
  • train과 valid용 meta 파일을 별도로 만듬.
!cd ./data/annotations; cat trainval.txt
import pandas as pd

pet_df = pd.read_csv('./data/annotations/trainval.txt', sep=' ', header=None, names=['img_name', 'class_id', 'etc1', 'etc2'])
pet_df.head()

img_name	class_id	etc1	etc2
0	Abyssinian_100	1	1	1
1	Abyssinian_101	1	1	1
2	Abyssinian_102	1	1	1
3	Abyssinian_103	1	1	1
4	Abyssinian_104	1	1	1
pet_df['class_id'].value_counts()
37    100
22    100
34    100
32    100
30    100
28    100
26    100
24    100
20    100
35    100
18    100
16    100
14    100
10    100
6     100
4     100
36    100
1     100
3     100
19    100
31    100
29    100
27    100
25    100
5     100
21    100
17    100
15    100
11    100
9     100
7     100
2     100
33     99
23     96
13     96
8      96
12     93
Name: class_id, dtype: int64
pet_df['class_name'] = pet_df['img_name'].apply(lambda x:x[:x.rfind('_')])
pet_df.head()
	img_name	class_id	etc1	etc2	class_name
0	Abyssinian_100	1	1	1	Abyssinian
1	Abyssinian_101	1	1	1	Abyssinian
2	Abyssinian_102	1	1	1	Abyssinian
3	Abyssinian_103	1	1	1	Abyssinian
4	Abyssinian_104	1	1	1	Abyssinian
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(pet_df, test_size=0.1, stratify=pet_df['class_id'], random_state=2021)
print(train_df['class_id'].value_counts(), val_df['class_id'].value_counts())
37    90
22    90
34    90
32    90
30    90
28    90
26    90
24    90
20    90
35    90
18    90
16    90
14    90
10    90
6     90
4     90
36    90
1     90
3     90
19    90
31    90
29    90
27    90
25    90
5     90
21    90
17    90
15    90
11    90
9     90
7     90
2     90
33    89
23    87
13    86
8     86
12    84
Name: class_id, dtype: int64 37    10
36    10
17    10
16    10
15    10
14    10
13    10
11    10
10    10
9     10
8     10
7     10
6     10
5     10
4     10
3     10
2     10
18    10
19    10
20    10
21    10
35    10
34    10
33    10
32    10
31    10
30    10
29    10
28    10
27    10
26    10
25    10
24    10
22    10
1     10
12     9
23     9
Name: class_id, dtype: int64
train_df = train_df.sort_values(by='img_name')
val_df = val_df.sort_values(by='img_name')

# ann_file로 주어지는 메타파일은 가급적이면 소스데이터의 가장 상단 디렉토리에 저장하는 것이 바람직. 
train_df['img_name'].to_csv('./data/train.txt', sep=' ', header=False, index=False)
val_df['img_name'].to_csv('./data/val.txt', sep=' ', header=False, index=False)

pet_classes_list = pet_df['class_name'].unique().tolist()
print(pet_classes_list)
# ['Abyssinian', 'american_bulldog', 'american_pit_bull_terrier', 'basset_hound', 'beagle', 'Bengal', 'Birman', 'Bombay', 'boxer', 'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel', 'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese', 'japanese_chin', 'keeshond', 'leonberger', 'Maine_Coon', 'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug', 'Ragdoll', 'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier', 'shiba_inu', 'Siamese', 'Sphynx', 'staffordshire_bull_terrier', 'wheaten_terrier', 'yorkshire_terrier']
!echo 'train list #####'; cat ./data/train.txt
train list #####
Abyssinian_1
Abyssinian_10
...
yorkshire_terrier_188
yorkshire_terrier_189
!echo ' valid list ###'; cat ./data/val.txt
 valid list ###
Abyssinian_100
Abyssinian_11
Abyssinian_122
...
yorkshire_terrier_185
yorkshire_terrier_190

mmdetection의 중립 annotation 포맷 변환

  • CLASSES 는 pet_df의 'class_name' 컬럼에 unique 데이터로 지정. class id는 tuple(list)형의 CLASSES의 index값에 따라 설정.
  • ann_file로 입력되는 메타 파일을 읽어서 개별 image정보와 ann 정보를 dict로 생성하여 data_infos list에 입력
  • 개별 XML 읽어서 ann 정보를 만드는 것은 get_bboxes_from_xml() 함수 이용.
  • 디버깅용으로 CustomDataset을 만들어서 미리 테스트 하는 방법도 고려.

[ ]

import xml.etree.ElementTree as ET

# 1개의 annotation 파일에서 bbox 정보 추출. 여러개의 object가 있을 경우 이들 object의 name과 bbox 좌표들을 list로 반환.
def get_bboxes_from_xml(anno_dir, xml_file):
  anno_xml_file = osp.join(anno_dir, xml_file)
  tree = ET.parse(anno_xml_file)
  root = tree.getroot()
  bbox_names = []
  bboxes = []

  # 파일내에 있는 모든 object Element를 찾음. 
  for obj in root.findall('object'):
    #obj.find('name').text는 cat 이나 dog을 반환     
    #bbox_name = obj.find('name').text
    # object의 클래스명은 파일명에서 추출. 
    bbox_name = xml_file[:xml_file.rfind('_')]

    xmlbox = obj.find('bndbox')
    x1 = int(xmlbox.find('xmin').text)
    y1 = int(xmlbox.find('ymin').text)
    x2 = int(xmlbox.find('xmax').text)
    y2 = int(xmlbox.find('ymax').text)

    bboxes.append([x1, y1, x2, y2])
    bbox_names.append(bbox_name)

  return bbox_names, bboxes
PET_CLASSES = pet_df['class_name'].unique().tolist()
PET_CLASSES

['Abyssinian',
 'american_bulldog',
 'american_pit_bull_terrier',
 'basset_hound',
 'beagle',
 'Bengal',
 'Birman',
 'Bombay',
 'boxer',
 'British_Shorthair',
 'chihuahua',
 'Egyptian_Mau',
 'english_cocker_spaniel',
 'english_setter',
 'german_shorthaired',
 'great_pyrenees',
 'havanese',
 'japanese_chin',
 'keeshond',
 'leonberger',
 'Maine_Coon',
 'miniature_pinscher',
 'newfoundland',
 'Persian',
 'pomeranian',
 'pug',
 'Ragdoll',
 'Russian_Blue',
 'saint_bernard',
 'samoyed',
 'scottish_terrier',
 'shiba_inu',
 'Siamese',
 'Sphynx',
 'staffordshire_bull_terrier',
 'wheaten_terrier',
 'yorkshire_terrier']
import copy
import os.path as osp

import mmcv
import numpy as np
import cv2

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

import xml.etree.ElementTree as ET

PET_CLASSES = pet_df['class_name'].unique().tolist()

@DATASETS.register_module(force=True)
class PetDataset(CustomDataset):
  CLASSES = PET_CLASSES

  # annotation에 대한 모든 파일명을 가지고 있는 텍스트 파일을 __init__(self, ann_file)로 입력 받고, 
  # 이 self.ann_file이 load_annotations()의 인자로 입력
  def load_annotations(self, ann_file):
    cat2label = {k:i for i, k in enumerate(self.CLASSES)}
    image_list = mmcv.list_from_file(self.ann_file)
    # 포맷 중립 데이터를 담을 list 객체
    data_infos = []

    for image_id in image_list:
      # self.img_prefix는 images 가 입력될 것임. 
      filename = '{0:}/{1:}.jpg'.format(self.img_prefix, image_id)
      # 원본 이미지의 너비, 높이를 image를 직접 로드하여 구함. 
      image = cv2.imread(filename)
      height, width = image.shape[:2]
      # 개별 image의 annotation 정보 저장용 Dict 생성. key값 filename에는 image의 파일명만 들어감(디렉토리는 제외)
      data_info = {'filename': filename,
                  'width': width, 'height': height}
      # 개별 annotation XML 파일이 있는 서브 디렉토리의 prefix 변환. 
      label_prefix = self.img_prefix.replace('images', 'annotations')
      
      # 개별 annotation XML 파일을 1개 line 씩 읽어서 list 로드. annotation XML파일이 xmls 밑에 있음에 유의
      anno_xml_file = osp.join(label_prefix, 'xmls/'+str(image_id)+'.xml')
      # 메타 파일에는 이름이 있으나 실제로는 존재하지 않는 XML이 있으므로 이는 제외. 
      if not osp.exists(anno_xml_file):
          continue
      
      # get_bboxes_from_xml() 를 이용하여 개별 XML 파일에 있는 이미지의 모든 bbox 정보를 list 객체로 생성. 
      anno_dir = osp.join(label_prefix, 'xmls')
      bbox_names, bboxes = get_bboxes_from_xml(anno_dir, str(image_id)+'.xml')
      #print('#########:', bbox_names)
                  
      gt_bboxes = []
      gt_labels = []
      gt_bboxes_ignore = []
      gt_labels_ignore = []
        
      # bbox별 Object들의 class name을 class id로 매핑. class id는 tuple(list)형의 CLASSES의 index값에 따라 설정
      for bbox_name, bbox in zip(bbox_names, bboxes):
        # 만약 bbox_name이 클래스명에 해당 되면, gt_bboxes와 gt_labels에 추가, 그렇지 않으면 gt_bboxes_ignore, gt_labels_ignore에 추가
        # bbox_name이 CLASSES중에 반드시 하나 있어야 함. 안 그러면 FILTERING 되므로 주의 할것. 
        if bbox_name in cat2label:
            gt_bboxes.append(bbox)
            # gt_labels에는 class id를 입력
            gt_labels.append(cat2label[bbox_name])
        else:
            gt_bboxes_ignore.append(bbox)
            gt_labels_ignore.append(-1)
      
      # 개별 image별 annotation 정보를 가지는 Dict 생성. 해당 Dict의 value값을 np.array형태로 bbox의 좌표와 label값으로 생성. 
      data_anno = {
        'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
        'labels': np.array(gt_labels, dtype=np.long),
        'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
        'labels_ignore': np.array(gt_labels_ignore, dtype=np.long)
      }
      
      # image에 대한 메타 정보를 가지는 data_info Dict에 'ann' key값으로 data_anno를 value로 저장. 
      data_info.update(ann=data_anno)
      # 전체 annotation 파일들에 대한 정보를 가지는 data_infos에 data_info Dict를 추가
      data_infos.append(data_info)
      #print(data_info)

    return data_infos
# 디버깅 용도로 생성한 클래스를 생성하고 data_infos를 10개만 추출하여 생성된 데이터 확인. 
train_ds = PetDataset_imsi(data_root='/content/data', ann_file='train.txt', img_prefix='images')
print(train_ds.data_infos[:10])
[{'filename': '/content/data/images/Abyssinian_1.jpg', 'width': 600, 'height': 400, 'ann': {'bboxes': array([[333.,  72., 425., 158.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_10.jpg', 'width': 375, 'height': 500, 'ann': {'bboxes': array([[ 72., 105., 288., 291.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_101.jpg', 'width': 450, 'height': 313, 'ann': {'bboxes': array([[ 54.,  36., 319., 235.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_102.jpg', 'width': 500, 'height': 465, 'ann': {'bboxes': array([[ 23.,  27., 325., 320.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_103.jpg', 'width': 500, 'height': 351, 'ann': {'bboxes': array([[241.,  68., 362., 196.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_105.jpg', 'width': 500, 'height': 375, 'ann': {'bboxes': array([[237., 101., 373., 227.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_106.jpg', 'width': 1536, 'height': 1024, 'ann': {'bboxes': array([[ 861.,  156., 1302.,  563.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_107.jpg', 'width': 500, 'height': 448, 'ann': {'bboxes': array([[ 94.,  76., 275., 271.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_108.jpg', 'width': 500, 'height': 404, 'ann': {'bboxes': array([[ 50.,  14., 336., 304.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}, {'filename': '/content/data/images/Abyssinian_109.jpg', 'width': 282, 'height': 450, 'ann': {'bboxes': array([[ 81.,   7., 246., 146.]], dtype=float32), 'labels': array([0]), 'bboxes_ignore': array([], shape=(0, 4), dtype=float32), 'labels_ignore': array([], dtype=int64)}}]

 

 

 

 

반응형
728x90
반응형
!pip install mmcv-full
!git clone https://github.com/open-mmlab/mmdetection.git
!cd mmdetection; python setup.py install

Oxford Pet Dataset 다운로드

image와 annotations을 압축파일로 각각 download 수행.

!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz

# /content/data 디렉토리를 만들고 해당 디렉토리에 다운로드 받은 압축 파일 풀기.
!mkdir /content/data
!tar -xvf images.tar.gz -C /content/data
!tar -xvf annotations.tar.gz -C /content/data

- 하나의 image에 하나의 annotation

- kaggle kenel은 t100 gpu가 돌아감 1epoch에 13분 // t4로 돌리면 좀더 걸림

- 품종 고르기

디렉토리 안에서 .jpg file만 꺼내야함. 그걸 xmls 폴더의 annotation을 매칭시켜야함.

annotation file에 경로지정이 안되어있음.

image당 object가 하나밖에 없고,

cat, dog 수준으로 나뉨

이미지 디렉토리와 annotation 파일 살펴 보기

!ls -lia ./data/images/Abyss*.jpg
# 4460071 -rwxr-xr-x 1 1001 1001 126923 Jun 18  2012 ./data/images/Abyssinian_100.jpg
# ...
# 4458962 -rwxr-xr-x 1 1001 1001  21004 Jun 18  2012 ./data/images/Abyssinian_9.jpg
!ls -lia ./data/images

# Streaming output truncated to the last 5000 lines.
# 4459898 -rwxr-xr-x 1 1001 1001   27987 Jun 18  2012 Egyptian_Mau_91.jpg
# ...
# 4476065 -rwxr-xr-x 1 1001 1001  126596 Jun 18  2012 yorkshire_terrier_9.jpg
!cat ./data/annotations/xmls/Abyssinian_1.xml
# <annotation><folder>OXIIIT</folder><filename>Abyssinian_1.jpg</filename><source><database>OXFORD-IIIT Pet Dataset</database><annotation>OXIIIT</annotation><image>flickr</image></source><size><width>600</width><height>400</height><depth>3</depth></size><segmented>0</segmented><object><name>cat</name><pose>Frontal</pose><truncated>0</truncated><occluded>0</occluded><bndbox><xmin>333</xmin><ymin>72</ymin><xmax>425</xmax><ymax>158</ymax></bndbox><difficult>0</difficult></object></annotation>
import glob
import xml.etree.ElementTree as ET

# annotation xml 파일 파싱해서 bbox정보 추출
def get_bboxes_from_xml_test(xml_file):
  tree = ET.parse(xml_file)
  root = tree.getroot()
  bbox_names = []
  bboxes = []
  # 파일내에 있는 모든 object Element를 찾음. 
  for obj in root.findall('object'):

    bbox_name = obj.find('name').text
    xmlbox = obj.find('bndbox')
    x1 = int(xmlbox.find('xmin').text)
    y1 = int(xmlbox.find('ymin').text)
    x2 = int(xmlbox.find('xmax').text)
    y2 = int(xmlbox.find('ymax').text)

    bbox_names.append(bbox_name)
    bboxes.append([x1, y1, x2, y2])

  return bbox_names, bboxes

get_bboxes_from_xml_test('./data/annotations/xmls/Abyssinian_1.xml')
!ls -lia ./data/annotations/xmls/Abys*.xml

train, val image/annotation 메타 파일 보기

  • train과 valid 데이터로 나뉠 image와 annotation의 파일명을 가지는 메타 파일
  • train과 valid용 meta 파일을 별도로 만듬.
!cd ./data/annotations; cat trainval.txt
import pandas as pd

pet_df = pd.read_csv('./data/annotations/trainval.txt', sep=' ', header=None, names=['img_name', 'class_id', 'etc1', 'etc2'])
pet_df.head()
	img_name	class_id	etc1	etc2
0	Abyssinian_100	1	1	1
1	Abyssinian_101	1	1	1
2	Abyssinian_102	1	1	1
3	Abyssinian_103	1	1	1
4	Abyssinian_104	1	1	1
pet_df['class_id'].value_counts()
37    100
22    100
34    100
32    100
30    100
28    100
26    100
24    100
20    100
35    100
18    100
16    100
14    100
10    100
6     100
4     100
36    100
1     100
3     100
19    100
31    100
29    100
27    100
25    100
5     100
21    100
17    100
15    100
11    100
9     100
7     100
2     100
33     99
23     96
13     96
8      96
12     93
Name: class_id, dtype: int64
pet_df['class_name'] = pet_df['img_name'].apply(lambda x:x[:x.rfind('_')])
pet_df.head()

	img_name	class_id	etc1	etc2	class_name
0	Abyssinian_100	1	1	1	Abyssinian
1	Abyssinian_101	1	1	1	Abyssinian
2	Abyssinian_102	1	1	1	Abyssinian
3	Abyssinian_103	1	1	1	Abyssinian
4	Abyssinian_104	1	1	1	Abyssinian
print(train_df['class_id'].value_counts(), val_df['class_id'].value_counts())
37    90
22    90
34    90
32    90
30    90
28    90
26    90
24    90
20    90
35    90
18    90
16    90
14    90
10    90
6     90
4     90
36    90
1     90
3     90
19    90
31    90
29    90
27    90
25    90
5     90
21    90
17    90
15    90
11    90
9     90
7     90
2     90
33    89
23    87
13    86
8     86
12    84
Name: class_id, dtype: int64 37    10
36    10
17    10
16    10
15    10
14    10
13    10
11    10
10    10
9     10
8     10
7     10
6     10
5     10
4     10
3     10
2     10
18    10
19    10
20    10
21    10
35    10
34    10
33    10
32    10
31    10
30    10
29    10
28    10
27    10
26    10
25    10
24    10
22    10
1     10
12     9
23     9
Name: class_id, dtype: int64
train_df = train_df.sort_values(by='img_name')
val_df = val_df.sort_values(by='img_name')

# ann_file로 주어지는 메타파일은 가급적이면 소스데이터의 가장 상단 디렉토리에 저장하는 것이 바람직. 
train_df['img_name'].to_csv('./data/train.txt', sep=' ', header=False, index=False)
val_df['img_name'].to_csv('./data/val.txt', sep=' ', header=False, index=False)

pet_classes_list = pet_df['class_name'].unique().tolist()
print(pet_classes_list)
['Abyssinian', 'american_bulldog', 'american_pit_bull_terrier', 'basset_hound', 'beagle', 'Bengal', 'Birman', 'Bombay', 'boxer', 'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel', 'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese', 'japanese_chin', 'keeshond', 'leonberger', 'Maine_Coon', 'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug', 'Ragdoll', 'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier', 'shiba_inu', 'Siamese', 'Sphynx', 'staffordshire_bull_terrier', 'wheaten_terrier', 'yorkshire_terrier']

 

!echo 'train list #####'; cat ./data/train.txt
train list #####
Abyssinian_1
Abyssinian_10
Abyssinian_101
Abyssinian_102
...
yorkshire_terrier_187
yorkshire_terrier_188
yorkshire_terrier_189

!echo ' valid list ###'; cat ./data/val.txt
 valid list ###
Abyssinian_100
Abyssinian_11
...
yorkshire_terrier_185
yorkshire_terrier_190

mmdetection의 중립 annotation 포맷 변환

  • CLASSES 는 pet_df의 'class_name' 컬럼에 unique 데이터로 지정. class id는 tuple(list)형의 CLASSES의 index값에 따라 설정.
  • ann_file로 입력되는 메타 파일을 읽어서 개별 image정보와 ann 정보를 dict로 생성하여 data_infos list에 입력
  • 개별 XML 읽어서 ann 정보를 만드는 것은 get_bboxes_from_xml() 함수 이용.
  • 디버깅용으로 CustomDataset을 만들어서 미리 테스트 하는 방법도 고려.

import xml.etree.ElementTree as ET

# 1개의 annotation 파일에서 bbox 정보 추출. 여러개의 object가 있을 경우 이들 object의 name과 bbox 좌표들을 list로 반환.
def get_bboxes_from_xml(anno_dir, xml_file):
  anno_xml_file = osp.join(anno_dir, xml_file)
  tree = ET.parse(anno_xml_file)
  root = tree.getroot()
  bbox_names = []
  bboxes = []

  # 파일내에 있는 모든 object Element를 찾음. 
  for obj in root.findall('object'):
    #obj.find('name').text는 cat 이나 dog을 반환     
    #bbox_name = obj.find('name').text
    # object의 클래스명은 파일명에서 추출. 
    bbox_name = xml_file[:xml_file.rfind('_')]

    xmlbox = obj.find('bndbox')
    x1 = int(xmlbox.find('xmin').text)
    y1 = int(xmlbox.find('ymin').text)
    x2 = int(xmlbox.find('xmax').text)
    y2 = int(xmlbox.find('ymax').text)

    bboxes.append([x1, y1, x2, y2])
    bbox_names.append(bbox_name)

  return bbox_names, bboxes
import copy
import os.path as osp

import mmcv
import numpy as np
import cv2

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

import xml.etree.ElementTree as ET

PET_CLASSES = pet_df['class_name'].unique().tolist()

@DATASETS.register_module(force=True)
class PetDataset(CustomDataset):
  CLASSES = PET_CLASSES

  # annotation에 대한 모든 파일명을 가지고 있는 텍스트 파일을 __init__(self, ann_file)로 입력 받고, 
  # 이 self.ann_file이 load_annotations()의 인자로 입력
  def load_annotations(self, ann_file):
    cat2label = {k:i for i, k in enumerate(self.CLASSES)}
    image_list = mmcv.list_from_file(self.ann_file)
    # 포맷 중립 데이터를 담을 list 객체
    data_infos = []

    for image_id in image_list:
      # self.img_prefix는 images 가 입력될 것임. 
      filename = '{0:}/{1:}.jpg'.format(self.img_prefix, image_id)
      # 원본 이미지의 너비, 높이를 image를 직접 로드하여 구함. 
      image = cv2.imread(filename)
      height, width = image.shape[:2]
      # 개별 image의 annotation 정보 저장용 Dict 생성. key값 filename에는 image의 파일명만 들어감(디렉토리는 제외)
      data_info = {'filename': filename,
                  'width': width, 'height': height}
      # 개별 annotation XML 파일이 있는 서브 디렉토리의 prefix 변환. 
      label_prefix = self.img_prefix.replace('images', 'annotations')
      
      # 개별 annotation XML 파일을 1개 line 씩 읽어서 list 로드. annotation XML파일이 xmls 밑에 있음에 유의
      anno_xml_file = osp.join(label_prefix, 'xmls/'+str(image_id)+'.xml')
      # 메타 파일에는 이름이 있으나 실제로는 존재하지 않는 XML이 있으므로 이는 제외. 
      if not osp.exists(anno_xml_file):
          continue
      
      # get_bboxes_from_xml() 를 이용하여 개별 XML 파일에 있는 이미지의 모든 bbox 정보를 list 객체로 생성. 
      anno_dir = osp.join(label_prefix, 'xmls')
      bbox_names, bboxes = get_bboxes_from_xml(anno_dir, str(image_id)+'.xml')
      #print('#########:', bbox_names)
                  
      gt_bboxes = []
      gt_labels = []
      gt_bboxes_ignore = []
      gt_labels_ignore = []
        
      # bbox별 Object들의 class name을 class id로 매핑. class id는 tuple(list)형의 CLASSES의 index값에 따라 설정
      for bbox_name, bbox in zip(bbox_names, bboxes):
        # 만약 bbox_name이 클래스명에 해당 되면, gt_bboxes와 gt_labels에 추가, 그렇지 않으면 gt_bboxes_ignore, gt_labels_ignore에 추가
        # bbox_name이 CLASSES중에 반드시 하나 있어야 함. 안 그러면 FILTERING 되므로 주의 할것. 
        if bbox_name in cat2label:
            gt_bboxes.append(bbox)
            # gt_labels에는 class id를 입력
            gt_labels.append(cat2label[bbox_name])
        else:
            gt_bboxes_ignore.append(bbox)
            gt_labels_ignore.append(-1)
      
      # 개별 image별 annotation 정보를 가지는 Dict 생성. 해당 Dict의 value값을 np.array형태로 bbox의 좌표와 label값으로 생성. 
      data_anno = {
        'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
        'labels': np.array(gt_labels, dtype=np.long),
        'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
        'labels_ignore': np.array(gt_labels_ignore, dtype=np.long)
      }
      
      # image에 대한 메타 정보를 가지는 data_info Dict에 'ann' key값으로 data_anno를 value로 저장. 
      data_info.update(ann=data_anno)
      # 전체 annotation 파일들에 대한 정보를 가지는 data_infos에 data_info Dict를 추가
      data_infos.append(data_info)
      #print(data_info)

    return data_infos
''' 디버깅 용도로 CustomDataset을 흉내낸 클래스를 생성하여 다양한 테스트를 수행 가능 '''

import os.path as osp

PET_CLASSES = pet_df['class_name'].unique().tolist()

# 디버깅 용도로 CustomDataset을 흉내낸 클래스 생성. 
class PetDataset_imsi():
  CLASSES = PET_CLASSES

  # 생성자 함수 생성. 
  def __init__(self, data_root, ann_file, img_prefix):
      self.data_root = data_root
      self.ann_file = osp.join(data_root, ann_file)
      self.img_prefix = osp.join(data_root, img_prefix)
      
      self.data_infos = self.load_annotations(self.ann_file)

  # annotation에 대한 모든 파일명을 가지고 있는 텍스트 파일을 __init__(self, ann_file)로 입력 받고, 
  # 이 self.ann_file이 load_annotations()의 인자로 입력
  def load_annotations(self, ann_file):
    cat2label = {k:i for i, k in enumerate(self.CLASSES)}
    image_list = mmcv.list_from_file(self.ann_file)
    # 포맷 중립 데이터를 담을 list 객체
    data_infos = []

    for image_id in image_list:
      # self.img_prefix는 images 가 입력될 것임. 
      filename = '{0:}/{1:}.jpg'.format(self.img_prefix, image_id)
      # 원본 이미지의 너비, 높이를 image를 직접 로드하여 구함. 
      image = cv2.imread(filename)
      height, width = image.shape[:2]
      # 개별 image의 annotation 정보 저장용 Dict 생성. key값 filename에는 image의 파일명만 들어감(디렉토리는 제외)
      data_info = {'filename': filename,
                  'width': width, 'height': height}
      # 개별 annotation XML 파일이 있는 서브 디렉토리의 prefix 변환. 
      label_prefix = self.img_prefix.replace('images', 'annotations')
      
      # 개별 annotation XML 파일을 1개 line 씩 읽어서 list 로드. annotation XML파일이 xmls 밑에 있음에 유의
      anno_xml_file = osp.join(label_prefix, 'xmls/'+str(image_id)+'.xml')
      # 메타 파일에는 이름이 있으나 실제로는 존재하지 않는 XML이 있으므로 이는 제외. 
      if not osp.exists(anno_xml_file):
          continue
      
      # get_bboxes_from_xml() 를 이용하여 개별 XML 파일에 있는 이미지의 모든 bbox 정보를 list 객체로 생성. 
      anno_dir = osp.join(label_prefix, 'xmls')
      bbox_names, bboxes = get_bboxes_from_xml(anno_dir, str(image_id)+'.xml')
      #print('#########:', bbox_names)
                  
      gt_bboxes = []
      gt_labels = []
      gt_bboxes_ignore = []
      gt_labels_ignore = []
        
      # bbox별 Object들의 class name을 class id로 매핑. class id는 tuple(list)형의 CLASSES의 index값에 따라 설정
      for bbox_name, bbox in zip(bbox_names, bboxes):
        # 만약 bbox_name이 클래스명에 해당 되면, gt_bboxes와 gt_labels에 추가, 그렇지 않으면 gt_bboxes_ignore, gt_labels_ignore에 추가
        # bbox_name이 CLASSES중에 반드시 하나 있어야 함. 안 그러면 FILTERING 되므로 주의 할것. 
        if bbox_name in cat2label:
            gt_bboxes.append(bbox)
            # gt_labels에는 class id를 입력
            gt_labels.append(cat2label[bbox_name])
        else:
            gt_bboxes_ignore.append(bbox)
            gt_labels_ignore.append(-1)
      
      # 개별 image별 annotation 정보를 가지는 Dict 생성. 해당 Dict의 value값을 np.array형태로 bbox의 좌표와 label값으로 생성. 
      data_anno = {
        'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
        'labels': np.array(gt_labels, dtype=np.long),
        'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
        'labels_ignore': np.array(gt_labels_ignore, dtype=np.long)
      }
      
      # image에 대한 메타 정보를 가지는 data_info Dict에 'ann' key값으로 data_anno를 value로 저장. 
      data_info.update(ann=data_anno)
      # 전체 annotation 파일들에 대한 정보를 가지는 data_infos에 data_info Dict를 추가
      data_infos.append(data_info)
      #print(data_info)

    return data_infos

 

반응형
728x90
반응형
from mmcv import Config

cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

# model = dict(
    type='FasterRCNN',
    pretrained='torchvision://resnet50',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100)))
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_train2017.json',
        img_prefix='data/coco/train2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
        ]),
    val=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]),
    test=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]))
evaluation = dict(interval=1, metric='bbox')
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]

 

data_pipeline의 구성

순차적으로 파이프라인이 진행될때마다 개별작업은 새로운 key값을 추가하거나(녹색), 기존key값 update적용(주황)

annotation 내용을 어떻게 바꿔야하는가에 관한 내용들

formatting은 모델에 최종적으로 전달할 내용들

 

img_norm_cfg = dict(
    # rgb 적용후, meanstandard 적용
    mean = [123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [ # train 방식
    dict(type = 'LoadImageFromFile'),
    dict(type = 'LoadAnnotations', with_bbox=True),
    dict(type = 'Resize', img_scale=(1333,800), keep_ratio=True),
    dict(type = 'RandomFlip', flip_ratio=0.5),
    dict(type = 'Normalize', **img_norm_cfg),
    dict(type = 'Pad', size_divisor=32),
    dict(type = 'DefaultFormatBundle'),
    dict(type = 'Collect', keys=['img','gt_bboxes','gt_labels']),
]

test_pipeline = [
    dict(type = 'LoadImageFromFile'),
    dict(
        type = 'MultiScaleFlipAug', 
        img_scale = (1333, 800),
        flip=False,
        transforms=[        
            dict(type = 'Resize', keep_ratio=True),
            dict(type = 'RandomFlip'),
            dict(type = 'Normalize', **img_norm_cfg),
            dict(type = 'Pad', size_divisor=32),
            dict(type = 'ImageToTensor', keys=['img']),
            dict(type = 'Collect', keys=['img']),
        ])
]​

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

반응형
728x90
반응형

 config를 이해하는게 mmdetection을 이해하는데 매우 중요함

 

faster

# faster_rcnn_r50_fpn_1x_coco.py의 내용
_base_ = [
	'../_base_/models/faster_rcnn_r50_fpn.py', # model
	'../_base_/dataset/coco_detection.py',     # dataset
 	'../_base_/schedules/schedule_1x.py',      # schedule
	'../_base_/default_runtime.py',            # runtime
]


# 위 내용이 합쳐짐
from mmcv import Config
cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

from mmcv import Config

cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

# model
model = dict(
    type='FasterRCNN', # type
    pretrained='torchvision://resnet50',
    backbone=dict( # backbone, resnet
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch'),
    neck=dict(  # neck, fpn
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict( # roihead
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict( # bbox head : num_classes
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100)))
# dataset            
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_train2017.json',
        img_prefix='data/coco/train2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
        ]),
    val=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]),
    test=dict(
        type='CocoDataset',
        ann_file='data/coco/annotations/instances_val2017.json',
        img_prefix='data/coco/val2017/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]))
evaluation = dict(interval=1, metric='bbox')
# optimizer sgd
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
# runner : epoch
runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]

config 대분류 및 주요설정내역

config 대분류 설명
dataset dataset의 type(customdataset, cocodataset 등), train/val/test dataset 유형, data_root, train/val/test dataset의 주요 파라미터 설정(type, ann_file, img_prefix, pipeline 등)
model object detection model의 backbone, neck, dense head, roi extractor, roi head(num_classes=4) 주요 영역별로 세부 설정
schedule optimizer 유형 설정 (sgd, adam, rmsprop 등), 최초 learning 설정
학습중 동적 learning rate 적용 정책 설정(step, cyclic, cosine annealing 등)
train 시 epochs 횟수 : learning rate scheduler
runtime 주로 hook(callback)관련 설정
학습 중 checkpoint 파일, log 파일 생성을 위한 interval epochs 수

 

 

 

 

 

 

 

반응형
728x90
반응형

Numerical stability

계산적인 안정성
계산적인 안정성은 Input data가 모델이나 알고리즘의 성향에 영향을 미치는 것과 관련이 있다

컴퓨터터는 부동소수점 연산을 근사적으로 하기 때문에 아주 큰수나 아주 작은 수가 입력값으로 주어졌을 계산을 제대로 하지 못하는 경우가 생길 있다

따라서 이러한 경우에 연산을 보장하기 위해서 parametric trick을 사용하여 numerical stability을 확보한다

 

Numerical stability를 깨버리는 요소

1. activation function

- sigmoid, tanh같은 activation function을 사용했을 아주 값이나 아주 작은 값의 미분 값이 0 되는 경우가 발생한다

특정 구간에서 미분값이 0이 되어 버리는 sigmoid

Normalization vs Standardization vs Regularization

셋의 공통점은 머신러닝 또는 딥러닝에 학습을 효율적으로 하거나 overfitting을 피하기 위한 방법들이다

Normalization

모든 데이터 포인트가 동일한 정도의 스케일(중요도)로 반영되도록 해주는 re-scaling 방법

1. 데이터의 범위를 조정한다

2. 범위의 차이를 왜곡시키지 않고 데이터의 분포를 조정한다

3. 값의 범위를 0~1사이의 값으로 바꾼다

4. 예시

- MinMaxScaler

- Standard Score

- Student's t-statistic

- Studentized residual

- Standardized moment

- Coefficient of variation

!pip install -U mglearn
import mglearn
mglearn.plot_scaling.plot_scaling()

Standardization

데이터가 평균으로부터 얼마나 떨어져 있는지 분포를 통해 확인하는 re-scaling 방법

1. 값의 범위를 평균 0, 분산 1 되도록 변환한다

2. 예시

- Standard Scaler

- z-score normalization

Normalization을 하지 않으면 데이터의 분포가 불균형한 경우 학습이 제대로 되지 않을 수도 있고 학습하기까지 시간이 오래 걸릴 수가 있다

Regularization

모델에 제약(penalty)을 줌으로써 overfitting을 방지하고 일반화 시키는 방법
주로 하이퍼 파라미터를 수정하는 방식으로 Regularization한다

모델의 설명도를 유지하면서 모델의 복잡도를 줄이는 방식

1. Early stopping

2. Noisy input

3. Drop-out

4. Pruning & feature selection

5. Ensemble

6. L1, L2 (Ridge, Lasso)

Initializer

Breaking the symmetry

Breaking the symmetry는 신경망과 같은 기계 학습 모델을 초기화해야 하는 경우 지켜야할 조건이다

기계 학습 모델의 가중치가 모두 동일한 값으로 초기화된 경우 모델이 학습될 가중치가 달라지는 것이 어렵거나 불가능할 있는데 이것을 "symmetry(대칭)"이라고 한다

따라서 이러한 symmetry한 성질을 없애야 제대된 학습이 가능해진다

가중치를 0으로 초기화하는 경우 어떤 값이 들어와도 0이 되어버리기 때문에 0으로 초기화를 하면 안된다

전부 같은 값으로 가중치를 초기화 하는 경우 같은 값이 나오기 때문에 동일한 가중치를 갖게 되므로 동일한 값으로 초기화 하는 것도 피해야 한다

랜덤하게 가중치를 초기화 하는 것도 문제가 될 때가 있다

우연치 않게 계속 0으로 가중치가 초기화 되거나 같은 값으로 초기화가 될 수 있기 때문에

값을 랜덤하게 초기화 하되 범위를 제약시켜 초기화시켜야 한다

평균이 0이고 표준편차가 1인 정규분포에서 가중치를 랜덤하게 초기화 하면 가중치 업데이트가 정상적으로 될수 있다

 

Batch Normalization

Normalization을 했음에도 'layer가 깊어짐' 따라 좋지 못한 성능을 보일때가 있다

이때 Internal Covariance Shift와 같은 문제가 발생하기도 한다

'Batch normalization' internal covariance shift문제가 발생하는 것을 방지하기 위해 만들어진 기법이다

(사실상 2017 후속논문을 통해 'internal covariance shift문제를 해결하진 못했지만 효과는 좋더라'라고 밝혀졌다)

Internal Covariance shift: 학습 도중 이전 layer의 파라미터 변화로 인해 원래 분포와 전혀 상관없는 분포로 바뀌어버리는 현상

Whitening

평균을 0, 표준편차를 1로 만드는 작업

BN 방법

 

1. 'samling한 평균은 전체 평균과 같아진다' 특징을 활용하여 학습할 batch 단위로 whitening을 진행한다

2. whitening할 분모의 값이 0 되는 것을 방지하여 아주 작은 값인 엡실론을 더한다

(numerical stability를 보장하기 위해)

3. scale and shift연산을 한다 (learned parameter γ,β를 추가한다)

- BN은 activation layer이전에 위치하는데 normalization을 후에 activation layer를 통과하게 되면 non-linearity를 감소시킬 우려가 있다

따라서 γ,β를 추가함으로써 학습을 통해 찾아내고 non-linearity한 성질을 유지시킨다

BN Test-time

기본적으로 ML에서 테스트할 때 학습했을 방법과 똑같이 사용한다

예를 들어 minmax를 했다면 테스트 할 때도 minmax를 한 후에 예측한다

BN 테스트할 때에도 모든 배치의 평균과 표준편차들을 이동평균을 하여 사용한다

이동평균: 이동평균은 전체 데이터 집합의 여러 하위 집합에 대한 일련의 평균을 만들어 데이터 요소를 분석하는 계산이다

BN 예시

import tensorflow as tf 
resnet = tf.keras.applications.ResNet50()
resnet.summary()
Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
conv1_relu (Activation)         (None, 112, 112, 64) 0           conv1_bn[0][0]                   
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 114, 114, 64) 0           conv1_relu[0][0]                 
__________________________________________________________________________________________________
pool1_pool (MaxPooling2D)       (None, 56, 56, 64)   0           pool1_pad[0][0]                  
__________________________________________________________________________________________________
conv2_block1_1_conv (Conv2D)    (None, 56, 56, 64)   4160        pool1_pool[0][0]                 
__________________________________________________________________________________________________
conv2_block1_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_1_relu (Activation (None, 56, 56, 64)   0           conv2_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block1_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_2_relu (Activation (None, 56, 56, 64)   0           conv2_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_0_conv (Conv2D)    (None, 56, 56, 256)  16640       pool1_pool[0][0]                 
__________________________________________________________________________________________________
conv2_block1_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block1_0_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block1_add (Add)          (None, 56, 56, 256)  0           conv2_block1_0_bn[0][0]          
                                                                 conv2_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block1_out (Activation)   (None, 56, 56, 256)  0           conv2_block1_add[0][0]           
__________________________________________________________________________________________________
conv2_block2_1_conv (Conv2D)    (None, 56, 56, 64)   16448       conv2_block1_out[0][0]           
__________________________________________________________________________________________________
conv2_block2_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_1_relu (Activation (None, 56, 56, 64)   0           conv2_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block2_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_2_relu (Activation (None, 56, 56, 64)   0           conv2_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block2_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block2_add (Add)          (None, 56, 56, 256)  0           conv2_block1_out[0][0]           
                                                                 conv2_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block2_out (Activation)   (None, 56, 56, 256)  0           conv2_block2_add[0][0]           
__________________________________________________________________________________________________
conv2_block3_1_conv (Conv2D)    (None, 56, 56, 64)   16448       conv2_block2_out[0][0]           
__________________________________________________________________________________________________
conv2_block3_1_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_1_relu (Activation (None, 56, 56, 64)   0           conv2_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_2_conv (Conv2D)    (None, 56, 56, 64)   36928       conv2_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv2_block3_2_bn (BatchNormali (None, 56, 56, 64)   256         conv2_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_2_relu (Activation (None, 56, 56, 64)   0           conv2_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_3_conv (Conv2D)    (None, 56, 56, 256)  16640       conv2_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv2_block3_3_bn (BatchNormali (None, 56, 56, 256)  1024        conv2_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv2_block3_add (Add)          (None, 56, 56, 256)  0           conv2_block2_out[0][0]           
                                                                 conv2_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv2_block3_out (Activation)   (None, 56, 56, 256)  0           conv2_block3_add[0][0]           
__________________________________________________________________________________________________
conv3_block1_1_conv (Conv2D)    (None, 28, 28, 128)  32896       conv2_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block1_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_1_relu (Activation (None, 28, 28, 128)  0           conv3_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block1_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_2_relu (Activation (None, 28, 28, 128)  0           conv3_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_0_conv (Conv2D)    (None, 28, 28, 512)  131584      conv2_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block1_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block1_0_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_add (Add)          (None, 28, 28, 512)  0           conv3_block1_0_bn[0][0]          
                                                                 conv3_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_out (Activation)   (None, 28, 28, 512)  0           conv3_block1_add[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block1_out[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_1_relu (Activation (None, 28, 28, 128)  0           conv3_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block2_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_2_relu (Activation (None, 28, 28, 128)  0           conv3_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block2_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block2_add (Add)          (None, 28, 28, 512)  0           conv3_block1_out[0][0]           
                                                                 conv3_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block2_out (Activation)   (None, 28, 28, 512)  0           conv3_block2_add[0][0]           
__________________________________________________________________________________________________
conv3_block3_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block2_out[0][0]           
__________________________________________________________________________________________________
conv3_block3_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_1_relu (Activation (None, 28, 28, 128)  0           conv3_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block3_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_2_relu (Activation (None, 28, 28, 128)  0           conv3_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block3_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block3_add (Add)          (None, 28, 28, 512)  0           conv3_block2_out[0][0]           
                                                                 conv3_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block3_out (Activation)   (None, 28, 28, 512)  0           conv3_block3_add[0][0]           
__________________________________________________________________________________________________
conv3_block4_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block3_out[0][0]           
__________________________________________________________________________________________________
conv3_block4_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block4_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_1_relu (Activation (None, 28, 28, 128)  0           conv3_block4_1_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_2_conv (Conv2D)    (None, 28, 28, 128)  147584      conv3_block4_1_relu[0][0]        
__________________________________________________________________________________________________
conv3_block4_2_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block4_2_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_2_relu (Activation (None, 28, 28, 128)  0           conv3_block4_2_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_3_conv (Conv2D)    (None, 28, 28, 512)  66048       conv3_block4_2_relu[0][0]        
__________________________________________________________________________________________________
conv3_block4_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block4_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block4_add (Add)          (None, 28, 28, 512)  0           conv3_block3_out[0][0]           
                                                                 conv3_block4_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block4_out (Activation)   (None, 28, 28, 512)  0           conv3_block4_add[0][0]           
__________________________________________________________________________________________________
conv4_block1_1_conv (Conv2D)    (None, 14, 14, 256)  131328      conv3_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block1_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_1_relu (Activation (None, 14, 14, 256)  0           conv4_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_relu (Activation (None, 14, 14, 256)  0           conv4_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_0_conv (Conv2D)    (None, 14, 14, 1024) 525312      conv3_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block1_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block1_0_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_add (Add)          (None, 14, 14, 1024) 0           conv4_block1_0_bn[0][0]          
                                                                 conv4_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_out (Activation)   (None, 14, 14, 1024) 0           conv4_block1_add[0][0]           
__________________________________________________________________________________________________
conv4_block2_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block1_out[0][0]           
__________________________________________________________________________________________________
conv4_block2_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_1_relu (Activation (None, 14, 14, 256)  0           conv4_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block2_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_2_relu (Activation (None, 14, 14, 256)  0           conv4_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block2_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block2_add (Add)          (None, 14, 14, 1024) 0           conv4_block1_out[0][0]           
                                                                 conv4_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block2_out (Activation)   (None, 14, 14, 1024) 0           conv4_block2_add[0][0]           
__________________________________________________________________________________________________
conv4_block3_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block2_out[0][0]           
__________________________________________________________________________________________________
conv4_block3_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_1_relu (Activation (None, 14, 14, 256)  0           conv4_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block3_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_2_relu (Activation (None, 14, 14, 256)  0           conv4_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block3_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block3_add (Add)          (None, 14, 14, 1024) 0           conv4_block2_out[0][0]           
                                                                 conv4_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block3_out (Activation)   (None, 14, 14, 1024) 0           conv4_block3_add[0][0]           
__________________________________________________________________________________________________
conv4_block4_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block3_out[0][0]           
__________________________________________________________________________________________________
conv4_block4_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block4_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_1_relu (Activation (None, 14, 14, 256)  0           conv4_block4_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block4_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block4_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block4_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_2_relu (Activation (None, 14, 14, 256)  0           conv4_block4_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block4_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block4_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block4_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block4_add (Add)          (None, 14, 14, 1024) 0           conv4_block3_out[0][0]           
                                                                 conv4_block4_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block4_out (Activation)   (None, 14, 14, 1024) 0           conv4_block4_add[0][0]           
__________________________________________________________________________________________________
conv4_block5_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block4_out[0][0]           
__________________________________________________________________________________________________
conv4_block5_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block5_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_1_relu (Activation (None, 14, 14, 256)  0           conv4_block5_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block5_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block5_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block5_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_2_relu (Activation (None, 14, 14, 256)  0           conv4_block5_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block5_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block5_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block5_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block5_add (Add)          (None, 14, 14, 1024) 0           conv4_block4_out[0][0]           
                                                                 conv4_block5_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block5_out (Activation)   (None, 14, 14, 1024) 0           conv4_block5_add[0][0]           
__________________________________________________________________________________________________
conv4_block6_1_conv (Conv2D)    (None, 14, 14, 256)  262400      conv4_block5_out[0][0]           
__________________________________________________________________________________________________
conv4_block6_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block6_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_1_relu (Activation (None, 14, 14, 256)  0           conv4_block6_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block6_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block6_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block6_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_2_relu (Activation (None, 14, 14, 256)  0           conv4_block6_2_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_3_conv (Conv2D)    (None, 14, 14, 1024) 263168      conv4_block6_2_relu[0][0]        
__________________________________________________________________________________________________
conv4_block6_3_bn (BatchNormali (None, 14, 14, 1024) 4096        conv4_block6_3_conv[0][0]        
__________________________________________________________________________________________________
conv4_block6_add (Add)          (None, 14, 14, 1024) 0           conv4_block5_out[0][0]           
                                                                 conv4_block6_3_bn[0][0]          
__________________________________________________________________________________________________
conv4_block6_out (Activation)   (None, 14, 14, 1024) 0           conv4_block6_add[0][0]           
__________________________________________________________________________________________________
conv5_block1_1_conv (Conv2D)    (None, 7, 7, 512)    524800      conv4_block6_out[0][0]           
__________________________________________________________________________________________________
conv5_block1_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_1_relu (Activation (None, 7, 7, 512)    0           conv5_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block1_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_2_relu (Activation (None, 7, 7, 512)    0           conv5_block1_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_0_conv (Conv2D)    (None, 7, 7, 2048)   2099200     conv4_block6_out[0][0]           
__________________________________________________________________________________________________
conv5_block1_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block1_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block1_0_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block1_0_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block1_add (Add)          (None, 7, 7, 2048)   0           conv5_block1_0_bn[0][0]          
                                                                 conv5_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block1_out (Activation)   (None, 7, 7, 2048)   0           conv5_block1_add[0][0]           
__________________________________________________________________________________________________
conv5_block2_1_conv (Conv2D)    (None, 7, 7, 512)    1049088     conv5_block1_out[0][0]           
__________________________________________________________________________________________________
conv5_block2_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_1_relu (Activation (None, 7, 7, 512)    0           conv5_block2_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block2_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block2_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block2_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_2_relu (Activation (None, 7, 7, 512)    0           conv5_block2_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block2_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block2_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block2_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block2_add (Add)          (None, 7, 7, 2048)   0           conv5_block1_out[0][0]           
                                                                 conv5_block2_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block2_out (Activation)   (None, 7, 7, 2048)   0           conv5_block2_add[0][0]           
__________________________________________________________________________________________________
conv5_block3_1_conv (Conv2D)    (None, 7, 7, 512)    1049088     conv5_block2_out[0][0]           
__________________________________________________________________________________________________
conv5_block3_1_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_1_relu (Activation (None, 7, 7, 512)    0           conv5_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_2_conv (Conv2D)    (None, 7, 7, 512)    2359808     conv5_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_2_bn (BatchNormali (None, 7, 7, 512)    2048        conv5_block3_2_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_2_relu (Activation (None, 7, 7, 512)    0           conv5_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_3_conv (Conv2D)    (None, 7, 7, 2048)   1050624     conv5_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_3_bn (BatchNormali (None, 7, 7, 2048)   8192        conv5_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_add (Add)          (None, 7, 7, 2048)   0           conv5_block2_out[0][0]           
                                                                 conv5_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_out (Activation)   (None, 7, 7, 2048)   0           conv5_block3_add[0][0]           
__________________________________________________________________________________________________
avg_pool (GlobalAveragePooling2 (None, 2048)         0           conv5_block3_out[0][0]           
__________________________________________________________________________________________________
predictions (Dense)             (None, 1000)         2049000     avg_pool[0][0]                   
==================================================================================================
Total params: 25,636,712
Trainable params: 25,583,592
Non-trainable params: 53,120
__________________________________________________________________________________________________
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(1,3, input_shape=(32,32,3),),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(2,3),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

# batch normalization은 training동안에 epsilon, gamma(scale), beta(shift), moving_mean(이동평균), moving_var(이동표준편차)를 구한다   
# 따라서 param이 4개 인 것이다(이동평균, 이동표준편차는 하나의 쌍) 
model.summary() 

# Model: "sequential_3"
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# conv2d_6 (Conv2D)            (None, 30, 30, 1)         28        
# _________________________________________________________________
# batch_normalization_6 (Batch (None, 30, 30, 1)         4         
# _________________________________________________________________
# re_lu_6 (ReLU)               (None, 30, 30, 1)         0         
# _________________________________________________________________
# conv2d_7 (Conv2D)            (None, 28, 28, 2)         20        
# _________________________________________________________________
# batch_normalization_7 (Batch (None, 28, 28, 2)         8         
# _________________________________________________________________
# re_lu_7 (ReLU)               (None, 28, 28, 2)         0         
# _________________________________________________________________
# flatten_3 (Flatten)          (None, 1568)              0         
# _________________________________________________________________
# dense_3 (Dense)              (None, 10)                15690     
# =================================================================
# Total params: 15,750
# Trainable params: 15,744
# Non-trainable params: 6
# _________________________________________________________________

Training accuarcy는 높아지지만 validation accuracy가 스파이크 형태를 띄는 것은 데이터가 부족하다는 의미이다

즉 overfitting되었기 때문에 데이터의 양을 늘려야 한다 => data augmentation을 해야 한다

Data augmentation

img_height = 256 
img_width = 256
num_classes = 10

# random jittering or random crop 후 
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal', input_shape=(img_height, img_width,3)),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1)
])

model = tf.keras.Sequential([
    data_augmentation,
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Conv2D(16,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64,3,padding='same',activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2), # 데이터가 확보된 후에 overfitting을 방지하기 위해 사용한다   
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes)
])

 

 

반응형
728x90
반응형

Tensorflow 6가지 방법

1. tf.Module (meta class)

2. tf.keras.models.Model (multi input, output 가능)

3. tf.keras.models.Sequential

4. subclass (tf.keras.models.Model)

5. tf.estimator

6. tf.nn

Data

1. numpy

2. tensor

- tf.Tensor

- tf.data.Dataset => ML용 dataset 데이터 셋을 가장 효율적으로 관리하고 하드웨어적으로 최적화 되어 사용할 있는 장점이 있다

- tf.Variable

Data Pipelines

데이터의 흐름을 나타내는 것
data load -> model training

tf.data를 활용하여 data pipeline을 구축하면 하드웨어를 효율적으로 사용할 수 있다

prefetch를 통해 gpu를 사용하여 데이터 학습 중일 때 데이터 로드시간을 줄이기 위해 cpu 연산을 하여 불러온다

병렬 연산, cache또한 지원한다

 

 

import tensorflow as tf 
import pandas as pd

pd.DataFrame.from_dict() # classmethod를 이용해서 dataset을 만든다 

x = tf.constant([[1,2],[3,4],[5,6]]) 
y = tf.constant([[1],[3],[5]]) 

x # 요소가 두 개인 데이터 3개
# <tf.Tensor: shape=(3, 2), dtype=int32, numpy=
# array([[1, 2],
#        [3, 4],
#        [5, 6]], dtype=int32)>
xx = tf.data.Dataset.from_tensor_slices(x) # lazy 기법을 사용해서 불러온다

for i in xx.take(2):
  print(i)
# tf.Tensor([1 2], shape=(2,), dtype=int32)
# tf.Tensor([3 4], shape=(2,), dtype=int32)
xx = tf.data.Dataset.from_tensor_slices((x,y)) # x,y 묶음으로 관리 가능 

for i, j in xx:
  print(i,j)
# tf.Tensor([1 2], shape=(2,), dtype=int32) tf.Tensor([1], shape=(1,), dtype=int32)
# tf.Tensor([3 4], shape=(2,), dtype=int32) tf.Tensor([3], shape=(1,), dtype=int32)
# tf.Tensor([5 6], shape=(2,), dtype=int32) tf.Tensor([5], shape=(1,), dtype=int32)
xx.cache().prefetch(32).shuffle(32)
# <ShuffleDataset shapes: ((2,), (1,)), types: (tf.int32, tf.int32)>

xx = tf.data.Dataset.from_tensors(x) # 전체 데이터 
for i in xx.take(1):
  print(i)
# tf.Tensor(
# [[1 2]
#  [3 4]
#  [5 6]], shape=(3, 2), dtype=int32)

Data Augmentation

augmentation 방법 가지

1. 원본을 array로 바꾸고 나서 array를 augmentation

2. 원본 자체를 augmentation (이미지 파일 처리)

1. Basic image manipulation

- overfitting 방지하는 방식

- 원본 자체를 augmentation을 하지 않는다 (성능향상에 도움이 되는 방법이 아니다)

 

2. Deep learning approaches

- 원본 자체를 augmentation하는 방법

- 그럴듯한 가짜 데이터를 생성하는 방법은 일반적인 성능을 높일 수 있다

 

데이터 관리 방식

1. Directory

2. DB(LMDB)

- 불러오는 리소스가 크기 때문에 사용하지 않는 관리 방법이다

- LMDB를 사용해서 이미지를 DB에 저장하는 방식도 있지만 요즘에는 사용하지 않는다

3. HDF

데이터를 불러오는 방법

1. tf.keras.preprocessing.image_dataset_from_directory

2. tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory

3. pathlib.Path.glob

4. tf.data.Dataset.list_files

5. tf.data.Dataset.from_generator

 

 

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') # directory에 있는 모든 이미지를 불러온다 
# Found 3670 files belonging to 5 classes.

data # tf.data.Dataset이기 때문에 map을 이용해서 전처리가 가능하다 
# <BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

next(iter(data))
# (<tf.Tensor: shape=(32, 256, 256, 3), dtype=float32, numpy=
#  array([[[[1.64000000e+02, 1.60000000e+02, 1.48000000e+02],
#           [1.64000000e+02, 1.60000000e+02, 1.48000000e+02],
#           [1.65000000e+02, 1.61000000e+02, 1.49000000e+02],
#           ...,
#           [2.52875000e+02, 2.54250000e+02, 2.53875000e+02],
#           [2.49750000e+02, 2.50500000e+02, 2.48875000e+02],
#           [2.54250000e+02, 2.54250000e+02, 2.52250000e+02]]]],
#        dtype=float32)>, <tf.Tensor: shape=(32,), dtype=int32, numpy=
#  array([3, 1, 2, 0, 2, 2, 2, 3, 1, 1, 4, 3, 1, 1, 0, 4, 2, 1, 4, 0, 3, 1,
#         1, 2, 2, 3, 0, 0, 3, 2, 2, 0], dtype=int32)>)
idg = tf.keras.preprocessing.image.ImageDataGenerator() # augmentation을 함 
data2 = idg.flow_from_directory('flower_photos/')
# Found 3670 images belonging to 5 classes.

idg.flow_from_dataframe() 
next(data2)
# (array([[[[ 63.,  73.,  39.],
#           [ 63.,  73.,  39.],
#           [ 63.,  73.,  39.],
#           ...,
#           [166., 140., 107.],
#           [160., 136., 102.],
#           [157., 133.,  99.]]]], dtype=float32), 
#   array([[1., 0., 0., 0., 0.],
#          [0., 0., 0., 0., 1.],
#          [1., 0., 0., 0., 0.]], dtype=float32))
tf.data.Dataset.from_generator
# <function tensorflow.python.data.ops.dataset_ops.DatasetV2.from_generator>
import pathlib
flower_path = pathlib.Path('flower_photos')
for i in flower_path.glob('*/*.jpg'):
  print(i)
# flower_photos/dandelion/8915661673_9a1cdc3755_m.jpg
# flower_photos/dandelion/8740218495_23858355d8_n.jpg
# flower_photos/dandelion/2608937632_cfd93bc7cd.jpg  
# ...
# flower_photos/tulips/8690791226_b1f015259f_n.jpg
# flower_photos/tulips/4612075317_91eefff68c_n.jpg
ls = tf.data.Dataset.list_files('flower_photos/*/*.jpg')  # 파일 명을 불러온다 
import matplotlib.pyplot as plt
for i in ls.take(100):
  x = tf.keras.preprocessing.image.load_img(i.numpy())
  plt.imshow(x)

 

 

!pip install Augmentor
import Augmentor
pipe = Augmentor.Pipeline('augmentor/')

pipe.rotate(0.5,15,15)

pipe.sample(5) # 랜덤하게 n개 뽑아서 처리한다 

pipe.process()

pipe.rotate(1,15,15) 

pipe.flip_left_right(0.5) # 0.5확률로 좌우 반전한다

pipe.process() # 전부다 처리한다
g = pipe.keras_generator(2)

tf.data.Dataset.from_generator # 내부적으로 PIL로 만들어졌다 

next(g)
# (array([[[[0.        , 0.        , 0.        ],
#           [0.67058825, 0.5803922 , 0.3529412 ],
#           [0.5254902 , 0.54901963, 0.34509805],
#           ...,
#           [0.34117648, 0.11764706, 0.08235294],
#           [0.34901962, 0.12156863, 0.09411765],
#           [0.35686275, 0.1254902 , 0.10980392]]]], dtype=float32), 
#           array([[0], [0]]))
!pip install -U albumentations
!pip install -U tensorflow-datasets
import tensorflow_datasets as tfds

flowers, info = tfds.load('tf_flowers', split='train', as_supervised=True, with_info=True)

flowers # as_supervised=False
# <PrefetchDataset shapes: {image: (None, None, 3), label: ()}, types: {image: tf.uint8, label: tf.int64}>

flowers # as_supervised=True
# <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>

info
# tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_path='/root/tensorflow_datasets/tf_flowers/3.0.1',
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)
tfds.visualization.show_examples(flowers, info)

flowers_pd = tfds.as_dataframe(flowers,info) # pandas data / eda 쉽게 하기 위해 
flowers_pd
data, metadata = tfds.load(
    'tf_flowers',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True
)

data
# [<PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>,
#  <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>,
#  <PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>]
metadata
# tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_path='/root/tensorflow_datasets/tf_flowers/3.0.1',
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)
metadata.features['image']
# Image(shape=(None, None, 3), dtype=tf.uint8)

metadata.features['label']
# ClassLabel(shape=(), dtype=tf.int64, num_classes=5)

metadata.features['label'].int2str(0)
# 'dandelion'


tf.keras.layers.experimental.preprocessing.RandomCrop
tf.keras.layers.RandomCrop
# model 안에서 함께 사용 가능하다 (preprocessing layer) => map이랑 같이 쓸수 있다
import numpy as np
im = tf.keras.preprocessing.image.load_img('people.jpg')

x = np.array(im)

xx = tf.keras.layers.RandomRotation(0.4)(x)

plt.imshow(xx)

xx = tf.keras.layers.RandomFlip()(x)

plt.imshow(xx)

aug = tf.keras.models.Sequential([
  tf.keras.layers.RandomRotation(0.5),s
  tf.keras.layers.RandomFlip()
]) # Model보다 Sequential이 좋은 점은 다른 모델안에도 들어갈수 있다는 점이다 (전처리 레이어를 사용할 때 명확하게 확인 가능하다)

plt.imshow(aug(x))

# Model안에 Sequential 전처리 Layer 포함된 예시 
model = tf.keras.models.Model([
  aug, 
  tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
]) # 전처리 자체가 모델 안에 들어가기 때문에 gpu로 전처리가 가능하다 

tf.image.random_crop # 하나씩 처리
from albumentations import Compose, RandomBrightnessContrast, HorizontalFlip

aug = Compose([RandomBrightnessContrast(), HorizontalFlip()])

aug
# Compose([
#   RandomBrightnessContrast(always_apply=False, p=0.5, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True),
#   HorizontalFlip(always_apply=False, p=0.5),
# ], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={})
plt.imshow(aug(image=x)['image'])

 

 

반응형
728x90
반응형

Computer Vision Footprint

1998 - LeNet 5

2012 - AlexNet

- Gradient vanishin문제를 해결하는 ReLU를 사용

- LRN 사용 안했음

- Overlapping Pooling(영향력이 크진 않음)

- Dropout(Overfitting 방지)

2013 - ZFNet

- AlexNet을 Visualization통해 insight를 제공

- HyperParameter tuning 방법론 제시

- Network in Network

- 1x1 Convolution 도입 (1. 차원 조절 2. non-linear 특성 부여 3. fully connected 처럼 사용)

- Flatten 대신 사용하는 방법인 Global Average Pooling 도입

- 같은 형태를 반복하고 겹쳐서 사용하는 Stacking 도입

2014 - GoogLeNet v1 (1등)

- 1x1 convolution 활용

- Inception module stacking

- Global average pooling 활용

- VGG (2등)

2015 - ResNet

- 최초로 인간을 뛰어 넘는 성능을 보여준 알고리즘 - 잔차(Residual)을 최소로 하는 방향으로 학습

- Batch Normalization 활용

2016 - GoogLeNet v4

Deep Residual Learning for Image Recognition

ResNet은 인간을 뛰어넘은 첫번째 모델
Residual => 실제값 - 예측값 (잔차)

차원이 증가할 때 점선 화살표로 표시했다

Skip connection

deep architectures에서 short skip connections은 하나의 layer의 output을 몇 개의 layer를 건너뛰고 다음 layer의 input에 추가하는 것

import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
y = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
z = tf.keras.layers.Concatenate()([x,y]) # 구조 자체를 더해준다 
model = tf.keras.models.Model(input_, z)

model.summary()
# Model: "model"
# __________________________________________________________________________________________________
# Layer (type)                    Output Shape         Param #     Connected to                     
# ==================================================================================================
# input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
# __________________________________________________________________________________________________
# conv2d (Conv2D)                 (None, 32, 32, 2)    56          input_1[0][0]                    
# __________________________________________________________________________________________________
# conv2d_1 (Conv2D)               (None, 32, 32, 2)    56          input_1[0][0]                    
# __________________________________________________________________________________________________
# concatenate (Concatenate)       (None, 32, 32, 4)    0           conv2d[0][0]                     
#                                                                  conv2d_1[0][0]                   
# ==================================================================================================
# Total params: 112
# Trainable params: 112
# Non-trainable params: 0
# __________________________________________________________________________________________________
# ReNet에서 구현하는 방식 (Add)
input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
y = tf.keras.layers.Conv2D(2,3, padding='same')(input_)
z = tf.keras.layers.Add()([x,y]) # 값을 더해준다 (element wise)
model = tf.keras.models.Model(input_, z)

model.summary()
# Model: "model_1"
# __________________________________________________________________________________________________
# Layer (type)                    Output Shape         Param #     Connected to                     
# ==================================================================================================
# input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
# __________________________________________________________________________________________________
# conv2d_2 (Conv2D)               (None, 32, 32, 2)    56          input_2[0][0]                    
# __________________________________________________________________________________________________
# conv2d_3 (Conv2D)               (None, 32, 32, 2)    56          input_2[0][0]                    
# __________________________________________________________________________________________________
# add (Add)                       (None, 32, 32, 2)    0           conv2d_2[0][0]                   
#                                                                  conv2d_3[0][0]                   
# ==================================================================================================
# Total params: 112
# Trainable params: 112
# Non-trainable params: 0
# __________________________________________________________________________________________________

 

 

실전 예제

 

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

np.unique(y_train, return_counts=True)
# (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
#  array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))
plt.hist(y_train, width=0.3) # 데이터 클래스가 균등한지 확인 

# (array([5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000., 5000.]),
#  array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ]),
#  <a list of 10 Patch objects>)

# 비율과 크기가 다르다 => 전통적인 ml방식으로는 해결할 수 없다 => CNN으로 해결해야 한다 
rows = 3
cols = 3
axes=[]
fig=plt.figure(figsize=(10,7))

for a in range(rows*cols):    
    axes.append(fig.add_subplot(rows, cols, a+1))
    plt.imshow(X_train[a])
fig.tight_layout()    
plt.show()

np.unique(y_test, return_counts=True)
# (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
#  array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]))

plt.hist(y_test, width=0.3)
# (array([1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000., 1000.]),
#  array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ]),
#  <a list of 10 Patch objects>)

plt.imshow(X_test[3]) # data image크기가 작기 때문에 deep한 layer를 사용하기 힘들다

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = X_train / 255
X_test = X_test / 255 

# layer를 구성하는 것은 만드는 사람에 따라 달라질 수 있다 

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(32,3)(input_)
# x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) # 2x2 stride 2 
x = tf.keras.layers.Conv2D(64,3)(x) 
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Flatten()(x) # 이미지 크기가 작기때문에 flatten해도 상관없다 
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Dense(10)(x)

model = tf.keras.models.Model(input_, x)

model.summary()
# Model: "model"
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
# _________________________________________________________________
# conv2d (Conv2D)              (None, 30, 30, 32)        896       
# _________________________________________________________________
# re_lu (ReLU)                 (None, 30, 30, 32)        0         
# _________________________________________________________________
# max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
# _________________________________________________________________
# conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
# _________________________________________________________________
# re_lu_1 (ReLU)               (None, 13, 13, 64)        0         
# _________________________________________________________________
# max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
# _________________________________________________________________
# conv2d_2 (Conv2D)            (None, 4, 4, 64)          36928     
# _________________________________________________________________
# re_lu_2 (ReLU)               (None, 4, 4, 64)          0         
# _________________________________________________________________
# flatten (Flatten)            (None, 1024)              0         
# _________________________________________________________________
# dense (Dense)                (None, 512)               524800    
# _________________________________________________________________
# re_lu_3 (ReLU)               (None, 512)               0         
# _________________________________________________________________
# dense_1 (Dense)              (None, 10)                5130      
# =================================================================
# Total params: 586,250
# Trainable params: 586,250
# Non-trainable params: 0
# _________________________________________________________________
# 사용자가 해석한 값이 아닌 raw값 그대로 loss를 구한다 /
# Numerical stability가 더 좋기 때문에 거의 차이가 안난다 
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                                                                 optimizer='adam',
                                                                 metrics=['accuracy'])
                                                                 
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test)) 
# 정규화 했을 때 더 낮은 loss에서 시작한다 그리고 정규화를 했을 때 수렴속도가 빠르다                                                                  

Epoch 1/20
1563/1563 [==============================] - 14s 8ms/step - loss: 1.4378 - accuracy: 0.4767 - val_loss: 1.1393 - val_accuracy: 0.5892
Epoch 2/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.0637 - accuracy: 0.6254 - val_loss: 1.0237 - val_accuracy: 0.6401
Epoch 3/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8987 - accuracy: 0.6826 - val_loss: 0.9170 - val_accuracy: 0.6825
Epoch 4/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.7807 - accuracy: 0.7277 - val_loss: 0.8540 - val_accuracy: 0.7036
Epoch 5/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6769 - accuracy: 0.7609 - val_loss: 0.8615 - val_accuracy: 0.7131
Epoch 6/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5830 - accuracy: 0.7932 - val_loss: 0.8833 - val_accuracy: 0.7151
Epoch 7/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4926 - accuracy: 0.8251 - val_loss: 0.9074 - val_accuracy: 0.7179
Epoch 8/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4044 - accuracy: 0.8571 - val_loss: 0.9816 - val_accuracy: 0.7117
Epoch 9/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3321 - accuracy: 0.8829 - val_loss: 1.0310 - val_accuracy: 0.7151
Epoch 10/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.2701 - accuracy: 0.9034 - val_loss: 1.2083 - val_accuracy: 0.7054
Epoch 11/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.2228 - accuracy: 0.9207 - val_loss: 1.3023 - val_accuracy: 0.7059
Epoch 12/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1909 - accuracy: 0.9319 - val_loss: 1.4026 - val_accuracy: 0.7109
Epoch 13/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1661 - accuracy: 0.9411 - val_loss: 1.6160 - val_accuracy: 0.6998
Epoch 14/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1501 - accuracy: 0.9471 - val_loss: 1.5875 - val_accuracy: 0.7062
Epoch 15/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1311 - accuracy: 0.9548 - val_loss: 1.8276 - val_accuracy: 0.6951
Epoch 16/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1288 - accuracy: 0.9552 - val_loss: 1.8113 - val_accuracy: 0.7016
Epoch 17/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1169 - accuracy: 0.9591 - val_loss: 2.0500 - val_accuracy: 0.6922
Epoch 18/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1180 - accuracy: 0.9604 - val_loss: 2.0089 - val_accuracy: 0.6998
Epoch 19/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1095 - accuracy: 0.9634 - val_loss: 1.9952 - val_accuracy: 0.6968
Epoch 20/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.1034 - accuracy: 0.9657 - val_loss: 2.0972 - val_accuracy: 0.6938

 

history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test)) 
# 정규화 하지 않았을 때 더 높은 loss에서 시작한다 

Epoch 1/20
1563/1563 [==============================] - 19s 8ms/step - loss: 2.0055 - accuracy: 0.3065 - val_loss: 1.5826 - val_accuracy: 0.4172
Epoch 2/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.4448 - accuracy: 0.4833 - val_loss: 1.3306 - val_accuracy: 0.5283
Epoch 3/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.2506 - accuracy: 0.5596 - val_loss: 1.2615 - val_accuracy: 0.5648
Epoch 4/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.1343 - accuracy: 0.6053 - val_loss: 1.2301 - val_accuracy: 0.5752
Epoch 5/20
1563/1563 [==============================] - 12s 8ms/step - loss: 1.0247 - accuracy: 0.6427 - val_loss: 1.1291 - val_accuracy: 0.6128
Epoch 6/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.9380 - accuracy: 0.6753 - val_loss: 1.2271 - val_accuracy: 0.5985
Epoch 7/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8566 - accuracy: 0.7020 - val_loss: 1.1887 - val_accuracy: 0.6037
Epoch 8/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.7684 - accuracy: 0.7344 - val_loss: 1.2450 - val_accuracy: 0.6161
Epoch 9/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6898 - accuracy: 0.7621 - val_loss: 1.3770 - val_accuracy: 0.6004
Epoch 10/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.6153 - accuracy: 0.7890 - val_loss: 1.4217 - val_accuracy: 0.6172
Epoch 11/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5556 - accuracy: 0.8114 - val_loss: 1.4523 - val_accuracy: 0.6202
Epoch 12/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.5212 - accuracy: 0.8254 - val_loss: 1.6618 - val_accuracy: 0.5982
Epoch 13/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4697 - accuracy: 0.8419 - val_loss: 1.7345 - val_accuracy: 0.6087
Epoch 14/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4385 - accuracy: 0.8550 - val_loss: 1.9349 - val_accuracy: 0.6040
Epoch 15/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.4044 - accuracy: 0.8672 - val_loss: 2.1098 - val_accuracy: 0.5947
Epoch 16/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3703 - accuracy: 0.8803 - val_loss: 2.3537 - val_accuracy: 0.6007
Epoch 17/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3737 - accuracy: 0.8796 - val_loss: 2.4973 - val_accuracy: 0.6003
Epoch 18/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3659 - accuracy: 0.8848 - val_loss: 2.5574 - val_accuracy: 0.5830
Epoch 19/20
1563/1563 [==============================] - 12s 7ms/step - loss: 0.3423 - accuracy: 0.8938 - val_loss: 2.4638 - val_accuracy: 0.6007
Epoch 20/20
1563/1563 [==============================] - 12s 8ms/step - loss: 0.3459 - accuracy: 0.8939 - val_loss: 2.6130 - val_accuracy: 0.5853

 

 

pd.DataFrame(history.history).plot.line(figsize=(10,8)) # validation loss가 증가하는 지점부터 overfitting이 시작된다 (정규화)

pd.DataFrame(history.history).plot.line(figsize=(10,8)) # validation loss가 증가하는 지점부터 overfitting이 시작된다 (정규화 X)

 

 

Flower Datasets

tf.keras.utils.get_file("flower_photos","https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz")

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
# Found 3670 files belonging to 5 classes.

data # tf.data.Dataset
# <BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

idg = tf.keras.preprocessing.image.ImageDataGenerator()
idg.flow_from_directory('flower_photos/') # raw data를 클래스별로 모아 두면 한번에 바꾸는 것을 지원한다 (하나의 array로 만들어줌) 
# Found 3670 images belonging to 5 classes.
# <keras.preprocessing.image.DirectoryIterator at 0x7fee7a4b1390>

numpy와 tensor

Numpy와 tensor는 서로 호환이 된다

tensor로 사용하면 호환성은 떨어지지만 gpu나 tensorflow 내부의 기능을 극대화 할 수 있다

tf.data.Dataset는 tensor중에서 ml용으로 관리한다

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

tensor data로 만드는 방법

1. from_tensors

2. from_tensor_slices

tf.data.Dataset.from_tensors(X_train) # 데이터 전체를 기반으로 만들어 준다 
tf.data.Dataset.from_tensors((X_train,y_train)) # 데이터 전체를 묶어서 관리 
# <TensorDataset shapes: ((50000, 32, 32, 3), (50000, 1)), types: (tf.uint8, tf.uint8)>

tf.data.Dataset.from_tensor_slices(X_train) # 데이터 한 개를 기반으로 만들어 준다 
tf.data.Dataset.from_tensor_slices((X_train, y_train)) # 데이터 한쌍을 묶어서 관리 
# <TensorSliceDataset shapes: ((32, 32, 3), (1,)), types: (tf.uint8, tf.uint8)>


# y_train = tf.keras.utils.to_categorical(y_train)
train = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)

train_t = tf.data.Dataset.from_tensor_slices((X_train, y_train))

train_t.batch(32)
# <BatchDataset shapes: ((None, 32, 32, 3), (None, 1)), types: (tf.float64, tf.uint8)>

train_t.shuffle(400)
# <ShuffleDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

# cache : preprocessing 시간이 너무 길어서 줄이고 싶을때 사용
# prefetch : 학습중일때, 데이터 로드시간을 줄이기 위해 미리 메모리에 적재시킴 이때, 괄호안의 숫자는 얼마만큼 적재시킬지에 대한 숫자 / AUTOTUNE는 자동으로 
train_t.shuffle(400).cache()
# <CacheDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

train_t.shuffle(400).cache().prefetch(tf.data.AUTOTUNE) 
# <PrefetchDataset shapes: ((32, 32, 3), (1,)), types: (tf.float64, tf.uint8)>

'_GeneratorState' in dir(train) # lazy하게 데이터를 불러온다 => 호출되기 전까지 메모리에 올라가지 않는다 
# True
for i in train.take(2): # 데이터 2개 불러온다 
  print(i)
  
# (<tf.Tensor: shape=(32, 32, 32, 3), dtype=float64, numpy=
# array([[[[0.23137255, 0.24313725, 0.24705882],
         [0.16862745, 0.18039216, 0.17647059],
         [0.19607843, 0.18823529, 0.16862745],
         ...,
         [0.19215686, 0.28235294, 0.17647059],
         [0.12156863, 0.2       , 0.11764706],
         [0.08235294, 0.15294118, 0.08235294]]]])>, <tf.Tensor: shape=(32, 1), dtype=uint8, numpy=
array([[1],
       [3],
       [4],
       [0],
       [3],
       [7],
       [3],
       [3],
       [5],
       [2],
       [2],
       [7],
       [1],
       [1],
       [1],
       [2],
       [2],
       [0],
       [9],
       [5],
       [7],
       [9],
       [2],
       [2],
       [5],
       [2],
       [4],
       [3],
       [1],
       [1],
       [8],
       [2]], dtype=uint8)>)

Naive

prefetch

tf.data.Dataset을 사용하지 않으면 cpu를 읽고 쓰고하는 것을 각각 할당한다

그런데 cache, prefetch를 사용하면 내부적으로 cpu, gpu를 압축적으로 사용할 수 있도록 처리해준다

따라서 numpy보다 메모리를 효율적으로 사용할 수 있다

input_ = tf.keras.Input((32,32,3))
x = tf.keras.layers.Conv2D(32,3)(input_)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x) 
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPool2D(2)(x) 
x = tf.keras.layers.Conv2D(64,3)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Flatten()(x) 
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Dense(10)(x)

model = tf.keras.models.Model(input_, x)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                                                                 optimizer='adam',
                                                                 metrics=['accuracy'])
                                                                 
history = model.fit(train, epochs=10)
# tf.data.Dataset으로 학습을 하면 tensor로 처리하고 내부적인 기능이 최적화되어 사용하기 때문에 훨씬 더 효율적으로 사용할 수 있다 / 메모리 효율이 좋다 
Epoch 1/10
1563/1563 [==============================] - 16s 10ms/step - loss: 1.4560 - accuracy: 0.4731
Epoch 2/10
1563/1563 [==============================] - 14s 9ms/step - loss: 1.0674 - accuracy: 0.6208
Epoch 3/10
1563/1563 [==============================] - 12s 8ms/step - loss: 0.8919 - accuracy: 0.6871
Epoch 4/10
1563/1563 [==============================] - 18s 12ms/step - loss: 0.7641 - accuracy: 0.7327
Epoch 5/10
1563/1563 [==============================] - 18s 12ms/step - loss: 0.6630 - accuracy: 0.7666
Epoch 6/10
1563/1563 [==============================] - 15s 10ms/step - loss: 0.5731 - accuracy: 0.7999
Epoch 7/10
1563/1563 [==============================] - 15s 9ms/step - loss: 0.4959 - accuracy: 0.8257
Epoch 8/10
1563/1563 [==============================] - 22s 14ms/step - loss: 0.4299 - accuracy: 0.8476
Epoch 9/10
1563/1563 [==============================] - 25s 16ms/step - loss: 0.3662 - accuracy: 0.8701
Epoch 10/10
1563/1563 [==============================] - 24s 15ms/step - loss: 0.3118 - accuracy: 0.8894
data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
# Found 3670 files belonging to 5 classes.
plt.imshow(next(iter(data.take(1)))[0][0])

plt.imshow(next(iter(data.take(1)))[0][0]/255) 
# 255값으로 나누면 이미지의 특성을 좀 더 반영할 수 있다

tf.keras.utils.image_dataset_from_directory is tf.keras.preprocessing.image_dataset_from_directorys 
# True
# 같은 방법이다

Data augmentation

Data augmentation은 현재 갖고 있는 데이터를 좀 더 다양하게 만들어 AlexNet에서 처음 도입된 개념이다

AlexNet에서 256x256 이미지를 224x224 크기로 무작위하게 잘라서 데이터의 수를 키웠다

(2048배 뻥튀기 되었다)

일반적으로는 cpu기반으로 연산되지만 batch기반으로 학습될때는 gpu로 연산된다

data = tf.keras.preprocessing.image_dataset_from_directory('flower_photos/') 
rescale = tf.keras.layers.experimental.preprocessing.RandomCrop(224,224,3)
# Found 3670 files belonging to 5 classes.

data.map(lambda x,y: (rescale(x), y), num_parallel_calls=tf.data.AUTOTUNE) # num_parallel_calls: 동시에 cpu 몇개 사용할 것인가
# <ParallelMapDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int32)>

3670*2048
# 7516160

lambda layer

t = tf.constant([[1,2,],[3,4,]])
t
# <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
# array([[1, 2],
#       [3, 4]], dtype=int32)>
tf.keras.layers.Lambda(lambda x:x+1)(t) # layer안에 함수를 쓴다 
# <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
# array([[2, 3],
#        [4, 5]], dtype=int32)>
rescale = tf.keras.layers.Lambda(lambda x:tf.image.random_crop(x, (224,224,3)))
# data.map(lambda x,y: (rescale(x), y), num_parallel_calls=tf.data.AUTOTUNE)
data.map(lambda x,y:(rescale(x)(),y))
# ValueError: in user code:

    <ipython-input-81-dc4661f485b7>:1 None  *
        lambda x,y:(rescale(x)(x),y))
    /usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:1037 __call__  **
        outputs = call_fn(inputs, *args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/keras/layers/core.py:903 call
        result = self.function(inputs, **kwargs)
    <ipython-input-74-6ba6a1285d04>:1 <lambda>
        rescale = tf.keras.layers.Lambda(lambda x:tf.image.random_crop(x, (224,224,3)))
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/random_ops.py:402 random_crop
        math_ops.reduce_all(shape >= size),
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_ops.py:1817 wrapper
        return fn(x, y, *args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_math_ops.py:4050 greater_equal
        "GreaterEqual", x=x, y=y, name=name)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
        compute_device)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3569 _create_op_internal
        op_def=op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
        control_input_ops, op_def)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 4 and 3 for '{{node lambda_2/random_crop/GreaterEqual}} = GreaterEqual[T=DT_INT32](lambda_2/random_crop/Shape, lambda_2/random_crop/size)' with input shapes: [4], [3].

 

 

 

 

 

 

 

 

반응형
728x90
반응형

자취인으로서 아주 훌륭한 단백질공급원이다.

가격, 맛, 영양, 구성
알차다

지금은 망했지만....
싸움의 고수는 자취인을 위한 혁명이 아닐까?

반응형
728x90
반응형

역삼역 1번출구에 위치하는 구백냥곱창

자취인으로서 거대한 음식들...
곱창, 감자탕, 전골 등의 3~4인 음식들은...
먹고싶어도 접근하지 못하는 영역이다.

하지만 구백냥 곱창은 곱창을 1인 도시락으로 제공한다.
배달팁, 가격, 맛, 양, 컨셉, 오픈이벤트까지!
완벽한 곳이었다...

자취인들을 위한 식사는 여기저기있어도 이런 술안주는 쉽게 찾아보지 못했다.

구백냥 덕분에 맥주 한 잔 시원하게 할 수 있었다.
덕분에 내 고민도 싹 날려버렸다.

반응형

+ Recent posts