728x90
반응형
import numpy as np
import pandas as pd
import tensorflow as tf
import glob as glob
all_data = np.array(glob.glob('./clothes/*/*.jpg', recursive=True))
'''recursive = 폴더 아래 폴더또 있을 때, 지정한 하위폴더까지 검색설정
glob.glob : ./clothes/*/*.jpg
.:현재폴더
clothes : 현재폴더하위의 cloths 폴더
* 하위폴더
* .jpg : 모든 jpg
array로 만들어서 저장
'''
print(all_data[:5])
['./clothes\\black_dress\\1.jpg' './clothes\\black_dress\\10.jpg'
'./clothes\\black_dress\\100.jpg' './clothes\\black_dress\\101.jpg'
'./clothes\\black_dress\\102.jpg']
def check_cc(color, clothes) :
labels = np.zeros(11,)
# color check
if ( color == 'black') :
labels[0] = 1
color_index = 0
elif ( color == 'blue') :
labels[1] = 1
color_index = 1
elif ( color == 'brown') :
labels[2] = 1
color_index = 2
elif ( color == 'green') :
labels[3] = 1
color_index = 3
elif ( color == 'red') :
labels[4] = 1
color_index = 4
elif ( color == 'white') :
labels[5] = 1
color_index = 5
# clothes check
# color check
if ( color == 'dress') :
labels[6] = 1
elif ( color == 'shirt') :
labels[7] = 1
elif ( color == 'pants') :
labels[8] = 1
elif ( color == 'shorts') :
labels[9] = 1
elif ( color == 'shoes') :
labels[10] = 1
return labels, color_index
print(all_data.shape)
all_labels = np.empty((all_data.shape[0], 11))
all_color_labels = np.empty((all_data.shape[0], 1))
print(all_labels.shape)
print(all_color_labels.shape)
(16170,)
(16170, 11)
(16170, 1)
for i, data in enumerate(all_data) :
color_and_clothes = all_data[i].split('\\')[1].split('_')
color = color_and_clothes[0]
print(color_and_clothes)
clothes = color_and_clothes[1]
labels, color_index = check_cc(color, clothes)
all_labels[i] = labels;
all_color_labels[i] = color_index
print(all_labels[:10])
['black', 'dress']
['black', 'dress']
['black', 'dress']
['black', 'dress']
['black', 'dress']
# 훈련, 테스트, 검증데이터 분리
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split\
(all_data, all_labels, shuffle = True, test_size = 0.3, random_state = 99)
train_x, val_x, train_y, val_y = train_test_split\
(train_x, train_y, shuffle = True, test_size = 0.3, random_state = 99)
print(train_x.shape)
print(val_x.shape)
print(test_x.shape)
(7923,)
(3396,)
(4851,)
train_df = pd.DataFrame(
{'image':train_x, 'black':train_y[:,0], 'blue':train_y[:,1],
'brown':train_y[:,2], 'green':train_y[:,3], 'red':train_y[:,4],
'white':train_y[:,5], 'dress':train_y[:,6], 'shirt':train_y[:,7],
'pants':train_y[:,8], 'shorts':train_y[:,9], 'shoes':train_y[:,10]})
val_df = pd.DataFrame(
{'image':val_x, 'black':val_y[:,0], 'blue':val_y[:,1],
'brown':val_y[:,2], 'green':val_y[:,3], 'red':val_y[:,4],
'white':val_y[:,5], 'dress':val_y[:,6], 'shirt':val_y[:,7],
'pants':val_y[:,8], 'shorts':val_y[:,9], 'shoes':val_y[:,10]})
test_df = pd.DataFrame(
{'image':test_x, 'black':test_y[:,0], 'blue':test_y[:,1],
'brown':test_y[:,2], 'green':test_y[:,3], 'red':test_y[:,4],
'white':test_y[:,5], 'dress':test_y[:,6], 'shirt':test_y[:,7],
'pants':test_y[:,8], 'shorts':test_y[:,9], 'shoes':test_y[:,10]})
train_df.head()
image black blue brown green red white dress shirt pants shorts shoes
0 ./clothes\silver_skirt\000095.jpg 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 ./clothes\blue_shirt\82.jpg 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 ./clothes\pink_hoodie\000267.jpg 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 ./clothes\red_shoes\159.jpg 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 ./clothes\red_shoes\601.jpg 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
train_df.to_csv('./clothes.train.csv', index = None)
val_df.to_csv('./clothes.val.csv', index = None)
test_df.to_csv('./clothes.test.csv', index = None)
df1 = pd.read_csv("./clothes.train.csv")
df2 = pd.read_csv("./clothes.val.csv")
df3 = pd.read_csv("./clothes.test.csv")
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7923 entries, 0 to 7922
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 image 7923 non-null object
1 black 7923 non-null float64
2 blue 7923 non-null float64
3 brown 7923 non-null float64
4 green 7923 non-null float64
5 red 7923 non-null float64
6 white 7923 non-null float64
7 dress 7923 non-null float64
8 shirt 7923 non-null float64
9 pants 7923 non-null float64
10 shorts 7923 non-null float64
11 shoes 7923 non-null float64
dtypes: float64(11), object(1)
memory usage: 742.9+ KB
반응형
'Data_Science > Data_Analysis_Py' 카테고리의 다른 글
56. 영화리뷰 분석 (0) | 2021.12.07 |
---|---|
54. glob-clothes || conv 다중 분류 (0) | 2021.12.07 |
52. ImageDataGenerator || 이미지 조회 (0) | 2021.12.07 |
51. cifar10 || imageDataGenerator (0) | 2021.12.07 |
50. ImageDataGenerator (0) | 2021.12.07 |