728x90
반응형
LMDB 안에 image와 label이 있다는 가정하에 로딩,
txt파일의 경우 다음과 같은 형식으로 생성됨(여기서 구분자는 \t 사용)
import lmdb
from tqdm import tqdm
import six
from PIL import Image
import os
from glob import glob
import concurrent.futures
from concurrent.futures import ProcessPoolExecutor
import functools
import time
def make_txt_lst_dict(data_dir_lst):
txt_dict = {}
txt_lst = []
for idx, data_dir in enumerate(data_dir_lst):
if data_dir.split('/')[0]=='train':
path = [데이터 저장 경로]+data_dir+'/train_label.txt'
else:
path = [데이터 저장 경로] +data_dir+'/valid_label.txt'
txt_lst.append(path)
for txt in txt_lst:
txt_dict[txt]=[]
return txt_lst, txt_dict
root = './data_folder/**/*/data.mdb'
lmdb_lst = sorted(glob(root, recursive = True))
data_dir_lst = [dir_name.split('/')[8:-1] for dir_name in lmdb_lst]
data_dir_lst = ['/'.join(val) for val in data_dir_lst]
txt_lst, g_txt_dict = make_txt_lst_dict(data_dir_lst)
def read_lmdb_wr_image(data_dir):
img_save_dir = [데이터 저장 경로]+data_dir+'/images/'
lmdb_file = [LMDB 데이터 경로]+data_dir
if data_dir.split('/')[0]=='train':
txtfile = [데이터 저장 경로]+data_dir+'/train_label.txt'
else:
txtfile = [데이터 저장 경로]+data_dir+'/valid_label.txt'
'''
lmdb_file : lmdb file path (string)
img_save_dir : Image path (string)
txtfile : txt file path (string)
txt_dict : txt_dictionary for save txt file
rgb : Image Loading Type(Default : RGB)
'''
rgb = True
lmdb_env = lmdb.open(
lmdb_file,
max_readers=32,
readonly=True,
lock=False,
readahead=False,
meminit=False,
)
txn = lmdb_env.begin(write=False)
lmdb_cursor = txn.cursor()
nSamples = int(txn.get(b"num-samples"))
print('::Processing %s Total Samples:%d'%(data_dir,nSamples))
line_lst = []
blank = ''
txt = open(txtfile,'w')
for index in range(nSamples):
index += 1 # lmdb starts with 1
label_key = "label-%09d".encode() % index
label = txn.get(label_key).decode("utf-8")
img_key = "image-%09d".encode() % index
img_name = img_key.decode('utf-8')
imgbuf = txn.get(img_key)
buf = six.BytesIO()
buf.write(imgbuf)
buf.seek(0)
if rgb:
img = Image.open(buf).convert("RGB") # for color image
else:
img = Image.open(buf).convert("L")
img_file = img_save_dir + img_name+'.jpg'
img.save(img_file)
img_path_lst = img_file.split('/')[-2:]
img_path_forward = '/'.join(img_path_lst)
delimiter = '\t'
line = img_path_forward+delimiter+label+'\n'
txt.write(line)
txt.close()
print('::Finished Save Images %s'%data_dir)
if __name__ == '__main__':
for data_dir in data_dir_lst:
img_save_dir = [데이터 저장 경로]+data_dir+'/images/'
if not os.path.exists(img_save_dir):
print('Make Directory : %s'%img_save_dir)
os.makedirs(img_save_dir)
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(read_lmdb_wr_image, [data_dir for data_dir in data_dir_lst])
728x90
반응형
'사소한 Tip . 오류 해결법' 카테고리의 다른 글
[Git] git command (0) | 2021.12.29 |
---|---|
[MMOCR] 사용방법 (0) | 2021.12.14 |
[LMDB] lmdb file loading, reading (0) | 2021.12.08 |
[Tensorflow] Tensorflow v1 to Tensorflow v2 자동화 코드(.ipynb) (0) | 2021.12.07 |
[Tensorflow] ValueError: setting an array element with a sequence. (0) | 2021.12.07 |