快速 load 多个 json 文件
utils
本文字数:206 字 | 阅读时长 ≈ 1 min

快速 load 多个 json 文件

utils
本文字数:206 字 | 阅读时长 ≈ 1 min

修改前

# get masks
for index in meta['object_list']:
    mask_path = prefix + index['mask path']
    with open(mask_path, "r") as f:
        mask = json.load(f)
        mask = np.array(mask[0])
    masks.append(mask)
    text_descriptions.append(index['caption'])

修改后

import json
import numpy as np
from multiprocessing import Pool

def load_mask(mask_path):
    with open(mask_path, "r") as f:
        mask = json.load(f)
        mask = np.array(mask[0])
    return mask

def load_data(index):
    mask_path = prefix + index['mask path']
    mask = load_mask(mask_path)
    text_description = index['caption']
    return mask, text_description

masks = []
text_descriptions = []

if __name__ == "__main__":
    with Pool() as pool:
        results = pool.map(load_data, meta['object_list'])

    for mask, text_description in results:
        masks.append(mask)
        text_descriptions.append(text_description)

上面的可能出问题,也可以这样写

import json
import numpy as np
from concurrent.futures import ThreadPoolExecutor

def load_mask(mask_path):
    with open(mask_path, "r") as f:
        mask = json.load(f)
        mask = np.array(mask[0])
    return mask

def load_data(index):
    mask_path = prefix + index['mask path']
    mask = load_mask(mask_path)
    text_description = index['caption']
    return mask, text_description

masks = []
text_descriptions = []

with ThreadPoolExecutor() as executor:
    results = executor.map(load_data, meta['object_list'])

for mask, text_description in results:
    masks.append(mask)
    text_descriptions.append(text_description)