制作待训练的数据
- 参考
- 源码notebook,https://github.com/databooks/databook/tree/master/gis/iobjectspy10
- iObjects 10使用,使用SuperMap iObjects for python 10.0
import os
from iobjectspy import open_datasource
from iobjectspy.ml.vision import DataPreparation
import time
Using TensorFlow backend.
设置输入数据路径
设置TIF影像,矢量标签路径
curr_dir = ''
# VOC格式实验
input_VOC_data = os.path.join(curr_dir, '..','..','example_data','training','plane.tif')
ds_voc = open_datasource(os.path.join(curr_dir, '..','..','example_data','training','label.udbx'))
input_VOC_label = ds_voc['label']
# SCENE格式实验
input_SCENE_data = os.path.join(curr_dir, '..','..','example_data','training','scene_cls_train_data','raw','sceneclassification.tif')
ds_scene = open_datasource(
os.path.join(curr_dir, '..','..','example_data','training','scene_cls_train_data','raw','sceneclassification.udbx'))
input_SCENE_label = ds_scene['DLTB']
# 制作地物分类训练数据实验
input_multi_data = os.path.join(curr_dir, '..','..','example_data','training','multi_cls_train_data','raw','image.tif')
ds_multi = open_datasource(
os.path.join(curr_dir, '..','..','example_data','training','multi_cls_train_data','raw','multi_cls_train_data.udbx'))
input_multi_label = ds_multi['Vector']
# 制作二元分类训练数据实验
input_binary_data = os.path.join(curr_dir, '..','..','example_data','training','binary_cls_train_data','raw','image.tif')
ds_binary = open_datasource(
os.path.join(curr_dir, '..','..','example_data','training','binary_cls_train_data','raw','binary_cls_train_data.udbx'))
input_binary_label = ds_binary['Vector']
java -cp /home/data/hou/workspaces/iobjectspy/venv/lib/python3.6/site-packages/iobjectspy-10.0.0-py3.6.egg/iobjectspy/_jsuperpy/jars/iobjects-py4j.jar com.supermap.jsuperpy.ApplicationExample 127.0.0.1 59097
[iObjectsPy]: Connection gateway-service successful, Python callback port bind 32797
设置输出数据路径
output_path = os.path.join(curr_dir, '..','..','out')
制作VOC数据生成(用于faster-rcnn)
将整幅影像数据和与其匹配的矢量标注数据切分为指定大小的瓦片,用于深度学习训练。生成的训练数据一般包括图片、标注、以及相关元信息,其中切分后的图片和标注文件名一一对应。
label_class_field = None
output_name = 'VOC'
start_time = time.time()
DataPreparation.create_training_data(input_data=input_VOC_data, input_label=input_VOC_label,
label_class_field=label_class_field, output_path=output_path,
output_name=output_name,
training_data_format='VOC', tile_format='jpg', tile_size_x=512,
tile_size_y=512, tile_offset_x=256,
tile_offset_y=256, tile_start_index=0, save_nolabel_tiles=False)
print('完成,共耗时{}s,训练数据保存在 {}'.format(
time.time() - start_time, os.path.join(output_path, output_name)))
train data saved to `../../out/VOC`
完成,共耗时5.622236013412476s,训练数据保存在 ../../out/VOC
制作SCENE数据生成(用于场景分类)
label_class_field = 'DLMC'
output_name = 'SCENE'
start_time = time.time()
DataPreparation.create_training_data(input_data=input_SCENE_data, input_label=input_SCENE_label,
label_class_field=label_class_field, output_path=output_path,
output_name=output_name,
training_data_format="SCENE_C", tile_format='origin', tile_size_x=128,
tile_size_y=128,
tile_offset_x=64,
tile_offset_y=64, tile_start_index=0, save_nolabel_tiles=False)
print('完成,共耗时{}s,训练数据保存在 {}'.format(
time.time() - start_time, os.path.join(output_path, output_name)))
完成,共耗时53.19485330581665s,训练数据保存在 ../../out/SCENE
制作地物分类训练数据
label_class_field = 'class_type'
output_name = 'multi_classfication'
start_time = time.time()
DataPreparation.create_training_data(input_data=input_multi_data, input_label=input_multi_label,
label_class_field=label_class_field, output_path=output_path,
output_name=output_name,
training_data_format="MULTI_C", tile_format='origin', tile_size_x=1024,
tile_size_y=1024,
tile_offset_x=128,
tile_offset_y=128, tile_start_index=0, save_nolabel_tiles=False)
print('完成,共耗时{}s,训练数据保存在 {}'.format(
time.time() - start_time, os.path.join(output_path, output_name)))
完成,共耗时29.65898036956787s,训练数据保存在 ../../out/multi_classfication
制作二元分类训练数据
label_class_field = None
output_name = 'binary_classfication'
start_time = time.time()
DataPreparation.create_training_data(input_data=input_binary_data, input_label=input_binary_label,
label_class_field=label_class_field, output_path=output_path,
output_name=output_name,
training_data_format="BINARY_C", tile_format='tif', tile_size_x=1024,
tile_size_y=1024,
tile_offset_x=512,
tile_offset_y=512, tile_start_index=0, save_nolabel_tiles=False)
print('完成,共耗时{}s,训练数据保存在 {}'.format(
time.time() - start_time, os.path.join(output_path, output_name)))
完成,共耗时25.959524393081665s,训练数据保存在 ../../out/binary_classfication