# 开发者实战 | 基于 OpenVINO™ 在英特尔开发套件上实现眼部追踪

09/19 17:00

AIxBoard™ 爱克斯板开发者套介绍

01

02

class EyeImageModel(nn.Module):    def __init__(self):        super(EyeImageModel, self).__init__()        self.model = Sequential(            # in-> [N, 3, 32, 128]            BatchNorm2d(3),            Conv2d(3, 2, kernel_size=(5, 5), padding=2),            LeakyReLU(),            MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),            Conv2d(2, 20, kernel_size=(5, 5), padding=2),            ELU(),            Conv2d(20, 10, kernel_size=(5, 5), padding=2),            Tanh(),            Flatten(1, 3),            Dropout(0.01),            Linear(10240, 1024),            Softplus(),            Linear(1024, 2),        )    def forward(self, x):        return self.model(x)class PositionOffset(nn.Module):    def __init__(self):        super(PositionOffset, self).__init__()        self.model = Sequential(            Conv2d(1, 32, kernel_size=(2, 2), padding=0),            Softplus(),            Conv2d(32, 64, kernel_size=(2, 2), padding=1),            Conv2d(64, 64, kernel_size=(2, 2), padding=0),            ELU(),            Conv2d(64, 128, kernel_size=(2, 2), padding=0),            Tanh(),            Flatten(1, 3),            Dropout(0.01),            Linear(128, 32),            Sigmoid(),            Linear(32, 2),        )    def forward(self, x):        return self.model(x)class EyeTrackModel(nn.Module):    def __init__(self):        super(EyeTrackModel, self).__init__()        self.eye_img_model = EyeImageModel()        self.position_offset = PositionOffset()    def forward(self, x):        eye_img_result = self.eye_img_model(x[0])        end = torch.cat((eye_img_result, x[1]), dim=1)        end = torch.reshape(end, (-1, 1, 3, 3))        end = self.position_offset(end)        return end

03

610265158/Peppa_Pig_Face_Landmark: A simple face detect and alignment method, which is easy and stable. (github.com)

def save_img_and_coords(img, coords, annot, saved_img_index):    img_save_path = './dataset/img/' + '%d.png' % saved_img_index    annot_save_path = './dataset/annot/' + '%d.txt' % saved_img_index    cv2.imwrite(img_save_path, img)    np.savetxt(annot_save_path, np.array([*coords, *annot]))    print("[INFO] | SAVED:", saved_img_index)

def trim_eye_img(image, face_kp):    """    :param image: [H W C] 格式人脸图片    :param face_kp: 面部关键点    :return: 拼接后的图片 [H W C] 格式    """    l_l, l_r, l_t, l_b = return_boundary(face_kp[60:68])    r_l, r_r, r_t, r_b = return_boundary(face_kp[68:76])    left_eye_img = image[int(l_t):int(l_b), int(l_l):int(l_r)]    right_eye_img = image[int(r_t):int(r_b), int(r_l):int(r_r)]    left_eye_img = cv2.resize(left_eye_img, (64, 32), interpolation=cv2.INTER_AREA)    right_eye_img = cv2.resize(right_eye_img, (64, 32), interpolation=cv2.INTER_AREA)    return np.concatenate((left_eye_img, right_eye_img), axis=1)

vide_capture = cv2.VideoCapture(1)vide_capture.set(cv2.CAP_PROP_FRAME_WIDTH, HEIGHT)vide_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, WEIGHT)

04

def EpochDataLoader(path, batch_size=64):    """    :param path: 数据集的根路径    :param batch_size: batch_size    :return: epoch_img, epoch_annots, epoch_coords：[M, batch_size, C, H, W], [M, batch_size, 7], [M, batch_size, 2]    """    epoch_img, epoch_annots, epoch_coords = [], [], []    all_file_name = os.listdir(path + "img/")  # get all file name -> list    file_num = len(all_file_name)    batch_num = file_num // batch_size    for i in range(batch_num):  # how many batch        curr_batch = all_file_name[batch_size * i:batch_size * (i + 1)]        batch_img, batch_annots, batch_coords = [], [], []        for file_name in curr_batch:            img = cv2.imread(str(path) + "img/" + str(file_name))  # [H, W, C] format            img = img.transpose((2, 0, 1))            img = img / 255  # [C, H, W] format            data = np.loadtxt(str(path) + "annot/" + str(file_name).split(".")[0] + ".txt")            annot_mora, coord_mora = np.array([1920, 1080, 1920, 1080, 1, 1, 1.4]), np.array([1920, 1080])            annot, coord = data[2:]/annot_mora, data[:2]/coord_mora            batch_img.append(img)            batch_annots.append(annot)            batch_coords.append(coord)        epoch_img.append(batch_img)        epoch_annots.append(batch_annots)        epoch_coords.append(batch_coords)    epoch_img = torch.from_numpy(np.array(epoch_img)).float()    epoch_annots = torch.from_numpy(np.array(epoch_annots)).float()    epoch_coords = torch.from_numpy(np.array(epoch_coords)).float()    return epoch_img, epoch_annots, epoch_coords

05

def eye_track_train():    img, annot, coord = EpochDataLoader(TRAIN_DATASET_PATH, batch_size=TRAIN_BATCH_SIZE)    batch_num = img.size()[0]    model = EyeTrackModel().to(device).train()    loss = torch.nn.MSELoss()    optim = torch.optim.SGD(model.parameters(), lr=LEARN_STEP)    writer = SummaryWriter(LOG_SAVE_PATH)    trained_batch_num = 0    for epoch in range(TRAIN_EPOCH):        for batch in range(batch_num):            batch_img = img[batch].to(device)            batch_annot = annot[batch].to(device)            batch_coords = coord[batch].to(device)            # infer and calculate loss            outputs = model((batch_img, batch_annot))            result_loss = loss(outputs, batch_coords)            # reset grad and calculate grad then optim model            optim.zero_grad()            result_loss.backward()            optim.step()            # save loss and print info            trained_batch_num += 1            writer.add_scalar("loss", result_loss.item(), trained_batch_num)            print("[INFO]: trained epoch num | trained batch num | loss "                  , epoch + 1, trained_batch_num, result_loss.item())        if epoch % 100 == 0:            torch.save(model, "../model/ET-" + str(epoch) + ".pt")    # save model    torch.save(model, "../model/ET-last.pt")    writer.close()    print("[SUCCEED!] model saved!")

06

def export_onnx(model_path, if_fp16=False):    """    :param model_path: 模型的路径:param if_fp16: 是否要将模型压缩为 FP16 格式:return: 模型输出路径    """    model = torch.load(model_path, map_location=torch.device('cpu')).eval()    print(model)    model_path = model_path.split(".")[0]    dummy_input_img = torch.randn(1, 3, 32, 128, device='cpu')    dummy_input_position = torch.randn(1, 7, device='cpu')    torch.onnx.export(model, [dummy_input_img, dummy_input_position], model_path + ".onnx", export_params=True)    model = mo.convert_model(model_path + ".onnx", compress_to_fp16=if_fp16)  # if_fp16=False, output = FP32    serialize(model, model_path + ".xml")    print(EyeTrackModel(), "\n[FINISHED] CONVERT DONE!")    return model_path + ".xml"

07

Neural Network Compression Framework (NNCF) provides a new post-training quantization API available in Python that is aimed at reusing the code for model training or validation that is usually available with the model in the source framework, for example, PyTorch* or TensroFlow*. The API is cross-framework and currently supports models representing in the following frameworks: PyTorch, TensorFlow 2.x, ONNX, and OpenVINO™.

Post-training Quantization with NNCF (new) — OpenVINO™ documentation[1]

• Basic quantization

• Quantization with accuracy control

def basic_quantization(input_model_path):    # prepare required data    data = data_source(path=DATASET_ROOT_PATH)    nncf_calibration_dataset = nncf.Dataset(data, transform_fn)    # set the parameter of how to quantize    subset_size = 1000    preset = nncf.QuantizationPreset.MIXED    # load model    ov_model = Core().read_model(input_model_path)    # perform quantize    quantized_model = nncf.quantize(ov_model, nncf_calibration_dataset, preset=preset, subset_size=subset_size)    # save model    output_model_path = input_model_path.split(".")[0] + "_BASIC_INT8.xml"    serialize(quantized_model, output_model_path)def accuracy_quantization(input_model_path, max_drop):    # prepare required data    calibration_source = data_source(path=DATASET_ROOT_PATH, with_annot=False)    validation_source = data_source(path=DATASET_ROOT_PATH, with_annot=True)    calibration_dataset = nncf.Dataset(calibration_source, transform_fn)    validation_dataset = nncf.Dataset(validation_source, transform_fn_with_annot)    # load model    xml_model = Core().read_model(input_model_path)    # perform quantize    quantized_model = nncf.quantize_with_accuracy_control(xml_model,                                                          calibration_dataset=calibration_dataset,                                                          validation_dataset=validation_dataset,                                                          validation_fn=validate,                                                          max_drop=max_drop)    # save model    output_model_path = xml_model_path.split(".")[0] + "_ACC_INT8.xml"    serialize(quantized_model, output_model_path)def export_onnx(model_path, if_fp16=False):    """    :param model_path: the path that will be converted    :param if_fp16: if the output onnx model compressed to fp16    :return: output xml model path    """    model = torch.load(model_path, map_location=torch.device('cpu')).eval()    print(model)    model_path = model_path.split(".")[0]    dummy_input_img = torch.randn(1, 3, 32, 128, device='cpu')    dummy_input_position = torch.randn(1, 7, device='cpu')    torch.onnx.export(model, [dummy_input_img, dummy_input_position], model_path + ".onnx", export_params=True)    model = mo.convert_model(model_path + ".onnx", compress_to_fp16=if_fp16)  # if_fp16=False, output = FP32    serialize(model, model_path + ".xml")    print(EyeTrackModel(), "\n[FINISHED] CONVERT DONE!")    return model_path + ".xml"

08

benchmark_app -m ET-last_ACC_INT8.xml -d CPU -api async<br/>[ INFO ] Execution Devices:['CPU'][ INFO ] Count:            226480 iterations[ INFO ] Duration:         60006.66 ms[ INFO ] Latency:[ INFO ]    Median:        3.98 ms[ INFO ]    Average:       4.18 ms[ INFO ]    Min:           2.74 ms[ INFO ]    Max:           38.98 ms[ INFO ] Throughput:   3774.25 FPSbenchmark_app -m ET-last_INT8.xml -d CPU -api async<br/>[ INFO ] Execution Devices:['CPU'][ INFO ] Count:            513088 iterations[ INFO ] Duration:         60002.85 ms[ INFO ] Latency:[ INFO ]    Median:        1.46 ms[ INFO ]    Average:       1.76 ms[ INFO ]    Min:           0.82 ms[ INFO ]    Max:           61.07 ms[ INFO ] Throughput:   8551.06 FPS

09

OpenVINO™ 提供了一个方便快捷的开发方式，通过几个核心的API便可实现模型转换和模型量化。

AIxBoard™ 基于 x86 架构提供了一个高通用、高性能的部署平台，体积小巧，非常适合项目的最终部署。

[1] https://docs.openvino.ai/2022.3/nncf_ptq_introduction.html

OpenVINO™

--END--



你也许想了解(点击蓝字查看)⬇️

➡️ 基于 ChatGLM2 和 OpenVINO™ 打造中文聊天助手

➡️ 基于 Llama2 和 OpenVINO™ 打造聊天机器人

➡️ OpenVINO™ DevCon 2023重磅回归！英特尔以创新产品激发开发者无限潜能

➡️ 5周年更新 | OpenVINO™  2023.0，让AI部署和加速更容易

➡️ OpenVINO™5周年重头戏！2023.0版本持续升级AI部署和加速性能

➡️ OpenVINO™2023.0实战 | 在 LabVIEW 中部署 YOLOv8 目标检测模型

➡️ 开发者实战系列资源包来啦！

➡️

以AI作画，祝她节日快乐；简单三步，OpenVINO™ 助你轻松体验AIGC

➡️

还不知道如何用OpenVINO™作画？点击了解教程。

➡️

➡️

使用OpenVINO 在“端—边—云”快速实现高性能人工智能推理





扫描下方二维码立即体验
OpenVINO™ 工具套件 2023.0



0 评论
0 收藏
0