# retinaface onnx

04/16 15:14

retina face，

``````import os
import time
from math import ceil

import onnxruntime
import numpy as np
import cv2
import argparse
import argparse
import numpy as np
from data import cfg_mnet, cfg_peleenet
from utils.nms.py_cpu_nms import py_cpu_nms
from math import ceil
from itertools import product as product

#sigmoid函数
def sigmoid(x):
s = 1 / (1 + np.exp(-1*x))
return s
def softmax(x, axis=1):
# 计算每行的最大值
row_max = x.max(axis=axis)

# 每行元素都需要减去对应的最大值，否则求exp(x)会溢出，导致inf情况
row_max = row_max.reshape(-1, 1)
x = x - row_max

x_exp = np.exp(x)
x_sum = np.sum(x_exp, axis=axis, keepdims=True)
s = x_exp / x_sum
return s

def decode_landm(pre, priors, variances):
"""Decode landm from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
pre (tensor): landm predictions for loc layers,
Shape: [num_priors,10]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded landm predictions
"""
landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
), 1)
return landms

def decode(loc, priors, variances):
"""Decode locations from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
loc (tensor): location predictions for loc layers,
Shape: [num_priors,4]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded bounding box predictions
"""

boxes = np.concatenate((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes

class PriorBox(object):
def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]

def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]

# back to torch land
output = np.array(anchors)
output = output.reshape(-1, 4)
return output
def sigmoid(x):
# TODO: Implement sigmoid function
return 1/(1 + np.exp(-x))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--onnx_path', help="onnx model file path",  default="mobileretina.onnx")
# parser.add_argument('--onnx_path', help="onnx model file path",  default=r"pelee_detector.onnx")
# parser.add_argument('--onnx_path', help="onnx model file path",  default="yolov3.onnx")
args = parser.parse_args()

cfg = cfg_peleenet
onnx_path = args.onnx_path
session = onnxruntime.InferenceSession(onnx_path)
input_shape = args.input_shape #模型输入尺寸
nms_threshold = args.nms_thres
img_path = args.img_path

print("image path:",img_path)
print("onnx model path:",onnx_path)

list_path = r"D:\input\faces/"

g = os.walk(list_path)
files = ['%s\\%s' % (i[0], j) for i in g for j in i[-1] if
j.endswith('jpg')]
width=input_shape[0]
height=input_shape[1]
scale = np.array([width, height, width, height])

scale1 = np.array([width, height, width, height,
width, height, width, height,
width, height])

resize_level=1
count = 0
ok_count = 0
priorbox = PriorBox(cfg, image_size=(height, width))
priors = priorbox.forward()

# vc = cv2.VideoCapture(r"D:\project\face\Face-Track-Detect-Extract\videos\2_Obama.mp4")  # 读入视频文件
vc = cv2.VideoCapture(0)  # 读入视频文件

while True:  # 循环读取视频帧

# for file in files:
#     file=r"d:/7_Cheering_Cheering_7_426.png"
if img_raw is None:
# print(file)
continue
start = time.time()

img_raw = cv2.resize(img_raw, input_shape)
img=cv2.cvtColor(img_raw,cv2.COLOR_BGR2RGB)
img = np.float32(img)
img -= (104, 117, 123)
image = img[:, :, ::-1].transpose((2, 0, 1))
TestData = image[np.newaxis, :, :, :]

start2=time.time()
inname = [input.name for input in session.get_inputs()][0]
outname = [output.name for output in session.get_outputs()]

loc, conf,landmarks = session.run(outname, {inname:TestData})
print('net time', time.time() - start2)
start1=time.time()

boxes = decode(np.squeeze(loc, axis=0), priors, cfg['variance'])
boxes = boxes * scale / resize_level
scores = np.squeeze(conf, axis=0)[:,1]

landmarks = decode_landm(np.squeeze(landmarks, axis=0), priors, cfg['variance'])
landmarks = landmarks * scale1 / resize_level
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landmarks = landmarks[inds]
scores = scores[inds]

# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landmarks = landmarks[order]
scores = scores[order]

# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
dets = dets[keep, :]
landmarks = landmarks[keep, :]
print('time',time.time()-start,start1-start)
dets = np.concatenate((dets, landmarks), axis=1)
if args.show_image:
for box in dets:
if (box[3] < - box[1]) or (box[2] < - box[0]):
continue
elif box[0] < 0 or box[1] < 0:

box[0] = max(0, box[0])
box[1] = max(0, box[1])

if (box[3] + box[1] > 2 * img_raw.shape[0]) or (box[0] + box[2] > 2 * img_raw.shape[1]):
continue
elif box[3] > img_raw.shape[0] or box[2] > img_raw.shape[1]:
box[3] = min(img_raw.shape[0], box[3])
box[2] = max(img_raw.shape[1], box[2])

if (box[2] - box[0]) > 4 * (box[3] - box[1]) or (box[2] - box[0]) * 4 < (box[3] - box[1]):
continue
# if box[3]*resize_level > img_raw.shape[0] + 5 or box[2]*resize_level > img_raw.shape[1] + 5:
#     # print('out_show', img_raw.shape, int(box[2]*resize_level),int(box[3]*resize_level))
#     continue
text = "{:.2f}".format(box[4])
box = list(map(int, box))
cv2.rectangle(img_raw, (box[0] * resize_level, box[1] * resize_level),
(box[2] * resize_level, box[3] * resize_level), (0, 0, 255), 1)

cv2.circle(img_raw, (box[5], box[6]), 1, (0, 0, 255), 4)
cv2.circle(img_raw, (box[7], box[8]), 1, (0, 255, 255), 4)
cv2.circle(img_raw, (box[9], box[10]), 1, (255, 0, 255), 4)
cv2.circle(img_raw, (box[11], box[12]), 1, (0, 255, 0), 4)
cv2.circle(img_raw, (box[13], box[14]), 1, (255, 0, 0), 4)
cx = box[0] * resize_level + 18
cy = box[1] * resize_level + 18
# cv2.putText(img_raw, text, (cx, cy),                           cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 255, 0))

# landms
# cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
# cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
# cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
# cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
# cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
# save image

if img_raw.shape[0] > 1080:
fy = 1070 / img_raw.shape[0]
img_raw = cv2.resize(img_raw, (0, 0), fx=fy, fy=fy, interpolation=cv2.INTER_NEAREST)
cv2.imshow("sdf", img_raw)
cv2.waitKey(1)

# print(time.time()-start,"inputs name:", inname, "outputs name:", outname,prediction)
# drawBox(boxes,img,img_shape)

if __name__ == '__main__':
main()``````

0
0 收藏

0 评论
0 收藏
0