AIGC 之将草稿内容转为真实内容 | 征稿活动 V6

05/16 12:01

AI 在识别之后可以生成一个 txt 文件，其中保存着各个矩形的数据，如下所示。

1 0.682994 0.452703 0.127743 0.09811992 0.683777 0.452115 0.123041 0.09929492 0.562696 0.65188 0.12069 0.1039952 0.5721 0.343713 0.125392 0.09635720 0.290752 0.341657 0.111285 0.08871921 0.302116 0.543772 0.0979624 0.08519391 0.320533 0.73913 0.111285 0.0951821

def get_rect_list_from_detect_result(pic_path, detect_result_path):    """从预测结果中得出各个矩形的真实大小和位置数据"""    with open(detect_result_path, "r", encoding="utf-8") as f:        result = f.readlines()     pic_width, pic_height = Image.open(pic_path).size    class_dict = {"0": "B", "1": "S", "2": "L"}    rect_list = []     for line in result:        _class, x_center_norm, y_center_norm, width_norm, height_norm = line.strip().split(" ")        rect_width = round(float(width_norm)*pic_width, 2)        rect_height = round(float(height_norm)*pic_height, 2)        rect_x = round(float(x_center_norm)*pic_width, 2) - rect_width/2        rect_y = round(float(y_center_norm)*pic_height, 2) - rect_height/2        rect_list.append({            "type": class_dict[_class],            "width": rect_width,            "height": rect_height,            "x": rect_x,            "y": rect_y        })        return rect_list

def decide_canvas_size(rect_list):    """通过各个矩形的最小和最大x/y值来确定画布尺寸"""        # 获取所有矩形中的最小x值    min_x = min(rect_list, key=lambda rect: rect["x"])["x"]    max_x = max(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]    max_y = max(rect_list, key=lambda rect: rect["y"])["y"]     # 根据x和y的距离差判断是要横屏还是竖屏    distance_x = max_x - min_x    distance_y = max_y - min_y     if distance_x >= distance_y:        canvas_width = 960        canvas_height = 640    else:        canvas_width = 640        canvas_height = 960     width_prop = distance_x / canvas_width if canvas_width > distance_x else canvas_width / distance_x    height_prop = distance_y / canvas_height if canvas_height > distance_y else canvas_height / distance_y     return canvas_width, canvas_height, width_prop, height_prop

AI 在识别时，是以图片左上角为原点的，往右为 x 轴正方向，往下为 y 轴正方向。

def adjust_rect_data(rect_list, canvas_width, canvas_height, width_prop, height_prop):    """调整各个矩形的值"""    # 找到最小x和最小y值（也就是找到最左上角的矩形的x和y值）    # 然后其他将其他矩形的x和y坐标减去最小x和最小y，求出相对距离    # 所有相对距离包括宽高全部乘以宽高比例    # 同时将坐标转换为Cocos类型，以画布中心为原点    min_x = min(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]     for rect in rect_list:        rect["x"] = (rect["x"] - min_x) * width_prop - canvas_width/2        rect["y"] = canvas_height/2 - (rect["y"] - min_y) * height_prop        rect["width"] *= width_prop        rect["height"] *= height_prop     # 算出下边和右边的空白距离，将所有矩形往下和往右平移空白距离/2个像素点    max_x = max(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]    right_distance = (canvas_width/2 - max_x) / 2    bottom_distance = abs(-canvas_height/2 - min_y) / 2     for rect in rect_list:        rect["x"] += right_distance        rect["y"] -= bottom_distance     # 将x或y坐标距离不相差15像素的矩形对齐    diff = 15    for rect1 in rect_list:        for rect2 in rect_list:            if rect1 == rect2:                continue                            if abs(rect1["x"] - rect2["x"]) <= diff:                average_x = (rect1["x"] + rect2["x"]) / 2                rect1["x"] = average_x                rect2["x"] = average_x                        if abs(rect1["y"] - rect2["y"]) <= diff:                average_y = (rect1["y"] + rect2["y"]) / 2                rect1["x"] = average_y                rect2["x"] = average_y             if abs(rect1["width"] - rect2["width"]) <= diff:                average_width = (rect1["width"] + rect2["width"]) / 2                rect1["width"] = average_width                rect2["width"] = average_width                        if abs(rect1["height"] - rect2["height"]) <= diff:                average_height= (rect1["height"] + rect2["height"]) / 2                rect1["height"] = average_height                rect2["height"] = average_height     # 四舍五入保留整数    for rect in rect_list:        rect["x"] = round(rect["x"])        rect["y"] = round(rect["y"])        rect["width"] = round(rect["width"])        rect["height"] = round(rect["height"])        return rect_list

import osimport uuidfrom PIL import Imagefrom pathlib import Pathfrom flask import Flask, request  app = Flask(__name__)app.config["UPLOAD_FOLDER"] = str(Path(__file__).parent / "upload")app.config["SECRET_KEY"] = "SECRET_KEY"  @app.route("/d2r", methods=["POST"])def draft_to_reality():    """将草稿转成真实布局所需要的数据格式"""     # 从前端获取图片    file = request.files.get("file")    if not file.filename.endswith(".png") and not file.filename.endswith(".jpg") and not file.filename.endswith("jpeg"):        return {            "code": "1",            "message": "图片格式错误"        }        # 保存图片    pic_path = Path(app.config["UPLOAD_FOLDER"]) / f"{uuid.uuid4()}.jpg"    file.save(pic_path)     # 目标识别    is_ok, detect_result_path = detect(pic_path)    if not is_ok:        return {             "code": "2",             "message": "图片识别失败"        }        # 制作数据    rect_list = get_rect_list_from_detect_result(pic_path, detect_result_path)    canvas_width, canvas_height, width_prop, height_prop = decide_canvas_size(rect_list)    rect_list = adjust_rect_data(rect_list, canvas_width, canvas_height, width_prop, height_prop)    final_data = make_final_data(rect_list, canvas_width, canvas_height)        return {        "code": "0",        "message": final_data    }  def detect(pic_path):    os.system(f"python ./yolov5/detect.py --weights ./yolov5/best.pt --source {pic_path} --save-txt --exist-ok")        # 如果识别成功，则会生成一个txt文件    detect_result_path = f"./yolov5/runs/detect/exp/labels/{Path(pic_path).name.split('.')[0]}.txt"    if Path(detect_result_path).exists():        return True, detect_result_path    else:        return False, None     def get_rect_list_from_detect_result(pic_path, detect_result_path):    """从预测结果中得出各个矩形的真实大小和位置数据"""    with open(detect_result_path, "r", encoding="utf-8") as f:        result = f.readlines()     pic_width, pic_height = Image.open(pic_path).size    class_dict = {"0": "B", "1": "S", "2": "L"}    rect_list = []     for line in result:        _class, x_center_norm, y_center_norm, width_norm, height_norm = line.strip().split(" ")        rect_width = round(float(width_norm)*pic_width, 2)        rect_height = round(float(height_norm)*pic_height, 2)        rect_x = round(float(x_center_norm)*pic_width, 2) - rect_width/2        rect_y = round(float(y_center_norm)*pic_height, 2) - rect_height/2        rect_list.append({            "type": class_dict[_class],            "width": rect_width,            "height": rect_height,            "x": rect_x,            "y": rect_y        })        return rect_list  def decide_canvas_size(rect_list):    """通过各个矩形的最小和最大x/y值来确定画布尺寸"""        # 获取所有矩形中的最小x值    min_x = min(rect_list, key=lambda rect: rect["x"])["x"]    max_x = max(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]    max_y = max(rect_list, key=lambda rect: rect["y"])["y"]     # 根据x和y的距离差判断是要横屏还是竖屏    distance_x = max_x - min_x    distance_y = max_y - min_y     if distance_x >= distance_y:        canvas_width = 960        canvas_height = 640    else:        canvas_width = 640        canvas_height = 960     width_prop = distance_x / canvas_width if canvas_width > distance_x else canvas_width / distance_x    height_prop = distance_y / canvas_height if canvas_height > distance_y else canvas_height / distance_y     return canvas_width, canvas_height, width_prop, height_prop  def adjust_rect_data(rect_list, canvas_width, canvas_height, width_prop, height_prop):    """调整各个矩形的值"""    # 找到最小x和最小y值（也就是找到最左上角的矩形的x和y值）    # 然后其他将其他矩形的x和y坐标减去最小x和最小y，求出相对距离    # 所有相对距离包括宽高全部乘以宽高比例    # 同时将坐标转换为Cocos类型，以画布中心为原点    min_x = min(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]     for rect in rect_list:        rect["x"] = (rect["x"] - min_x) * width_prop - canvas_width/2        rect["y"] = canvas_height/2 - (rect["y"] - min_y) * height_prop        rect["width"] *= width_prop        rect["height"] *= height_prop     # 算出下边和右边的空白距离，将所有矩形往下和往右平移空白距离/2个像素点    max_x = max(rect_list, key=lambda rect: rect["x"])["x"]    min_y = min(rect_list, key=lambda rect: rect["y"])["y"]    right_distance = (canvas_width/2 - max_x) / 2    bottom_distance = abs(-canvas_height/2 - min_y) / 2     for rect in rect_list:        rect["x"] += right_distance        rect["y"] -= bottom_distance     # 将x或y坐标距离不相差15像素的矩形对齐    diff = 15    for rect1 in rect_list:        for rect2 in rect_list:            if rect1 == rect2:                continue                            if abs(rect1["x"] - rect2["x"]) <= diff:                average_x = (rect1["x"] + rect2["x"]) / 2                rect1["x"] = average_x                rect2["x"] = average_x                        if abs(rect1["y"] - rect2["y"]) <= diff:                average_y = (rect1["y"] + rect2["y"]) / 2                rect1["x"] = average_y                rect2["x"] = average_y             if abs(rect1["width"] - rect2["width"]) <= diff:                average_width = (rect1["width"] + rect2["width"]) / 2                rect1["width"] = average_width                rect2["width"] = average_width                        if abs(rect1["height"] - rect2["height"]) <= diff:                average_height= (rect1["height"] + rect2["height"]) / 2                rect1["height"] = average_height                rect2["height"] = average_height     # 四舍五入保留整数    for rect in rect_list:        rect["x"] = round(rect["x"])        rect["y"] = round(rect["y"])        rect["width"] = round(rect["width"])        rect["height"] = round(rect["height"])        return rect_list  def make_final_data(rect_list, canvas_width, canvas_height):    return {        "canvas": {            "width": canvas_width,            "height": canvas_height        },        "rects": rect_list    }  if __name__ == "__main__":    app.run()

{   canvas: { height: 960, width: 640 },   rects: [     { height: 93, type: 'S', width: 122, x: 215, y: 128 },     { height: 208, type: 'B', width: 241, x: 193, y: -165 },     { height: 119, type: 'S', width: 148, x: -171, y: -56 },     { height: 119, type: 'L', width: 148, x: -215, y: 165 }   ] } 

1. 用于 AI 的训练数据量可以再大一些，这样可以提高识别精度，也可以生成更多种类的组件。

2. 个别草稿识别出来的结果不是很满意，有些组件在草稿上是分开的，但生成后却是重合的，还有方框的大小比例还需要更精确。所以在对提取的数据做调整时，调整方面的算法还有待改进。

3. 目前 AI 的作用就在识别，并不能很好的体现 AIGC 的魅力。可以加入自然语言处理(NLP)技术，获取用户意向，然后再用爬虫爬取相应的图片资源，生成一个更好看更完善的内容。

https://pan.baidu.com/s/1Z-q2mc2jsX5h_fWD_QjaOA

https://pan.baidu.com/s/141gpeSjGunKMf9SlqY0H7Q

0 评论
0 收藏
0