文档章节

python批量替代

o
 osc_isezqdgg
发布于 2019/09/18 14:40
字数 1182
阅读 0
收藏 0

精选30+云产品,助力企业轻松上云!>>>

import re
import jieba.analyse
import codecs
import pandas as pd
def word_replace(xianbingshi,hospital1):
    """替换词表"""
    data = []
    hospital = []
    """去重"""
    with codecs.open(hospital1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in hospital:
                hospital.append(line)
            else:
                continue
    hospital.sort(key=len, reverse=True)
    with codecs.open(xianbingshi,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for ho in hospital:
                if ho in hospital:
                    line = line.replace(ho,'[hospital]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write_sub.txt','w','utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()

def word_replace3(xianbingshi2, operation1):
    data = []
    operation = []
    with codecs.open(operation1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in operation:
                operation.append(line)
            else:
                continue
    """排序"""
    operation.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for op in operation:
                if op in line:
                    line = line.replace(op, '[operation]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2, 'w','utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace1(xianbingshi2,disease1):
    data = []
    disease = []
    with codecs.open(disease1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in disease:
                disease.append(line)
            else:
                continue
    disease.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for di in disease:
                if di in line and len(di)>1:
                    line = line.replace(di, '[disease]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace2(xianbingshi2, symptom1):
    data = []
    symptom = []
    with codecs.open(symptom1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in symptom:
                symptom.append(line)
            else:
                continue
    """排序"""
    symptom.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for sy in symptom:
                if sy in line and len(sy) > 1:
                    line = line.replace(sy, '[symptom]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()

def word_replace4(xianbingshi2, test1):
    data = []
    test = []
    with codecs.open(test1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in test:
                test.append(line)
            else:
                continue
    """排序"""
    test.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for te in test:
                if te in line:
                    line = line.replace(te, '[test]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2, 'w','utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace5(xianbingshi2, time1):
    data = []
    time = []
    with codecs.open(time1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in time:
                time.append(line)
            else:
                continue
    """排序"""
    time.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for t in time:
                if t in line:
                    line = line.replace(t,'[time]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace6(xianbingshi2, organ1):
    data = []
    organ = []
    with codecs.open(organ1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in organ:
                organ.append(line)
            else:
                continue
    """排序"""
    organ.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for o in organ:
                if o in line and len(o) > 1:
                    line = line.replace(o, '[organ]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace7(xianbingshi2, symptom1):
    data = []
    symptom = []
    with codecs.open(symptom1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in symptom and len(line) == 1:
                symptom.append(line)
                print(line)
            else:
                continue
    """排序"""
    symptom.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for sy in symptom:
                line = line.replace(sy, '[symptom]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace8(xianbingshi2, disease1):
    data = []
    disease = []
    with codecs.open(disease1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in disease and line == 1:
                disease.append(line)
            else:
                continue
    """排序"""
    disease.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for di in disease:
                line = line.replace(di, '[disease]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
def word_replace9(xianbingshi2, organ1):
    data = []
    organ = []
    with codecs.open(organ1,'r','utf8') as f:
        for line in f:
            line = line.strip()
            if line not in organ and line == 1:
                organ.append(line)
            else:
                continue
    """排序"""
    organ.sort(key=len, reverse=True)
    with codecs.open(xianbingshi2,'r','utf8') as f:
        """优先级:医院、手术、检查、症状、疾病、部位、时间"""
        for line in f:
            for o in organ:
                 line = line.replace(o, '[organ]')
            line = line.strip()
            data.append(line)
            print(line)
    with codecs.open(xianbingshi2,'w', 'utf8') as f:
        for line in data:
            f.write(line + '\n')
        f.close()
if __name__ == '__main__':
    disease1 =r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\disease_0903.txt'
    organ1 = r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\organ_0903.txt"
    test1 = r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\test_0903.txt"
    time1 = r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\time1.txt"
    operation1 = r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\operation_0903.txt"
    symptom1 = r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\症状.txt"
    xianbingshi = r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write.txt'
    xianbingshi2 =r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write_sub.txt'
    hospital1 = r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\hospital_0903.txt'
    word_replace(xianbingshi, hospital1)
    word_replace3(xianbingshi2, operation1)
    word_replace1(xianbingshi2, disease1)
    word_replace2(xianbingshi2, symptom1)
    word_replace4(xianbingshi2, test1)
    # word_replace5(xianbingshi2, time1)
    word_replace6(xianbingshi2, organ1)
    word_replace7(xianbingshi2, symptom1)
    word_replace8(xianbingshi2, disease1)
    word_replace9(xianbingshi2, organ1)

 

o
粉丝 0
博文 500
码字总数 0
作品 0
私信 提问
加载中
请先登录后再评论。
手把手教你使用Python批量创建复工证明 - 知乎

/1 前言/ 受疫情影响,2020年2月份、3月份企业复工需为员工开具复工证明。因本公司人数较多,复制粘贴工作量巨大,特此为行政同事写此小工具,现与诸位共享。 /2 目标/ 1.实现批量将 Excel ...

osc_4hct2n4z
04/22
19
0
学Python需要天赋吗?看完弟弟编写的爬虫与爆破脚本,只有汗水!

Python学习记录脚本,希望弟弟通过练习编写脚本一点点提升自己很菜的编程水平~~明天会更好,希望自己越来越强吧。 实现原理 Python3 实现批量查询网站的百度权重以及收录情况,利用的是站长之...

Python新世界
2018/07/24
0
0
ansible批量管理服务工具

ansible批量管理服务工具 批量管理服务器的工具 三种批量管理工具 jkenkins简介 Ansible服务器简单的综合安全管理策略 ansible查看帮助 安装ansible流程 如果Centos7版本 需要安装yun -y in...

osc_9fk93pk2
2019/07/24
3
0
手把手教你使用Python批量创建复工证明 - 知乎

/1 前言/ 受疫情影响,2020年2月份、3月份企业复工需为员工开具复工证明。因本公司人数较多,复制粘贴工作量巨大,特此为行政同事写此小工具,现与诸位共享。 /2 目标/ 1.实现批量将 Excel ...

Python的那些事儿
04/21
0
0
手把手教你使用Python批量创建复工证明

/1 前言/ 受疫情影响,2020年2月份、3月份企业复工需为员工开具复工证明。因本公司人数较多,复制粘贴工作量巨大,特此为行政同事写此小工具,现与诸位共享。 /2 目标/ 1.实现批量将 Excel ...

pengdongcheng
04/21
0
0

没有更多内容

加载失败,请刷新页面

加载更多

阻塞锁,非阻塞锁,自旋锁,互斥锁

1.阻塞锁 多个线程同时调用同一个方法的时候,所有线程都被排队处理了。让线程进入阻塞状态进行等待,当获得相应的信号(唤醒,时间) 时,才可以进入线程的准备就绪状态,准备就绪状态的所有...

osc_umiwij2c
14分钟前
0
0
Asp.NetCore3.1 WebApi中模型验证

前言   不管是前端,还是后端,做数据合法性验证是避免不了的,这边文章就记录一下Asp.NetCore3.1 WebApi中的模型验证; 传统写法--不使用模型验证   来,先上图:   我相信,应该绝大...

osc_qgfjs4a5
16分钟前
21
0
龙芯开源社区上线.NET主页

龙芯团队从2019年7 月份开始着手.NET Core的MIPS64支持研发,经过将近一年的研发,在2020年6月18日完成了里程碑性的工作,在github CoreCLR 仓库:https://github.com/gsvm/coreclr, 随后受...

osc_bj12kvua
16分钟前
11
0
高并发下浏览量入库设计

一、背景 文章浏览量统计,low的做法是:用户每次浏览,前端会发送一个GET请求获取一篇文章详情时,会把这篇文章的浏览量+1,存进数据库里。 1.1 这么做,有几个问题: 在GET请求的业务逻辑里...

osc_uj3h5gt9
17分钟前
17
0
nginx timeout 配置 全局timeout 局部timeout web timeout

nginx比较强大,可以针对单个域名请求做出单个连接超时的配置. 比如些动态解释和静态解释可以根据业务的需求配置 proxy_connect_timeout :后端服务器连接的超时时间_发起握手等候响应超时时间...

osc_5cok9i01
19分钟前
3
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部