Python爬取 音标

原创
2019/07/20 10:09
阅读数 904

 

# -*- coding: UTF-8 -*-
import requests
import time
from bs4 import BeautifulSoup
f = open('./words.txt')
fw = open('./result.txt','a')

line = f.readline()
index = 0
while line:
    index = index+1
    url = "https://www.oxfordlearnersdictionaries.com/definition/english/" + line.strip()
    print(str(index) + ":" + url)
    wbdata = requests.get(url,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36'}).text
    soup = BeautifulSoup(wbdata,'html.parser')
    news_titles = soup.select("span.pron-g > span.phon")
    # print(news_titles)
    result = ''
    for n in news_titles:   
        title = n.get_text()    
        if 'NAmE' in title:
            result += '['+title.replace('NAmE','').replace('//','') + ']'
    print(result)  
    fw.write((result + "\n").encode("utf-8"))
    line = f.readline()
    time.sleep(0.1)

fw.close()
f.close()

 

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部