北京理工大学崇天老师编写的小课件,很不完善,放进来只是为了方便自己用手机查看
1 import requests
2 import re
3 import bs4
4 import traceback
5
6 def getHTMLText(url, code = "utf-8"):
7 # 获得股票页面
8 try:
9 r = requests.get(url)
10 r.raise_for_status()
11 r.encoding = code
12 # r.encoding = r.apparent_encoding
13 # 直接用"utf-8"编码节省时间
14 return r.text
15 except:
16 return ""
17
18 def getStockList(lst, stockURL):
19 # 获取股票列表
20 html = getHTMLText(stockURL, "GB2312")
21 # 东方财富网用"GB2312"方式编码
22 soup = bs4.BeautifulSoup(html, "html.parser")
23 a = soup. find_all("a")
24 for i in a:
25 try:
26 href = i.attrs["href"]
27 lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
28 except:
29 continue
30
31 def getStockInfo(lst, stockURL, fpath):
32
33 count = 0
34 # 增加进度条
35
36 # 获取个股信息
37 for stock in lst:
38 url = stockURL + stock + ".html"
39 html = getHTMLText(url)
40 try:
41 if html == "":
42 # 判断页面是否为空
43 continue
44 infoDict = { }
45 # 定义一个字典用来储存股票信息
46 soup = bs4.BeautifulSoup(html, "html.parser")
47 stockInfo = soup.find("div", attrs={"class":"stock-bets"})
48 # 获得股票信息标签
49
50 name = stockInfo.find_all(attrs={"class":"bets-name"})[0]
51 # 在标签中查找股票名称
52 infoDict.update({"股票名称":name.text.split()[0]})
53 # 将股票名称增加到字典中
54
55 keyList = stockInfo.find_all("dt")
56 # "dt"标签是股票信息键的域
57 valueList = stockInfo.find_all("dd")
58 # "dd"标签是股票信息值的域
59
60 for i in range(len(keyList)):
61 # 还原键值对并存储到列表中
62 key = keyList[i].text
63 val = valueList[i].text
64 infoDict[key] = val
65
66 with open(fpath, "a", encoding="utf-8") as f:
67 f.write(str(infoDict) + "\n")
68
69 count += 1
70 # 增加进度条
71 print("\r当前进度:{:.2f}%".format(count*100/len(lst)),end = "")
72
73 except:
74 count += 1
75 # 增加进度条
76 print("\r当前进度:{:.2f}%".format(count * 100 / len(lst)), end="")
77
78 # 用traceback获得异常信息
79 #traceback.print_exc()
80 continue
81 return ""
82
83 if __name__ == '__main__':
84 stock_list_url = "http://quote.eastmoney.com/stocklist.html"
85 # 获得个股链接
86 stock_info_url = "https://gupiao.baidu.com/stock/"
87 # 获取股票信息的主题部分
88 output_file = "C:\\Users\\W419L\\Desktop\\股票爬取.txt"
89 # 文件保存地址
90 slist = []
91 # 存储股票信息
92 getStockList(slist, stock_list_url)
93 getStockInfo(slist, stock_info_url, output_file)