获取单位的招标信息

原创
08/15 21:41
阅读数 54

用hutool下载网页, 用jsoup解析html页面.

package com.yh.zhaobiao;

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.swing.DesktopUtil;
import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.log.StaticLog;
import org.jsoup.Jsoup;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class yfbzb {

    static Map<String, String> ppp = new HashMap<>();


    public static void main(String[] args) {

        String view = "潍坊市农村信用合作社";

        /// 在 http://www.yfbzb.com/yfbsemsite/yfb_360so 可以获取以下字符串    
        ppp.put("工商银行", "pageNo={pageNo}&pageSize=15&ipAddress=&count=450&searchword=%E5%B7%A5%E5%95%86%E9%93%B6%E8%A1%8C&searchword2=&hotword=&provinceId=&provinceName=&areaId=&areaName=&infoType=1&infoTypeName=&noticeTypes=0&noticeTypesName=&secondInfoType=&secondInfoTypeName=&timeType=5&timeTypeName=%E8%BF%91%E4%B8%80%E5%B9%B4&searchType=2&clearAll=false&e_keywordid=29600554411&e_creative=6638038917&flag=1&source=360so&qhclickid=8594d61ce9eb97fa&firstTime=1");
        ppp.put("台州银行", "pageNo={pageNo}&pageSize=15&ipAddress=&count=450&searchword=%E5%8F%B0%E5%B7%9E%E9%93%B6%E8%A1%8C&searchword2=&hotword=&provinceId=&provinceName=&areaId=&areaName=&infoType=1&infoTypeName=&noticeTypes=0&noticeTypesName=&secondInfoType=&secondInfoTypeName=&timeType=5&timeTypeName=%E8%BF%91%E4%B8%80%E5%B9%B4&searchType=2&clearAll=false&e_keywordid=29600554411&e_creative=6638038917&flag=1&source=360so&qhclickid=8594d61ce9eb97fa&firstTime=1");
        ppp.put("潍坊市农村信用合作社", "pageNo={pageNo}&pageSize=15&ipAddress=&count=57&searchword=%E6%BD%8D%E5%9D%8A%E5%B8%82%E5%86%9C%E6%9D%91%E4%BF%A1%E7%94%A8%E5%90%88%E4%BD%9C%E7%A4%BE&searchword2=&hotword=&provinceId=&provinceName=&areaId=&areaName=&infoType=1&infoTypeName=&noticeTypes=0&noticeTypesName=&secondInfoType=&secondInfoTypeName=&timeType=5&timeTypeName=%E8%BF%91%E4%B8%80%E5%B9%B4&searchType=2&clearAll=false&e_keywordid=29600554411&e_creative=6638038917&flag=1&source=360so&qhclickid=8594d61ce9eb97fa&firstTime=1");


        if (!ppp.keySet().contains(view)) throw new RuntimeException(" error! " + view);
        StaticLog.info(" {}", view);

        List<String> data = new ArrayList<>();
        int pageNo = 1;
        do {
            String html = "";

            String url = "http://www.yfbzb.com/yfbsemsite/yfb_360so2";

            File tmpFile = new File(FileUtil.getTmpDir() + File.separator + view, pageNo + ".html");
            tmpFile.getParentFile().mkdir();

            if (tmpFile.exists()) {
                html = FileUtil.readUtf8String(tmpFile);
            } else {
                html = HttpUtil.post(url, StrUtil.replace(ppp.get(view), "{pageNo}", "" + pageNo));
                FileUtil.writeUtf8String(html, tmpFile);
                StaticLog.info("download {} ok!", tmpFile.getAbsolutePath());

                ThreadUtil.safeSleep(2000L);
            }

            data.add(getData(html));

            if (hasNext(html, pageNo)) pageNo++;
            else break;



        } while (true);

        StaticLog.info(" download ok! {}", data.size());

        File all = new File(FileUtil.getTmpDir() + File.separator + view, view + ".html");
        FileUtil.writeLines(data, all, "UTF-8");

        ThreadUtil.safeSleep(1000L);
        DesktopUtil.open(all);

    }

    private static String getData(String html) {
        String table = Jsoup.parse(html).select("table#contentTable").outerHtml();
        return table;
    }

    private static boolean hasNext(String html, int pageNo) {
        String text = Jsoup.parse(html).select("li.controls").last().text();
        StaticLog.info(text);
        String[] arr = StrUtil.replaceChars(text, "当前页共条", "/")//
                .replace(" ", "").replace("//", "/")//
                .split("/");
        int thispage = Integer.valueOf(arr[1]);
        int totalpage = Integer.valueOf(arr[2]);
        return thispage < totalpage;

    }
}

pom.xml

    <dependencies>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.8.5</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.15.2</version>
        </dependency>
    </dependencies>

 

 

 

展开阅读全文
加载中

作者的其它热门文章

打赏
0
0 收藏
分享
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部