java模拟登陆爬虫

原创
2018/08/16 15:14
阅读数 779

1、添加依赖

	    <dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.9.2</version>
		</dependency>

2、示例代码

package com.neo;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * The type Crawler test.
 *
 * @author weixiang.wu
 */
public class CrawlerTest {

	/**
	 * The entry point of application.
	 *
	 * @param args the input arguments
	 * @throws IOException the io exception
	 */
	public static void main(String[] args) throws IOException {
		//想要爬取的url
		String url = "https://www.?.com/at/api.html?url=admin_member_list";
		String username = "?";
		String password = "?";
		Map<String, String> sessionId = getSessionInfo(username, password);
		String todoSomething = httpPost(sessionId, url);
	}

	private static Map<String, String> getSessionInfo(String username, String password) throws IOException {
		//登录网站
		Connection.Response res = Jsoup.connect("https://www.?.com/at/login.html")
				.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31")
				.data("username", username, "password", password)
				.method(Connection.Method.POST)
				.timeout(10000)
				.execute();
		//获取cookie
		Map<String, String> cookie = new HashMap<>();
		cookie.put("__cfduid", res.cookie("__cfduid"));
		cookie.put("PHPSESSID", res.cookie("PHPSESSID"));
		return cookie;
	}


	private static String httpPost(Map<String, String> sessionId, String url) throws IOException {
		//获取请求连接
		Connection con = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31")
				.cookies(sessionId).timeout(10000).ignoreContentType(true);
		//请求参数设置
		con.data("page", "1");
		con.data("pid", "0");
		con.data("level", "[1,2,3]");
		Document doc = con.post();
		System.out.println(doc);
		return doc.toString();
	}
}

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部