OkHttp3爬虫+FastJson解析json+Jsoup解析Document节点+文本写入文件
爬虫 + JSON + Document + 文件写入
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| String url = "www.baidu.com";
OkHttpClient httpClient = new OkHttpClient(); Request request = new Request.Builder() .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36") .url(url) .build(); Response response = httpClient.newCall(request).execute();
String responseString = response.body().string(); System.out.println(responseString); JSONObject jsonObject = JSONObject.parseObject(responseString, JSONObject.class); JSONObject jsonObject1 = jsonObject.getJSONObject("html"); String html = jsonObject1.getString("list");
File file = new File("C:\\Users\\用户名\\Desktop\\新建文本文档.txt"); FileOutputStream fileOutputStream = new FileOutputStream(file); fileOutputStream.write(html.getBytes());
fileOutputStream.close();
Document document = Jsoup.parse(html); Elements imgs = document.getElementsByTag("h3"); Element element = imgs.get(0); System.out.println(element.text()); System.out.println(element.nodeName()); System.out.println(element.tagName()); element.tagName("lry"); System.out.println(element.isBlock()); System.out.println(element.parent()); System.out.println(element.parents()); System.out.println(element.children());
|