jsoup 爬取案例 jsoup

应煌
2023-12-01
package jousp;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JSoupP {

	public static void main(String[] args) throws Exception {
		Document document = Jsoup.connect("https://book.douban.com").get();
		Elements elements = document.getElementsByTag("img");
		Element element = elements.get(0);
		String url  = element.attr("src");
		URL url1 = new URL(url);
		HttpURLConnection connection = (HttpURLConnection)url1.openConnection();
		connection.setRequestMethod("GET");
		connection.setConnectTimeout(5*1000);
		//获得图片的输入流
		InputStream inputStream = connection.getInputStream();
		byte data[] = readInputStream(inputStream);
		//定义图片位置
		File imageFile = new File("D://BeautyGirl.jpg");  
		//定义输出流
		FileOutputStream outStream = new FileOutputStream(imageFile);
		outStream.write(data);
		outStream.flush();
		outStream.close();	
	}
	public static byte[] readInputStream(InputStream inStream) throws Exception{  
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();  
        //创建一个Buffer字符串  
        byte[] buffer = new byte[1024];  
        //每次读取的字符串长度,如果为-1,代表全部读取完毕  
        int len = 0;  
        //使用一个输入流从buffer里把数据读取出来  
        while( (len=inStream.read(buffer)) != -1 ){  
            //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度  
            outStream.write(buffer, 0, len);  
        }  
        //关闭输入流  
        inStream.close();  
        //把outStream里的数据写入内存  
        return outStream.toByteArray();  
    }  


}

 类似资料: