package sometry;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeList;
public class ExtractPicture {
private static HttpURLConnection con = null;
private static BufferedInputStream bis = null;
private static OutputStream out = null;
public static void extractLinks(String url) {
try {
Parser parser = new Parser(url);
parser.setEncoding("UTF-8");
// image filter;
NodeFilter imageFilter = new NodeClassFilter(ImageTag.class);
NodeList nodeList = parser.extractAllNodesThatMatch(imageFilter);
for (int i = 0; i < nodeList.size(); i++) {
Node tag = nodeList.elementAt(i);
ImageTag image = (ImageTag)tag;
String imageURL = image.getImageURL();
String imageText = image.getText();
System.out.println("imageURL:"+imageURL);
System.out.println("imageText:"+imageText);
con = (HttpURLConnection)(new URL(imageURL).openConnection());
con.connect();
bis = new BufferedInputStream(con.getInputStream());
out = new FileOutputStream(new File("D:\\360Downloads\\" + i + "_" +System.currentTimeMillis() +imageURL.substring(imageURL.lastIndexOf("."))));
byte[] buf = new byte[1024];
int size = 0;
while((size = bis.read(buf)) != -1){
out.write(buf, 0, size);
}
}
} catch (Exception e) {
System.err.println(e.getStackTrace());
} finally {
try {
out.close();
bis.close();
con.disconnect();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
ExtractPicture.extractLinks("http://www.cb.cityu.edu.hk/staff/issliao/");
}
}