需要一个包:jsoup-1.7.3.jar
原文地址:http://www.cnblogs.com/zyw-205520/p/3421687.html
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTest {
static String url="http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html";
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// BolgBody();
// article();
Blog();
}
private static void BolgBody()throws Exception{
String html = "<html><head><title> 开源中国社区 </title></head>"
+ "<body><p> 这里是 jsoup 项目的相关文章 </p></body></html>";
Document doc = Jsoup.parse(html);
System.out.println(doc.body());
//从URL直接加载HTML文档
Document doc2 = Jsoup.connect(url).get();
String title = doc2.body().toString();
System.out.println(title);
}
//获取博客上的文章标题和连接
public static void article(){
Document doc = null;
try {
doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/").get();
Elements listDiv = doc.getElementsByAttributeValue("class", "postTitle");
for(Element element : listDiv){
Elements links = element.getElementsByTag("a");
for(Element link : links){
String linkHref = link.attr("href");
String linkText = link.text().trim();
System.out.println(linkHref);
System.out.println(linkText);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//获取指定博客文章的内容
public static void Blog(){
Document doc = null;
try {
doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html").get();
Elements listDiv = doc.getElementsByAttributeValue("class", "postBody");
for(Element element : listDiv){
System.out.println(element.html());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}