用jericho jar包来解析HTML页面示例
柯默
2023-12-01
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import java.io.File;
import java.io.IOException;
public class HtmlBeanUtil {
public void parseHtml() {
String path = "G:\\data\\index.html";
try {
Source sc = new Source(new File(path));
sc.getFirstElement(HTMLElementName.TITLE);
System.out.println(firstElement.getTextExtractor().toString());
System.out.println(sc.getTextExtractor().toString());
} catch (IOException e) {
e.printStackTrace();
}
}
}