简单的爬虫工具:抓取数据

芮琛
2023-12-01

抓取网站为:https://luoyang.anjuke.com/sale/m530/?pi=baidu-cpc-luoyang-tyong1&kwid=90923896685中的数据

前台:<asp:Button ID="Button1" runat="server" Text="爬虫" OnClick="Button1_Click" />

后台:

public static string GetHtmlStr(string url)
    {
        try
        {
            WebRequest rGet = WebRequest.Create(url);
            WebResponse rSet = rGet.GetResponse();
            Stream s = rSet.GetResponseStream();
            StreamReader reader = new StreamReader(s, Encoding.UTF8);
            return reader.ReadToEnd();
        }
        catch (WebException)
        {
            //连接失败
            return null;
        }
    }
    protected void Button1_Click(object sender, EventArgs e)
    {
        string url = "https://luoyang.anjuke.com/sale/m530/?pi=baidu-cpc-luoyang-tyong1&kwid=90923896685";
        string htmlstr = GetHtmlStr(url);
        HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

        doc.LoadHtml(htmlstr);
        HtmlNode rootnode = doc.DocumentNode;    //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
        //根据网页的内容设置XPath路径表达式

        string xpathstringting = "//div[@class='details-item']/span";
        //string xpathstringping = "//div[@class='details-item']/em[@class='spe-lines']";
        HtmlNodeCollection aa = rootnode.SelectNodes(xpathstringting);    //所有找到的节点都是一个集合
        //HtmlNodeCollection bb = rootnode.SelectNodes(xpathstringping); 

        string sPath = @"C:\Users\Administrator\Desktop\JQuery练习4.14爬虫高德支付宝\Weeb\PaTwo" + "\\";
            Directory.CreateDirectory(sPath);
            for (int i = 0; i < aa.Count(); i++)
            {
                string innertext = aa[i].InnerText;
                Response.Write("<br/>");
                Response.Write(innertext);
            }
        
    }

 类似资料: