使用GROOVY脚本语言发送get和post请求

谷梁永年
2023-12-01

使用GROOVY脚本语言发送get和post请求

2018年9月10日


用到的包

  • import org.apache.http.NameValuePair
  • import org.apache.http.client.entity.UrlEncodedFormEntity
  • import org.apache.http.client.methods.CloseableHttpResponse
  • import org.apache.http.client.methods.HttpGet
  • import org.apache.http.client.methods.HttpPost
  • import org.apache.http.impl.client.CloseableHttpClient
  • import org.apache.http.impl.client.DefaultHttpClient
  • import org.apache.http.message.BasicNameValuePair
  • import org.apache.http.util.EntityUtils
  • import org.jsoup.Jsoup
  • import org.jsoup.nodes.Document

    这里给一个pom文件,方便大家获取这些包。把下面代码放在新建的pom文件中去就可以自动加载这些用到的包了,下面是pom的内容:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.jc</groupId>
    <artifactId>com.jc</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>net.sf.json.JSONObject</groupId>
            <artifactId>htmlunit</artifactId>
            <version>2.18</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.8.3</version>
        </dependency>
        <dependency>
            <groupId>commons-cli</groupId>
            <artifactId>commons-cli</artifactId>
            <version>1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>
        <dependency>
            <groupId>httpcore</groupId>
            <artifactId>httpcore</artifactId>
            <version>4.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.codehaus.groovy</groupId>
            <artifactId>groovy</artifactId>
            <version>2.1.6</version>
        </dependency>
    </dependencies>
</project>

get请求

先上代码

import org.apache.http.HttpResponse
import org.apache.http.client.methods.HttpGet
import org.apache.http.impl.client.CloseableHttpClient
import org.apache.http.impl.client.DefaultHttpClient
import org.apache.http.util.EntityUtils
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

CloseableHttpClient httpClient = new DefaultHttpClient();
//要访问的url
url = "http://news.baidu.com/";
//创建一个HTTPget对象并设置头信息
HttpGet method1 = new HttpGet(url);
method1.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)" +
        " Chrome/56.0.2924.87 Safari/537.36");
String retVal = "";
// 转换参数并设置编码格式
HttpResponse result1;
result1 = httpClient.execute(method1);
retVal = EntityUtils.toString(result1.getEntity(), "utf-8");
//获取列表页Document对象
Document doc = Jsoup.parse(retVal);
print doc;

运行代码,返回一个包含百度新闻首页HTML源码的对象,可进行select操作选择节点。例如:doc.select(“#table615 > tbody > tr > td > p > a”).text()将返回对应节点下的文字内容。

post请求

同样也是先上代码

import org.apache.http.NameValuePair
import org.apache.http.client.entity.UrlEncodedFormEntity
import org.apache.http.client.methods.CloseableHttpResponse
import org.apache.http.client.methods.HttpPost
import org.apache.http.impl.client.CloseableHttpClient
import org.apache.http.impl.client.DefaultHttpClient
import org.apache.http.message.BasicNameValuePair
import org.apache.http.util.EntityUtils
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

CloseableHttpClient httpClient = new DefaultHttpClient()
HttpPost method = new HttpPost("http://www.mxw.gov.cn/pingyi/wlhyr/py_list.aspx?type=bmhf&hymc=%CA%A1%BB%B7%B1%A3%CC%FC");
method.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
List<NameValuePair> params=new ArrayList<NameValuePair>()
params.add(new BasicNameValuePair("__VIEWSTATE", "9dHvBu5S1jqyerKLADNM+hRpAkRbBuSshVM19IkkX8+rLqQHfhiXmpNz+kGvpKFANjNHB0+oRNs8tkdwdfwe6wKYOOkvD3ums5OUU03BE8F7yalvgaaJQb0hIQthRF2OrP0FuA3z8uVFvN4O1IoDHagLcsm7JPa+5AOQ3UWzwgh8pqKiNo7kiIm6ofRJ6W6261Q+/pV1WSqrp5PyI19tpsObKLRH6p1kx2iCU9T4wNyhmmtpY5MZ6YHMxjyAzPMn4loEYJUiSPxJdoH9nJ9c5wqqKXcg5NxASGXpT1YaqdZdpzDq4UHLb7A+TLIf6kdCmkBjEHWD160jMzLx7XfKGfJAmRUPaCXcpnTD9VnmYvJlfzejfhz07ihdqJ5hXDh254oJT8+h1h5scCEnMjc3bheY7ZZn1TnUrHhEpDfpOb/LzPDvy/pZglcWZdwiuqDOzBYKctGkkcUy+gJq4TCJnQeX+ywkOTBnb/NAa2YvyW2BBcH0ZFXcORelDp1P91gl5n8rL0NxY5yo5MWxDxQyppvr2v9w9ITB6PQ8rAxBfsGp898tIeq5Ih8VGmCGS4RcsX+iEjxpEl3bFsg4X3n8EEwGIZOIwe4F5uDz5W8t2Uc6ZnWw2XvhtW+LSWObNFGyIvD1Pg9JahY12pnBMdM0FHzFJ6VbjaylLSj13TMNayRhLGnJww3zTomUe0nfHwZuJuudOjyTH6fSp7BSthH83FMwgAm6QSuHqDn0QNe3oOPIXw9Gs3M5+6Um+lHHdDvGKLSJh+XTLIDRilHMkCBsDFOuTttFEajdKmBGKpmN99fcM+wNBhQYSTnIJ6He7YwHl8QDO+fsmYtMFdphT0iICnLoTTF44zULLbAvCMiwNV1+qIsL4APhv1+0lJenbPk0zD8Xt0A6LK4MOguuj33gsOgF5gkKojUMjSOaBfeJqkLOU82/7NBeYxqaOviA5Q6jKdyMVs1RtH2tkrvindHvstV5oRPV4X7JYuneHPPvYSuwQmoJ1o7zQ3m94iHILQC9z/tXmkPLQWXj/Q/u/vqeskrxqSOIOEps53lcVPb2BBzIsSVkokW5yGdI75VBjalMEFdrl5bigVdGklj3wtxxdStVXm1EMv6jrt8QRvcddtElrdw+rvegOMyfbFHbkEvPPL6qnKrtX0lkBtjG9HbqMUDr/D+pp3V78MHb5E2eh1ReTVzHvdXONTrm9/XMyK5TLykiOzjWf9DTuxNEfygcEqRViMj207tyqnKtpzBsuHOkiC62TpqlRbEBPdHeNXwcWvWNRpERrLp9Zj3e79saJrqEAeZpibWdVM5ThDeAkIIajcSfpyE3HX1OdsSotCtAppzxpKJYWM1pOP6Xyg7jw1cHvl6i0K13Twqv1jBJrAl6j3dfIiG4Epzc2wYYvXRRj4FxDW4/u81eLdDLoCn/I4xoahNW5Wu4ZaRn+6rAUvzPPyfl5JZIQc7H/07/9Uo4paBjq3jp32IX9KCyL5xLl6YUCN56lYV5/gbk2QxQNEfVQJnaA8cM7HzuJ1HVgsUjZKGCtSD3iEJYUf/yeBCYiDbKD1PCBNKpJp6itkeXoE7W2uDYpf6hbSMK30WYCvsVdnAlGOXd91OWUwEaU07pugsqby48PQxoqZxlCKu4x9JecajxY5IJO/DcSjtwSJjeBzGbeZHLg3dEmysvDet1wq8AcgZlHwcvpR/QxzMYTe3H2j9u5rOf/SvTybUQ0D7BMefuvnXzVVh8t+ViVO8s+bo3vFuRnLjniq0sP3vXtcfVAhd653s9DcSTfIDGiFSMRLER3s80+sNeIxO9GVPIQT7QJXjxICdc2GSZab85xAL2xrsbXMPH13QIn2++s6Pj1cITIeRi2DTCTEnNTxDEPDfFzbccUeFLUFbYMYrrpQnQMih9GLdqZO+F40hTqNuTRGViC/NrG9PULnrpoXPQRKiSaqAl7xU9oc2Fq/IV99pCdmmRQNvehrNpQSFjjfLx7mr+cd1veHUpqmtz7JSPSzgDuSomE5XJA9AcxtnmFupGw9jpQbpxSMfXIcqFayBy17wGTORcFjFbp3krpZaO16dWe4NLUEMARFrORzRp9+E+8htRAMw8bKo6WawI2G00/HnYL0AM6uDjhvWQyhyQGcmbaq56TqVUwprPdwL4/oL+9YDXpZWgoFKlIuJiYqzTf0Ws1JyIwiDDvezMbx+09e3R8LgYVaT+KM88L9o11G7bFkmYAzeM+B68t9Pk/zbkQmVpm9nWo7m1WfAJhVgUJHYhXvzsOskaksVofzW+42BawsWxDE+5hZp8iPyiJg+xZJbjTnKDnvxDklVD8jbwSlOmzII7GVdlpxV3ncCiUdJglQrLEL32NmehdD2k7MP5plvJfc6GfFsNt9KHBIOO8YtiPu6twWNoxVDnx6zSwo0Nl5yrZ0rX+cA31h2wwXglBWlbMb46V2S+rNi5ujJySi0yB1TkL9msIUWc8833Pj/SmJwN/XIYnZIs/Rtz4YuDb5TarvZmXB0xTJCXrhEGVqrJlrxA+oe/Pbc8FaAUNjQvToXPGPGSrXBtpVA1y4nrAuS++GOZweDF40dn4OZxBqFj32R+F3deGQZaOUN7pP1ylMNdxz3hvNF+DmYnqD4kfaVlZZEu9b5EDLbgcT3t0WL9WM20hHWE2a8ksRUPbX8+8rhqMGxQqnXmR8VUA16KsDKHPBONzeAc8piMz1xUuiSK4Qpsj2/lWwWAkkIr+nP1/72dWLW7Z6hi8T03tCP9qbG9G7k3beLXt5qhIZzeVlF+cyMkPJ5tigh+wzxJEQubmOQ41HWKb99fJ7n3M3fVL5dHTsTy7pLbxiX5ITHZyKR9KfjbEag4MMBEuhtrTYwcUgwwvtSARhavHIHLtPYpaVcB2m6Qsu1Ap8GYCKHqyYgdvZ/fZSECnCOh0EVClF9JhDTQZEyr6DLplAKgPPujEjAgVDpAXjjCOAMVDBSxxeaaegJ1fBLiZWPYT7Cuey6fYJFaCGrathQtdebI30N5Kt7Ay0r1U8gzkG3rJuOVGWJ7jKKuulENsU6tTh45DqVrB/cZugA0AXXDVSvMtyYrkAToL0EKbOl8IW2oX2n5pnMHhEiKityExCS3CqNyuEnxVZ/QJoknm1ud21G4/XzNVwUJ7uQd4sV7wrZOb+ZI05Fmd6vLTHqfbxYLpIEjXAG/df4w0jVGbI2gfAMAmE9fTTgEAukuKkJCTXNk"))
params.add(new BasicNameValuePair("__VIEWSTATEGENERATOR", "85F75307"))
params.add(new BasicNameValuePair("__EVENTTARGET", "Pg"))
params.add(new BasicNameValuePair("__EVENTARGUMENT",'5'))
method.setEntity(new UrlEncodedFormEntity(params,"gb2312"));
CloseableHttpResponse response = httpClient.execute(method);
//发送Post,并返回一个HttpResponse对象
String result = EntityUtils.toString(response.getEntity());
Document doc1 = Jsoup.parse(result)
print doc1

这里是访问一个用post请求翻页的网站的事例,上面的map对象params中包含的内容是post请求的内容,这里和Python的字典很像。使用时,只要找到post请求的地址和表单数据的内容,用这样的方法就可以获取请求的内容。同样,因为post返回的对象是一个html页面的源码,所以这里已经转换成了一个可选择节点的document对象。
以上就是使用groovy脚本语言的get和post方法。总的来说,使用起来比起用java直接开发爬虫要简单得多,而且个人品牌不要语言中还包含了很多高级语言的特性,例如闭包等用起来十分方便的概念。

 类似资料: