当前位置: 首页 > 工具软件 > Colly > 使用案例 >

go的爬虫工具教你如何去翻译(go调用js,colly的使用)

萧凡
2023-12-01

go的爬虫工具教你如何去翻译(go调用js,colly的使用)

分析过程

https://blog.csdn.net/a1309525802/article/details/108394021

go代码

package main

import (
	"encoding/json"
	"fmt"
	"regexp"

	"github.com/dop251/goja"
	"github.com/gocolly/colly"
)

var (
	token string
)

func CallJsCode(keyword string) float64 {
	const script = `
    var i = "320305.131321201"
	function n(r, o) {
		for (var t = 0; t < o.length - 2; t += 3) {
			var a = o.charAt(t + 2);
			a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
		}
		return r
	}


	function e(r) {
		var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
		if (null === o) {
			var t = r.length;
			t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
		} else {
			for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
			var g = f.length;
			g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
		}
		var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
		u = null !== i ? i : (i = window[l] || "") || "";
		for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
			var A = r.charCodeAt(v);
			128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
		}
		for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
		return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
	}
    `
	vm := goja.New()
	_, err := vm.RunString(script)
	if err != nil {
		fmt.Println("JS代码有问题!")
		return 0
	}
	var fn func(string) float64
	err = vm.ExportTo(vm.Get("e"), &fn)
	if err != nil {
		fmt.Println("Js函数映射到 Go 函数失败!")
		return 0
	}
	return fn(keyword)
}

type Result struct {
	TransResult Trans `json:"trans_result"`
}
type Trans struct {
	Data []TransData `json:"data"`
	From string      `json:"from"`
	To   string      `json:"to"`
}
type TransData struct {
	Dst string `json:"dst"`
	Src string `json:"src"`
}

func main() {
	var keyword string = "hello world!!!"
	c := colly.NewCollector()
	c.OnRequest(func(r *colly.Request) {
		r.Headers.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36")
		r.Headers.Set("x-requested-with", "XMLHttpRequest")
		r.Headers.Set("origin", "https://fanyi.baidu.com")
		r.Headers.Set("referer", "https://fanyi.baidu.com/?aldtype=16047")
		fmt.Println("Visiting", r.URL.String())
	})
	c.OnResponse(func(r *colly.Response) {
		url := fmt.Sprintf("%v", r.Request.URL)
		if url == "https://fanyi.baidu.com/langdetect" {
			requestData := map[string]string{
				"query": keyword,
			}
			c.Post("https://fanyi.baidu.com/", requestData)
		} else if url == "https://fanyi.baidu.com/" {
			body := r.Body
			re := regexp.MustCompile(`token: '(.*?)'`)
			match := re.FindStringSubmatch(string(body))
			token = match[1]
			requestData := map[string]string{
				"from":              "en",
				"to":                "zh",
				"query":             keyword,
				"transtype":         "realtime",
				"simple_means_flag": "3",
				"sign":              fmt.Sprint(CallJsCode(keyword)),
				"token":             token,
				"domain":            "common",
			}
			c.Post("https://fanyi.baidu.com/v2transapi?from=en&to=zh", requestData)
		} else {
			body := r.Body
			var res Result
			json.Unmarshal(body, &res)
			fmt.Printf("%+v\n", res)
		}

	})
	c.Visit("https://fanyi.baidu.com/langdetect")
}

 类似资料: