• go的爬虫工具教你如何去翻译(go调用js,colly的使用)


    go的爬虫工具教你如何去翻译(go调用js,colly的使用)

    分析过程

    https://blog.csdn.net/a1309525802/article/details/108394021

    go代码

    package main
    
    import (
    	"encoding/json"
    	"fmt"
    	"regexp"
    
    	"github.com/dop251/goja"
    	"github.com/gocolly/colly"
    )
    
    var (
    	token string
    )
    
    func CallJsCode(keyword string) float64 {
    	const script = `
        var i = "320305.131321201"
    	function n(r, o) {
    		for (var t = 0; t < o.length - 2; t += 3) {
    			var a = o.charAt(t + 2);
    			a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    		}
    		return r
    	}
    
    
    	function e(r) {
    		var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    		if (null === o) {
    			var t = r.length;
    			t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    		} else {
    			for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
    			var g = f.length;
    			g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    		}
    		var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    		u = null !== i ? i : (i = window[l] || "") || "";
    		for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
    			var A = r.charCodeAt(v);
    			128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
    		}
    		for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
    		return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
    	}
        `
    	vm := goja.New()
    	_, err := vm.RunString(script)
    	if err != nil {
    		fmt.Println("JS代码有问题!")
    		return 0
    	}
    	var fn func(string) float64
    	err = vm.ExportTo(vm.Get("e"), &fn)
    	if err != nil {
    		fmt.Println("Js函数映射到 Go 函数失败!")
    		return 0
    	}
    	return fn(keyword)
    }
    
    type Result struct {
    	TransResult Trans `json:"trans_result"`
    }
    type Trans struct {
    	Data []TransData `json:"data"`
    	From string      `json:"from"`
    	To   string      `json:"to"`
    }
    type TransData struct {
    	Dst string `json:"dst"`
    	Src string `json:"src"`
    }
    
    func main() {
    	var keyword string = "hello world!!!"
    	c := colly.NewCollector()
    	c.OnRequest(func(r *colly.Request) {
    		r.Headers.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36")
    		r.Headers.Set("x-requested-with", "XMLHttpRequest")
    		r.Headers.Set("origin", "https://fanyi.baidu.com")
    		r.Headers.Set("referer", "https://fanyi.baidu.com/?aldtype=16047")
    		fmt.Println("Visiting", r.URL.String())
    	})
    	c.OnResponse(func(r *colly.Response) {
    		url := fmt.Sprintf("%v", r.Request.URL)
    		if url == "https://fanyi.baidu.com/langdetect" {
    			requestData := map[string]string{
    				"query": keyword,
    			}
    			c.Post("https://fanyi.baidu.com/", requestData)
    		} else if url == "https://fanyi.baidu.com/" {
    			body := r.Body
    			re := regexp.MustCompile(`token: '(.*?)'`)
    			match := re.FindStringSubmatch(string(body))
    			token = match[1]
    			requestData := map[string]string{
    				"from":              "en",
    				"to":                "zh",
    				"query":             keyword,
    				"transtype":         "realtime",
    				"simple_means_flag": "3",
    				"sign":              fmt.Sprint(CallJsCode(keyword)),
    				"token":             token,
    				"domain":            "common",
    			}
    			c.Post("https://fanyi.baidu.com/v2transapi?from=en&to=zh", requestData)
    		} else {
    			body := r.Body
    			var res Result
    			json.Unmarshal(body, &res)
    			fmt.Printf("%+v\n", res)
    		}
    
    	})
    	c.Visit("https://fanyi.baidu.com/langdetect")
    }
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
  • 相关阅读:
    5.使用日志+自定义全局异常过滤器
    计算机组成原理---第四章指令系统---指令格式
    React之一些函数或者方法的扩展
    Node18.x基础使用总结(二)
    有哪些强化学习的算法以及它们的原理及优缺点
    echarts图表 实现高度按照 内容撑起来或者超出部分滚动展示效果
    Go | 函数(包)的使用
    【Vue3】全局组件,递归组件,动态组件,传送组件,缓存组件,异步组件等
    LVGL | 1.LVGL PC模拟器之CodeBlocks
    企业数据分析的维度一般有哪些?
  • 原文地址:https://blog.csdn.net/a1309525802/article/details/126973191