ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

爬取网易云评论--涉及加密问题、程序断点问题

2021-12-05 09:33:06  阅读:127  来源: 互联网

标签:bsG7z Y6S -- data e6c 爬取 params 断点 encSecKey


  1 #!/usr/bin/env python
  2 # -*- coding:utf-8 -*-
  3 # Author:woshinidaye
  4 
  5 #抓取网易云歌曲的热评,为了简单,不要登录了
  6 #1、找到未加密的参数
  7 #2、想办法把参数进行加密,加密逻辑与网易一致,params,encSecKey
  8 #3、请求,拿去数据
  9 #加密
 10 # var
 11 # bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
 12 # e6c.data = j6d.cs6m({
 13 #     params: bUM2x.encText,
 14 #     encSecKey: bUM2x.encSecKey
 15 # })
 16 
 17 import requests,re,json,base64
 18 from lxml import html
 19 from Crypto.Cipher import AES           #pip install pycryptodome
 20 etree = html.etree
 21 
 22 url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
 23 #请求方式
 24 e = '010001'
 25 f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
 26 g = "0CoJUm6Qyw8W8jud"
 27 i = 'hjbiwTejTo17235R'
 28 def get_encSecKey():
 29     return '6c11f64c829ec94df8ce7d711932c15c4c6e46daf00674f0f22dc1170ba68e809047ee5a7e12c3e07d8c1c3f66b76e4518201b1d4679bd1659a747856f16ac17c32286fba6a82034fa2597004dcca90ca9bfce49bd1a85d09fac162d7b40b390fe8d4c4be15bcc65788d0002fdbd91fb529a71d4d42aa702170fd8e92f1ed87e'
 30 def to_16 (data):
 31     pad = 16 -len(data)%16
 32     data = data + chr(pad)*pad
 33     return data
 34 def enc_params(data,key):
 35     iv = '0102030405060708'
 36     data = to_16(data)
 37     aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC)
 38     bs = aes.encrypt(data.encode('utf-8'))
 39     return str(base64.b64encode(bs),'utf-8')       #返回params
 40 def get_params(data):       #默认data是字符串
 41     first = enc_params(data,g)
 42     second = enc_params(first,i)
 43     return second
 44 
 45 
 46 # "c6aaef7d7fe54edc416de03808f94c8de2590f943d4f334d8bc485e53f00b95acdfbe704330a01d81bfe666c00b5d681321ab4b04147d0ba1683877e4350b1310e3ad67465ffa1dc9ea57b9d682f1efffbe14ad734a9454faf8e28464491542226109de2fdce6751b63426bd3b18543108c5076ef2b8eab03358ea7a88ce90e9"
 47 data = {
 48     'csrf_token': "",
 49     'cursor': '-1',
 50     'offset': '0',
 51     'orderType': '1',
 52     'pageNo': '1',
 53     'pageSize': '20',
 54     'rid': "R_SO_4_1881521546",
 55     'threadId': "R_SO_4_1881521546"
 56 }
 57 #加密方式
 58 '''
 59     function a(a) {             #随机产生16位字符串
 60         var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
 61         for (d = 0; a > d; d += 1)      #循环16次
 62             e = Math.random() * b.length,       #随机数
 63             e = Math.floor(e),              #取整
 64             c += b.charAt(e);                   #取字符串中的xxxx位置
 65         return c
 66     }
 67     function b(a, b) {          #a是要加密的数据
 68         var c = CryptoJS.enc.Utf8.parse(b)
 69           , d = CryptoJS.enc.Utf8.parse("0102030405060708")
 70           , e = CryptoJS.enc.Utf8.parse(a)
 71           , f = CryptoJS.AES.encrypt(e, c, {
 72             iv: d,              #偏移量
 73             mode: CryptoJS.mode.CBC         #模式CBC
 74         });
 75         return f.toString()
 76     }
 77     function c(a, b, c) {
 78         var d, e;
 79         return setMaxDigits(131),
 80         d = new RSAKeyPair(b,"",c),
 81         e = encryptedString(d, a)
 82     } 
 83     function d(d, e, f, g) {        #d:data   e:010001    f:bsG7z(WW3x.md)  g:bsG7z(["爱心", "女孩", "惊恐", "大笑"])
 84         var h = {}
 85           , i = a(16);          #i就是16位随机字符
 86         return h.encText = b(d, g), 
 87         h.encText = b(h.encText, i),           #得到params,       做了两次加密,第一次 data+g
 88         h.encSecKey = c(i, e, f),              #得到encSecKey  
 89         h
 90     }
 91 '''
 92 # var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
 93 '''
 94   u6o.be6Y = function(Y6S, e6c) {
 95         var i6c = {}
 96           , e6c = NEJ.X({}, e6c)
 97           , mo0x = Y6S.indexOf("?");
 98         if (window.GEnc && /(^|\.com)\/api/.test(Y6S) && !(e6c.headers && e6c.headers[eu7n.AI4M] == eu7n.FD6x) && !e6c.noEnc) {
 99             if (mo0x != -1) {
100                 i6c = j6d.gW8O(Y6S.substring(mo0x + 1));
101                 Y6S = Y6S.substring(0, mo0x)
102             }
103             if (e6c.query) {
104                 i6c = NEJ.X(i6c, j6d.fT8L(e6c.query) ? j6d.gW8O(e6c.query) : e6c.query)
105             }
106             if (e6c.data) {
107                 i6c = NEJ.X(i6c, j6d.fT8L(e6c.data) ? j6d.gW8O(e6c.data) : e6c.data)
108             }
109             i6c["csrf_token"] = u6o.gQ8I("__csrf");
110             Y6S = Y6S.replace("api", "weapi");
111             e6c.method = "post";
112             delete e6c.query;
113             var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
114             e6c.data = j6d.cs6m({
115                 params: bUM2x.encText,
116                 encSecKey: bUM2x.encSecKey
117             })
118         }
119         var cdnHost = "y.music.163.com";
120         var apiHost = "interface.music.163.com";
121         if (location.host === cdnHost) {
122             Y6S = Y6S.replace(cdnHost, apiHost);
123             if (Y6S.match(/^\/(we)?api/)) {
124                 Y6S = "//" + apiHost + Y6S
125             }
126             e6c.cookie = true
127         }
128         cxg5l(Y6S, e6c)
129 '''
130 
131 resp = requests.post(url,data={
132     'params': get_params(json.dumps(data)),
133     "encSecKey":get_encSecKey()
134 })
135 print(resp.text)
136 
137 
138 #上面是获取某一首歌的评论,变量主要在data里面,更换歌曲ID,可以通过页面查找获取
139 '''
140 url = 'https://music.163.com/playlist?id=6920064959'
141 resp = requests.get(url=url,headers=headers)
142 resp.encoding = 'utf-8'
143 # print(resp.text)
144 # 用RE
145 # obj = re.compile(r'<li><a href="/(?P<song_id>.*?)">(?P<song_title>.*?)</a></li>',re.S)
146 # songs = obj.finditer(resp.text)
147 # for my_list in songs:
148 #     aa = my_list.group('song_id').split('=')[-1]
149 #     print(aa,'\t',my_list.group('song_title'))
150 
151 #用Xpath
152 # etree = html.etree
153 # # print(resp.text)
154 # html = etree.HTML(resp.text)
155 # test = html.xpath('//html/body/div[3]/div[1]/div/div/div[2]/div[2]//a/@href')
156 # #这个地方试了好久,写全的话取不出来,感觉是跟页面有嵌套有关系
157 # print(test)
158 
159 #用bs4
160 # from bs4 import BeautifulSoup
161 # html = BeautifulSoup(resp.text,'html.parser')
162 # test = html.find('ul',class_='f-hide').find_all('a')
163 # print(test)
164 '''

 

标签:bsG7z,Y6S,--,data,e6c,爬取,params,断点,encSecKey
来源: https://www.cnblogs.com/woshinidaye123/p/15644060.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有