0%

python 爬虫之模拟登陆实战(一)微人大

python爬虫实战篇,模拟登陆微人大,才能查看成绩什么的。 2015.5.3更新:

  • 更新使用requests的代码

战前准备

打开fiddler2,然后用chrome打开微人大首页,v.ruc.edu.cn

尝试进行正确的登陆,发现发送包如下:

登陆发送的数据

发现它用post方法发送至https://uc.tiup.cn/account/login,且发送了如下五个字段,

  • csrf_token  : 这是一个随机值
  • school_code : 学校代码
  • username  : 用户名
  • password  : 密码
  • remember_me : 是否记住登陆状态(显然就是登陆旁边的那个保持登录状态)

那么,我们首先要获取的是csrf_token  这个玩意,我们往前看,发现刚进页面还没登录时,response有如下字段:

set-cookie,设置了一个名为csrf_token  的cookie,并且该cookie是仅在建立连接的时候发送(httponly)

csrf_token是如何建立的

那么我们可以想象登陆过程如下:

  • 打开登陆页面被设置了一个名为csrf_token 的cookie
  • 输入用户名密码登陆,发送五个字段,包括csrf_token
  • 验证通过后进行重定向

 

最终code

只是登陆的演练,所以没有从里面抠出成绩。只是打印出成绩的整个页面的HTML

而且服务器太弱,竟然连头部验证什么的都没有。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
# author: hrwhisper
# blog : hrwhisper,me
# date : 2015.5.3

import requests
import re

class vRuc:
url_token = 'https://uc.tiup.cn/account/login?client_id=uc.tiup.cn&redirect_uri=%2Foauth%2Fauthorize%3Fclient_id%3Duc.tiup.cn%26redirect_uri%3Dhttp%253A%252F%252Fv.ruc.edu.cn%252Fsso%252Flogin%253Fredirect_uri%253D%25252FUser%2526school_code%253Druc%2526theme%253Dschools%26response_type%3Dcode%26school_code%3Druc%26scope%3Dall%26sso%3Dtrue%26state%3DZ08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=Z08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx&theme=schools'
url_login = 'https://uc.tiup.cn/account/login'
url_redirect ='https://uc.tiup.cn/oauth/authorize?client_id=uc.tiup.cn&redirect_uri=http%3A%2F%2Fv.ruc.edu.cn%2Fsso%2Flogin%3Fredirect_uri%3D%252FUser%26school_code%3Druc%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=vhfL6zHS_J4cH88Z_A-ZHl7RIUxmPrZEnffvYdtsCf7tWpz7&theme=schools'
url_score = 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=printXscjReport&xh='
url_myCourse='http://app.ruc.edu.cn/idc/education/selectcourses/studentselectcourse/StudentSelectCourseAction.do'
s = requests.Session()
myheader={
'Referer': 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=queryXscjReport',
'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
}
def startLogin(self,username,password):
self.login(self.getToken(),username,password)
self.redirect()

def getScore(self):
r = self.s.get(self.url_score,headers=self.myheader)
r.encoding='gbk'
print r.text

def redirect(self):
self.s.get(self.url_redirect)

def login(self,csrf_token,username,password):
postData = {
'csrf_token' : csrf_token,
'school_code' : 'ruc',
'username': username,
'password': password,
'remember_me' : 'true'
}
self.s.post(self.url_login,data=postData)

def getToken(self):
r = self.s.get(self.url_token)
csrf_token=r.cookies['csrf_token']
return csrf_token

def getMyCourse(self):
postData = {
'method': 'queryXkjg',
'isNeedInitSQL': 'true',
'xnd' :'2014-2015',
'xq': '2',
'condition_xnd': '2014-2015',
'condition_xq': '2',
'condition_kclb':'',
'condition_spbz': ''
}
r = self.s.post(self.url_myCourse,data=postData,headers=self.myheader)
r.encoding='gbk'
print r.text

ruc = vRuc()

#enter your username and password
username=''
password=''
ruc.startLogin(username, password)
ruc.getMyCourse()
#ruc.getScore()


 

使用urllib的老版本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
# author:hrwhisper
# date :2015.3.17

import urllib
import urllib2
import re
import cookielib

class vRuc:
url_token = 'https://uc.tiup.cn/account/login?client_id=uc.tiup.cn&redirect_uri=%2Foauth%2Fauthorize%3Fclient_id%3Duc.tiup.cn%26redirect_uri%3Dhttp%253A%252F%252Fv.ruc.edu.cn%252Fsso%252Flogin%253Fredirect_uri%253D%25252FUser%2526school_code%253Druc%2526theme%253Dschools%26response_type%3Dcode%26school_code%3Druc%26scope%3Dall%26sso%3Dtrue%26state%3DZ08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=Z08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx&theme=schools'
url_login = 'https://uc.tiup.cn/account/login'
url_redirect ='https://uc.tiup.cn/oauth/authorize?client_id=uc.tiup.cn&redirect_uri=http%3A%2F%2Fv.ruc.edu.cn%2Fsso%2Flogin%3Fredirect_uri%3D%252FUser%26school_code%3Druc%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=vhfL6zHS_J4cH88Z_A-ZHl7RIUxmPrZEnffvYdtsCf7tWpz7&theme=schools'
url_score = 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=printXscjReport&xh='
url_myCourse='http://app.ruc.edu.cn/idc/education/selectcourses/studentselectcourse/StudentSelectCourseAction.do'
cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

def startLogin(self,username,password):
self.login(self.getToken(),username,password)
self.redirect()

def getScore(self):
request = urllib2.Request(self.url_score)
response = self.opener.open(request)
print response.geturl()
html = response.read()

print html.decode('gbk')

def redirect(self):
request = urllib2.Request(self.url_redirect)
self.opener.open(request)

def login(self,csrf_token,username,password):
postData = {
'csrf_token' : csrf_token,
'school_code' : 'ruc',
'username': username,
'password': password,
'remember_me' : 'true'
}
postData = urllib.urlencode(postData)
loginRequest = urllib2.Request(self.url_login,data=postData)
self.opener.open(loginRequest)

def getToken(self):
request = urllib2.Request(self.url_token)
self.opener.open(request)
csrf_token=''
for item in self.cookie:
if item.name=='csrf_token':
csrf_token= item.value
return csrf_token

def getMyCourse(self):
postData = {
'method': 'queryXkjg',
'isNeedInitSQL': 'true',
'xnd' :'2014-2015',
'xq': '2',
'condition_xnd': '2014-2015',
'condition_xq': '2',
'condition_kclb':'',
'condition_spbz': ''
}
postData = urllib.urlencode(postData)
request = urllib2.Request(self.url_myCourse,data=postData)
print self.opener.open(request).read().decode('gbk')

ruc = vRuc()

#enter your username and password
username=''
password=''
ruc.startLogin(username, password)
ruc.getMyCourse()


请我喝杯咖啡吧~