python 爬虫之模拟登陆实战(一)微人大

python爬虫实战篇,模拟登陆微人大,才能查看成绩什么的。

2015.5.3更新:

  • 更新使用requests的代码

战前准备

打开fiddler2,然后用chrome打开微人大首页,v.ruc.edu.cn

尝试进行正确的登陆,发现发送包如下:

登陆发送的数据

发现它用post方法发送至https://uc.tiup.cn/account/login,且发送了如下五个字段,

  • csrf_token  : 这是一个随机值
  • school_code : 学校代码
  • username  : 用户名
  • password  : 密码
  • remember_me : 是否记住登陆状态(显然就是登陆旁边的那个保持登录状态)

那么,我们首先要获取的是csrf_token  这个玩意,我们往前看,发现刚进页面还没登录时,response有如下字段:

set-cookie,设置了一个名为csrf_token  的cookie,并且该cookie是仅在建立连接的时候发送(httponly)

csrf_token是如何建立的

那么我们可以想象登陆过程如下:

  • 打开登陆页面被设置了一个名为csrf_token 的cookie
  • 输入用户名密码登陆,发送五个字段,包括csrf_token
  • 验证通过后进行重定向

 

最终code

只是登陆的演练,所以没有从里面抠出成绩。只是打印出成绩的整个页面的HTML

而且服务器太弱,竟然连头部验证什么的都没有。

# -*- coding: utf-8 -*-
# author: hrwhisper
# blog  : hrwhisper,me
# date  : 2015.5.3

import requests
import re

class vRuc:
    url_token = 'https://uc.tiup.cn/account/login?client_id=uc.tiup.cn&redirect_uri=%2Foauth%2Fauthorize%3Fclient_id%3Duc.tiup.cn%26redirect_uri%3Dhttp%253A%252F%252Fv.ruc.edu.cn%252Fsso%252Flogin%253Fredirect_uri%253D%25252FUser%2526school_code%253Druc%2526theme%253Dschools%26response_type%3Dcode%26school_code%3Druc%26scope%3Dall%26sso%3Dtrue%26state%3DZ08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=Z08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx&theme=schools' 
    url_login = 'https://uc.tiup.cn/account/login'
    url_redirect ='https://uc.tiup.cn/oauth/authorize?client_id=uc.tiup.cn&redirect_uri=http%3A%2F%2Fv.ruc.edu.cn%2Fsso%2Flogin%3Fredirect_uri%3D%252FUser%26school_code%3Druc%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=vhfL6zHS_J4cH88Z_A-ZHl7RIUxmPrZEnffvYdtsCf7tWpz7&theme=schools'
    url_score = 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=printXscjReport&xh='
    url_myCourse='http://app.ruc.edu.cn/idc/education/selectcourses/studentselectcourse/StudentSelectCourseAction.do'
    s = requests.Session()
    myheader={
    'Referer': 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=queryXscjReport',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
	'Accept-Encoding':'gzip, deflate, sdch',
	'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
	}
    def startLogin(self,username,password):            
        self.login(self.getToken(),username,password)
        self.redirect()
    
    def getScore(self): 
        r = self.s.get(self.url_score,headers=self.myheader)
        r.encoding='gbk'
        print r.text
    
    def redirect(self):  
        self.s.get(self.url_redirect)
    
    def login(self,csrf_token,username,password):   
        postData = {        
            'csrf_token' : csrf_token,
            'school_code' : 'ruc',
            'username': username,    
            'password': password,
            'remember_me' : 'true'
        }    
        self.s.post(self.url_login,data=postData)
            
    def getToken(self):    
        r =  self.s.get(self.url_token)
        csrf_token=r.cookies['csrf_token']
        return csrf_token
        
    def getMyCourse(self):
        postData = {      
            'method':  'queryXkjg',
            'isNeedInitSQL':   'true',
            'xnd' :'2014-2015',
            'xq':  '2',
            'condition_xnd':   '2014-2015',
            'condition_xq':    '2',
            'condition_kclb':'',  
            'condition_spbz': ''      
        } 
        r = self.s.post(self.url_myCourse,data=postData,headers=self.myheader)
        r.encoding='gbk'
        print r.text

ruc = vRuc()

#enter your username and password
username=''
password=''
ruc.startLogin(username, password)
ruc.getMyCourse()
#ruc.getScore()

 

使用urllib的老版本

# -*- coding: utf-8 -*-
# author:hrwhisper
# date  :2015.3.17


import urllib
import urllib2
import re
import cookielib  

class vRuc:
    url_token = 'https://uc.tiup.cn/account/login?client_id=uc.tiup.cn&redirect_uri=%2Foauth%2Fauthorize%3Fclient_id%3Duc.tiup.cn%26redirect_uri%3Dhttp%253A%252F%252Fv.ruc.edu.cn%252Fsso%252Flogin%253Fredirect_uri%253D%25252FUser%2526school_code%253Druc%2526theme%253Dschools%26response_type%3Dcode%26school_code%3Druc%26scope%3Dall%26sso%3Dtrue%26state%3DZ08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=Z08-C1r99RVtBwYVkEm12XxfhvVlW680YDKwfAaPU1DG-_mx&theme=schools' 
    url_login = 'https://uc.tiup.cn/account/login'
    url_redirect ='https://uc.tiup.cn/oauth/authorize?client_id=uc.tiup.cn&redirect_uri=http%3A%2F%2Fv.ruc.edu.cn%2Fsso%2Flogin%3Fredirect_uri%3D%252FUser%26school_code%3Druc%26theme%3Dschools&response_type=code&school_code=ruc&scope=all&sso=true&state=vhfL6zHS_J4cH88Z_A-ZHl7RIUxmPrZEnffvYdtsCf7tWpz7&theme=schools'
    url_score = 'http://app.ruc.edu.cn/idc/education/report/xscjreport/XscjReportAction.do?method=printXscjReport&xh='
    url_myCourse='http://app.ruc.edu.cn/idc/education/selectcourses/studentselectcourse/StudentSelectCourseAction.do'
    cookie = cookielib.CookieJar()  
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    
    def startLogin(self,username,password):            
        self.login(self.getToken(),username,password)
        self.redirect()
    
    def getScore(self): 
        request =  urllib2.Request(self.url_score)
        response = self.opener.open(request)     
        print response.geturl()
        html = response.read()
  
        print html.decode('gbk')
    
    def redirect(self):  
        request =  urllib2.Request(self.url_redirect)
        self.opener.open(request)    
    
    def login(self,csrf_token,username,password):   
        postData = {        
            'csrf_token' : csrf_token,
            'school_code' : 'ruc',
            'username': username,    
            'password': password,
            'remember_me' : 'true'
        }    
        postData = urllib.urlencode(postData)
        loginRequest = urllib2.Request(self.url_login,data=postData)
        self.opener.open(loginRequest)
            
    def getToken(self):    
        request =  urllib2.Request(self.url_token)
        self.opener.open(request)  
        csrf_token=''
        for item in self.cookie:
            if item.name=='csrf_token':
                csrf_token= item.value
        return csrf_token
        
    def getMyCourse(self):
        postData = {      
            'method':  'queryXkjg',
            'isNeedInitSQL':   'true',
            'xnd' :'2014-2015',
            'xq':  '2',
            'condition_xnd':   '2014-2015',
            'condition_xq':    '2',
            'condition_kclb':'',  
            'condition_spbz': ''      
        } 
        postData = urllib.urlencode(postData)
        request =  urllib2.Request(self.url_myCourse,data=postData)
        print self.opener.open(request).read().decode('gbk')

ruc = vRuc()

#enter your username and password
username=''
password=''
ruc.startLogin(username, password)
ruc.getMyCourse()

 

本博客若无特殊说明则由 hrwhisper 原创发布
转载请点名出处:细语呢喃 > python 爬虫之模拟登陆实战(一)微人大
本文地址:https://www.hrwhisper.me/python-web-crawler-login-vruc/

打赏一杯咖啡钱呗

python learning, 学习 , . permalink.

Leave a Reply

Your email address will not be published. Required fields are marked *