-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScoreSpider.py
More file actions
113 lines (106 loc) · 3.96 KB
/
ScoreSpider.py
File metadata and controls
113 lines (106 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#-*-coding:utf-8 -*-
#!/usr/bin/python
import urllib
import urllib2
import cookielib
import requests
import re
import os
def login(loginURL):
ID = raw_input("Please input your student ID:")
Password = raw_input("Please input your password:")
print 'Loading........'
page = urllib2.urlopen(loginURL).read()
postdata = urllib.urlencode({
'__VIEWSTATE':getVIEW(page),
'txtYhm':ID, #std ID
'txtMm':Password, #password
'rblJs':'学生',
'btnDl':' 登录'})
myRequest = urllib2.Request(loginURL, postdata,getheaders())
loginPage = urllib2.urlopen(myRequest).read()
page = unicode(loginPage, 'gb2312').encode("utf-8")
try:
name = getName(page)
logindata = (myRequest,name,ID)
return logindata
except IndexError, e:
print "User-name or password error, try again!"
main()
exit()
else:
pass
def getheaders(Request=None): #为每次访问提供Headers
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Content-Type':'application/x-www-form-urlencoded',
'Host':'202.200.112.200',
'Pragma':'no-cache',
'Referer':'',
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:44.0) Gecko/20100101 Firefox/44.0 Iceweasel/44.7.36'
}
if Request != None:
headers['Referer'] = Request.get_full_url() #获取上次的访问地址作为本次访问的Referer值
return headers
def gethash(url): # 获取第一次打开登录页时随机产生的hash码
headers = requests.head(url).headers
val = headers['location']
return val[:28]
def getVIEW(Page): # 获取页面访问状态
view = r'name="__VIEWSTATE" value="(.+)" '
view = re.compile(view)
return view.findall(Page)[0]
def Print(Score_html):
rules = "<tr.*?<td>([0-9\-]{9}).*?([1-2]).*?[0-9].*?<td>(.*?)</td.*?<td>.*?</td><td>([0-9\.]*)</td><td>.*?([0-9\.]*)</td><td>(.*?)</td>.*?</tr>"
res = re.compile(rules,re.S)
iteams = re.findall(res,Score_html)
date = ('','')
for iteam in iteams:
if date != iteam[0:2]:
date = iteam[0:2]
print '-------------------------------------------------------------------'
print '\t\t学年:%s\t\t学期:%s' % date
print '-------------------------------------------------------------------'
print '\t%s\t%s\t%s\t%s' % iteam[2:6]
def getName(loginPage): # 获取姓名
Sname = r'<span id="xhxm">(.+)同学</span>'
Sname = re.compile(Sname)
try:
return Sname.findall(loginPage)[0]
except IndexError, e:
raise e
print "User-name or password error, try again!"
main()
def main():
URL = 'http://202.200.112.200' #登录网址
hashcode = gethash(URL+'/default6.aspx')
loginURL = URL+hashcode+'default6.aspx'
logindata = login(loginURL) #登录成功后返回的信息
MyRequest = logindata[0]
name = logindata[1]
print name
ID = logindata[2]
getdata = urllib.urlencode({
'xh':ID,
'xm':name,
'gnmkdm': 'N121605'
})
accessURL = URL+hashcode+'xscj_gc.aspx?'+getdata
MyRequest = urllib2.Request(accessURL,None,getheaders(MyRequest) ) #成绩查询页面
loginPage=unicode(urllib2.urlopen(MyRequest).read(), 'gb2312').encode("utf-8")
data = urllib.urlencode({
"__VIEWSTATE":getVIEW(loginPage),
"Button5":"按学年查询"
})
# print MyRequest.get_full_url()
# print loginPage
MyRequest= urllib2.Request(accessURL,data,getheaders(MyRequest)) #按学年查询页面
html = urllib2.urlopen(MyRequest)
result = unicode(html.read(), 'gb2312').encode("utf-8")
# print result
Print (result) # Score
if __name__ == '__main__':
main()