-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiemthi.py
More file actions
45 lines (45 loc) · 1.9 KB
/
diemthi.py
File metadata and controls
45 lines (45 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import scrapy
import logging
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
wb = Workbook()
dest_filename = 'C:/Users/vohai2003/Pythonproject/crawled_data.xlsx'
ws = wb.active
ws.title = "Crawled_data"
ws.append(["SBD","Toán","Văn","Lý","Hóa","Sinh","Sử","Địa","Ngoại ngữ","GDCD"])
j = 0
class DiemthiSpider(scrapy.Spider):
start_urls = []
for i in range (1,22001):
start_urls.append("https://www.vietnamnet.vn/vn/giao-duc/tra-cuu-diem-thi-thpt/?y=2021&sbd=33"+"{:06}".format(i))
name = 'diemthi'
def parse(self, response):
global j
j += 1
temp = ["33"+"{:06}".format(j),"x","x","x","x","x","x","x","x","x"]
if(response.xpath('/html/body/div[1]/div[2]/div[3]/h5[1]').getall()) ==[]:
rep = response.xpath('//div[@class="d-flex justify-content-between search-result-line py-3 px-3"]')
for repline in rep:
data = repline.xpath('.//text()').getall()
if data[0].find("Ngoại ngữ") != -1:
temp[8] = float(data[3])
else:
if data[0] == "Toán":
temp[1] = float(data[1])
elif data[0] == "Văn":
temp[2] = float(data[1])
elif data[0] == "Lý":
temp[3] = float(data[1])
elif data[0] == "Hóa":
temp[4] = float(data[1])
elif data[0] == "Sinh":
temp[5] = float(data[1])
elif data[0] == "Sử":
temp[6] = float(data[1])
elif data[0] == "Địa":
temp[7] = float(data[1])
elif data[0] == "GDCD":
temp[9] = float(data[1])
ws.append(temp)
if j%100 ==0:
wb.save(filename = dest_filename)