-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathapi.py
More file actions
173 lines (145 loc) · 5.6 KB
/
api.py
File metadata and controls
173 lines (145 loc) · 5.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
'''lupro api'''
import gevent
from .config import HTTP_ENGINE
from .typing import Union, Response
from .hooks import lupros
from .HTTPengine import lupro, analyze, requests
from .publictool import original
from copy import copy
# 实例化参数字典
def requests_dict(luprodir,url,no) -> dict:
'''实例化参数字典.
Args:
`luprodir` : `dict` lupro构造参数
`url` : `str` 链接
`no` : `str` filename 序列
Returns:
dict : lupro参数字典
'''
luprodict = copy(luprodir)
if luprodict['lupros'][1]:
luprodict['lupros'] = (luprodict['lupros'][0], (url,*luprodict['lupros'][1][1:]),luprodict['lupros'][2])
else:
luprodict['lupros'][2]['url'] = url
luprodict['filename'] += str(no)
return luprodict
# 实例化生成器
def generator(instantiation : lupro, url : list, filenameNo : list = []) -> list:
'''实例化生成器
Args:
`instantiation` : `lupro` lupro模板实例
`url` : `list` 链接表
`filenameNo` : `list` filename 序列且此序列会继承 `instantiation.filename`
Returns:
list : lupro实例列表
'''
if not filenameNo:
filenameNo = range(len(url))
else:
if not len(url)==len(filenameNo):
raise ValueError('"url" needs to be consistent with "FileNameno"!')
repr = instantiation.__reprs__()
res = []
for i,j in enumerate(url):
res.append(lupro(**requests_dict(repr,j,filenameNo[i])))
return res
# 批量请求
def Batchsubmission(generator) -> list:
'''通过实例列表的批量请求
Args:
`generator` : `list[lupro]` lupro实例列表
Returns:
list : Response列表
'''
a = [gevent.spawn(i.task,) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])
# 批量下载
def BulkDownload(generator) -> list:
'''通过实例列表的批量下载
Args:
`generator` : `list[lupro]` lupro实例列表
Returns:
list : path 列表
'''
a = [gevent.spawn(i.save_file,) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])
# xpath 批量解析
def xpath_Batchanalysis(generator : Union["list[lupro]", "list[Response]"], analytic : dict, auxiliary = original) -> list:
''' xpath批量解析器
Args:
`generator` : `Union[list[lupro], list[Response]]` lupro实例列表 或 Response实例列表
`analytic` : `dict` 解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
list[dict] : 解析列表
'''
if isinstance(generator[0], lupro):
a = [gevent.spawn(i.xpath_analysis, analytic, auxiliary) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])
else:
return [analyze.xpath(i, analytic, auxiliary) for i in generator]
# json 批量解析
def json_Batchanalysis(generator : Union["list[lupro]", "list[Response]"], analytic : dict, auxiliary = original) -> list:
'''json批量解析器 <json解析器为 `dtanys`>
Args:
`generator` : `Union[list[lupro], list[Response]]` lupro实例列表 或 Response实例列表
`analytic` : `dict` 解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
list[dict] : 解析列表
'''
if isinstance(generator[0], lupro):
a = [gevent.spawn(i.json_analysis, analytic, auxiliary) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])
else:
return [analyze.json(i, analytic, auxiliary) for i in generator]
# 正则 批量解析
def re_Batchanalysis(generator : Union["list[lupro]", "list[Response]"], analytic : dict, auxiliary = original) -> list:
'''正则解析器
Args:
`generator` : `Union[list[lupro], list[Response]]` lupro实例列表 或 Response实例列表
`analytic` : `dict`{`str`:`function`} 正则解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
list[dict] : 解析列表
'''
if isinstance(generator[0], lupro):
a = [gevent.spawn(i.re_analysis, analytic, auxiliary) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])
else:
return [analyze.re(i, analytic, auxiliary) for i in generator]
# 批量解析
def Batchanalysis(mold : str ,generator : Union["list[lupro]", "list[Response]"], analytic : dict, auxiliary = original) -> list:
'''lupro批量解析
Args:
`mold` : `str` 解析方法
`generator` : `Union[list[lupro], list[Response]]` lupro实例列表 或 Response实例列表
`analytic` : `dict` 解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
list[dict] : 解析列表
'''
if mold == 'xpath':
return xpath_Batchanalysis(generator, analytic, auxiliary)
elif mold == 'json':
return json_Batchanalysis(generator, analytic, auxiliary)
elif mold == 're':
return re_Batchanalysis(generator, analytic, auxiliary)
else:
raise TypeError('No corresponding parsing method!')
# 批量异步请求
def async_lupro(generator : "list[lupros]") -> list:
'''原生异步`requests`请求
Args:
`generator` : `list[lupros]` lupros实例列表
Returns:
list[Response] : Response列表
'''
a = [gevent.spawn(HTTP_ENGINE.request,i[0],*i[1],**i[2]) for i in generator]
gevent.joinall(a)
return ([i.value for i in a])