-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspider.py
More file actions
68 lines (55 loc) · 2.17 KB
/
spider.py
File metadata and controls
68 lines (55 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import sys
import time
import json
import requests
from spider_ikanhm_top import Ikanhm
from spider_se8_us import Se8us
from spider_rouman5 import Rouman
class Spider():
def __init__(self):
self.store_dir_path = "/var/www/HanMan"
self.book_img_folder = self.init_dir()
def init_dir(self):
book_img_folder = os.path.join(self.store_dir_path, 'images')
if not os.path.exists(book_img_folder):
os.makedirs(book_img_folder)
# print(os.path.abspath(book_img_path))
return book_img_folder
def run(self, id=''):
print(f' run at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} --------------------------')
# jsonfile = os.path.abspath("book_spider.json")
json_path = os.path.join(self.store_dir_path, 'books.json')
with open(json_path, "r") as f:
books = json.load(f)
for bookitem in books:
if id == '' or id == bookitem["id"].strip():
self.spider_book(bookitem, id != '')
if id == '':
time.sleep(10)
print("end.")
print(f'stop at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} -------------------------')
def spider_book(self, bookitem, specify = False):
bookname = bookitem["name"].strip()
spiderby = bookitem["spiderby"].strip()
book_img_path = os.path.join(self.book_img_folder, bookname)
if not os.path.exists(book_img_path):
os.makedirs(book_img_path)
if spiderby == 'ikan':
Ikanhm().do_book(bookitem["name"].strip(), book_img_path, 0 if specify else 5)
elif spiderby == 'rouman':
Rouman().do_book(bookitem["name"].strip(), book_img_path, 0 if specify else 5)
elif spiderby == 'se8':
bookid = bookitem["id"].strip()
Se8us().do_book(bookid,bookname,book_img_path)
def timer():
while True:
now = time.localtime()
if now.tm_hour == 4 and now.tm_min == 0 and now.tm_sec == 0:
Spider().run()
else:
time.sleep(1)
if __name__ == '__main__':
id = sys.argv[1]
spider = Spider()
spider.run(id)