-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetLink.py
More file actions
75 lines (53 loc) · 2.21 KB
/
getLink.py
File metadata and controls
75 lines (53 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from selenium import webdriver
from selenium.webdriver.common.by import By
import json
from random import randrange
from datetime import datetime
from time import sleep
from config import CATEGORY_DICT, KEYWORD_XPATH, CATEGORY_XPATH
options = webdriver.ChromeOptions()
options.add_argument('window-size=1920,1080')
def getLinkByKeyword(keyword, pages):
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(5)
result = []
for page in range(pages):
driver.get(url="https://kin.naver.com/search/list.naver?query=" + keyword + "&page=" + str(page + 1))
q_list = driver.find_elements(By.XPATH, KEYWORD_XPATH)
for q in q_list:
result.append(
{
"id": q.get_attribute("href").split("&docId=")[-1].split("&")[0],
"title": q.text,
"link": q.get_attribute("href")
}
)
sleep(0.3 + randrange(2, 8) / 10)
link_path = "./link/keyword_link/" + datetime.today().strftime("%y%m%d.%H%M%S.") + keyword + "link.json"
with open(link_path, 'w', encoding='utf-8') as file:
json.dump(result, file, ensure_ascii=False, indent=2)
driver.close()
return link_path
def getLinkByCategory(category, pages):
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(5)
result = []
for page in range(pages):
driver.get(url="https://kin.naver.com/qna/kinupList.naver?dirId=" + str(CATEGORY_DICT[category]) + "&page=" + str(page + 1))
q_list = driver.find_elements(By.XPATH, CATEGORY_XPATH)
for q in q_list:
result.append(
{
"id": q.get_attribute("href").split("&docId=")[-1].split("&")[0],
"title": q.text,
"link": q.get_attribute("href")
}
)
sleep(0.3 + (randrange(2, 8) / 10))
link_path = "./link/category_link/" + datetime.today().strftime("%y%m%d.%H%M%S.") + category + ".json"
with open(link_path, 'w', encoding='utf-8') as file:
json.dump(result, file, ensure_ascii=False, indent=2)
driver.close()
return link_path
if __name__ == "__main__":
getLinkByCategory(1, 5)