-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScraper.py
More file actions
57 lines (46 loc) · 1.83 KB
/
Scraper.py
File metadata and controls
57 lines (46 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sqlite3
import requests
db = sqlite3.connect('database.db3')
cursor = db.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS "post" (
"subreddit" TEXT,
"title" TEXT,
"content" TEXT,
"permalink" TEXT,
"attachment" TEXT,
PRIMARY KEY("permalink")
)''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS "comment" (
"id" TEXT,
"content" TEXT,
"postlink" TEXT,
"permalink" TEXT,
FOREIGN KEY("postlink") REFERENCES "post",
PRIMARY KEY("id")
)''')
while(True):
sub = input("Enter a subreddit name: ")
url = "https://www.reddit.com/r/"+sub+".json"
response = requests.get(url,headers = {'User-agent':'Hacksu Scraper'})
if(not response.ok):
print("Got Error code: "+str(response.status_code))
else:
data = response.json().get('data')['children']
for x in data:
info = x["data"]
cursor.execute('''INSERT INTO post(subreddit, title, content, permalink, attachment) VALUES(?,?,?,?,?)''',
(info["subreddit"],info["title"],info["selftext"],info["permalink"],info["url"]))
commentsReq = requests.get("https://www.reddit.com/" + info['permalink'] + ".json",headers = {'User-agent':'Hacksu Scraper'})
if(not commentsReq.ok):
print("Got Error code: "+str(commentsReq.status_code))
else:
comments = commentsReq.json()[1]['data']['children']
for comment in comments:
if comment['kind'] != 'more':
cinfo = comment['data']
cursor.execute('''INSERT INTO comment(id, content, postlink, permalink) VALUES(?,?,?,?)''',(cinfo['id'], cinfo['body'], info['permalink'], cinfo['permalink']))
# print(comment['data']['body'])
db.commit()
print("Done!")