bpb27 · 4sakura · Mar 23, 2018
diff --git a/README.md b/README.md
@@ -23,8 +23,11 @@ The `scrape.py` script collects tweet ids. If you know a tweet's id number, you
 
 ## Running the scraper
 
-- open up `scrape.py` and edit the user, start, and end variables (and save the file)
-- run `python3 scrape.py`
+- <del>~~open up `scrape.py` and edit the user, start, and end variables (and save the file)~~</del>
+- run `python3 scrape.py` and add the arguments you need
+    - `-u` followed by the username
+    - `--since` followed by a string of the date e.g (2017-01-01) otherwise it uses the default value declared  in start
+    - `--until` followed by a string of the date e.g (2018-01-01) otherwise it uses the default value declared  in end 
 - you'll see a browser pop up and output in the terminal
 - do some fun other task until it finishes
 - once it's done, it outputs all the tweet ids it found into `all_ids.json`

diff --git a/scrape.py b/scrape.py
@@ -4,24 +4,33 @@
 from time import sleep
 import json
 import datetime
-
-
-# edit these three variables
-user = 'realdonaldtrump'
-start = datetime.datetime(2010, 1, 1)  # year, month, day
-end = datetime.datetime(2016, 12, 7)  # year, month, day
+import argparse
+
+parser = argparse.ArgumentParser(prog="scrape.py", usage="python3 %(prog)s [options]", description="scrape.py - Twitter Scraping Tool")
+parser.add_argument("-u", help="Scrape this user's Tweets")
+parser.add_argument("--since", help="Get Tweets after this date (Example: 2010-01-01).")
+parser.add_argument("--until", help="Get Tweets before this date (Example: 2018-12-07.")
+args = parser.parse_args()
+
+if args.since is not None:
+    start = datetime.datetime(int(args.since[:4]), int(args.since[5:7]), int(args.since[8:10]))
+else:
+    start = datetime.datetime(2010, 1, 1)  #year, month, day; this is the fallback date; only used when there is no argument `--since`
+if args.until is not None:
+    end = datetime.datetime(int(args.until[:4]), int(args.until[5:7]), int(args.until[8:10]))
+else:
+    end = datetime.datetime.now()
 
 # only edit these if you're having problems
 delay = 1  # time to wait on each page load before reading the page
-driver = webdriver.Safari()  # options are Chrome() Firefox() Safari()
+driver = webdriver.Firefox()  # options are Chrome() Firefox() Safari()
 
 
 # don't mess with this stuff
 twitter_ids_filename = 'all_ids.json'
 days = (end - start).days + 1
 id_selector = '.time a.tweet-timestamp'
 tweet_selector = 'li.js-stream-item'
-user = user.lower()
 ids = []
 
 def format_day(date):
@@ -31,8 +40,10 @@ def format_day(date):
     return '-'.join([year, month, day])
 
 def form_url(since, until):
-    p1 = 'https://twitter.com/search?f=tweets&vertical=default&q=from%3A'
-    p2 =  user + '%20since%3A' + since + '%20until%3A' + until + 'include%3Aretweets&src=typd'
+    p1 = 'https://twitter.com/search?f=tweets&vertical=default&q='
+    if args.u is not None:
+        p1 += "from%3A{0.u}".format(args)
+    p2 ='%20since%3A' + since + '%20until%3A' + until + 'include%3Aretweets&src=typd'
     return p1 + p2
 
 def increment_day(date, i):