-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamLocations.py
More file actions
87 lines (75 loc) · 2.85 KB
/
streamLocations.py
File metadata and controls
87 lines (75 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import sys
import tweepy
import urllib3
import http
import requests
import logging
import string
import time
import pandas as pd
from geo import *
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('query', help='Escribí los términos de búsqueda sin espacios')
args = parser.parse_args()
query = args.query.split(',')
print(query)
#Twitter API credentials
consumerKey = ""
consumerSecret = ""
accessToken = ""
accessTokenSecret = ""
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api = tweepy.API(auth,
retry_count = 5, # retry 5 times
retry_delay = 5, # seconds to wait for retry
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
count = 1
df = pd.read_csv('myLocations.csv')
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
global count, df
try:
if status.user.location:
count+=1
row = pd.DataFrame({'text': [status.text],
'raw_location': [status.user.location],
})
df = df.append(row, ignore_index = True)
if count%10==0:
dfWithLocations = df[-df['location'].isnull()]
dfToGeocode = df[df['location'].isnull()]
if len(dfToGeocode)>0:
dfToGeocode = addGeoData(dfToGeocode)
df = pd.concat([dfWithLocations, dfToGeocode])
df.to_csv('myLocations.csv', index=False)
except (http.client.IncompleteRead) as e:
logging.warning('http.client.IncompleteRead')
except urllib3.exceptions.ProtocolError as error:
logging.warning('urllib3.exceptions.ProtocolError')
except urllib3.exceptions.ReadTimeoutError as error:
logging.warning('urllib3.exceptions.ReadTimeoutError')
except ConnectionResetError as error:
logging.warning('ConnectionResetError')
except ConnectionError as error:
logging.warning('ConnectionError')
except requests.exceptions.ConnectionError as error:
logging.warning('requests.exceptions.ConnectionError')
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
def on_exception(self, exception):
print(exception)
return True
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener)
while True:
try:
myStream.filter(track=query, stall_warnings=True)
except (urllib3.exceptions.ProtocolError, AttributeError):
continue