stock-analysis/dataCollection.py at develop · silicon-beach/stock-analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from alpha_vantage.timeseries import TimeSeries
from datetime import datetime
import csv
import pandas as pd
import requests
import os
import glob

SYMBOL_URL = "http://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange={}&render=download"
STOCK_EXCHANGES = ["nasdaq", "nyse"]


# Get last 7 days worth of data
def downloadHistory_stocks(symbol, interval='1min'):
    try:
        ts = TimeSeries(key='055UMQXJRDY71RG3', output_format='pandas')
        data, meta_data = ts.get_intraday(
            symbol=symbol, interval=interval, outputsize='full')
        pd.set_option('display.max_rows', 5000)
        dataCovert = str(pd.DataFrame(data))
        f = open('data/output.txt', "w")
        f.write(dataCovert)
        f.close()
        DataTemp = ["timestamp,open,high,low,close,volume,vwap\n"]
        Data1 = []
        f1 = open('data/output.txt')
        line = f1.readline()
        line = f1.readline()
        while 1:
            line = f1.readline()
            if not line:
                break
            else:
                Data1.append(line.split())
        f1.close()
        cumulative_total = 0
        cumulative_volume = 0
        for line in Data1:
            # 2017-10-30,09:30:00
            date = line.pop(0)
            date += ' ' + line.pop(0)
            typical_price = (float(line[0]) +
                             float(line[1]) + float(line[2])) / 3
            cumulative_total += (typical_price * float(line[3]))
            cumulative_volume += float(line[3])
            DataTemp.append(
                ",".join([date] + line + [str(cumulative_total / cumulative_volume)]) + "\n")
        write_csv(file_name="data/" + symbol + ".csv", data=DataTemp)
    except ValueError:
        pass


# get list of symbols automatically
def get_symbols(directory_name):
    for se in STOCK_EXCHANGES:
        with requests.Session() as s:
            download = s.get(SYMBOL_URL.format(se))
            decoded_content = download.content.decode('utf-8')
            cr = csv.reader(decoded_content.splitlines(), delimiter=',')
            data_list = []
            for d in list(cr):
                # print(d)
                data_list.append(';'.join(d[:8]) + '\n')
            write_csv(os.path.join(directory_name, se + ".csv"), data_list)


# Get data for all stocks below some price
def get_data():
    get_symbols("data/symbols/")
    for filename in glob.glob(os.path.join("data/symbols/", '*.csv')):
        df = read_csv(file_name=filename, names=[
                      "Symbol", "Name", "LastSale", "MarketCap", "IPOyear", "Sector", "industry", "Summary Quote"], sep=";")
        for chunk in df:
            symbols = chunk["Symbol"].values.tolist()
            for s in symbols:
                print("Downloading data for ", s)
                downloadHistory_stocks(s)

    return


def read_csv(file_name, names=["timestamp", "open", "high", "low", "close", "volume", "vwap"], sep=',', chunksize=29):
    df = pd.read_csv(file_name, names=names, sep=sep,
                     header=0, chunksize=chunksize)
    return df


def write_csv(file_name="result.csv", data=[]):
    file = open(file_name, "w")
    file.writelines(data)
    file.close()


if __name__ == '__main__':
    apple_data = downloadHistory_stocks('SLV')
    #mbi_data = downloadHistory_stocks('MBI')
    #google_data = downloadHistory_stocks('GOOGL')