From bc61510e08492be91a491a89be1fd651b3f52b9f Mon Sep 17 00:00:00 2001 From: mhawry <1245767+mhawry@users.noreply.github.com> Date: Wed, 9 Mar 2022 11:07:08 -0500 Subject: [PATCH 1/4] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b8a3e3e..0fcbbfd 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ auquan_toolbox.egg-info/* *Data/* historicalData/* *.json +.idea \ No newline at end of file From 7c7fddf7bba4af6c6dc479ac1234f3bc2d6a796a Mon Sep 17 00:00:00 2001 From: mhawry <1245767+mhawry@users.noreply.github.com> Date: Wed, 9 Mar 2022 11:08:38 -0500 Subject: [PATCH 2/4] Update Yahoo data source to use yfinance library instead of requests --- backtester/dataSource/data_source_utils.py | 55 ++-------------------- backtester/dataSource/yahoo_data_source.py | 14 +----- 2 files changed, 6 insertions(+), 63 deletions(-) diff --git a/backtester/dataSource/data_source_utils.py b/backtester/dataSource/data_source_utils.py index 5c62051..8203f93 100644 --- a/backtester/dataSource/data_source_utils.py +++ b/backtester/dataSource/data_source_utils.py @@ -7,35 +7,14 @@ import re from time import mktime as mktime from itertools import groupby +import yfinance as yf -def getCookieForYahoo(instrumentId): - """Returns a tuple pair of cookie and crumb used in the request""" - url = 'https://finance.yahoo.com/quote/%s/history' % (instrumentId) - req = requests.get(url) - txt = req.content - cookie = req.cookies['B'] - pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P[^"]+)"\}') - - for line in txt.splitlines(): - m = pattern.match(line.decode("utf-8")) - if m is not None: - crumb = m.groupdict()['crumb'] - crumb = crumb.replace(u'\\u002F', '/') - return cookie, crumb # return a tuple of crumb and cookie - - -def downloadFileFromYahoo(startDate, endDate, instrumentId, fileName, event='history'): +def downloadFileFromYahoo(startDate, endDate, instrumentId, fileName, adjustPrice=False): logInfo('Downloading %s' % fileName) - cookie, crumb = getCookieForYahoo(instrumentId) - start = int(mktime(startDate.timetuple())) - end = int(mktime(endDate.timetuple())) - url = 'https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=1d&events=%s&crumb=%s' % (instrumentId, start, end, event, crumb) - data = requests.get(url, cookies={'B': cookie}) - with open(fileName, 'wb') as f: - f.write(data.content) - return True - return False + data = yf.download(instrumentId, start=startDate, end=endDate, auto_adjust=adjustPrice) + data.to_csv(fileName) + return True ''' Takes list of instruments. @@ -62,27 +41,3 @@ def getAllTimeStamps(groupedInstrumentUpdates): for timeOfUpdate, instrumentUpdates in groupedInstrumentUpdates: timeUpdates.append(timeOfUpdate) return timeUpdates - -def getMultipliers(self,instrumentId, fileName, downloadId): - divFile = self.getFileName('div', instrumentId) - splitFile = self.getFileName('split', instrumentId) - if not (os.path.isfile(divFile) and os.path.isfile(splitFile)): - self.ensureDirectoryExists('div') - self.ensureDirectoryExists('split') - downloadFileFromYahoo(self.startDate, self.endDate, '%s%s'%(instrumentId,downloadId), divFile, event='div') - downloadFileFromYahoo(self.startDate, self.endDate, '%s%s'%(instrumentId,downloadId), splitFile, event='split') - div = pd.read_csv(divFile, engine='python', index_col='Date', parse_dates=True) - split = pd.read_csv(splitFile, engine='python', index_col='Date', parse_dates=True) - prices = pd.read_csv(fileName, engine='python', index_col='Date', parse_dates=True) - temp = pd.concat([div, prices], axis=1).fillna(0) - interim = (temp['Close'] - temp['Dividends']) / temp['Close'] - multiplier1 = interim.sort_index(ascending=False).cumprod().sort_index(ascending=True) - temp2 = split['Stock Splits'].str.split('/', expand=True) - if len(temp2.index) > 0: - temp_mult = pd.to_numeric(temp2[1]) / pd.to_numeric(temp2[0]) - multiplier2 = temp_mult.sort_index(ascending=False).cumprod().sort_index(ascending=True) - else: - multiplier2 = pd.Series(1, index=multiplier1.index) - multiplier = pd.concat([multiplier1, multiplier2], axis=1).fillna(method='bfill').fillna(1) - multiplier[1] = multiplier[1].shift(-1).fillna(1) - return multiplier diff --git a/backtester/dataSource/yahoo_data_source.py b/backtester/dataSource/yahoo_data_source.py index 2a2c319..6fb86c5 100644 --- a/backtester/dataSource/yahoo_data_source.py +++ b/backtester/dataSource/yahoo_data_source.py @@ -130,11 +130,9 @@ def getFileName(self, instrumentId): def downloadAndAdjustData(self, instrumentId, fileName): if not os.path.isfile(fileName): - if not downloadFileFromYahoo(self._startDate, self._endDate, instrumentId, fileName): + if not downloadFileFromYahoo(self._startDate, self._endDate, instrumentId, fileName, adjustPrice=self.__adjustPrice): logError('Skipping %s:' % (instrumentId)) return False - if(self.__adjustPrice): - self.adjustPriceForSplitAndDiv(instrumentId, fileName) return True def processGroupedInstrumentUpdates(self): @@ -181,16 +179,6 @@ def getBookDataByFeature(self): def getClosingTime(self): return self._allTimes[-1] - def adjustPriceForSplitAndDiv(self, instrumentId, fileName): - multiplier = data_source_utils.getMultipliers(self,instrumentId,fileName,self.__downloadId) - temp['close'] = temp['close'] * multiplier[0] * multiplier[1] - temp['open'] = temp['open'] * multiplier[0] * multiplier[1] - temp['high'] = temp['high'] * multiplier[0] * multiplier[1] - temp['low'] = temp['low'] * multiplier[0] * multiplier[1] - - del temp['dividends'] - temp.to_csv(fileName) - if __name__ == "__main__": instrumentIds = ['IBM', 'AAPL'] From d9f68b3cc26c483e450c053997d29d702c201504 Mon Sep 17 00:00:00 2001 From: mhawry <1245767+mhawry@users.noreply.github.com> Date: Wed, 9 Mar 2022 11:12:01 -0500 Subject: [PATCH 3/4] Remove unused imports --- backtester/dataSource/data_source_utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/backtester/dataSource/data_source_utils.py b/backtester/dataSource/data_source_utils.py index 8203f93..ad78297 100644 --- a/backtester/dataSource/data_source_utils.py +++ b/backtester/dataSource/data_source_utils.py @@ -1,11 +1,6 @@ from backtester.instrumentUpdates import * from backtester.constants import * from backtester.logger import * -import os -import os.path -import requests -import re -from time import mktime as mktime from itertools import groupby import yfinance as yf From 57acc07fc6d98c3cec19b6b8f01edb20c899e94e Mon Sep 17 00:00:00 2001 From: mhawry <1245767+mhawry@users.noreply.github.com> Date: Wed, 9 Mar 2022 11:13:07 -0500 Subject: [PATCH 4/4] Update README.md --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 171ae9b..2e14e84 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # **Quick Startup Guide** # 1. Packages to install. Easiest way is via pip - * numpy - * pandas - * pandas-datareader - * plotly + * numpy + * pandas + * pandas-datareader + * plotly + * yfinance 2. Clone/Download this repository. git clone https://{your_username}@bitbucket.org/auquan/auquantoolbox.git