From bc61510e08492be91a491a89be1fd651b3f52b9f Mon Sep 17 00:00:00 2001
From: mhawry <1245767+mhawry@users.noreply.github.com>
Date: Wed, 9 Mar 2022 11:07:08 -0500
Subject: [PATCH 1/4] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index b8a3e3e..0fcbbfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ auquan_toolbox.egg-info/*
 *Data/*
 historicalData/*
 *.json
+.idea
\ No newline at end of file

From 7c7fddf7bba4af6c6dc479ac1234f3bc2d6a796a Mon Sep 17 00:00:00 2001
From: mhawry <1245767+mhawry@users.noreply.github.com>
Date: Wed, 9 Mar 2022 11:08:38 -0500
Subject: [PATCH 2/4] Update Yahoo data source to use yfinance library instead
 of requests

---
 backtester/dataSource/data_source_utils.py | 55 ++--------------------
 backtester/dataSource/yahoo_data_source.py | 14 +-----
 2 files changed, 6 insertions(+), 63 deletions(-)

diff --git a/backtester/dataSource/data_source_utils.py b/backtester/dataSource/data_source_utils.py
index 5c62051..8203f93 100644
--- a/backtester/dataSource/data_source_utils.py
+++ b/backtester/dataSource/data_source_utils.py
@@ -7,35 +7,14 @@
 import re
 from time import mktime as mktime
 from itertools import groupby
+import yfinance as yf
 
 
-def getCookieForYahoo(instrumentId):
-    """Returns a tuple pair of cookie and crumb used in the request"""
-    url = 'https://finance.yahoo.com/quote/%s/history' % (instrumentId)
-    req = requests.get(url)
-    txt = req.content
-    cookie = req.cookies['B']
-    pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')
-
-    for line in txt.splitlines():
-        m = pattern.match(line.decode("utf-8"))
-        if m is not None:
-            crumb = m.groupdict()['crumb']
-            crumb = crumb.replace(u'\\u002F', '/')
-    return cookie, crumb  # return a tuple of crumb and cookie
-
-
-def downloadFileFromYahoo(startDate, endDate, instrumentId, fileName, event='history'):
+def downloadFileFromYahoo(startDate, endDate, instrumentId, fileName, adjustPrice=False):
     logInfo('Downloading %s' % fileName)
-    cookie, crumb = getCookieForYahoo(instrumentId)
-    start = int(mktime(startDate.timetuple()))
-    end = int(mktime(endDate.timetuple()))
-    url = 'https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=1d&events=%s&crumb=%s' % (instrumentId, start, end, event, crumb)
-    data = requests.get(url, cookies={'B': cookie})
-    with open(fileName, 'wb') as f:
-        f.write(data.content)
-        return True
-    return False
+    data = yf.download(instrumentId, start=startDate, end=endDate, auto_adjust=adjustPrice)
+    data.to_csv(fileName)
+    return True
 
 '''
 Takes list of instruments.
@@ -62,27 +41,3 @@ def getAllTimeStamps(groupedInstrumentUpdates):
     for timeOfUpdate, instrumentUpdates in groupedInstrumentUpdates:
         timeUpdates.append(timeOfUpdate)
     return timeUpdates
-
-def getMultipliers(self,instrumentId, fileName, downloadId):
-        divFile = self.getFileName('div', instrumentId)
-        splitFile = self.getFileName('split', instrumentId)
-        if not (os.path.isfile(divFile) and os.path.isfile(splitFile)):
-            self.ensureDirectoryExists('div')
-            self.ensureDirectoryExists('split')
-            downloadFileFromYahoo(self.startDate, self.endDate, '%s%s'%(instrumentId,downloadId), divFile, event='div')
-            downloadFileFromYahoo(self.startDate, self.endDate, '%s%s'%(instrumentId,downloadId), splitFile, event='split')
-        div = pd.read_csv(divFile, engine='python', index_col='Date', parse_dates=True)
-        split = pd.read_csv(splitFile, engine='python', index_col='Date', parse_dates=True)
-        prices = pd.read_csv(fileName, engine='python', index_col='Date', parse_dates=True)
-        temp = pd.concat([div, prices], axis=1).fillna(0)
-        interim = (temp['Close'] - temp['Dividends']) / temp['Close']
-        multiplier1 = interim.sort_index(ascending=False).cumprod().sort_index(ascending=True)
-        temp2 = split['Stock Splits'].str.split('/', expand=True)
-        if len(temp2.index) > 0:
-            temp_mult = pd.to_numeric(temp2[1]) / pd.to_numeric(temp2[0])
-            multiplier2 = temp_mult.sort_index(ascending=False).cumprod().sort_index(ascending=True)
-        else:
-            multiplier2 = pd.Series(1, index=multiplier1.index)
-        multiplier = pd.concat([multiplier1, multiplier2], axis=1).fillna(method='bfill').fillna(1)
-        multiplier[1] = multiplier[1].shift(-1).fillna(1)
-        return multiplier
diff --git a/backtester/dataSource/yahoo_data_source.py b/backtester/dataSource/yahoo_data_source.py
index 2a2c319..6fb86c5 100644
--- a/backtester/dataSource/yahoo_data_source.py
+++ b/backtester/dataSource/yahoo_data_source.py
@@ -130,11 +130,9 @@ def getFileName(self, instrumentId):
 
     def downloadAndAdjustData(self, instrumentId, fileName):
         if not os.path.isfile(fileName):
-            if not downloadFileFromYahoo(self._startDate, self._endDate, instrumentId, fileName):
+            if not downloadFileFromYahoo(self._startDate, self._endDate, instrumentId, fileName, adjustPrice=self.__adjustPrice):
                 logError('Skipping %s:' % (instrumentId))
                 return False
-            if(self.__adjustPrice):
-                self.adjustPriceForSplitAndDiv(instrumentId, fileName)
         return True
 
     def processGroupedInstrumentUpdates(self):
@@ -181,16 +179,6 @@ def getBookDataByFeature(self):
     def getClosingTime(self):
         return self._allTimes[-1]
 
-    def adjustPriceForSplitAndDiv(self, instrumentId, fileName):
-        multiplier = data_source_utils.getMultipliers(self,instrumentId,fileName,self.__downloadId)
-        temp['close'] = temp['close'] * multiplier[0] * multiplier[1]
-        temp['open'] = temp['open'] * multiplier[0] * multiplier[1]
-        temp['high'] = temp['high'] * multiplier[0] * multiplier[1]
-        temp['low'] = temp['low'] * multiplier[0] * multiplier[1]
-
-        del temp['dividends']
-        temp.to_csv(fileName)
-
 
 if __name__ == "__main__":
     instrumentIds = ['IBM', 'AAPL']

From d9f68b3cc26c483e450c053997d29d702c201504 Mon Sep 17 00:00:00 2001
From: mhawry <1245767+mhawry@users.noreply.github.com>
Date: Wed, 9 Mar 2022 11:12:01 -0500
Subject: [PATCH 3/4] Remove unused imports

---
 backtester/dataSource/data_source_utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/backtester/dataSource/data_source_utils.py b/backtester/dataSource/data_source_utils.py
index 8203f93..ad78297 100644
--- a/backtester/dataSource/data_source_utils.py
+++ b/backtester/dataSource/data_source_utils.py
@@ -1,11 +1,6 @@
 from backtester.instrumentUpdates import *
 from backtester.constants import *
 from backtester.logger import *
-import os
-import os.path
-import requests
-import re
-from time import mktime as mktime
 from itertools import groupby
 import yfinance as yf
 

From 57acc07fc6d98c3cec19b6b8f01edb20c899e94e Mon Sep 17 00:00:00 2001
From: mhawry <1245767+mhawry@users.noreply.github.com>
Date: Wed, 9 Mar 2022 11:13:07 -0500
Subject: [PATCH 4/4] Update README.md

---
 README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 171ae9b..2e14e84 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # **Quick Startup Guide** #
 
 1. Packages to install. Easiest way is via pip  
-    * numpy  
-    * pandas  
-    * pandas-datareader  
-    * plotly  
+    * numpy
+    * pandas
+    * pandas-datareader
+    * plotly
+    * yfinance
 2. Clone/Download this repository.
 
         git clone https://{your_username}@bitbucket.org/auquan/auquantoolbox.git