From 9186bad8ff0d4e9039505100498c08b7ec35d53d Mon Sep 17 00:00:00 2001 From: Andrew Burks Date: Sun, 25 Jun 2023 15:35:33 -0700 Subject: [PATCH 1/5] Update to use Selenium 4 and make OTP selection page optional. --- requirements.txt | 2 +- src/amazon_client/amazon_selenium_client.py | 38 ++++++++++++--------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index 52f81de..7dae38c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ bs4 html5lib requests -selenium +selenium ~= 4.10.0 webdriver-manager pyotp diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index 90af028..fc892bd 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -4,6 +4,9 @@ from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options as ChromeOptions +from selenium.webdriver.chrome.service import Service as ChromeService +from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.common.by import By from amazon_client.amazon_client import AmazonClient import platform @@ -24,7 +27,7 @@ def __init__(self, userEmail, userPassword, otpSecret): print(f"Attempting to initialize Chrome Selenium Webdriver on platform {platformMachine}...") options = ChromeOptions() options.add_argument('--headless') - self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) + self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) print("Successfully initialized Chrome Selenium Webdriver") self.signIn() @@ -44,38 +47,41 @@ def doSignIn(self): self.driver.get("https://amazon.com") time.sleep(1) - accountNav = self.driver.find_element_by_xpath("//a[@data-nav-role ='signin']") + accountNav = self.driver.find_element(By.XPATH, "//a[@data-nav-role ='signin']") accountNav.click() time.sleep(1) - emailEntry = self.driver.find_element_by_id("ap_email") + emailEntry = self.driver.find_element(By.ID, "ap_email") emailEntry.clear() emailEntry.send_keys(self.userEmail) - self.driver.find_element_by_id("continue").click() + self.driver.find_element(By.ID, "continue").click() time.sleep(1) - passwordEntry =self.driver.find_element_by_id("ap_password") + passwordEntry =self.driver.find_element(By.ID, "ap_password") passwordEntry.clear() passwordEntry.send_keys(self.userPassword) - self.driver.find_element_by_name("rememberMe").click() - self.driver.find_element_by_id("signInSubmit").click() + self.driver.find_element(By.NAME, "rememberMe").click() + self.driver.find_element(By.ID, "signInSubmit").click() time.sleep(1) - totpSelect = self.driver.find_element_by_xpath("//input[contains(@value,'TOTP')]") - totpSelect.click() + try: + totpSelect = self.driver.find_element(By.XPATH, "//input[contains(@value,'TOTP')]") + totpSelect.click() - sendCode = self.driver.find_element_by_xpath("//input[@id = 'auth-send-code']") - sendCode.click() + sendCode = self.driver.find_element(By.XPATH, "//input[@id = 'auth-send-code']") + sendCode.click() - time.sleep(1) + time.sleep(1) + except NoSuchElementException: + pass - otpEntry = self.driver.find_element_by_id("auth-mfa-otpcode") + otpEntry = self.driver.find_element(By.ID, "auth-mfa-otpcode") otpEntry.clear() otpEntry.send_keys(totp.now()) - self.driver.find_element_by_id("auth-mfa-remember-device").click() - self.driver.find_element_by_id("auth-signin-button").click() + self.driver.find_element(By.ID, "auth-mfa-remember-device").click() + self.driver.find_element(By.ID, "auth-signin-button").click() time.sleep(1) def signIn(self): @@ -90,7 +96,7 @@ def signIn(self): def interpretDriverErrorPage(self): try: - failElem = self.driver.find_element_by_xpath("//*[contains(text(),'not a robot')]") + failElem = self.driver.find_element(By.XPATH, "//*[contains(text(),'not a robot')]") print("Blocked by Amazon anti-robot. Circumnavigating this is unsupported. Please try again later.") except: pass From be812d12551e6048e0f1700b12a10c682f826bfa Mon Sep 17 00:00:00 2001 From: Andrew Burks Date: Wed, 5 Jul 2023 16:34:25 -0700 Subject: [PATCH 2/5] Get digital orders. --- src/amazon_client/amazon_selenium_client.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index fc892bd..7e64e4a 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -11,6 +11,7 @@ import platform ORDERS_PAGE = "https://www.amazon.com/gp/css/summary/print.html/ref=ppx_yo_dt_b_invoice_o00?ie=UTF8&orderID={}" +DIGITAL_ORDERS_PAGE = "https://www.amazon.com/gp/digital/your-account/order-summary.html?ie=UTF8&orderID={}&print=1&ref_=ppx_yo_dt_b_dpi_o00" class AmazonSeleniumClient(AmazonClient): def __init__(self, userEmail, userPassword, otpSecret): @@ -34,11 +35,16 @@ def __init__(self, userEmail, userPassword, otpSecret): def getAllOrderIDs(self, pages=1): orderPage = "https://www.amazon.com/gp/your-account/order-history/ref=ppx_yo_dt_b_pagination_1_2?ie=UTF8&orderFilter=months-6&search=&startIndex={}" + digitalOrderPage = "https://www.amazon.com/gp/your-account/order-history/ref=ppx_yo_dt_b_pagination_1_2?ie=UTF8&orderFilter=months-6&search=&startIndex={}&unifiedOrders=0" orderIDs = [] for pageNumber in range(pages): self.driver.get(orderPage.format(pageNumber * 10)) soup = BeautifulSoup(self.driver.page_source, 'html.parser') orderIDs.extend([i.getText() for i in soup.find_all("bdi")]) + + self.driver.get(digitalOrderPage.format(pageNumber * 10)) + soup = BeautifulSoup(self.driver.page_source, 'html.parser') + orderIDs.extend([i.getText() for i in soup.find_all("bdi")]) return orderIDs def doSignIn(self): @@ -102,6 +108,9 @@ def interpretDriverErrorPage(self): pass def getInvoicePage(self, orderID): - myOrderPage = ORDERS_PAGE.format(orderID) + if(orderID[0] == 'D'): + myOrderPage = DIGITAL_ORDERS_PAGE.format(orderID) + else: + myOrderPage = ORDERS_PAGE.format(orderID) self.driver.get(myOrderPage) return self.driver.page_source From d4d62a3b98f3914689b0ecfec5a4412cbc98c912 Mon Sep 17 00:00:00 2001 From: Andrew Burks Date: Fri, 7 Jul 2023 21:04:47 -0700 Subject: [PATCH 3/5] Fetching transcations page now. --- src/amazon_client/amazon_selenium_client.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index 7e64e4a..8e9cbd0 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -12,6 +12,7 @@ ORDERS_PAGE = "https://www.amazon.com/gp/css/summary/print.html/ref=ppx_yo_dt_b_invoice_o00?ie=UTF8&orderID={}" DIGITAL_ORDERS_PAGE = "https://www.amazon.com/gp/digital/your-account/order-summary.html?ie=UTF8&orderID={}&print=1&ref_=ppx_yo_dt_b_dpi_o00" +TRANSACTIONS_PAGE = "https://www.amazon.com/cpe/yourpayments/transactions" class AmazonSeleniumClient(AmazonClient): def __init__(self, userEmail, userPassword, otpSecret): @@ -27,7 +28,7 @@ def __init__(self, userEmail, userPassword, otpSecret): else: print(f"Attempting to initialize Chrome Selenium Webdriver on platform {platformMachine}...") options = ChromeOptions() - options.add_argument('--headless') + # options.add_argument('--headless') self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) print("Successfully initialized Chrome Selenium Webdriver") @@ -114,3 +115,18 @@ def getInvoicePage(self, orderID): myOrderPage = ORDERS_PAGE.format(orderID) self.driver.get(myOrderPage) return self.driver.page_source + + def getTransactionsPage(self, pages=3): + page_sources = [] + + self.driver.get(TRANSACTIONS_PAGE) + for i in range(pages): + page_sources.append(self.driver.page_source) + try: + nextPageButton = self.driver.find_element(By.XPATH, '// *[ @ id = "a-autoid-1"] / span / input') + except NoSuchElementException: + nextPageButton = self.driver.find_element(By.XPATH, '// *[ @ id = "cpefront-mpo-widget"] / div / form / div[2] / div[2] / span / span / input') + nextPageButton.click() + time.sleep(2) + + return page_sources \ No newline at end of file From 7fc4be7e39a34a13bce870a5ad789df751199e23 Mon Sep 17 00:00:00 2001 From: Andrew Burks Date: Sun, 9 Jul 2023 15:20:26 -0700 Subject: [PATCH 4/5] Handle phone skip button in sign in. --- src/amazon_client/amazon_selenium_client.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index 8e9cbd0..4c16bd6 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -91,6 +91,14 @@ def doSignIn(self): self.driver.find_element(By.ID, "auth-signin-button").click() time.sleep(1) + try: + phoneSkipButton = self.driver.find_element(By.XPATH, '//*[@id="ap-account-fixup-phone-skip-link"]') + phoneSkipButton.click() + time.sleep(1) + except NoSuchElementException: + pass + + def signIn(self): try: self.doSignIn() From 5ef5055789cdffefef18f964fd9fa573b3f6f15e Mon Sep 17 00:00:00 2001 From: Andrew Burks Date: Mon, 10 Jul 2023 19:18:03 -0700 Subject: [PATCH 5/5] Reenable headless mode. --- src/amazon_client/amazon_selenium_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index 4c16bd6..50a8f51 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -28,7 +28,7 @@ def __init__(self, userEmail, userPassword, otpSecret): else: print(f"Attempting to initialize Chrome Selenium Webdriver on platform {platformMachine}...") options = ChromeOptions() - # options.add_argument('--headless') + options.add_argument('--headless') self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) print("Successfully initialized Chrome Selenium Webdriver")