From 5df61e912be1efbbdf397d0a4b4fb65230d6aed5 Mon Sep 17 00:00:00 2001 From: farrellw Date: Tue, 14 Jan 2025 20:04:49 -0600 Subject: [PATCH 01/14] Count downrange posts. --- monthly_charts/PAXcharter.py | 197 ++++++++++++++++++++--------------- 1 file changed, 111 insertions(+), 86 deletions(-) diff --git a/monthly_charts/PAXcharter.py b/monthly_charts/PAXcharter.py index 3f90f8a..2d403ed 100755 --- a/monthly_charts/PAXcharter.py +++ b/monthly_charts/PAXcharter.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -''' -This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com. -This script queries Slack for all PAX Users and their respective beatdown attendance. It then generates bar graphs -on attendance for each member and sends it to them in a private Slack message. -''' from slack_sdk import WebClient from slack_sdk.errors import SlackApiError @@ -17,10 +12,12 @@ import sys import os import logging +import hashlib +import numpy as np +import calendar # This handler does retries when HTTP status 429 is returned from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler - # Configure AWS credentials config = configparser.ConfigParser(); config.read('../config/credentials.ini'); @@ -28,7 +25,7 @@ port = int(config['aws']['port']) user = config['aws']['user'] password = config['aws']['password'] -#db = config['aws']['db'] + db = sys.argv[1] # Set Slack token @@ -50,7 +47,7 @@ #Get Current Year, Month Number and Name d = datetime.datetime.now() -d = d - datetime.timedelta(days=7) +d = d - datetime.timedelta(days=15) thismonth = d.strftime("%m") thismonthname = d.strftime("%b") thismonthnamelong = d.strftime("%B") @@ -66,8 +63,15 @@ print('Looking for all Slack Users for ' + db + '. Stand by...') + +def hash_email(email): + if isinstance(email, str): + return hashlib.md5(email.encode('utf-8')).hexdigest() + else: + return None # Or return a default value if needed + # Make users Data Frame -column_names = ['user_id', 'user_name', 'real_name'] +column_names = ['user_id', 'user_name', 'real_name', 'email'] users_df = pd.DataFrame(columns = column_names) data = '' while True: @@ -76,9 +80,12 @@ next_cursor = response_metadata.get('next_cursor') users = users_response.data['members'] users_df_tmp = pd.json_normalize(users) - users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name']] - users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name'}) + users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name', 'profile.email']] + users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name', 'profile.email' : 'email'}) users_df = users_df.append(users_df_tmp, ignore_index=True) + + # Apply the hash function to the email column + users_df['email'] = users_df['email'].apply(hash_email) if next_cursor: # Keep going from next offset. #print('next_cursor =' + next_cursor) @@ -88,6 +95,7 @@ for index, row in users_df.iterrows(): un_tmp = row['user_name'] rn_tmp = row['real_name'] + row['email'] if un_tmp == "" : row['user_name'] = rn_tmp @@ -124,88 +132,105 @@ def success_message_sent(user_id_tmp, pax, db): # Query AWS by user ID for attendance history #users_df = users_df.iloc[:10] # THIS LINE IS FOR TESTING PURPOSES, THIS FORCES ONLY n USER ROWS TO BE SENT THROUGH THE PIPE -total_graphs = 0 # Sets a counter for the total number of graphs made (users with posting data) pause_on = [ 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000 ] +def savePlot(aggregated_data, total_count_for_year, title, file_path): + # Plot the stacked bar chart + ax = aggregated_data.plot(kind='bar', stacked=True) + # Add the total count as text on the chart + ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, + fontsize=12, verticalalignment='top', horizontalalignment='right') + plt.title(title) + plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) + plt.ioff() + plt.savefig(file_path, bbox_inches='tight') #save the figure to a file + plt.close() + +def execute_user_chart(attendance_tmp, user_id_tmp, db, pax, d, thismonthname, yearnum, rm): + if attendance_tmp and len(attendance_tmp) > 0: + attendance_tmp_df = pd.DataFrame(attendance_tmp) + thismonth = d.strftime("%m") + send_chart = attendance_tmp_df[(attendance_tmp_df['table_schema'] == db) & (attendance_tmp_df['Month'] == int(thismonth))].shape[0] + if send_chart > 0: # This sends a graph to ALL PAX who have attended at least 1 beatdown + rgion_method = rm + # Modify the 'AO' column based on the condition where 'table_schema' is not equal to 'db' + attendance_tmp_df['AO'] = np.where(attendance_tmp_df['table_schema'] != db, "DR: " + attendance_tmp_df['region'], attendance_tmp_df['AO']) + + attendance_tmp_df.sort_values(by=['Month'], inplace=True) + attendance_tmp_df['Month'] = attendance_tmp_df['Month'].map(lambda x: calendar.month_abbr[x]) + + # Group by 'Month' and 'AO', and aggregate the 'cnt' column by summing it + aggregated_data = attendance_tmp_df.groupby(['Month', 'AO'], sort=False)['cnt'].sum().unstack() + + # Calculate the total count for the year from the 'cnt' column + total_count_for_year = attendance_tmp_df['cnt'].sum() + + file_path = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' + + savePlot(aggregated_data, total_count_for_year, 'Number of posts by '+ pax + ' by AO/Month for ' + yearnum, file_path) + + message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + "! SYITG!" + + # user_id_tmp = 'U03QFC2S2LX' + print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') + + # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. + # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. + # The main difference is that v2 requires an additional conversation scope. + # New regions will all use v2. + # user_id_override = "U06GDMGJKNE" + if rgion_method == "v2": + try: + response = send_slack_message_v2(user_id_tmp, message, file_path) + + success_message_sent(user_id_tmp, pax, db) + except Exception as e: + # If the error is missing scope, then + if e.response['error'] == 'missing_scope': + print("Error: The app is missing required scopes. Please add the 'im:write' scope.") + rgion_method = "v1" + else: + log_message_sent_error(user_id_tmp, db, pax) + raise e + + if rgion_method != "v2": + try: + channel = user_id_tmp + response = send_slack_message(channel, message, file_path) + + success_message_sent(user_id_tmp, pax, db) + except Exception as e: + log_message_sent_error(user_id_tmp, db, pax) + raise e + + return True, rgion_method + else: + print(pax + ' skipped') + return False, region_method + + +total_graphs = 0 region_method = "v2" -for user_id in users_df['user_id']: +for _, row in users_df.iterrows(): + user_id = row['user_id'] + email = row['email'] + pax = row['user_name'] try: attendance_tmp_df = pd.DataFrame([]) # creates an empty dataframe to append to with mydb.cursor() as cursor: - sql = "SELECT * FROM attendance_view WHERE PAX = (SELECT user_name FROM users WHERE user_id = %s) AND YEAR(Date) = %s ORDER BY Date" - user_id_tmp = user_id - val = (user_id_tmp, yearnum) - cursor.execute(sql, val) - attendance_tmp = cursor.fetchall() - attendance_tmp_df = pd.DataFrame(attendance_tmp) - month = [] - day = [] - year = [] - count = attendance_tmp_df.shape[0] - #if user_id_tmp == 'U0187M4NWG4': #Use this to send a graph to only 1 specific PAX - if count > 0: # This sends a graph to ALL PAX who have attended at least 1 beatdown - for Date in attendance_tmp_df['Date']: - #for index, row in attendance_tmp_df.iterrows(): - datee = datetime.datetime.strptime(Date, "%Y-%m-%d") - month.append(datee.strftime("%B")) - day.append(datee.day) - year.append(datee.year) - pax = attendance_tmp_df.iloc[0]['PAX'] - attendance_tmp_df['Month'] = month - attendance_tmp_df['Day'] = day - attendance_tmp_df['Year'] = year - attendance_tmp_df.sort_values(by=['Date'], inplace=True) - attendance_tmp_df.groupby(['Month', 'AO'], sort=False).size().unstack().plot(kind='bar',stacked=True) - - ax = attendance_tmp_df.groupby(['Month', 'AO'], sort=False).size().unstack().plot(kind='bar', stacked=True) - total_count_for_year = attendance_tmp_df.shape[0] - - # Add the total count as text on the chart - ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, - fontsize=12, verticalalignment='top', horizontalalignment='right') - - plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum) - plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) - plt.ioff() - plt.savefig('../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg', bbox_inches='tight') #save the figure to a file + sql = "SELECT table_schema, region, year as Year, month as Month, ao as AO, email_hash, cnt FROM f3stlcity.user_monthly_aggregates WHERE email_hash=%s AND year = %s" + user_id_tmp = user_id + val = (email, yearnum) + cursor.execute(sql, val) + attendance_tmp = cursor.fetchall() + if attendance_tmp : + graph_or_not, rm = execute_user_chart(attendance_tmp, user_id_tmp, db, pax, d, thismonthname, yearnum, region_method) + region_method = rm + + if graph_or_not: total_graphs = total_graphs + 1 - message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!" - file = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' - - #manual_graphs = [240,241,242,244,245,246,247,249,250] - if total_graphs > 0: # This is a count of total users processed, in case of error during processing. Set the total_graphs > to whatever # comes next in the log file row count. - print(total_graphs, 'PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') - - # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. - # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. - # The main difference is that v2 requires an additional conversation scope. - # New regions will all use v2. - # user_id_override = "U06GDMGJKNE" - if region_method == "v2": - try: - response = send_slack_message_v2(user_id_tmp, message, file) - - success_message_sent(user_id_tmp, pax, db) - except Exception as e: - # If the error is missing scope, then - if e.response['error'] == 'missing_scope': - print("Error: The app is missing required scopes. Please add the 'im:write' scope.") - region_method = "v1" - else: - log_message_sent_error(user_id_tmp, db, pax) - raise e - - if region_method != "v2": - try: - channel = user_id_tmp - response = send_slack_message(channel, message, file) - - success_message_sent(user_id_tmp, pax, db) - except: - log_message_sent_error(user_id_tmp, db, pax) - raise e - else: - print(pax + ' skipped') + else: + print("No attendance this year", user_id) except Exception as e: print(e) print("An exception occurred for User ID " + user_id) From ebd7341fd05d407279faad076062ded7f662ccc9 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 13:30:33 -0600 Subject: [PATCH 02/14] Move paxminer scraping to a cloud function. Clean up some errors and upgrade necesssary warnings from python libraries. --- backblast_scraping/.dockerignore | 2 + backblast_scraping/Dockerfile | 13 + backblast_scraping/PAXMiner_Cloud_Run.py | 45 + backblast_scraping/PAX_BD_Miner.py | 840 +++++++++--------- .../PAXminer_Daily_Execution.py | 10 +- .../PAXminer_Manual_Execution.py | 18 +- backblast_scraping/deploy.sh | 5 + backblast_scraping/requirements.txt | 7 + 8 files changed, 500 insertions(+), 440 deletions(-) create mode 100644 backblast_scraping/.dockerignore create mode 100644 backblast_scraping/Dockerfile create mode 100644 backblast_scraping/PAXMiner_Cloud_Run.py create mode 100755 backblast_scraping/deploy.sh create mode 100644 backblast_scraping/requirements.txt diff --git a/backblast_scraping/.dockerignore b/backblast_scraping/.dockerignore new file mode 100644 index 0000000..27a1106 --- /dev/null +++ b/backblast_scraping/.dockerignore @@ -0,0 +1,2 @@ +config/* +venv \ No newline at end of file diff --git a/backblast_scraping/Dockerfile b/backblast_scraping/Dockerfile new file mode 100644 index 0000000..9cd77c3 --- /dev/null +++ b/backblast_scraping/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim-buster + +WORKDIR /app + +# Copy the requirements file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application files +COPY . . + +# Set the entrypoint for the container +CMD ["python", "PAXMiner_Cloud_Run.py", "A-Z"] \ No newline at end of file diff --git a/backblast_scraping/PAXMiner_Cloud_Run.py b/backblast_scraping/PAXMiner_Cloud_Run.py new file mode 100644 index 0000000..aa22d95 --- /dev/null +++ b/backblast_scraping/PAXMiner_Cloud_Run.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +import pandas as pd +import os +import sys +from PAX_BD_Miner import run_pax_bd_miner, create_database_connection +from paxminer_models import Config + +# Set RegEx range for which regions will be queried. Command line input parameter 1 should be a regex range (e.g. A-M) which will search for all regions starting with A through M. +region_regex = sys.argv[1] + +paxminer_db = None + +# Get list of regions and Slack tokens for PAXminer execution +try: + host = os.environ['host'] + port = 3306 + user = os.environ['user'] + password = os.environ['password'] + db = "paxminer" + #Define AWS Database connection criteria + paxminer_db = create_database_connection(host, port, user, password, db) + + with paxminer_db.cursor() as cursor: + sql = "SELECT * from paxminer.regions WHERE active = 1 AND scrape_backblasts = 1" + cursor.execute(sql) + regions = cursor.fetchall() + regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name']) +finally: + print('Getting list of regions that use PAXminer...') + if paxminer_db: + paxminer_db.close() + +for index, row in regions_df.iterrows(): + region = row['region'] + key = row['slack_token'] + db = row['schema_name'] + print(f'Executing user updates for region {region}') + + try: + run_pax_bd_miner(host, port, user, password, db, key) + except Exception as e: + print(f'Error in PAXminer execution for region {region}') + print(e) + finally: + print(f'-------- PAXMiner Coud Run Complete {region}-------------') \ No newline at end of file diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index 90a08b5..3f4b111 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -''' -This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com. -This script queries Slack for User, Channel, and Conversation (channel) history and then parses all conversations to find Backblasts. -All Backblasts are then parsed to collect the BEATDOWN information for any given workout and puts those attendance records into the AWS F3STL database for recordkeeping. -''' import warnings from slack_sdk import WebClient @@ -14,12 +9,11 @@ import pytz import re import pymysql.cursors -import configparser -import sys import logging import math import warnings from BD_Update_Utils import determine_db_action, find_match, retrievePreviousBackblasts, DbAction +from paxminer_models import Config warnings.simplefilter(action='ignore', category=FutureWarning) warnings.filterwarnings( @@ -28,175 +22,16 @@ ) from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler + MIN_BACKBLAST = 'Backblast:AO:PAX:@x@yQ:@xCount:0' SECONDS_PER_DAY = 86400 LOOKBACK_DAYS = 7 LOOKBACK_SECONDS = SECONDS_PER_DAY * LOOKBACK_DAYS ALLOWABLE_DAYS_BACKBLAST_DATE_VALID = 30 +pat = r'(?<=\<).+?(?=>)' pd.options.mode.chained_assignment = None # default='warn' -# Configure AWS credentials -config = configparser.ConfigParser(); -config.read('../config/credentials.ini'); -host = config['aws']['host'] -port = int(config['aws']['port']) -user = config['aws']['user'] -password = config['aws']['password'] -db = sys.argv[1] # Use this for the multi-region automated update - -# Set Slack token -key = sys.argv[2] # Use this for the multi-region automated update -slack = WebClient(token=key) - -# Enable rate limited error retries -rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=5) -slack.retry_handlers.append(rate_limit_handler) - -#Define AWS Database connection criteria -mydb = pymysql.connect( - host=host, - port=port, - user=user, - password=password, - db=db, - charset='utf8mb4', - cursorclass=pymysql.cursors.DictCursor) - -# Set epoch and yesterday's timestamp for datetime calculations -epoch = datetime(1970, 1, 1) -yesterday = datetime.now() - timedelta(days = 1) -today = datetime.now() -cutoff_date = today - timedelta(days = 7) # This tells BDminer to go back up to N days for message age -current_ts = time.time() -cutoff_ts = current_ts - LOOKBACK_SECONDS -cutoff_date = cutoff_date.strftime('%Y-%m-%d') -date_time = today.strftime("%m/%d/%Y, %H:%M:%S") - -# Set up logging -logging.basicConfig(filename='../logs/BD_PAX_miner.log', - filemode = 'a', - format=f'%(asctime)s [{db}] %(levelname)-8s %(message)s', - datefmt = '%Y-%m-%d %H:%M:%S', - level = logging.INFO) -logging.info(f"Beginning BD+Paxminer {current_ts}") -logging.info("Running combined BD+PAXminer for " + db) -pm_log_text = date_time + " CDT: Executing hourly PAXminer run for " + db + "\n" - -# Make users Data Frame -column_names = ['user_id', 'user_name', 'real_name'] -users_df = pd.DataFrame(columns = column_names) -users_df.loc[len(users_df.index)] = ['APP', 'BackblastApp', 'BackblastApp'] -data = '' -while True: - users_response = slack.users_list(limit=1000, cursor=data) - response_metadata = users_response.get('response_metadata', {}) - next_cursor = response_metadata.get('next_cursor') - users = users_response.data['members'] - users_df_tmp = pd.json_normalize(users) - users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name']] - users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name'}) - users_df = users_df.append(users_df_tmp, ignore_index=True) - if next_cursor: - # Keep going from next offset. - #print('next_cursor =' + next_cursor) - data = next_cursor - else: - break -for index, row in users_df.iterrows(): - un_tmp = row['user_name'] - rn_tmp = row['real_name'] - if un_tmp == "" : - row['user_name'] = rn_tmp - -# Retrieve Channel List from AWS database (backblast = 1 denotes which channels to search for backblasts) -try: - with mydb.cursor() as cursor: - sql = "SELECT channel_id, ao FROM aos WHERE backblast = 1 AND archived = 0" - cursor.execute(sql) - channels = cursor.fetchall() - channels_df = pd.DataFrame(channels, columns=['channel_id', 'ao']) -finally: - print('Pulling current beatdown records...') - -users_dict = users_df[["user_id", "user_name"]].set_index("user_id").to_dict()["user_name"] -aos_dict = channels_df[["channel_id", "ao"]].set_index("channel_id").to_dict()["ao"] - -# Retrieve backblast entries from regional database for comparison to identify new or updated posts -try: - previously_saved_beatdowns = retrievePreviousBackblasts(mydb, cutoff_ts) -finally: - print('Looking for new backblasts from Slack...') - -# Get all channel conversation -# messages_df = pd.DataFrame([]) #creates an empty dataframe to append to -messages_df = pd.DataFrame([], columns=['user_id', 'message_type', 'timestamp', 'ts_edited', 'text', 'channel_id']) #creates an empty dataframe to append to -for id in channels_df['channel_id']: - data = '' - pages = 1 - while True: - try: - #print("Checking channel " + id) # <-- Use this if debugging any slack channels throwing errors - response = slack.conversations_history(channel=id, cursor=data) - response_metadata = response.get('response_metadata', {}) - try: - next_cursor = response_metadata.get('next_cursor') - except: - pass - messages = response.data['messages'] - temp_df = pd.json_normalize(messages) - try: - temp_df = temp_df[['user', 'type', 'text', 'ts', 'edited.ts']] - except: - temp_df = temp_df[['user', 'type', 'text', 'ts']] - temp_df['edited.ts'] = "NA" - finally: - temp_df["user"]=temp_df["user"].fillna("APP") - temp_df = temp_df.rename(columns={'user' : 'user_id', 'type' : 'message_type', 'ts' : 'timestamp', 'edited.ts' : 'ts_edited'}) - temp_df["channel_id"] = id - messages_df = messages_df.append(temp_df, ignore_index=True) - except: - print("Error: Unable to access Slack channel:", id, "in region:",db) - logging.warning("Error: Unable to access Slack channel %s in region %s", id, db) - pm_log_text += "Error: Unable to access Slack channel " + id + " in region " + db + "\n" - if next_cursor != "None": - # Keep going from next offset. - data = next_cursor - if pages == 1: ## Total number of pages to query from Slack - break - pages = pages + 1 - else: - break - -# Calculate Date and Time columns -msg_date = [] -msg_time = [] -for ts in messages_df['timestamp']: - seconds_since_epoch = float(ts) - dt = epoch + timedelta(seconds=seconds_since_epoch) - dt = dt.replace(tzinfo=pytz.utc) - dt = dt.astimezone(pytz.timezone('America/Chicago')) - msg_date.append(dt.strftime('%Y-%m-%d')) - msg_time.append(dt.strftime('%H:%M:%S')) -messages_df['msg_date'] = msg_date -messages_df['time'] = msg_time - - -# Merge the data frames into 1 joined DF -f3_df = pd.merge(messages_df, users_df) -f3_df = pd.merge(f3_df,channels_df) -f3_df = f3_df[['timestamp', 'ts_edited', 'msg_date', 'time', 'channel_id', 'ao', 'user_id', 'user_name', 'real_name', 'text']] -f3_df['ts_edited'] = f3_df['ts_edited'].fillna('NA') - -# Now find only backblast messages (either "Backblast" or "Back Blast") - note .casefold() denotes case insensitivity - and pull out the PAX user ID's identified within -# This pattern finds username links followed by commas: pat = r'(?<=\\xa0).+?(?=,)' -pat = r'(?<=\<).+?(?=>)' # This pattern finds username links within brackets <> -bd_df = pd.DataFrame([]) -pax_attendance_df = pd.DataFrame([]) -warnings.filterwarnings("ignore", category=DeprecationWarning) #This prevents displaying the Deprecation Warning that is present for the RegEx lookahead function used below - - - def retrieve_q_line(backblast): # Find the Q information qline = re.findall(r'(?<=\n)\*?V?Qs?\*?:.+?(?=\n)', str(backblast), re.MULTILINE) #This is regex looking for \nQ: with or without an * before Q @@ -215,7 +50,7 @@ def retrieve_q_line(backblast): def retrieve_count_line(backblast): # Combine the regex patterns for 'Count' and 'Total' - patterns = [r'(?<=\n)\*?(?i)Count\*?:\*?.+?(?:$|\n)', r'(?<=\n)\*?(?i)Total\*?:\*?.+?(?=\n)'] + patterns = [r'(?i)(?<=\n)\*?Count\*?:\*?.+?(?:$|\n)', r'(?i)(?<=\n)\*?Total\*?:\*?.+?(?=\n)'] pax_count = None # Search for the patterns in the backblast text @@ -234,23 +69,23 @@ def retrieve_count_line(backblast): return False, -1 def retrieve_fng_line(backblast): - # Find the FNGs line + # Find the FNGs line fngline = re.findall(r'(?<=\n)\*?FNGs\*?:\*?.+?(?=\n)', str(backblast), re.MULTILINE) # This is regex looking for \nFNGs: with or without an * before Q if fngline: fngline = fngline[0] - fngs = re.sub('\*?FNGs\*?:\s?', '', str(fngline)) + fngs = re.sub(r'\*?FNGs\*?:\s?', '', str(fngline)) fngs = fngs.strip() return True, fngs else: fngs = 'None listed' return False, fngs -def retrieve_date_line(backblast): +def retrieve_date_line(backblast, msg_date): #Find the Date: dateline = re.findall(r'(?<=\n)Date:.+?(?=\n)', str(backblast), re.IGNORECASE) if dateline: dateline = re.sub('xa0', ' ', str(dateline), flags=re.I) - dateline = re.sub("Date:\s?", '', str(dateline), flags=re.I) + dateline = re.sub(r"Date:\s?", '', str(dateline), flags=re.I) dateline = dateparser.parse(dateline) #dateparser is a flexible date module that can understand many different date formats if dateline is None: date_tmp = '2099-12-31' #sets a date many years in the future just to catch this error later (needs to be a future date) @@ -280,14 +115,14 @@ def retrieve_ao_line(backblast): return False, 'Unknown' -def bd_info(text_tmp): +def bd_info(text_tmp, users_dict, aos_dict, timestamp, ts_edited, msg_date, ao_tmp, user_name, user_id): q_found, qid, coqid = retrieve_q_line(text_tmp) count_found, pax_count = retrieve_count_line(text_tmp) - + fng_found, fngs = retrieve_fng_line(text_tmp) - date_found, date_tmp = retrieve_date_line(text_tmp) + date_found, date_tmp = retrieve_date_line(text_tmp, msg_date) ao_found, parsed_ao_channel = retrieve_ao_line(text_tmp) @@ -304,8 +139,7 @@ def bd_info(text_tmp): # Adds the Q lines to the pax list as well. # Deduplicates the list to account for PAX listed multiple lines, or a Q who is also listed in the PAX list. def list_pax(beatdown_text): - #paxline = [line for line in beatdown_text.split('\n') if 'pax'.casefold() in line.casefold()] - paxline = re.findall(r'(?<=\n)\*?(?i)PAX\*?:\*?.+?(?=\n)', str(beatdown_text), re.MULTILINE) #This is a case insensitive regex looking for \nPAX with or without an * before PAX + paxline = re.findall(r'(?i)(?<=\n)\*?PAX\*?:\*?.+?(?=\n)', str(beatdown_text), re.MULTILINE) #This is a case insensitive regex looking for \nPAX with or without an * before PAX pax = re.findall(pat, str(paxline), re.MULTILINE) pax = [re.sub(r'@','', i) for i in pax] @@ -360,23 +194,23 @@ def parse_backblast(backblast: str, users_dict, aos_dict) -> str: def containsBackblastKeyword(potential_backblast): return ( re.findall('^Slackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Backblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Backblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Back blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Back Blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Slack blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Sackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Slackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Slack blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Sackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Slackbast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Slackbast', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Sackdraft', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^\*Sackdraft', potential_backblast, re.IGNORECASE | re.MULTILINE) or - re.findall('^Back Blast', potential_backblast, re.IGNORECASE | re.MULTILINE) + re.findall(r'^\*Backblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Backblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Back blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Back Blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Slack blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Sackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Slackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Slack blast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Sackblast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Slackbast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Slackbast', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Sackdraft', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^\*Sackdraft', potential_backblast, re.IGNORECASE | re.MULTILINE) or + re.findall(r'^Back Blast', potential_backblast, re.IGNORECASE | re.MULTILINE) ) -def isValidDate(date): +def isValidDate(date, today): lookback_valid_date = (today - timedelta(days = ALLOWABLE_DAYS_BACKBLAST_DATE_VALID )).strftime('%Y-%m-%d') forward_valid_date = (today + timedelta(days = 2)).strftime('%Y-%m-%d') @@ -392,7 +226,7 @@ def isValidDate(date): # Taking a dataframe of beatdown attendance, inserts these records into the database. # If the database action is 'UPDATE', we clear out all the records that were associated with this beatdown previous to inserting. # This prevents extra entries, in the scenario where 1) PAX that were mistakenly added, or 2) if one of the compound keys changed ( such as ao or q ) -def insert_beatdown_attendance(dbConn, cursor, beatdown_attendance, database_action, beatdownkey): +def insert_beatdown_attendance(dbConn, cursor, beatdown_attendance, database_action, beatdownkey, cutoff_date): inserts = 0 try: if database_action == DbAction.UPDATE: @@ -443,236 +277,398 @@ def safe_cast(val, to_type, default=None): except (ValueError, TypeError): return default -# Iterate through the new bd_df dataframe, pull out the channel_name, date, and text line from Slack. Process the text line to find the beatdown info -for index, row in f3_df.iterrows(): - ao_tmp = row['channel_id'] - timestamp = row['timestamp'] - ts_edited = row['ts_edited'] - msg_date = row['msg_date'] - text_tmp = row['text'] - text_tmp = re.sub('_\\xa0', ' ', str(text_tmp)) - text_tmp = re.sub('\\xa0', ' ', str(text_tmp)) - text_tmp = re.sub('_\*', '', str(text_tmp)) - text_tmp = re.sub('\*_', '', str(text_tmp)) - text_tmp = re.sub('\*', '', str(text_tmp)) - user_name = row['user_name'] - user_id = row['user_id'] - - # Backblast criteria one. Be over the minimum length and contain the backblast keyword - if (len(str(text_tmp)) >= len(MIN_BACKBLAST)) and containsBackblastKeyword(text_tmp): - line_matches, new_row = bd_info(text_tmp) - - # Backblast criteria two. Besides the backblast keyword, contain one other properly formatted line. - if line_matches >= 1: - if float(new_row["timestamp"]) > float(cutoff_ts): - upsertAction = determine_db_action(new_row, find_match(new_row, previously_saved_beatdowns)) - - new_row["database_action"] = upsertAction - - bd_df = bd_df.append(new_row, ignore_index = True) - -# Now connect to the AWS database and insert some rows! -try: - with mydb.cursor() as cursor: - for index, row in bd_df.iterrows(): - qc = 1 - send_q_msg = 0 - - update_sql = """ - UPDATE beatdowns - SET timestamp=%s, ts_edited=%s, ao_id=%s, bd_date=%s, q_user_id=%s, coq_user_id=%s, pax_count=%s, backblast=%s, fngs=%s, backblast_parsed=%s - WHERE timestamp=%s - LIMIT 1 - """ - insert_sql = "INSERT into beatdowns (timestamp, ts_edited, ao_id, bd_date, q_user_id, coq_user_id, pax_count, backblast, fngs, backblast_parsed) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - - timestamp = row['timestamp'] - ts_edited = row['ts_edited'] - channel_id = row['channel_id'] - msg_date = row['msg_date'] - bd_date = row['bd_date'] - q_user_id = row['q_user_id'] - coq_user_id = row['coq_user_id'] - pax_count = row['pax_count'] - backblast = row['backblast'] - user_name = row['user_name'] - user_id = row['user_id'] - fngs = row['fngs'] - msg_link = slack.chat_getPermalink(channel=channel_id, message_ts=timestamp)["permalink"] - parsed_ao_channel = row['parsed_ao_channel_id'] - database_action = row["database_action"] - backblast_parsed = row['backblast_parsed'] - - if parsed_ao_channel in channels_df["channel_id"].values : - ao_id = parsed_ao_channel - else : - ao_id = channel_id - - val = (timestamp, ts_edited, ao_id, bd_date, q_user_id, coq_user_id, pax_count, backblast, fngs, backblast_parsed) - # for Slackblast users, set the user_id as the Q - appnames = ['slackblast', 'Slackblast'] - if user_name in appnames: - user_id = q_user_id - user_name = 'Q' - q_error_text = "Paxminer failed to import your latest backblast.\n" - q_message_end = "Backblast message posted on " + msg_date + " at <#" + ao_id + "> (<" + msg_link + "|link>)\n" - # pm_log_text += "- Processing <" + msg_link + "|this> backblast." - - if database_action == DbAction.IGNORE: - logging.debug("Encountered a bblast already recorded and has not been modified. Skipping import") - continue - - if q_user_id == 'NA': - logging.warning("Q error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) - pm_log_text += " - Backblast error on Q at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" - if user_id != 'APP': - q_error_text += " - ERROR: The Q is not present or not tagged correctly. Please ensure the Q is tagged using @PAX_NAME \n" - send_q_msg = 2 - qc = 0 - else: - pass - if pax_count == -1: - logging.warning("Count error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Count - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q.") - pm_log_text += " - Backblast error on Count at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" - if user_id != 'APP': - q_error_text += " - ERROR: The Count is not present or not entered correctly. The correct syntax is Count: XX - Use digits please. \n" - send_q_msg = 2 - qc = 0 +def create_database_connection(host, port, user, password, db): + """Creates and returns a database connection.""" + try: + mydb = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor + ) + return mydb + except pymysql.MySQLError as e: + logging.error(f"Database connection error: {e}") + raise + +def run_pax_bd_miner(host, port, user, password, db, key): + + slack = WebClient(token=key) + + # Enable rate limited error retries + rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=5) + slack.retry_handlers.append(rate_limit_handler) + + #Define AWS Database connection criteria + mydb = create_database_connection(host, port, user, password, db) + + # Set epoch and yesterday's timestamp for datetime calculations + epoch = datetime(1970, 1, 1) + today = datetime.now() + cutoff_date = today - timedelta(days = 7) + current_ts = time.time() + cutoff_ts = current_ts - LOOKBACK_SECONDS + cutoff_date = cutoff_date.strftime('%Y-%m-%d') + date_time = today.strftime("%m/%d/%Y, %H:%M:%S") + + # Set up logging + logging.basicConfig(format=f'%(asctime)s [{db}] %(levelname)-8s %(message)s', + datefmt = '%Y-%m-%d %H:%M:%S', + level = logging.INFO) + logging.info(f"Beginning BD+Paxminer {current_ts}") + logging.info("Running combined BD+PAXminer for " + db) + pm_log_text = date_time + " CDT: Executing hourly PAXminer run for " + db + "\n" + + # Make users Data Frame + column_names = ['user_id', 'user_name', 'real_name'] + users_df = pd.DataFrame(columns = column_names) + users_df.loc[len(users_df.index)] = ['APP', 'BackblastApp', 'BackblastApp'] + data = '' + while True: + try: + users_response = slack.users_list(limit=1000, cursor=data) + except Exception as e: + logging.error(e) + return + response_metadata = users_response.get('response_metadata', {}) + next_cursor = response_metadata.get('next_cursor') + users = users_response.data['members'] + users_df_tmp = pd.json_normalize(users) + users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name']] + users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name'}) + users_df = pd.concat([users_df, users_df_tmp], ignore_index=True) + if next_cursor: + # Keep going from next offset. + data = next_cursor + else: + break + for index, row in users_df.iterrows(): + un_tmp = row['user_name'] + rn_tmp = row['real_name'] + if un_tmp == "" : + row['user_name'] = rn_tmp + + # Retrieve Channel List from AWS database (backblast = 1 denotes which channels to search for backblasts) + try: + with mydb.cursor() as cursor: + sql = "SELECT channel_id, ao FROM aos WHERE backblast = 1 AND archived = 0" + cursor.execute(sql) + channels = cursor.fetchall() + channels_df = pd.DataFrame(channels, columns=['channel_id', 'ao']) + finally: + print('Pulling current beatdown records...') + + users_dict = users_df[["user_id", "user_name"]].set_index("user_id").to_dict()["user_name"] + aos_dict = channels_df[["channel_id", "ao"]].set_index("channel_id").to_dict()["ao"] + + # Retrieve backblast entries from regional database for comparison to identify new or updated posts + try: + previously_saved_beatdowns = retrievePreviousBackblasts(mydb, cutoff_ts) + finally: + print('Looking for new backblasts from Slack...') + + # Get all channel conversation + messages_df = pd.DataFrame([], columns=['user_id', 'message_type', 'timestamp', 'ts_edited', 'text', 'channel_id']) #creates an empty dataframe to append to + + for row in channels_df.itertuples(index=False): + id, ao = row.channel_id, row.ao + data = '' + pages = 1 + while True: + try: + response = slack.conversations_history(channel=id, cursor=data) + response_metadata = response.get('response_metadata', {}) + try: + next_cursor = response_metadata.get('next_cursor') + except: + print("Hello World") + pass + messages = response.data['messages'] + temp_df = pd.json_normalize(messages) + try: + temp_df = temp_df[['user', 'type', 'text', 'ts', 'edited.ts']] + except Exception as e: + temp_df = temp_df[['user', 'type', 'text', 'ts']] + temp_df['edited.ts'] = "NA" + finally: + temp_df["user"]=temp_df["user"].fillna("APP") + temp_df = temp_df.rename(columns={'user' : 'user_id', 'type' : 'message_type', 'ts' : 'timestamp', 'edited.ts' : 'ts_edited'}) + temp_df["channel_id"] = id + messages_df = pd.concat([messages_df, temp_df], ignore_index=True) + except: + logging.warning("Error: Unable to access Slack channel %s in region %s", id, db) + pm_log_text += "Error: Unable to access Slack channel " + id + ", " + ao + " in region " + db + "\n" + if next_cursor != "None": + # Keep going from next offset. + data = next_cursor + if pages == 1: ## Total number of pages to query from Slack + break + pages = pages + 1 else: - pass - - if not isValidDate(bd_date): - logging.warning("Date error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Date - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name,". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) - pm_log_text += " - Backblast error on Date - AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" - if user_id != 'APP': - q_error_text += " - ERROR: The Date is not entered correctly. I can understand most common date formats like Date: 12-25-2020, Date: 2021-12-25, Date: 12/25/21, or Date: December 25, 2021. Common mistakes include a date from the future, a date with the time appended, or a date more than one month on the past.\n" - send_q_msg = 2 - qc = 0 - - if qc == 1: - try : - if database_action == DbAction.UPDATE : - cursor.execute(update_sql, val + (timestamp,)) - else: - cursor.execute(insert_sql, val) - - mydb.commit() - except Exception as error: - print("An error occurred writing to the datastore", error) - logging.error("An error occured writing to the datastore: %s", error) - - if cursor.rowcount == 1: - print(cursor.rowcount, "records inserted.") - print('Beatdown Date:', bd_date) - print('Message Posting Date:', msg_date) - print('AO:', ao_id) - print('Q:', q_user_id) - print('Co-Q', coq_user_id) - print('Pax Count:',pax_count) - print('fngs:', fngs) - if database_action == DbAction.UPDATE : - pm_log_text += " - Backblast successfully updated for AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + "\n" - if user_id != 'APP': - q_success_text = "Successfully updated your backblast after it had been changed for " + bd_date + " at <#" + ao_id + ">. I see you had " + str(math.trunc(pax_count)) + " PAX in attendance and FNGs were: " + str(fngs) + ". Thanks for posting and updating your BB! \n" - send_q_msg = 1 - else: - pm_log_text += " - Backblast successfully imported for AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + "\n" - if user_id != 'APP': - q_success_text = "Successfully imported your backblast for " + bd_date + " at <#" + ao_id + ">. I see you had " + str(math.trunc(pax_count)) + " PAX in attendance and FNGs were: " + str(fngs) + ". Thanks for posting your BB! \n" - send_q_msg = 1 + break + + # Calculate Date and Time columns + msg_date = [] + msg_time = [] + for ts in messages_df['timestamp']: + seconds_since_epoch = float(ts) + dt = epoch + timedelta(seconds=seconds_since_epoch) + dt = dt.replace(tzinfo=pytz.utc) + dt = dt.astimezone(pytz.timezone('America/Chicago')) + msg_date.append(dt.strftime('%Y-%m-%d')) + msg_time.append(dt.strftime('%H:%M:%S')) + messages_df['msg_date'] = msg_date + messages_df['time'] = msg_time + + + # Merge the data frames into 1 joined DF + f3_df = pd.merge(messages_df, users_df) + f3_df = pd.merge(f3_df,channels_df) + f3_df = f3_df[['timestamp', 'ts_edited', 'msg_date', 'time', 'channel_id', 'ao', 'user_id', 'user_name', 'real_name', 'text']] + f3_df['ts_edited'] = f3_df['ts_edited'].fillna('NA') + + # Now find only backblast messages (either "Backblast" or "Back Blast") - note .casefold() denotes case insensitivity - and pull out the PAX user ID's identified within + # This pattern finds username links followed by commas: pat = r'(?<=\\xa0).+?(?=,)' + # This pattern finds username links within brackets <> + bd_df = pd.DataFrame([]) + pax_attendance_df = pd.DataFrame([]) + warnings.filterwarnings("ignore", category=DeprecationWarning) #This prevents displaying the Deprecation Warning that is present for the RegEx lookahead function used below + # Iterate through the new bd_df dataframe, pull out the channel_name, date, and text line from Slack. Process the text line to find the beatdown info + for index, row in f3_df.iterrows(): + ao_tmp = row['channel_id'] + timestamp = row['timestamp'] + ts_edited = row['ts_edited'] + msg_date = row['msg_date'] + text_tmp = row['text'] + text_tmp = re.sub('_\\xa0', ' ', str(text_tmp)) + text_tmp = re.sub('\\xa0', ' ', str(text_tmp)) + text_tmp = re.sub(r'_\*', '', str(text_tmp)) + text_tmp = re.sub(r'\*_', '', str(text_tmp)) + text_tmp = re.sub(r'\*', '', str(text_tmp)) + user_name = row['user_name'] + user_id = row['user_id'] + + # Backblast criteria one. Be over the minimum length and contain the backblast keyword + if (len(str(text_tmp)) >= len(MIN_BACKBLAST)) and containsBackblastKeyword(text_tmp): + line_matches, new_row = bd_info(text_tmp, users_dict, aos_dict, timestamp, ts_edited, msg_date, ao_tmp, user_name, user_id) + + # Backblast criteria two. Besides the backblast keyword, contain one other properly formatted line. + if line_matches >= 1: + if float(new_row["timestamp"]) > float(cutoff_ts): + upsertAction = determine_db_action(new_row, find_match(new_row, previously_saved_beatdowns)) + + new_row["database_action"] = upsertAction - logging.info("Backblast imported for AO: %s, Date: %s", ao_id, bd_date) - - pax = list_pax(backblast) - - if pax: - logging.info("Inserting PAX attendance for AO: %s, Date: %s", ao_id, bd_date) - pax_df = pd.DataFrame(pax) - pax_df['timestamp'] = timestamp - pax_df['ts_edited'] = ts_edited - pax_df.columns =['user_id', 'timestamp', 'ts_edited'] - - pax_df['ao'] = ao_id - pax_df['bd_date'] = bd_date - pax_df['msg_date'] = msg_date - pax_df['q_user_id'] = q_user_id - pax_df["coq_user_id"] = coq_user_id - inserts = insert_beatdown_attendance(mydb, cursor, pax_df, database_action, timestamp) - mydb.commit() + # Ensure temp_df is a DataFrame + if isinstance(new_row, dict): + new_row = pd.DataFrame([new_row]) - logging.info("PAX attendance updates complete: Inserted %s new PAX attendance records for AO: %s, Date: %s", inserts, ao_id, bd_date) - else: - logging.info("No PAX Found in Attendance for AO: %s, Date: %s", ao_id, bd_date) - else: - pass - - if send_q_msg == 2: - q_error_text += "You can also check for other common mistakes that cause errors - such as spaces at the beginning of Date:, Q:, AO:, or other lines, or even other messages you may have posted that begin with the word Backblast." + bd_df = pd.concat([bd_df, new_row], ignore_index= True) - # Only send the message at 3pm each day or if the backblast was posted in the last hour - try: - if today.hour == 15 or ( safe_cast(ts_edited, float) or safe_cast(timestamp, float) >= ( current_ts - 3600)): + # Now connect to the AWS database and insert some rows! + try: + with mydb.cursor() as cursor: + for index, row in bd_df.iterrows(): + qc = 1 + send_q_msg = 0 + + update_sql = """ + UPDATE beatdowns + SET timestamp=%s, ts_edited=%s, ao_id=%s, bd_date=%s, q_user_id=%s, coq_user_id=%s, pax_count=%s, backblast=%s, fngs=%s, backblast_parsed=%s + WHERE timestamp=%s + LIMIT 1 + """ + insert_sql = "INSERT into beatdowns (timestamp, ts_edited, ao_id, bd_date, q_user_id, coq_user_id, pax_count, backblast, fngs, backblast_parsed) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" + + timestamp = row['timestamp'] + ts_edited = row['ts_edited'] + channel_id = row['channel_id'] + msg_date = row['msg_date'] + bd_date = row['bd_date'] + q_user_id = row['q_user_id'] + coq_user_id = row['coq_user_id'] + pax_count = row['pax_count'] + backblast = row['backblast'] + user_name = row['user_name'] + user_id = row['user_id'] + fngs = row['fngs'] + msg_link = slack.chat_getPermalink(channel=channel_id, message_ts=timestamp)["permalink"] + parsed_ao_channel = row['parsed_ao_channel_id'] + database_action = row["database_action"] + backblast_parsed = row['backblast_parsed'] + + if parsed_ao_channel in channels_df["channel_id"].values : + ao_id = parsed_ao_channel + else : + ao_id = channel_id + + val = (timestamp, ts_edited, ao_id, bd_date, q_user_id, coq_user_id, pax_count, backblast, fngs, backblast_parsed) + # for Slackblast users, set the user_id as the Q + appnames = ['slackblast', 'Slackblast'] + if user_name in appnames: + user_id = q_user_id + user_name = 'Q' + q_error_text = "Paxminer failed to import your latest backblast.\n" + q_message_end = "Backblast message posted on " + msg_date + " at <#" + ao_id + "> (<" + msg_link + "|link>)\n" + # pm_log_text += "- Processing <" + msg_link + "|this> backblast." + + if database_action == DbAction.IGNORE: + logging.debug("Encountered a bblast already recorded and has not been modified. Skipping import") + continue + + if q_user_id == 'NA': + logging.warning("Q error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) + print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) + pm_log_text += " - Backblast error on Q at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + if user_id != 'APP': + q_error_text += " - ERROR: The Q is not present or not tagged correctly. Please ensure the Q is tagged using @PAX_NAME \n" + send_q_msg = 2 + qc = 0 + else: + pass + if pax_count == -1: + logging.warning("Count error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) + print('Backblast error on Count - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q.") + pm_log_text += " - Backblast error on Count at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + if user_id != 'APP': + q_error_text += " - ERROR: The Count is not present or not entered correctly. The correct syntax is Count: XX - Use digits please. \n" + send_q_msg = 2 + qc = 0 + else: + pass + + if not isValidDate(bd_date, today): + logging.warning("Date error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) + print('Backblast error on Date - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name,". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) + pm_log_text += " - Backblast error on Date - AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + if user_id != 'APP': + q_error_text += " - ERROR: The Date is not entered correctly. I can understand most common date formats like Date: 12-25-2020, Date: 2021-12-25, Date: 12/25/21, or Date: December 25, 2021. Common mistakes include a date from the future, a date with the time appended, or a date more than one month on the past.\n" send_q_msg = 2 - else: - send_q_msg = 0 - except Exception as error: - print("An error occurred determining to send an error message", error) - logging.error("An error occured determining to send an error message: %s", error) - - if send_q_msg == 1: - slack.chat_postMessage(channel=user_id, text=q_success_text + q_message_end) - if send_q_msg == 2: - slack.chat_postMessage(channel=user_id, text=q_error_text + q_message_end) - - sql3 = "UPDATE beatdowns SET coq_user_id=NULL where coq_user_id = 'NA'" - cursor.execute(sql3) - mydb.commit() - - sql4 = "UPDATE beatdowns SET fng_count=0 where fngs in ('none', 'None', 'None listed', 'NA', 'zero', '-', '') AND fng_count IS NULL" - cursor.execute(sql4) - mydb.commit() - - sql5 = "UPDATE beatdowns SET fng_count = 0 where fngs like '0%' AND fng_count IS NULL" - cursor.execute(sql5) - mydb.commit() - - sql6 = "UPDATE beatdowns SET fng_count = 1 where fngs like '1%' AND fng_count IS NULL" - cursor.execute(sql6) - mydb.commit() - - sql7 = "UPDATE beatdowns SET fng_count = 2 where fngs like '2%' AND fng_count IS NULL" - cursor.execute(sql7) - mydb.commit() - - sql8 = "UPDATE beatdowns SET fng_count = 3 where fngs like '3%' AND fng_count IS NULL" - cursor.execute(sql8) - mydb.commit() - - sql9 = "UPDATE beatdowns SET fng_count = 4 where fngs like '4%' AND fng_count IS NULL" - cursor.execute(sql9) - mydb.commit() - - sql10 = "UPDATE beatdowns SET fng_count = 5 where fngs like '5%' AND fng_count IS NULL" - cursor.execute(sql10) - mydb.commit() -finally: - pass - -mydb.close() - - -pm_log_text += "End of PAXminer hourly run" - -logging.info("Beatdown execution complete for region " + db) -logging.info(f"Time elapsed: {time.time() - current_ts}") - -try: - slack.chat_postMessage(channel='paxminer_logs', text=pm_log_text) -except: - print("Slack log message error - not posted") - logging.error("Slack log message error - not posted") - pass -print('Finished. You may go back to your day!') \ No newline at end of file + qc = 0 + + if qc == 1: + try : + if database_action == DbAction.UPDATE : + cursor.execute(update_sql, val + (timestamp,)) + else: + cursor.execute(insert_sql, val) + + mydb.commit() + except Exception as error: + logging.error("An error occured writing to the datastore: %s") + logging.error(error) + + if cursor.rowcount == 1: + print(cursor.rowcount, "records inserted.") + print('Beatdown Date:', bd_date) + print('Message Posting Date:', msg_date) + print('AO:', ao_id) + print('Q:', q_user_id) + print('Co-Q', coq_user_id) + print('Pax Count:',pax_count) + print('fngs:', fngs) + if database_action == DbAction.UPDATE : + pm_log_text += " - Backblast successfully updated for AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + "\n" + if user_id != 'APP': + q_success_text = "Successfully updated your backblast after it had been changed for " + bd_date + " at <#" + ao_id + ">. I see you had " + str(math.trunc(pax_count)) + " PAX in attendance and FNGs were: " + str(fngs) + ". Thanks for posting and updating your BB! \n" + send_q_msg = 1 + else: + pm_log_text += " - Backblast successfully imported for AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + "\n" + if user_id != 'APP': + q_success_text = "Successfully imported your backblast for " + bd_date + " at <#" + ao_id + ">. I see you had " + str(math.trunc(pax_count)) + " PAX in attendance and FNGs were: " + str(fngs) + ". Thanks for posting your BB! \n" + send_q_msg = 1 + + logging.info("Backblast imported for AO: %s, Date: %s", ao_id, bd_date) + + pax = list_pax(backblast) + + if pax: + logging.info("Inserting PAX attendance for AO: %s, Date: %s", ao_id, bd_date) + pax_df = pd.DataFrame(pax) + pax_df['timestamp'] = timestamp + pax_df['ts_edited'] = ts_edited + pax_df.columns =['user_id', 'timestamp', 'ts_edited'] + + pax_df['ao'] = ao_id + pax_df['bd_date'] = bd_date + pax_df['msg_date'] = msg_date + pax_df['q_user_id'] = q_user_id + pax_df["coq_user_id"] = coq_user_id + inserts = insert_beatdown_attendance(mydb, cursor, pax_df, database_action, timestamp, cutoff_date) + mydb.commit() + + logging.info("PAX attendance updates complete: Inserted %s new PAX attendance records for AO: %s, Date: %s", inserts, ao_id, bd_date) + else: + logging.info("No PAX Found in Attendance for AO: %s, Date: %s", ao_id, bd_date) + else: + pass + + if send_q_msg == 2: + q_error_text += "You can also check for other common mistakes that cause errors - such as spaces at the beginning of Date:, Q:, AO:, or other lines, or even other messages you may have posted that begin with the word Backblast." + + # Only send the message at 3pm each day or if the backblast was posted in the last hour + try: + if today.hour == 15 or ( safe_cast(ts_edited, float) or safe_cast(timestamp, float) >= ( current_ts - 3600)): + send_q_msg = 2 + else: + send_q_msg = 0 + except Exception as error: + logging.error("An error occured determining to send an error message: %s") + logging.error(error) + + # if send_q_msg == 1: + # slack.chat_postMessage(channel=user_id, text=q_success_text + q_message_end) + # if send_q_msg == 2: + # slack.chat_postMessage(channel=user_id, text=q_error_text + q_message_end) + + sql3 = "UPDATE beatdowns SET coq_user_id=NULL where coq_user_id = 'NA'" + cursor.execute(sql3) + mydb.commit() + + sql4 = "UPDATE beatdowns SET fng_count=0 where fngs in ('none', 'None', 'None listed', 'NA', 'zero', '-', '') AND fng_count IS NULL" + cursor.execute(sql4) + mydb.commit() + + sql5 = "UPDATE beatdowns SET fng_count = 0 where fngs like '0%' AND fng_count IS NULL" + cursor.execute(sql5) + mydb.commit() + + sql6 = "UPDATE beatdowns SET fng_count = 1 where fngs like '1%' AND fng_count IS NULL" + cursor.execute(sql6) + mydb.commit() + + sql7 = "UPDATE beatdowns SET fng_count = 2 where fngs like '2%' AND fng_count IS NULL" + cursor.execute(sql7) + mydb.commit() + + sql8 = "UPDATE beatdowns SET fng_count = 3 where fngs like '3%' AND fng_count IS NULL" + cursor.execute(sql8) + mydb.commit() + + sql9 = "UPDATE beatdowns SET fng_count = 4 where fngs like '4%' AND fng_count IS NULL" + cursor.execute(sql9) + mydb.commit() + + sql10 = "UPDATE beatdowns SET fng_count = 5 where fngs like '5%' AND fng_count IS NULL" + cursor.execute(sql10) + mydb.commit() + finally: + pass + + mydb.close() + + pm_log_text += "End of PAXminer hourly run" + + logging.info("Beatdown execution complete for region " + db) + logging.info(f"Time elapsed: {time.time() - current_ts}") + + # try: + # slack.chat_postMessage(channel='paxminer_logs', text=pm_log_text) + # except: + # print("Slack log message error - not posted") + # logging.error("Slack log message error - not posted") + # pass + logging.info('PAX_BD_Miner.py Finished') \ No newline at end of file diff --git a/backblast_scraping/PAXminer_Daily_Execution.py b/backblast_scraping/PAXminer_Daily_Execution.py index 601d5eb..20e8fda 100755 --- a/backblast_scraping/PAXminer_Daily_Execution.py +++ b/backblast_scraping/PAXminer_Daily_Execution.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -''' -This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com. -This script executes the daily PAXminer backblast queries and data updates for all F3 regions using PAXminer. -''' - import pandas as pd import pymysql.cursors import configparser @@ -20,7 +15,7 @@ # Configure AWS credentials config = configparser.ConfigParser(); -config.read('../config/credentials.ini'); +config.read('config/credentials.ini'); # Configure AWS Credentials host = config['aws']['host'] @@ -57,6 +52,5 @@ print('Executing user updates for region ' + region) #os.system("./F3SlackUserLister.py " + db + " " + key) #os.system("./F3SlackChannelLister.py " + db + " " + key) - #os.system("./BDminer.py " + db + " " + key) - #os.system("./PAXminer.py " + db + " " + key) + os.system("./PAX_BD_Miner.py " + db + " " + key) print('----------------- End of Region Update -----------------\n') \ No newline at end of file diff --git a/backblast_scraping/PAXminer_Manual_Execution.py b/backblast_scraping/PAXminer_Manual_Execution.py index 03a1c0a..7d57ccc 100755 --- a/backblast_scraping/PAXminer_Manual_Execution.py +++ b/backblast_scraping/PAXminer_Manual_Execution.py @@ -1,15 +1,10 @@ #!/usr/bin/env python3 -''' -This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com. -This script executes the daily PAXminer backblast queries and data updates for all F3 regions using PAXminer. -''' - -from slacker import Slacker import pandas as pd import pymysql.cursors import configparser import os import warnings +from PAX_BD_Miner import run_pax_bd_miner warnings.simplefilter(action='ignore', category=FutureWarning) # Configure AWS credentials @@ -36,7 +31,7 @@ # Get list of regions and Slack tokens for PAXminer execution try: with mydb1.cursor() as cursor: - sql = "SELECT * FROM paxminer.regions where region = 'Mobile'" # <-- Update this for whatever region is being tested + sql = "SELECT * FROM paxminer.regions where schema_name = 'f3stlcity'" # <-- Update this for whatever region is being tested cursor.execute(sql) regions = cursor.fetchall() regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name']) @@ -48,8 +43,11 @@ key = row['slack_token'] db = row['schema_name'] print('Executing user updates for region ' + region) - os.system("./F3SlackUserLister.py " + db + " " + key) - os.system("./F3SlackChannelLister.py " + db + " " + key) - #os.system("./PAX_BD_Miner.py " + db + " " + key) + + # os.system("./F3SlackUserLister.py " + db + " " + key) + # os.system("./F3SlackChannelLister.py " + db + " " + key) + # os.system("python3 PAX_BD_Miner.py " + db + " " + key) + run_pax_bd_miner(host, port, user, password, db, key) print('----------------- End of Region Update -----------------\n') +mydb1.close() print('\nPAXminer execution complete.') \ No newline at end of file diff --git a/backblast_scraping/deploy.sh b/backblast_scraping/deploy.sh new file mode 100755 index 0000000..df6a435 --- /dev/null +++ b/backblast_scraping/deploy.sh @@ -0,0 +1,5 @@ +gcloud auth configure-docker us-central1-docker.pkg.dev + +docker build . --platform linux/amd64 --tag us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest + +docker push us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest \ No newline at end of file diff --git a/backblast_scraping/requirements.txt b/backblast_scraping/requirements.txt new file mode 100644 index 0000000..f97586d --- /dev/null +++ b/backblast_scraping/requirements.txt @@ -0,0 +1,7 @@ +slack-sdk==3.26.1 +pandas==2.2.3 +pymysql==1.1.0 +numpy==1.25.2 +matplotlib==3.8.2 +typing-extensions==4.7.1 +dateparser==1.2.1 \ No newline at end of file From 6a8917f8daf863aa1d15dce31d7b9ac188ee0432 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 13:39:11 -0600 Subject: [PATCH 03/14] Add readme with running local instructions. --- backblast_scraping/PAXMiner_Cloud_Run.py | 1 - backblast_scraping/PAX_BD_Miner.py | 1 - .../PAXminer_Manual_Execution.py | 7 +- backblast_scraping/README.md | 67 +++++++++++++++++++ 4 files changed, 69 insertions(+), 7 deletions(-) create mode 100644 backblast_scraping/README.md diff --git a/backblast_scraping/PAXMiner_Cloud_Run.py b/backblast_scraping/PAXMiner_Cloud_Run.py index aa22d95..bd673dd 100644 --- a/backblast_scraping/PAXMiner_Cloud_Run.py +++ b/backblast_scraping/PAXMiner_Cloud_Run.py @@ -3,7 +3,6 @@ import os import sys from PAX_BD_Miner import run_pax_bd_miner, create_database_connection -from paxminer_models import Config # Set RegEx range for which regions will be queried. Command line input parameter 1 should be a regex range (e.g. A-M) which will search for all regions starting with A through M. region_regex = sys.argv[1] diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index 3f4b111..caff365 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -13,7 +13,6 @@ import math import warnings from BD_Update_Utils import determine_db_action, find_match, retrievePreviousBackblasts, DbAction -from paxminer_models import Config warnings.simplefilter(action='ignore', category=FutureWarning) warnings.filterwarnings( diff --git a/backblast_scraping/PAXminer_Manual_Execution.py b/backblast_scraping/PAXminer_Manual_Execution.py index 7d57ccc..c46e587 100755 --- a/backblast_scraping/PAXminer_Manual_Execution.py +++ b/backblast_scraping/PAXminer_Manual_Execution.py @@ -31,7 +31,7 @@ # Get list of regions and Slack tokens for PAXminer execution try: with mydb1.cursor() as cursor: - sql = "SELECT * FROM paxminer.regions where schema_name = 'f3stlcity'" # <-- Update this for whatever region is being tested + sql = "SELECT * FROM paxminer.regions where schema_name = 'f3stlcity'" cursor.execute(sql) regions = cursor.fetchall() regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name']) @@ -43,10 +43,7 @@ key = row['slack_token'] db = row['schema_name'] print('Executing user updates for region ' + region) - - # os.system("./F3SlackUserLister.py " + db + " " + key) - # os.system("./F3SlackChannelLister.py " + db + " " + key) - # os.system("python3 PAX_BD_Miner.py " + db + " " + key) + run_pax_bd_miner(host, port, user, password, db, key) print('----------------- End of Region Update -----------------\n') mydb1.close() diff --git a/backblast_scraping/README.md b/backblast_scraping/README.md new file mode 100644 index 0000000..221dc23 --- /dev/null +++ b/backblast_scraping/README.md @@ -0,0 +1,67 @@ +# PAXminer + +PAXminer Backblast Scraping is a tool for automatically extracting and storing backblasts (workout summaries) from F3 Slack channels. It scrapes Slack channels, parses the backblast text, and stores the information in a database for analysis and reporting. + +## Deployment to Google Cloud Run + +This section outlines the steps for deploying PAXminer to Google Cloud Run. The deploy.sh file can be run to do the following 3 steps together. + +### Deployment Steps + +1. **Authenticate Docker with Google Cloud:** + + ```bash + gcloud auth configure-docker us-central1-docker.pkg.dev + ``` + +2. **Build the Docker Image:** + + ```bash + docker build . --platform linux/amd64 --tag us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest + ``` + +3. **Push the Docker Image to Google Container Registry:** + + ```bash + docker push us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest + ``` + +## Running PAXminer Locally + +This section describes how to run PAXminer locally for development or testing. + +### Installation + +1. Clone the repository +2. Create a virtual environment (recommended) +3. Install the dependencies + +### Configuration + +1. **Set Config Variables:** + + Set the following variables in your config/credentials.ini file: + + * [host](http://_vscodecontentref_/1): The database host. + * [port](http://_vscodecontentref_/2): The database port (typically 3306). + * [user](http://_vscodecontentref_/3): The database user. + * [password](http://_vscodecontentref_/4): The database password. + * [db](http://_vscodecontentref_/5): The database name (e.g., `paxminer`). + +### Execution + +1. **Run [PAXminer_Manual_Execution.py](http://_vscodecontentref_/6):** + + ```bash + python PAXminer_Manual_Execution.py + ``` + + This will execute PAXminer for the region hardcoded in the script query on line 34. + +2. **Run [PAXMiner_Cloud_Run.py](http://_vscodecontentref_/7):** + + ```bash + python PAXMiner_Cloud_Run.py A-Z + ``` + + This will execute PAXminer for all regions that match the regex `A-Z`. It also requires setting the config file as environment variables instead*. \ No newline at end of file From d899d6122ad19cd3d80c8ced01b4b02da1e9ecb4 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 13:46:36 -0600 Subject: [PATCH 04/14] Add logging to errors where previously ignored. --- backblast_scraping/PAX_BD_Miner.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index caff365..bc5b4fb 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -366,8 +366,11 @@ def run_pax_bd_miner(host, port, user, password, db, key): # Retrieve backblast entries from regional database for comparison to identify new or updated posts try: previously_saved_beatdowns = retrievePreviousBackblasts(mydb, cutoff_ts) + except Exception as e: + logging.error("Error: Unable to retrieve previously saved beatdowns") + logging.error(e) finally: - print('Looking for new backblasts from Slack...') + logging.info('Looking for new backblasts from Slack...') # Get all channel conversation messages_df = pd.DataFrame([], columns=['user_id', 'message_type', 'timestamp', 'ts_edited', 'text', 'channel_id']) #creates an empty dataframe to append to @@ -382,7 +385,9 @@ def run_pax_bd_miner(host, port, user, password, db, key): response_metadata = response.get('response_metadata', {}) try: next_cursor = response_metadata.get('next_cursor') - except: + except Exception as e: + logging.error("Error: An unknown error occurred") + logging.error(e) print("Hello World") pass messages = response.data['messages'] @@ -397,8 +402,9 @@ def run_pax_bd_miner(host, port, user, password, db, key): temp_df = temp_df.rename(columns={'user' : 'user_id', 'type' : 'message_type', 'ts' : 'timestamp', 'edited.ts' : 'ts_edited'}) temp_df["channel_id"] = id messages_df = pd.concat([messages_df, temp_df], ignore_index=True) - except: + except Exception as e: logging.warning("Error: Unable to access Slack channel %s in region %s", id, db) + logging.error(e) pm_log_text += "Error: Unable to access Slack channel " + id + ", " + ao + " in region " + db + "\n" if next_cursor != "None": # Keep going from next offset. From a158f2ece23c3d663ec3d21879e6a31d5191d811 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 13:49:13 -0600 Subject: [PATCH 05/14] Small logging adjustments. --- backblast_scraping/PAX_BD_Miner.py | 33 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index bc5b4fb..3df3ab0 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -518,7 +518,6 @@ def run_pax_bd_miner(host, port, user, password, db, key): user_name = 'Q' q_error_text = "Paxminer failed to import your latest backblast.\n" q_message_end = "Backblast message posted on " + msg_date + " at <#" + ao_id + "> (<" + msg_link + "|link>)\n" - # pm_log_text += "- Processing <" + msg_link + "|this> backblast." if database_action == DbAction.IGNORE: logging.debug("Encountered a bblast already recorded and has not been modified. Skipping import") @@ -526,8 +525,8 @@ def run_pax_bd_miner(host, port, user, password, db, key): if q_user_id == 'NA': logging.warning("Q error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) - pm_log_text += " - Backblast error on Q at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". bd: ", bd_date, "cutoff:", cutoff_date) + pm_log_text += " - Backblast error on Q at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ".\n" if user_id != 'APP': q_error_text += " - ERROR: The Q is not present or not tagged correctly. Please ensure the Q is tagged using @PAX_NAME \n" send_q_msg = 2 @@ -536,8 +535,8 @@ def run_pax_bd_miner(host, port, user, password, db, key): pass if pax_count == -1: logging.warning("Count error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Count - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". Slack message sent to Q.") - pm_log_text += " - Backblast error on Count at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + print('Backblast error on Count - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ".") + pm_log_text += " - Backblast error on Count at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ".\n" if user_id != 'APP': q_error_text += " - ERROR: The Count is not present or not entered correctly. The correct syntax is Count: XX - Use digits please. \n" send_q_msg = 2 @@ -547,8 +546,8 @@ def run_pax_bd_miner(host, port, user, password, db, key): if not isValidDate(bd_date, today): logging.warning("Date error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Date - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name,". Slack message sent to Q. bd: ", bd_date, "cutoff:", cutoff_date) - pm_log_text += " - Backblast error on Date - AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ". Slack message sent to Q.\n" + print('Backblast error on Date - AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name,". bd: ", bd_date, "cutoff:", cutoff_date) + pm_log_text += " - Backblast error on Date - AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ".\n" if user_id != 'APP': q_error_text += " - ERROR: The Date is not entered correctly. I can understand most common date formats like Date: 12-25-2020, Date: 2021-12-25, Date: 12/25/21, or Date: December 25, 2021. Common mistakes include a date from the future, a date with the time appended, or a date more than one month on the past.\n" send_q_msg = 2 @@ -624,10 +623,10 @@ def run_pax_bd_miner(host, port, user, password, db, key): logging.error("An error occured determining to send an error message: %s") logging.error(error) - # if send_q_msg == 1: - # slack.chat_postMessage(channel=user_id, text=q_success_text + q_message_end) - # if send_q_msg == 2: - # slack.chat_postMessage(channel=user_id, text=q_error_text + q_message_end) + if send_q_msg == 1: + slack.chat_postMessage(channel=user_id, text=q_success_text + q_message_end) + if send_q_msg == 2: + slack.chat_postMessage(channel=user_id, text=q_error_text + q_message_end) sql3 = "UPDATE beatdowns SET coq_user_id=NULL where coq_user_id = 'NA'" cursor.execute(sql3) @@ -670,10 +669,10 @@ def run_pax_bd_miner(host, port, user, password, db, key): logging.info("Beatdown execution complete for region " + db) logging.info(f"Time elapsed: {time.time() - current_ts}") - # try: - # slack.chat_postMessage(channel='paxminer_logs', text=pm_log_text) - # except: - # print("Slack log message error - not posted") - # logging.error("Slack log message error - not posted") - # pass + try: + slack.chat_postMessage(channel='paxminer_logs', text=pm_log_text) + except: + print("Slack log message error - not posted") + logging.error("Slack log message error - not posted") + pass logging.info('PAX_BD_Miner.py Finished') \ No newline at end of file From accd4695cf49c1d252ad8cd05ad50239ea284b26 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:19:12 -0600 Subject: [PATCH 06/14] Add nested config to gitignore. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ebbd5bd..02c826a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__ plots logs config/credentials.ini -.DS_Store \ No newline at end of file +.DS_Store +backblast_scraping/config/* \ No newline at end of file From 3e8aa2980d239ec1fbea39ec36875b315a2fea2b Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:19:34 -0600 Subject: [PATCH 07/14] Revert PAXcharter.py back to server code. --- monthly_charts/PAXcharter.py | 198 ++++++++++++++++------------------- 1 file changed, 88 insertions(+), 110 deletions(-) diff --git a/monthly_charts/PAXcharter.py b/monthly_charts/PAXcharter.py index 2d403ed..88f79a3 100755 --- a/monthly_charts/PAXcharter.py +++ b/monthly_charts/PAXcharter.py @@ -1,4 +1,9 @@ #!/usr/bin/env python3 +''' +This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com. +This script queries Slack for all PAX Users and their respective beatdown attendance. It then generates bar graphs +on attendance for each member and sends it to them in a private Slack message. +''' from slack_sdk import WebClient from slack_sdk.errors import SlackApiError @@ -12,9 +17,6 @@ import sys import os import logging -import hashlib -import numpy as np -import calendar # This handler does retries when HTTP status 429 is returned from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler @@ -63,15 +65,8 @@ print('Looking for all Slack Users for ' + db + '. Stand by...') - -def hash_email(email): - if isinstance(email, str): - return hashlib.md5(email.encode('utf-8')).hexdigest() - else: - return None # Or return a default value if needed - # Make users Data Frame -column_names = ['user_id', 'user_name', 'real_name', 'email'] +column_names = ['user_id', 'user_name', 'real_name'] users_df = pd.DataFrame(columns = column_names) data = '' while True: @@ -80,12 +75,9 @@ def hash_email(email): next_cursor = response_metadata.get('next_cursor') users = users_response.data['members'] users_df_tmp = pd.json_normalize(users) - users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name', 'profile.email']] - users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name', 'profile.email' : 'email'}) + users_df_tmp = users_df_tmp[['id', 'profile.display_name', 'profile.real_name']] + users_df_tmp = users_df_tmp.rename(columns={'id' : 'user_id', 'profile.display_name' : 'user_name', 'profile.real_name' : 'real_name'}) users_df = users_df.append(users_df_tmp, ignore_index=True) - - # Apply the hash function to the email column - users_df['email'] = users_df['email'].apply(hash_email) if next_cursor: # Keep going from next offset. #print('next_cursor =' + next_cursor) @@ -95,7 +87,6 @@ def hash_email(email): for index, row in users_df.iterrows(): un_tmp = row['user_name'] rn_tmp = row['real_name'] - row['email'] if un_tmp == "" : row['user_name'] = rn_tmp @@ -132,105 +123,92 @@ def success_message_sent(user_id_tmp, pax, db): # Query AWS by user ID for attendance history #users_df = users_df.iloc[:10] # THIS LINE IS FOR TESTING PURPOSES, THIS FORCES ONLY n USER ROWS TO BE SENT THROUGH THE PIPE +total_graphs = 0 # Sets a counter for the total number of graphs made (users with posting data) pause_on = [ 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000 ] -def savePlot(aggregated_data, total_count_for_year, title, file_path): - # Plot the stacked bar chart - ax = aggregated_data.plot(kind='bar', stacked=True) - # Add the total count as text on the chart - ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, - fontsize=12, verticalalignment='top', horizontalalignment='right') - plt.title(title) - plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) - plt.ioff() - plt.savefig(file_path, bbox_inches='tight') #save the figure to a file - plt.close() - -def execute_user_chart(attendance_tmp, user_id_tmp, db, pax, d, thismonthname, yearnum, rm): - if attendance_tmp and len(attendance_tmp) > 0: - attendance_tmp_df = pd.DataFrame(attendance_tmp) - thismonth = d.strftime("%m") - send_chart = attendance_tmp_df[(attendance_tmp_df['table_schema'] == db) & (attendance_tmp_df['Month'] == int(thismonth))].shape[0] - if send_chart > 0: # This sends a graph to ALL PAX who have attended at least 1 beatdown - rgion_method = rm - # Modify the 'AO' column based on the condition where 'table_schema' is not equal to 'db' - attendance_tmp_df['AO'] = np.where(attendance_tmp_df['table_schema'] != db, "DR: " + attendance_tmp_df['region'], attendance_tmp_df['AO']) - - attendance_tmp_df.sort_values(by=['Month'], inplace=True) - attendance_tmp_df['Month'] = attendance_tmp_df['Month'].map(lambda x: calendar.month_abbr[x]) - - # Group by 'Month' and 'AO', and aggregate the 'cnt' column by summing it - aggregated_data = attendance_tmp_df.groupby(['Month', 'AO'], sort=False)['cnt'].sum().unstack() - - # Calculate the total count for the year from the 'cnt' column - total_count_for_year = attendance_tmp_df['cnt'].sum() - - file_path = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' - - savePlot(aggregated_data, total_count_for_year, 'Number of posts by '+ pax + ' by AO/Month for ' + yearnum, file_path) - - message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + "! SYITG!" - - # user_id_tmp = 'U03QFC2S2LX' - print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') - - # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. - # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. - # The main difference is that v2 requires an additional conversation scope. - # New regions will all use v2. - # user_id_override = "U06GDMGJKNE" - if rgion_method == "v2": - try: - response = send_slack_message_v2(user_id_tmp, message, file_path) - - success_message_sent(user_id_tmp, pax, db) - except Exception as e: - # If the error is missing scope, then - if e.response['error'] == 'missing_scope': - print("Error: The app is missing required scopes. Please add the 'im:write' scope.") - rgion_method = "v1" - else: - log_message_sent_error(user_id_tmp, db, pax) - raise e - - if rgion_method != "v2": - try: - channel = user_id_tmp - response = send_slack_message(channel, message, file_path) - - success_message_sent(user_id_tmp, pax, db) - except Exception as e: - log_message_sent_error(user_id_tmp, db, pax) - raise e - - return True, rgion_method - else: - print(pax + ' skipped') - return False, region_method - - -total_graphs = 0 region_method = "v2" -for _, row in users_df.iterrows(): - user_id = row['user_id'] - email = row['email'] - pax = row['user_name'] +for user_id in users_df['user_id']: try: attendance_tmp_df = pd.DataFrame([]) # creates an empty dataframe to append to with mydb.cursor() as cursor: - sql = "SELECT table_schema, region, year as Year, month as Month, ao as AO, email_hash, cnt FROM f3stlcity.user_monthly_aggregates WHERE email_hash=%s AND year = %s" - user_id_tmp = user_id - val = (email, yearnum) - cursor.execute(sql, val) - attendance_tmp = cursor.fetchall() - if attendance_tmp : - graph_or_not, rm = execute_user_chart(attendance_tmp, user_id_tmp, db, pax, d, thismonthname, yearnum, region_method) - region_method = rm - - if graph_or_not: - total_graphs = total_graphs + 1 - else: - print("No attendance this year", user_id) + sql = "SELECT * FROM attendance_view WHERE PAX = (SELECT user_name FROM users WHERE user_id = %s) AND YEAR(Date) = %s ORDER BY Date" + user_id_tmp = user_id + val = (user_id_tmp, yearnum) + cursor.execute(sql, val) + attendance_tmp = cursor.fetchall() + attendance_tmp_df = pd.DataFrame(attendance_tmp) + month = [] + day = [] + year = [] + count = attendance_tmp_df.shape[0] + #if user_id_tmp == 'U0187M4NWG4': #Use this to send a graph to only 1 specific PAX + if count > 0: # This sends a graph to ALL PAX who have attended at least 1 beatdown + for Date in attendance_tmp_df['Date']: + #for index, row in attendance_tmp_df.iterrows(): + datee = datetime.datetime.strptime(Date, "%Y-%m-%d") + month.append(datee.strftime("%B")) + day.append(datee.day) + year.append(datee.year) + pax = attendance_tmp_df.iloc[0]['PAX'] + attendance_tmp_df['Month'] = month + attendance_tmp_df['Day'] = day + attendance_tmp_df['Year'] = year + attendance_tmp_df.sort_values(by=['Date'], inplace=True) + attendance_tmp_df.groupby(['Month', 'AO'], sort=False).size().unstack().plot(kind='bar',stacked=True) + + ax = attendance_tmp_df.groupby(['Month', 'AO'], sort=False).size().unstack().plot(kind='bar', stacked=True) + total_count_for_year = attendance_tmp_df.shape[0] + + # Calculate total count for the last month + last_month_start = datetime.date(int(yearnum), int(thismonth), 1) + attendance_last_month_df = attendance_tmp_df[attendance_tmp_df['Date'] >= str(last_month_start)] + total_count_last_month = attendance_last_month_df.shape[0] + + if total_count_last_month > 0: + # Add the total count as text on the chart + ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, + fontsize=12, verticalalignment='top', horizontalalignment='right') + + plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum) + plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) + plt.ioff() + plt.savefig('../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg', bbox_inches='tight') #save the figure to a file + + message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!" + file = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' + + print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') + + # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. + # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. + # The main difference is that v2 requires an additional conversation scope. + # New regions will all use v2. + # user_id_override = "U06GDMGJKNE" + if region_method == "v2": + try: + response = send_slack_message_v2(user_id_tmp, message, file) + + success_message_sent(user_id_tmp, pax, db) + except Exception as e: + # If the error is missing scope, then + if e.response['error'] == 'missing_scope': + print("Error: The app is missing required scopes. Please add the 'im:write' scope.") + region_method = "v1" + else: + log_message_sent_error(user_id_tmp, db, pax) + raise e + + if region_method != "v2": + try: + channel = user_id_tmp + response = send_slack_message(channel, message, file) + + success_message_sent(user_id_tmp, pax, db) + except: + log_message_sent_error(user_id_tmp, db, pax) + raise e + else: + print(pax + ' skipped') except Exception as e: print(e) print("An exception occurred for User ID " + user_id) From d946228416ae8daaee1558b7afe9afb29aba7ffe Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:20:35 -0600 Subject: [PATCH 08/14] Specify paxcharter by name in manual script. --- monthly_charts/PAXcharter_Manual_Execution.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/monthly_charts/PAXcharter_Manual_Execution.py b/monthly_charts/PAXcharter_Manual_Execution.py index 98362b1..31ba253 100755 --- a/monthly_charts/PAXcharter_Manual_Execution.py +++ b/monthly_charts/PAXcharter_Manual_Execution.py @@ -35,8 +35,8 @@ # Get list of regions and Slack tokens for PAXminer execution try: with mydb1.cursor() as cursor: - #sql = "SELECT * FROM paxminer.regions where send_region_stats = 1" # <-- Update this for whatever region is being tested - sql = "SELECT * FROM paxminer.regions where region = 'Geneva'" # <-- Update this for whatever region is being tested + # sql = "SELECT * FROM paxminer.regions where send_region_stats = 1" # <-- Update this for whatever region is being tested + sql = "SELECT * FROM paxminer.regions where region = 'AAAAAAA'" # <-- Update this for whatever region is being tested cursor.execute(sql) regions = cursor.fetchall() regions_df = pd.DataFrame(regions) @@ -50,12 +50,12 @@ firstf = row['firstf_channel'] #firstf = 'U0187M4NWG4' # <--- Use this if sending a test msg to a specific user print('Processing statistics for region ' + region) - #os.system("./PAXcharter.py " + db + " " + key) + os.system("./PAXcharter.py " + db + " " + key) #os.system("./UniquePAXCharter.py " + db + " " + key + " " + region + " " + firstf) #os.system("./QCharter.py " + db + " " + key + " " + region + " " + firstf) #os.system("./Leaderboard_Charter.py " + db + " " + key + " " + region + " " + firstf) #os.system("./LeaderboardByAO_Charter.py " + db + " " + key + " " + region + " " + firstf) #os.system("./Join_Channels_and_Create_Directories.py " + db + " " + key + " " + region + " " + firstf) - os.system("./AOCharter.py " + db + " " + key + " " + region + " " + firstf) + # os.system("./AOCharter.py " + db + " " + key + " " + region + " " + firstf) print('----------------- End of Region Update -----------------\n') print('\nPAXcharter execution complete.') \ No newline at end of file From f4da3dbc5dd1377df3c590ef2e137268f2d94859 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:21:00 -0600 Subject: [PATCH 09/14] Add charts descriptions --- monthly_charts/CHARTS.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 monthly_charts/CHARTS.md diff --git a/monthly_charts/CHARTS.md b/monthly_charts/CHARTS.md new file mode 100644 index 0000000..95972a8 --- /dev/null +++ b/monthly_charts/CHARTS.md @@ -0,0 +1,28 @@ +# CHARTS + +## Chart Types +### AO +- Controlled by the send_ao_leaderboard flag in the region database. +- LeaderboardByAO_Charter.py +- Two Graphs Sent + - PAX posts in the last month + - PAX posts YTD + +### Q +- Controlled by the send_q_charts flag in the database. +- QCharter.py +- Q's in the last month to each AO +- Q's in the last month to the firstf channel, bisected by AO. + +### Region +- Controlled by the send_region_leaderboard flag in the database +- Leaderboard_Charter.py +- Two Graphs Sent to the firstf channel + - PAX posts in the last month + - PAX posts YTD + +### PAX +- Controlled by the send_pax_charts flag in the database +- PAXcharter.py +- One graph sent to each PAX. + - Monthly posting summary bisected by AO with a total in the upper right. \ No newline at end of file From 25e7ac3eb23fb180291613d8f2c73f4c7d1d978e Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:22:58 -0600 Subject: [PATCH 10/14] Add some charts to the new structure. Nest these folders as lots of files with old way and new way. --- monthly_charts/new-structure/.dockerignore | 2 + monthly_charts/new-structure/Dockerfile | 21 ++ .../new-structure/Manual_AO_Leaderboard.py | 34 +++ .../new-structure/ao_leaderboard_helper.py | 243 ++++++++++++++++++ monthly_charts/new-structure/requirements.txt | 6 + .../new-structure/run_ao_leaderboard.py | 7 + 6 files changed, 313 insertions(+) create mode 100644 monthly_charts/new-structure/.dockerignore create mode 100644 monthly_charts/new-structure/Dockerfile create mode 100644 monthly_charts/new-structure/Manual_AO_Leaderboard.py create mode 100644 monthly_charts/new-structure/ao_leaderboard_helper.py create mode 100644 monthly_charts/new-structure/requirements.txt create mode 100644 monthly_charts/new-structure/run_ao_leaderboard.py diff --git a/monthly_charts/new-structure/.dockerignore b/monthly_charts/new-structure/.dockerignore new file mode 100644 index 0000000..1d9915f --- /dev/null +++ b/monthly_charts/new-structure/.dockerignore @@ -0,0 +1,2 @@ +deploy.sh +ManualSlackUserLister.py \ No newline at end of file diff --git a/monthly_charts/new-structure/Dockerfile b/monthly_charts/new-structure/Dockerfile new file mode 100644 index 0000000..428a78d --- /dev/null +++ b/monthly_charts/new-structure/Dockerfile @@ -0,0 +1,21 @@ +# Use an official Python runtime as a parent image +FROM python:3.9.6-slim AS build + +# Set the working directory in the container +WORKDIR /app + +# Copy the current directory contents into the container at /app +COPY . /app + +# Ensure the script is executable +RUN chmod +x /app/run_ao_leaderboard.py + +RUN python3 -m pip install --no-cache-dir -r requirements.txt + +RUN mkdir /app/plots + +ENV cloud_run=true + +ENTRYPOINT ["python3"] + +CMD ["/app/run_ao_leaderboard.py"] \ No newline at end of file diff --git a/monthly_charts/new-structure/Manual_AO_Leaderboard.py b/monthly_charts/new-structure/Manual_AO_Leaderboard.py new file mode 100644 index 0000000..c027432 --- /dev/null +++ b/monthly_charts/new-structure/Manual_AO_Leaderboard.py @@ -0,0 +1,34 @@ +from ao_leaderboard_helper import region_run + +import pymysql.cursors +import configparser +import sys +from slack_sdk import WebClient + + +# Configure AWS credentials +config = configparser.ConfigParser(); +config.read('../../config/credentials.ini'); +host = config['aws']['host'] +port = int(config['aws']['port']) +user = config['aws']['user'] +password = config['aws']['password'] +#db = config['aws']['db'] +db = sys.argv[1] + +# Set Slack token +key = sys.argv[2] +slack = WebClient(token=key) + +def init_db(host, port, user, password, region_db): + return pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=region_db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor + ) + +region_run(init_db(host, port, user, password, db), db, key) \ No newline at end of file diff --git a/monthly_charts/new-structure/ao_leaderboard_helper.py b/monthly_charts/new-structure/ao_leaderboard_helper.py new file mode 100644 index 0000000..df0b4d1 --- /dev/null +++ b/monthly_charts/new-structure/ao_leaderboard_helper.py @@ -0,0 +1,243 @@ +import pandas as pd +import pymysql.cursors +import os +import logging + +import pandas as pd +import time +import pymysql.cursors +import datetime +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError +from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler +# This handler does retries when HTTP status 429 is returned +# from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler + +def create_directory(db, subdirectory): + # print(('hostname')) + if os.environ.get('cloud_run') : + base_directory = f'/mnt/images_volume/plots/{db}' + else : + base_directory = f'../plots/{db}' + + # List of subdirectories to ensure exist + subdirectories = [base_directory, f'{base_directory}/{subdirectory}'] + + # Create all necessary directories + for directory in subdirectories: + os.makedirs(directory, exist_ok=True) + + return base_directory + +def region_run(mydb, db, slack_key, send_charts=False): + slack = WebClient(token=slack_key) + rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=7) + slack.retry_handlers.append(rate_limit_handler) + + base_directory = create_directory(db, 'ao_charts') + + #Graph Counter Reset + total_graphs = 0 # Sets a counter for the total number of graphs made (users with posting data) + + #Get Current Year, Month Number and Name + d = datetime.datetime.now() + d = d - datetime.timedelta(days=9) + thismonth = d.strftime("%m") + thismonthname = d.strftime("%b") + thismonthnamelong = d.strftime("%B") + yearnum = d.strftime("%Y") + + try: + with mydb.cursor() as cursor: + sql = "SELECT ao, channel_id FROM aos WHERE backblast = 1 and archived = 0" + cursor.execute(sql) + aos = cursor.fetchall() + aos_df = pd.DataFrame(aos, columns=['ao', 'channel_id']) + finally: + logging.info('Now pulling all beatdown records... Stand by...') + + total_graphs = 0 # Sets a counter for the total number of graphs made (users with posting data) + + # Query AWS by for beatdown history + for index, row in aos_df.iterrows(): + ao = row['ao'] + channel_id = row['channel_id'] + month = [] + day = [] + year = [] + try: + with mydb.cursor() as cursor: + sql = """ + select PAX, count(1) as Posts FROM ( + select + `bd`.`date` AS `Date`, + `ao`.`ao` AS `AO`, + `u`.`user_name` AS `PAX` + from + (((`bd_attendance` `bd` + left join `aos` `ao` on + ((`bd`.`ao_id` = `ao`.`channel_id`))) + left join `users` `u` on + ((`bd`.`user_id` = `u`.`user_id`)))) + where `u`.app != 1 + order by + `bd`.`date` desc, + `ao`.`ao` + ) a + where + MONTH(Date) = %s + AND YEAR(Date) = %s + AND ao= %s + group by PAX + order by count(1) desc + limit 20 + """ + val = (thismonth, yearnum, ao) + cursor.execute(sql, val) + posts = cursor.fetchall() + posts_df = pd.DataFrame(posts, columns=['PAX', 'Posts']) + finally: + logging.info(f'Now pulling all posting records for {ao} ... Stand by...') + + if not posts_df.empty: + ax = posts_df.plot.bar(x='PAX', color={"Posts": "orange"}) + plt.title("Monthly Leaderboard - " + thismonthnamelong + ", " + yearnum) + plt.xlabel("") + plt.ylabel("# Posts for " + thismonthname + ", " + yearnum) + plt.savefig(f'{base_directory}/ao_charts/PAX_Leaderboard_{ao}{thismonthname}{yearnum}.jpg', bbox_inches='tight') + plt.close() + logging.info(f'Monthly Leaderboard Graph created for AO {ao} Sending to Slack now... hang tight!') + + if send_charts: + max_attempts = 5 + for attempt in range(max_attempts): + try: + response = slack.files_upload_v2(channel=channel_id, initial_comment='Hey ' + ao + "! Here are the posting leaderboards for " + thismonthnamelong + ", " + yearnum + " as well as for Year to Date with the top 20 posters! T-CLAPS to these HIMs.", file=f'{base_directory}/PAX_Leaderboard_{ao}{thismonthname}{yearnum}.jpg') + total_graphs = total_graphs + 1 + break #exit the loop if upload is successful + except SlackApiError as e: + if e.response.status_code == 429: + delay = int(e.response.headers['Retry-After']) + print(f"Rate limited. Retrying in {delay} seconds") + time.sleep(delay) + else: + # other errors + raise e + + try: + with mydb.cursor() as cursor: + sql = sql = """ + select PAX, count(1) as Posts FROM ( + select + `bd`.`date` AS `Date`, + `ao`.`ao` AS `AO`, + `u`.`user_name` AS `PAX` + from + (((`bd_attendance` `bd` + left join `aos` `ao` on + ((`bd`.`ao_id` = `ao`.`channel_id`))) + left join `users` `u` on + ((`bd`.`user_id` = `u`.`user_id`)))) + where `u`.app != 1 + order by + `bd`.`date` desc, + `ao`.`ao` + ) a + where + YEAR(Date) = %s + AND ao= %s + group by PAX + order by count(1) desc + limit 20 + """ + val = (yearnum, ao) + cursor.execute(sql, val) + posts = cursor.fetchall() + posts_df = pd.DataFrame(posts, columns=['PAX', 'Posts']) + finally: + logging.info(f'Now pulling all posting records for {ao} Stand by...') + if not posts_df.empty: + ax = posts_df.plot.bar(x='PAX', color={"Posts": "green"}) + plt.title("Year to Date Leaderboard - " + yearnum) + plt.xlabel("") + plt.ylabel("# Posts for " + yearnum + " - Year To Date") + plt.savefig(f'{base_directory}/ao_charts/PAX_Leaderboard_YTD_{ao}{yearnum}.jpg', bbox_inches='tight') # save the figure to a file + plt.close() + logging.info(f'YTD Leaderboard Graph created for region {db} ... Sending to Slack now... hang tight!') + if send_charts: + max_attempts = 5 + for attempt in range(max_attempts): + try: + slack.files_upload_v2(file=f'{base_directory}/PAX_Leaderboard_YTD_{ao}{yearnum}.jpg', channel=channel_id) + total_graphs = total_graphs + 1 + break # exit the loop if upload is successful + except SlackApiError as e: + if e.response.status_code == 429: + delay = int(e.response.headers['Retry-After']) + logging.info(f"Rate limited. Retrying in {delay} seconds") + time.sleep(delay) + else: + # other errors + raise e + # After all AOs have been processed, logging.info the total number of graphs made + logging.info(f'Total graphs made: {total_graphs}') + + +def ao_leaderboard_charts(): + logging.basicConfig(format=f'%(asctime)s %(levelname)-8s %(message)s', + datefmt = '%Y-%m-%d %H:%M:%S', + level = logging.INFO) + + host = os.environ['host'] + port = 3306 + user = os.environ['user'] + user = os.environ.get('custom_region') + password = os.environ['password'] + db = "paxminer" + + #Define AWS Database connection criteria + mydb1 = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + # Get list of regions and Slack tokens for PAXminer execution + try: + with mydb1.cursor() as cursor: + sql = "SELECT * FROM paxminer.regions where firstf_channel IS NOT NULL AND send_ao_leaderboard = 1" + cursor.execute(sql) + regions = cursor.fetchall() + regions_df = pd.DataFrame(regions) + finally: + logging.info('Getting list of regions that use PAXminer...') + cursor.close() + + count = 0 + for index, row in regions_df.iterrows(): + region = row['region'] + key = row['slack_token'] + db = row['schema_name'] + + #Define AWS Database connection criteria + region_db = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + logging.info(f'Processing statistics for region {region}') + count = count + 1 + if count < 5: + region_run(region_db, db, key, False) + logging.info('----------------- End of Region Update -----------------\n') \ No newline at end of file diff --git a/monthly_charts/new-structure/requirements.txt b/monthly_charts/new-structure/requirements.txt new file mode 100644 index 0000000..6c9447e --- /dev/null +++ b/monthly_charts/new-structure/requirements.txt @@ -0,0 +1,6 @@ +slack-sdk==3.26.1 +pandas==1.4.0 +pymysql==1.1.0 +numpy==1.25.2 +matplotlib==3.8.2 +typing-extensions==4.7.1 \ No newline at end of file diff --git a/monthly_charts/new-structure/run_ao_leaderboard.py b/monthly_charts/new-structure/run_ao_leaderboard.py new file mode 100644 index 0000000..dfc4771 --- /dev/null +++ b/monthly_charts/new-structure/run_ao_leaderboard.py @@ -0,0 +1,7 @@ +from ao_leaderboard_helper import ao_leaderboard_charts + +def main(): + ao_leaderboard_charts() + +if __name__ == '__main__': + main() \ No newline at end of file From 95a5b838fd99409ab2efc1912947a25035e59813 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:23:25 -0600 Subject: [PATCH 11/14] Add region leaderboard and deploy script. --- .../Manual_Region_Leaderboard.py | 33 ++++ monthly_charts/new-structure/deploy.sh | 5 + .../region_leaderboard_helper.py | 169 ++++++++++++++++++ .../new-structure/run_region_leaderboard.py | 7 + 4 files changed, 214 insertions(+) create mode 100644 monthly_charts/new-structure/Manual_Region_Leaderboard.py create mode 100755 monthly_charts/new-structure/deploy.sh create mode 100644 monthly_charts/new-structure/region_leaderboard_helper.py create mode 100644 monthly_charts/new-structure/run_region_leaderboard.py diff --git a/monthly_charts/new-structure/Manual_Region_Leaderboard.py b/monthly_charts/new-structure/Manual_Region_Leaderboard.py new file mode 100644 index 0000000..6889477 --- /dev/null +++ b/monthly_charts/new-structure/Manual_Region_Leaderboard.py @@ -0,0 +1,33 @@ +from region_leaderboard_helper import region_leaderboard_run + +import pymysql.cursors +import configparser +import sys +from slack_sdk import WebClient + +# Configure AWS credentials +config = configparser.ConfigParser(); +config.read('../../config/credentials.ini'); +host = config['aws']['host'] +port = int(config['aws']['port']) +user = config['aws']['user'] +password = config['aws']['password'] + +db = sys.argv[1] +key = sys.argv[2] +firstf = sys.argv[3] +slack = WebClient(token=key) +region_name = sys.argv[4] + +def init_db(host, port, user, password, region_db): + return pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=region_db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor + ) + +region_leaderboard_run(region_name, init_db(host, port, user, password, db), key, firstf, db) \ No newline at end of file diff --git a/monthly_charts/new-structure/deploy.sh b/monthly_charts/new-structure/deploy.sh new file mode 100755 index 0000000..32c0633 --- /dev/null +++ b/monthly_charts/new-structure/deploy.sh @@ -0,0 +1,5 @@ +gcloud auth configure-docker us-central1-docker.pkg.dev + +docker build . --platform linux/amd64 --tag us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/charts-publisher:latest + +docker push us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/charts-publisher:latest \ No newline at end of file diff --git a/monthly_charts/new-structure/region_leaderboard_helper.py b/monthly_charts/new-structure/region_leaderboard_helper.py new file mode 100644 index 0000000..19d4f85 --- /dev/null +++ b/monthly_charts/new-structure/region_leaderboard_helper.py @@ -0,0 +1,169 @@ +from slack_sdk import WebClient +from ao_leaderboard_helper import create_directory +import pandas as pd +import pymysql.cursors +import datetime +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import logging +import os +# This handler does retries when HTTP status 429 is returned +from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler + +def region_leaderboard_charts(): + host = os.environ['host'] + port = 3306 + user = os.environ['user'] + password = os.environ['password'] + schema_name = "paxminer" + + #Define AWS Database connection criteria + mydb1 = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=schema_name, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + # Get list of regions and Slack tokens for PAXminer execution + try: + logging.info('Getting list of regions that use PAXminer...') + with mydb1.cursor() as cursor: + sql = "SELECT * FROM paxminer.regions where firstf_channel IS NOT NULL AND send_region_leaderboard = 1" + cursor.execute(sql) + regions = cursor.fetchall() + regions_df = pd.DataFrame(regions) + finally: + cursor.close() + + count = 0 + for index, row in regions_df.iterrows(): + region = row['region'] + key = row['slack_token'] + db = row['schema_name'] + firstf = row['firstf_channel'] + + #Define AWS Database connection criteria + region_db = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + db=db, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + logging.info(f'Processing statistics for region {region}') + count = count + 1 + # if count < 5: + region_leaderboard_run(region, region_db, key, firstf, row['schema_name']) + logging.info('----------------- End of Region Update -----------------\n') + +def region_leaderboard_run(region_name, region_db, key, firstf, schema_name): + total_graphs = 0 + slack = WebClient(token=key) + # Enable rate limited error retries + rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=5) + slack.retry_handlers.append(rate_limit_handler) + + base_directory = create_directory(schema_name, 'region_charts') + + #Get Current Year, Month Number and Name + d = datetime.datetime.now() + d = d - datetime.timedelta(days=7) + thismonth = d.strftime("%m") + thismonthname = d.strftime("%b") + thismonthnamelong = d.strftime("%B") + yearnum = d.strftime("%Y") + + try: + print(f'Now pulling all posting records for {region_name}... Stand by...') + with region_db.cursor() as cursor: + sql = """ + select PAX, count(distinct AO) as UniqueAOs, count(1) as Posts FROM ( + select + `bd`.`date` AS `Date`, + `ao`.`ao` AS `AO`, + `u`.`user_name` AS `PAX` + from + (((`bd_attendance` `bd` + left join `aos` `ao` on + ((`bd`.`ao_id` = `ao`.`channel_id`))) + left join `users` `u` on + ((`bd`.`user_id` = `u`.`user_id`)))) + where `u`.app != 1 + order by + `bd`.`date` desc, + `ao`.`ao` + ) a + where MONTH(Date) = %s + AND YEAR(Date) = %s + group by PAX + order by count(1) desc + limit 20 + """ + val = (thismonth, yearnum) + cursor.execute(sql, val) + posts = cursor.fetchall() + posts_df = pd.DataFrame(posts, columns=['PAX', 'UniqueAOs', 'Posts']) + except Exception as e: + logging.error(e) + + if not posts_df.empty: + print(f'Sending Region Leaderboard Graph for {region_name} ... Stand by...') + ax = posts_df.plot.bar(x='PAX', color={'UniqueAOs' : "blue", "Posts" : "orange"}) + plt.title("Monthly Leaderboard - " + thismonthnamelong + ", " + yearnum) + plt.xlabel("") + plt.ylabel("# Posts for " + thismonthname + ", " + yearnum) + plt.savefig(f'{base_directory}/PAX_Leaderboard_' + region_name + thismonthname + yearnum + '.jpg', bbox_inches='tight') # save the figure to a file + print('Monthly Leaderboard Graph created for region_name ', region_name, 'Sending to Slack now... hang tight!') + plt.close() + # slack.files_upload_v2(channel=firstf, initial_comment='Hey ' + region_name + "! Check out the current posting leaderboards for " + thismonthnamelong + ", " + yearnum + " as well as for Year to Date (includes all beatdowns, rucks, Qsource, etc.). Here are the top 20 posters! T-CLAPS to these HIMs.", file=f'{base_directory}/{schema_name}/region_charts/PAX_Leaderboard_' + region_name + thismonthname + yearnum + '.jpg', ) + total_graphs = total_graphs + 1 + print(f'Total graphs made: {total_graphs}') + + try: + with region_db.cursor() as cursor: + sql = """ + select PAX, count(distinct AO) as UniqueAOs, count(1) as Posts FROM ( + select + `bd`.`date` AS `Date`, + `ao`.`ao` AS `AO`, + `u`.`user_name` AS `PAX` + from + (((`bd_attendance` `bd` + left join `aos` `ao` on + ((`bd`.`ao_id` = `ao`.`channel_id`))) + left join `users` `u` on + ((`bd`.`user_id` = `u`.`user_id`)))) + where `u`.app != 1 + order by + `bd`.`date` desc, + `ao`.`ao` + ) a + where YEAR(Date) = %s + group by PAX + order by count(1) desc + limit 20 + """ + val = (yearnum) + cursor.execute(sql, val) + posts = cursor.fetchall() + posts_df = pd.DataFrame(posts, columns=['PAX', 'UniqueAOs', 'Posts']) + finally: + print(f'Now pulling all posting records for {region_name}... Stand by...') + + if not posts_df.empty: + ax = posts_df.plot.bar(x='PAX', color={'UniqueAOs' : "purple", "Posts" : "green"}) + plt.title("Year to Date Leaderboard - " + yearnum) + plt.xlabel("") + plt.ylabel("# Posts for " + yearnum + " - Year To Date") + plt.savefig(f'{base_directory}/region_charts/PAX_Leaderboard_YTD_' + region_name + yearnum + '.jpg', bbox_inches='tight') # save the figure to a file + print('YTD Leaderboard Graph created for region_name', region_name, 'Sending to Slack now... hang tight!') + plt.close() + # slack.files_upload_v2(file=f'{base_directory}/region_charts/PAX_Leaderboard_YTD_' + region_name + yearnum + '.jpg', channel=firstf) + total_graphs = total_graphs + 1 + logging.info(f'Total graphs made: {total_graphs}') \ No newline at end of file diff --git a/monthly_charts/new-structure/run_region_leaderboard.py b/monthly_charts/new-structure/run_region_leaderboard.py new file mode 100644 index 0000000..0a7a99a --- /dev/null +++ b/monthly_charts/new-structure/run_region_leaderboard.py @@ -0,0 +1,7 @@ +from region_leaderboard_helper import region_leaderboard_charts + +def main(): + region_leaderboard_charts() + +if __name__ == '__main__': + main() \ No newline at end of file From 7831390996c8b47193690d3999781aef8d0db356 Mon Sep 17 00:00:00 2001 From: farrellw Date: Sat, 8 Mar 2025 19:28:26 -0600 Subject: [PATCH 12/14] Only send the pax chart if they posted in the last month. Close each plot after its been saved and sent. --- monthly_charts/PAXcharter.py | 92 ++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/monthly_charts/PAXcharter.py b/monthly_charts/PAXcharter.py index 88f79a3..4bdfc86 100755 --- a/monthly_charts/PAXcharter.py +++ b/monthly_charts/PAXcharter.py @@ -164,51 +164,59 @@ def success_message_sent(user_id_tmp, pax, db): attendance_last_month_df = attendance_tmp_df[attendance_tmp_df['Date'] >= str(last_month_start)] total_count_last_month = attendance_last_month_df.shape[0] - if total_count_last_month > 0: - # Add the total count as text on the chart - ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, - fontsize=12, verticalalignment='top', horizontalalignment='right') - - plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum) - plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) - plt.ioff() - plt.savefig('../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg', bbox_inches='tight') #save the figure to a file - - message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!" - file = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' - - print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') - - # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. - # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. - # The main difference is that v2 requires an additional conversation scope. - # New regions will all use v2. - # user_id_override = "U06GDMGJKNE" - if region_method == "v2": - try: - response = send_slack_message_v2(user_id_tmp, message, file) - - success_message_sent(user_id_tmp, pax, db) - except Exception as e: - # If the error is missing scope, then - if e.response['error'] == 'missing_scope': - print("Error: The app is missing required scopes. Please add the 'im:write' scope.") - region_method = "v1" - else: + try : + if total_count_last_month > 0: + # Add the total count as text on the chart + ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes, + fontsize=12, verticalalignment='top', horizontalalignment='right') + + file_path = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg' + plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum) + plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False) + plt.ioff() + plt.savefig(file_path, bbox_inches='tight') #save the figure to a file + + message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!" + file = file_path + + print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!') + + # The current method v2, and legacy method, can both be invoked here depending on the region_method variable. + # Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025. + # The main difference is that v2 requires an additional conversation scope. + # New regions will all use v2. + # user_id_override = "U06GDMGJKNE" + if region_method == "v2": + try: + response = send_slack_message_v2(user_id_tmp, message, file) + + success_message_sent(user_id_tmp, pax, db) + except Exception as e: + # If the error is missing scope, then + if e.response['error'] == 'missing_scope': + print("Error: The app is missing required scopes. Please add the 'im:write' scope.") + region_method = "v1" + else: + log_message_sent_error(user_id_tmp, db, pax) + raise e + + if region_method != "v2": + try: + channel = user_id_tmp + response = send_slack_message(channel, message, file) + + success_message_sent(user_id_tmp, pax, db) + except: log_message_sent_error(user_id_tmp, db, pax) raise e - - if region_method != "v2": - try: - channel = user_id_tmp - response = send_slack_message(channel, message, file) - success_message_sent(user_id_tmp, pax, db) - except: - log_message_sent_error(user_id_tmp, db, pax) - raise e - else: - print(pax + ' skipped') + else: + print(pax + ' skipped') + except Exception as e: + print(e) + finally: + plt.close() + except Exception as e: print(e) print("An exception occurred for User ID " + user_id) From 72180d02fa57b5fea342ebf878f83b654a0a0551 Mon Sep 17 00:00:00 2001 From: farrellw Date: Mon, 10 Mar 2025 10:22:41 -0500 Subject: [PATCH 13/14] Switch back to only recent backblasts. --- backblast_scraping/PAXMiner_Cloud_Run.py | 2 +- backblast_scraping/PAX_BD_Miner.py | 11 ++++++----- backblast_scraping/PAXminer_Manual_Execution.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/backblast_scraping/PAXMiner_Cloud_Run.py b/backblast_scraping/PAXMiner_Cloud_Run.py index bd673dd..36b69e8 100644 --- a/backblast_scraping/PAXMiner_Cloud_Run.py +++ b/backblast_scraping/PAXMiner_Cloud_Run.py @@ -33,7 +33,7 @@ region = row['region'] key = row['slack_token'] db = row['schema_name'] - print(f'Executing user updates for region {region}') + print(f'Executing beatdown scraping for region {region}') try: run_pax_bd_miner(host, port, user, password, db, key) diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index 3df3ab0..111a8eb 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -24,7 +24,7 @@ MIN_BACKBLAST = 'Backblast:AO:PAX:@x@yQ:@xCount:0' SECONDS_PER_DAY = 86400 -LOOKBACK_DAYS = 7 +LOOKBACK_DAYS = 10 LOOKBACK_SECONDS = SECONDS_PER_DAY * LOOKBACK_DAYS ALLOWABLE_DAYS_BACKBLAST_DATE_VALID = 30 pat = r'(?<=\<).+?(?=>)' @@ -307,14 +307,14 @@ def run_pax_bd_miner(host, port, user, password, db, key): # Set epoch and yesterday's timestamp for datetime calculations epoch = datetime(1970, 1, 1) today = datetime.now() - cutoff_date = today - timedelta(days = 7) + cutoff_date = today - timedelta(days = 10) current_ts = time.time() cutoff_ts = current_ts - LOOKBACK_SECONDS cutoff_date = cutoff_date.strftime('%Y-%m-%d') date_time = today.strftime("%m/%d/%Y, %H:%M:%S") # Set up logging - logging.basicConfig(format=f'%(asctime)s [{db}] %(levelname)-8s %(message)s', + logging.basicConfig(format=f'%(asctime)s %(levelname)-8s %(message)s', datefmt = '%Y-%m-%d %H:%M:%S', level = logging.INFO) logging.info(f"Beginning BD+Paxminer {current_ts}") @@ -404,7 +404,7 @@ def run_pax_bd_miner(host, port, user, password, db, key): messages_df = pd.concat([messages_df, temp_df], ignore_index=True) except Exception as e: logging.warning("Error: Unable to access Slack channel %s in region %s", id, db) - logging.error(e) + logging.warning(e) pm_log_text += "Error: Unable to access Slack channel " + id + ", " + ao + " in region " + db + "\n" if next_cursor != "None": # Keep going from next offset. @@ -525,7 +525,7 @@ def run_pax_bd_miner(host, port, user, password, db, key): if q_user_id == 'NA': logging.warning("Q error for AO: %s, Date: %s, backblast from Q %s (ID %s) not imported", ao_id, msg_date, user_name, user_id) - print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". bd: ", bd_date, "cutoff:", cutoff_date) + print('Backblast error on Q at AO:', ao_id, 'Date:', msg_date, 'Posted By:', user_name, ". bd: ", bd_date) pm_log_text += " - Backblast error on Q at AO: <#" + ao_id + "> Date: " + msg_date + " Posted By: " + user_name + ".\n" if user_id != 'APP': q_error_text += " - ERROR: The Q is not present or not tagged correctly. Please ensure the Q is tagged using @PAX_NAME \n" @@ -671,6 +671,7 @@ def run_pax_bd_miner(host, port, user, password, db, key): try: slack.chat_postMessage(channel='paxminer_logs', text=pm_log_text) + logging.info("Slack log message posted") except: print("Slack log message error - not posted") logging.error("Slack log message error - not posted") diff --git a/backblast_scraping/PAXminer_Manual_Execution.py b/backblast_scraping/PAXminer_Manual_Execution.py index c46e587..0ece7c5 100755 --- a/backblast_scraping/PAXminer_Manual_Execution.py +++ b/backblast_scraping/PAXminer_Manual_Execution.py @@ -31,7 +31,7 @@ # Get list of regions and Slack tokens for PAXminer execution try: with mydb1.cursor() as cursor: - sql = "SELECT * FROM paxminer.regions where schema_name = 'f3stlcity'" + sql = "SELECT * FROM paxminer.regions where schema_name = ''" cursor.execute(sql) regions = cursor.fetchall() regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name']) @@ -42,7 +42,7 @@ region = row['region'] key = row['slack_token'] db = row['schema_name'] - print('Executing user updates for region ' + region) + print('Executing paxminer backblast scraping for region ' + region) run_pax_bd_miner(host, port, user, password, db, key) print('----------------- End of Region Update -----------------\n') From 03553b2ba0a7a984059604acff3cdd62e2136319 Mon Sep 17 00:00:00 2001 From: farrellw Date: Mon, 10 Mar 2025 10:36:26 -0500 Subject: [PATCH 14/14] Change days back to 7 --- backblast_scraping/PAX_BD_Miner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backblast_scraping/PAX_BD_Miner.py b/backblast_scraping/PAX_BD_Miner.py index 111a8eb..4224569 100755 --- a/backblast_scraping/PAX_BD_Miner.py +++ b/backblast_scraping/PAX_BD_Miner.py @@ -24,7 +24,7 @@ MIN_BACKBLAST = 'Backblast:AO:PAX:@x@yQ:@xCount:0' SECONDS_PER_DAY = 86400 -LOOKBACK_DAYS = 10 +LOOKBACK_DAYS = 7 LOOKBACK_SECONDS = SECONDS_PER_DAY * LOOKBACK_DAYS ALLOWABLE_DAYS_BACKBLAST_DATE_VALID = 30 pat = r'(?<=\<).+?(?=>)' @@ -307,7 +307,7 @@ def run_pax_bd_miner(host, port, user, password, db, key): # Set epoch and yesterday's timestamp for datetime calculations epoch = datetime(1970, 1, 1) today = datetime.now() - cutoff_date = today - timedelta(days = 10) + cutoff_date = today - timedelta(days = LOOKBACK_DAYS) current_ts = time.time() cutoff_ts = current_ts - LOOKBACK_SECONDS cutoff_date = cutoff_date.strftime('%Y-%m-%d')