Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ __pycache__
plots
logs
config/credentials.ini
.DS_Store
.DS_Store
backblast_scraping/config/*
2 changes: 2 additions & 0 deletions backblast_scraping/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
config/*
venv
13 changes: 13 additions & 0 deletions backblast_scraping/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM python:3.11-slim-buster

WORKDIR /app

# Copy the requirements file and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy the rest of the application files
COPY . .

# Set the entrypoint for the container
CMD ["python", "PAXMiner_Cloud_Run.py", "A-Z"]
44 changes: 44 additions & 0 deletions backblast_scraping/PAXMiner_Cloud_Run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3
import pandas as pd
import os
import sys
from PAX_BD_Miner import run_pax_bd_miner, create_database_connection

# Set RegEx range for which regions will be queried. Command line input parameter 1 should be a regex range (e.g. A-M) which will search for all regions starting with A through M.
region_regex = sys.argv[1]

paxminer_db = None

# Get list of regions and Slack tokens for PAXminer execution
try:
host = os.environ['host']
port = 3306
user = os.environ['user']
password = os.environ['password']
db = "paxminer"
#Define AWS Database connection criteria
paxminer_db = create_database_connection(host, port, user, password, db)

with paxminer_db.cursor() as cursor:
sql = "SELECT * from paxminer.regions WHERE active = 1 AND scrape_backblasts = 1"
cursor.execute(sql)
regions = cursor.fetchall()
regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name'])
finally:
print('Getting list of regions that use PAXminer...')
if paxminer_db:
paxminer_db.close()

for index, row in regions_df.iterrows():
region = row['region']
key = row['slack_token']
db = row['schema_name']
print(f'Executing beatdown scraping for region {region}')

try:
run_pax_bd_miner(host, port, user, password, db, key)
except Exception as e:
print(f'Error in PAXminer execution for region {region}')
print(e)
finally:
print(f'-------- PAXMiner Coud Run Complete {region}-------------')
845 changes: 423 additions & 422 deletions backblast_scraping/PAX_BD_Miner.py

Large diffs are not rendered by default.

10 changes: 2 additions & 8 deletions backblast_scraping/PAXminer_Daily_Execution.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
#!/usr/bin/env python3
'''
This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com.
This script executes the daily PAXminer backblast queries and data updates for all F3 regions using PAXminer.
'''

import pandas as pd
import pymysql.cursors
import configparser
Expand All @@ -20,7 +15,7 @@

# Configure AWS credentials
config = configparser.ConfigParser();
config.read('../config/credentials.ini');
config.read('config/credentials.ini');

# Configure AWS Credentials
host = config['aws']['host']
Expand Down Expand Up @@ -57,6 +52,5 @@
print('Executing user updates for region ' + region)
#os.system("./F3SlackUserLister.py " + db + " " + key)
#os.system("./F3SlackChannelLister.py " + db + " " + key)
#os.system("./BDminer.py " + db + " " + key)
#os.system("./PAXminer.py " + db + " " + key)
os.system("./PAX_BD_Miner.py " + db + " " + key)
print('----------------- End of Region Update -----------------\n')
17 changes: 6 additions & 11 deletions backblast_scraping/PAXminer_Manual_Execution.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env python3
'''
This script was written by Beaker from F3STL. Questions? @srschaecher on twitter or srschaecher@gmail.com.
This script executes the daily PAXminer backblast queries and data updates for all F3 regions using PAXminer.
'''

from slacker import Slacker
import pandas as pd
import pymysql.cursors
import configparser
import os
import warnings
from PAX_BD_Miner import run_pax_bd_miner
warnings.simplefilter(action='ignore', category=FutureWarning)

# Configure AWS credentials
Expand All @@ -36,7 +31,7 @@
# Get list of regions and Slack tokens for PAXminer execution
try:
with mydb1.cursor() as cursor:
sql = "SELECT * FROM paxminer.regions where region = 'Mobile'" # <-- Update this for whatever region is being tested
sql = "SELECT * FROM paxminer.regions where schema_name = ''"
cursor.execute(sql)
regions = cursor.fetchall()
regions_df = pd.DataFrame(regions, columns=['region', 'slack_token', 'schema_name'])
Expand All @@ -47,9 +42,9 @@
region = row['region']
key = row['slack_token']
db = row['schema_name']
print('Executing user updates for region ' + region)
os.system("./F3SlackUserLister.py " + db + " " + key)
os.system("./F3SlackChannelLister.py " + db + " " + key)
#os.system("./PAX_BD_Miner.py " + db + " " + key)
print('Executing paxminer backblast scraping for region ' + region)

run_pax_bd_miner(host, port, user, password, db, key)
print('----------------- End of Region Update -----------------\n')
mydb1.close()
print('\nPAXminer execution complete.')
67 changes: 67 additions & 0 deletions backblast_scraping/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# PAXminer

PAXminer Backblast Scraping is a tool for automatically extracting and storing backblasts (workout summaries) from F3 Slack channels. It scrapes Slack channels, parses the backblast text, and stores the information in a database for analysis and reporting.

## Deployment to Google Cloud Run

This section outlines the steps for deploying PAXminer to Google Cloud Run. The deploy.sh file can be run to do the following 3 steps together.

### Deployment Steps

1. **Authenticate Docker with Google Cloud:**

```bash
gcloud auth configure-docker us-central1-docker.pkg.dev
```

2. **Build the Docker Image:**

```bash
docker build . --platform linux/amd64 --tag us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest
```

3. **Push the Docker Image to Google Container Registry:**

```bash
docker push us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest
```

## Running PAXminer Locally

This section describes how to run PAXminer locally for development or testing.

### Installation

1. Clone the repository
2. Create a virtual environment (recommended)
3. Install the dependencies

### Configuration

1. **Set Config Variables:**

Set the following variables in your config/credentials.ini file:

* [host](http://_vscodecontentref_/1): The database host.
* [port](http://_vscodecontentref_/2): The database port (typically 3306).
* [user](http://_vscodecontentref_/3): The database user.
* [password](http://_vscodecontentref_/4): The database password.
* [db](http://_vscodecontentref_/5): The database name (e.g., `paxminer`).

### Execution

1. **Run [PAXminer_Manual_Execution.py](http://_vscodecontentref_/6):**

```bash
python PAXminer_Manual_Execution.py
```

This will execute PAXminer for the region hardcoded in the script query on line 34.

2. **Run [PAXMiner_Cloud_Run.py](http://_vscodecontentref_/7):**

```bash
python PAXMiner_Cloud_Run.py A-Z
```

This will execute PAXminer for all regions that match the regex `A-Z`. It also requires setting the config file as environment variables instead*.
5 changes: 5 additions & 0 deletions backblast_scraping/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
gcloud auth configure-docker us-central1-docker.pkg.dev

docker build . --platform linux/amd64 --tag us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest

docker push us-central1-docker.pkg.dev/f3slackbot/paxminer-db-management/paxminer-scraping:latest
7 changes: 7 additions & 0 deletions backblast_scraping/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
slack-sdk==3.26.1
pandas==2.2.3
pymysql==1.1.0
numpy==1.25.2
matplotlib==3.8.2
typing-extensions==4.7.1
dateparser==1.2.1
28 changes: 28 additions & 0 deletions monthly_charts/CHARTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# CHARTS

## Chart Types
### AO
- Controlled by the send_ao_leaderboard flag in the region database.
- LeaderboardByAO_Charter.py
- Two Graphs Sent
- PAX posts in the last month
- PAX posts YTD

### Q
- Controlled by the send_q_charts flag in the database.
- QCharter.py
- Q's in the last month to each AO
- Q's in the last month to the firstf channel, bisected by AO.

### Region
- Controlled by the send_region_leaderboard flag in the database
- Leaderboard_Charter.py
- Two Graphs Sent to the firstf channel
- PAX posts in the last month
- PAX posts YTD

### PAX
- Controlled by the send_pax_charts flag in the database
- PAXcharter.py
- One graph sent to each PAX.
- Monthly posting summary bisected by AO with a total in the upper right.
105 changes: 58 additions & 47 deletions monthly_charts/PAXcharter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@
# This handler does retries when HTTP status 429 is returned
from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler


# Configure AWS credentials
config = configparser.ConfigParser();
config.read('../config/credentials.ini');
host = config['aws']['host']
port = int(config['aws']['port'])
user = config['aws']['user']
password = config['aws']['password']
#db = config['aws']['db']

db = sys.argv[1]

# Set Slack token
Expand All @@ -50,7 +49,7 @@

#Get Current Year, Month Number and Name
d = datetime.datetime.now()
d = d - datetime.timedelta(days=7)
d = d - datetime.timedelta(days=15)
thismonth = d.strftime("%m")
thismonthname = d.strftime("%b")
thismonthnamelong = d.strftime("%B")
Expand Down Expand Up @@ -159,53 +158,65 @@ def success_message_sent(user_id_tmp, pax, db):

ax = attendance_tmp_df.groupby(['Month', 'AO'], sort=False).size().unstack().plot(kind='bar', stacked=True)
total_count_for_year = attendance_tmp_df.shape[0]

# Add the total count as text on the chart
ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes,
fontsize=12, verticalalignment='top', horizontalalignment='right')

plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum)
plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False)
plt.ioff()
plt.savefig('../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg', bbox_inches='tight') #save the figure to a file
total_graphs = total_graphs + 1
message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!"
file = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg'

#manual_graphs = [240,241,242,244,245,246,247,249,250]
if total_graphs > 0: # This is a count of total users processed, in case of error during processing. Set the total_graphs > to whatever # comes next in the log file row count.
print(total_graphs, 'PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!')

# The current method v2, and legacy method, can both be invoked here depending on the region_method variable.
# Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025.
# The main difference is that v2 requires an additional conversation scope.
# New regions will all use v2.
# user_id_override = "U06GDMGJKNE"
if region_method == "v2":
try:
response = send_slack_message_v2(user_id_tmp, message, file)

success_message_sent(user_id_tmp, pax, db)
except Exception as e:
# If the error is missing scope, then
if e.response['error'] == 'missing_scope':
print("Error: The app is missing required scopes. Please add the 'im:write' scope.")
region_method = "v1"
else:

# Calculate total count for the last month
last_month_start = datetime.date(int(yearnum), int(thismonth), 1)
attendance_last_month_df = attendance_tmp_df[attendance_tmp_df['Date'] >= str(last_month_start)]
total_count_last_month = attendance_last_month_df.shape[0]

try :
if total_count_last_month > 0:
# Add the total count as text on the chart
ax.text(0.95, 0.95, f"Total: {total_count_for_year}", transform=ax.transAxes,
fontsize=12, verticalalignment='top', horizontalalignment='right')

file_path = '../plots/' + db + '/' + user_id_tmp + "_" + thismonthname + yearnum + '.jpg'
plt.title('Number of posts by '+ pax + ' by AO/Month for ' + yearnum)
plt.legend(loc = 'center left', bbox_to_anchor=(1, 0.5), frameon = False)
plt.ioff()
plt.savefig(file_path, bbox_inches='tight') #save the figure to a file

message = 'Hey ' + pax + "! Here is your monthly posting summary for " + yearnum + ". \nPush yourself, get those bars higher every month! SYITG!"
file = file_path

print('PAX posting graph created for user', pax, 'Sending to Slack now... hang tight!')

# The current method v2, and legacy method, can both be invoked here depending on the region_method variable.
# Most regions still use the legacy method, but will need to migrate to v2 by Spring 2025.
# The main difference is that v2 requires an additional conversation scope.
# New regions will all use v2.
# user_id_override = "U06GDMGJKNE"
if region_method == "v2":
try:
response = send_slack_message_v2(user_id_tmp, message, file)

success_message_sent(user_id_tmp, pax, db)
except Exception as e:
# If the error is missing scope, then
if e.response['error'] == 'missing_scope':
print("Error: The app is missing required scopes. Please add the 'im:write' scope.")
region_method = "v1"
else:
log_message_sent_error(user_id_tmp, db, pax)
raise e

if region_method != "v2":
try:
channel = user_id_tmp
response = send_slack_message(channel, message, file)

success_message_sent(user_id_tmp, pax, db)
except:
log_message_sent_error(user_id_tmp, db, pax)
raise e

if region_method != "v2":
try:
channel = user_id_tmp
response = send_slack_message(channel, message, file)

success_message_sent(user_id_tmp, pax, db)
except:
log_message_sent_error(user_id_tmp, db, pax)
raise e
else:
print(pax + ' skipped')
else:
print(pax + ' skipped')
except Exception as e:
print(e)
finally:
plt.close()

except Exception as e:
print(e)
print("An exception occurred for User ID " + user_id)
Expand Down
Loading