-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
115 lines (104 loc) · 3.76 KB
/
Copy pathutils.py
File metadata and controls
115 lines (104 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import re
import json
import csv
import os
from html import unescape
def html_to_text(html_content):
if not html_content:
return ''
# Replace block-level tags and line breaks with newlines
text = re.sub(r'<\s*(br\s*/?|/p|/div|/h[1-6]|/li|/tr)[^>]*>', '\n', html_content, flags=re.IGNORECASE)
# Remove all other HTML tags
text = re.sub(r'<[^>]+>', '', text)
# Unescape HTML entities
text = unescape(text)
# Clean up whitespace
lines = text.split('\n')
cleaned_lines = [re.sub(r'[ \t]+', ' ', line).strip() for line in lines]
text = '\n'.join(cleaned_lines)
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def clean_text(text):
if not text:
return ''
if not isinstance(text, str):
text = str(text)
text = unescape(text)
text = text.replace('’', "'").replace('‘', "'")
text = text.replace('”', '"').replace('“', '"')
text = text.replace('–', '-').replace('—', '-')
text = text.replace('…', '...')
text = text.replace('\x00', '')
return text.strip()
def load_aliases():
aliases = {}
display_aliases = {}
# Load translation aliases (e.g., "Bearded Millenial" -> "bm")
try:
with open('import/aliases.json', 'r', encoding='utf-8-sig', errors='ignore') as f:
raw_aliases = json.load(f)
for k, v in raw_aliases.items():
clean_k = re.sub(r'[^a-zA-Z0-9]', '', k.lower())
clean_v = re.sub(r'[^a-zA-Z0-9]', '', v.lower())
aliases[clean_k] = clean_v
except (FileNotFoundError, json.JSONDecodeError):
pass
# Load display aliases for logging/reporting
try:
with open('import/display_aliases.json', 'r', encoding='utf-8') as f:
display_aliases = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
pass
return aliases, display_aliases
def normalize_user(name, user_aliases):
if not name:
return ''
cleaned = clean_text(name).lstrip('@')
if not cleaned:
return ''
# Strip parentheticals
cleaned = re.sub(r'\(.*?\)', '', cleaned)
# Strip common suffixes
cleaned = re.sub(r'(?i)\bqic\b', '', cleaned)
cleaned = re.sub(r'(?i)\bfngs?\b', '', cleaned)
# Alphanumeric only
lowercased = cleaned.lower()
lowercased = re.sub(r'[^a-zA-Z0-9]', '', lowercased)
if lowercased in user_aliases:
return user_aliases[lowercased]
return lowercased
def format_time(time_str):
if not time_str:
return ''
time_str = str(time_str).strip().lower()
match = re.search(r'(\d{1,2})(?::(\d{2}))?\s*(am|pm)?', time_str)
if not match:
return ''
hour = int(match.group(1))
minute = int(match.group(2) or 0)
ampm = match.group(3)
if ampm == 'pm' and hour < 12:
hour += 12
elif ampm == 'am' and hour == 12:
hour = 0
return f"{hour:02d}{minute:02d}"
def load_locations(locations_csv):
locations = {}
weekday_map = {}
if not os.path.exists(locations_csv):
return locations, weekday_map
with open(locations_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
workout = row.get('Workout', '').strip()
if workout:
locations[workout] = {
'org_id': row.get('orgId', '').replace(',', ''),
'location_id': row.get('locationId', '').replace(',', ''),
'start_time': format_time(row.get('startTime', '')),
'weekday': row.get('weekDay', '').strip()
}
weekday = row.get('weekDay', '').strip()
if weekday:
weekday_map[weekday] = workout
return locations, weekday_map