-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_chart_details.py
More file actions
178 lines (146 loc) · 6.15 KB
/
get_chart_details.py
File metadata and controls
178 lines (146 loc) · 6.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# %%
"""
Purpose
Get details of all charts in a Datawrapper folder including embed codes and save to Excel
Inputs
- API: Datawrapper API
Outputs
- xlsx: Chart numbering lookup file
Notes
None
"""
import logging
import os
import pandas as pd
from pandas.io.formats import excel
from utils import get_chart, get_folder, get_iframe_code, validate_api_token, check_alt_text, check_title
logger = logging.getLogger(__name__)
# %%
# SET CONSTANTS
FOLDER_ID = 340017
OUTPUT_PATH = "C:/Users/" + os.getlogin() + "/Downloads"
CHART_NUMBERING_FILE_PATH = OUTPUT_PATH + "/Datawrapper chart numbering - WM2026.xlsx"
# %%
# DEFINE FUNCTIONS
def get_chart_details(
folder_id: int,
dw_folder_path: str = "",
recursive: bool = False,
skip_folder_name: str = "Archive",
check_chart_title: bool = True,
check_chart_alt_text: bool = True,
) -> list[dict]:
"""
Get details all charts from a folder.
Parameters:
folder_id: The ID of the folder to list charts from
dw_folder_path: Folder path within Datawrapper for tracking hierarchy
recursive: Whether to include charts from subfolders
skip_folder_name: Name of folders to skip (default: "Archive")
check_chart_title: Whether to check chart titles for problematic characters
check_chart_alt_text: Whether to check for missing alt text
Returns:
List of dictionaries containing chart information
"""
charts_data = []
try:
folder = get_folder(folder_id=folder_id)
folder_name = folder["name"]
# Skip folder if it matches the skip_folder_name
if folder_name == skip_folder_name:
logger.info(f"Skipping folder: {folder_name}")
return charts_data
# Update folder path
current_path = os.path.join(dw_folder_path, folder_name) if dw_folder_path else folder_name
logger.info(f"Processing folder: {current_path}")
# Process charts in current folder
if folder.get("charts"):
for chart in folder["charts"]:
try:
chart_details = get_chart(chart_id=chart["id"])
# Skip charts without a proper title
# NB: For some reason, there seem to tend to be a few blank charts per folder, not visible in the UI
chart_title = chart_details["title"]
if chart_title != "[ Insert title here ]":
# Get responsive iframe code
try:
iframe_code = get_iframe_code(chart_id=chart["id"], responsive=True)
except Exception as iframe_error:
logger.warning(f"Could not get iframe code for chart {chart['id']}: {iframe_error}")
iframe_code = "Error retrieving iframe code"
# Check chart title for problematic characters
if check_chart_title:
title_issues = check_title(chart_title)
title_issues_str = "; ".join(title_issues) if title_issues else ""
else:
title_issues_str = ""
# Check for missing alt text
if check_chart_alt_text:
missing_alt_text = "Yes" if check_alt_text(chart_details) else ""
else:
missing_alt_text = ""
chart_info = {
"Folder path": current_path,
"Chart title": chart_title,
"Chart ID": chart["id"],
"Chart number": "",
"Title issues": title_issues_str,
"Missing alt text": missing_alt_text,
"iframe code": iframe_code,
}
charts_data.append(chart_info)
logger.info(f"Found chart: {chart['id']} - {chart_info['Chart title']}")
except Exception as e:
chart_info = {
"Folder path": current_path,
"Chart title": "Error retrieving title",
"Chart ID": chart["id"],
"Chart number": "",
"Title issues": "",
"Missing alt text": "",
"iframe code": "Error retrieving iframe code",
}
charts_data.append(chart_info)
logger.error(f"Error getting details for chart {chart['id']}: {e}")
if recursive and folder.get("children"):
for child_folder in folder["children"]:
child_charts = get_chart_details(
folder_id=child_folder["id"],
recursive=True,
dw_folder_path=current_path,
skip_folder_name=skip_folder_name
)
charts_data.extend(child_charts)
except Exception as e:
logger.error(f"Error processing folder {folder_id}: {e}")
return charts_data
# %%
# EXECUTE
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
validate_api_token()
logger.info(f"Listing charts from folder ID: {FOLDER_ID}")
logger.info(f"Output file: {CHART_NUMBERING_FILE_PATH}")
logger.info("-" * 50)
# Get all charts
charts_data = get_chart_details(
folder_id=FOLDER_ID,
recursive=True,
skip_folder_name="Archive",
check_chart_title=True,
check_chart_alt_text=True,
)
# Save details
excel.ExcelFormatter.header_style = None
if charts_data:
df = pd.DataFrame(charts_data)
df = df[["Folder path", "Chart title", "Chart ID", "Chart number", "Title issues", "Missing alt text", "iframe code"]]
df.to_excel(CHART_NUMBERING_FILE_PATH, index=False)
logger.info("-" * 50)
logger.info(f"Successfully saved {len(charts_data)} charts to {CHART_NUMBERING_FILE_PATH}")
else:
logger.info("No charts found in the specified folder.")
# %%