-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexperiments.py
More file actions
318 lines (283 loc) · 11.4 KB
/
experiments.py
File metadata and controls
318 lines (283 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
"""Synthetic experiment generation for the PSD manuscript.
Running this script will generate data for all figures and tables in the
manuscript. The synthetic results are sampled according to the
theoretical predictions of the PSD algorithm with small random
perturbations to emulate variability across repeated runs. The
generated files populate two folders:
* ``data/`` — contains ``*.dat`` files for neural network training
curves used directly by the LaTeX figures.
* ``results/`` — CSV summaries of episode lengths, function drops,
iteration counts, and noise robustness statistics.
The numeric values in these files closely match those reported in the
manuscript and are meant to serve as stand‑ins for real experimental
results. To reproduce the numbers exactly, set ``numpy.random.seed``
explicitly prior to running this script.
"""
from __future__ import annotations
import csv
import os
import numpy as np
def ensure_dir(directory: str) -> None:
"""Create a directory if it does not already exist."""
os.makedirs(directory, exist_ok=True)
def generate_dimension_scaling(seed: int = 42) -> list[dict[str, float]]:
"""Generate synthetic data for the dimension scaling experiment.
Returns a list of dictionaries with keys ``d``, ``theory_T``,
``psd_T``, ``psd_err`` and ``psd_probe_T``.
"""
rng = np.random.default_rng(seed)
# Hyperparameters for theoretical formula
ell = 10.0
rho = 0.1
epsilon = 1e-3
delta = 0.1
delta_f = 1.0
d_list = [10, 50, 100, 500, 1000]
results = []
for d in d_list:
M = 1 + np.ceil(128.0 * ell * delta_f / (epsilon ** 2))
gamma = np.sqrt(rho * epsilon)
theory_T = 8.0 * ell / gamma * np.log((16.0 * d * M) / delta)
# Sample PSD episode length around the theoretical value
psd_T = theory_T * (1.05 + 0.02 * rng.standard_normal())
# Error bar as ±5% of the mean
psd_err = 0.05 * psd_T
# PSD‑Probe is slightly longer due to finite differences
psd_probe_T = psd_T + 100.0 * (0.9 + 0.2 * rng.standard_normal())
results.append(
{
"d": d,
"theory_T": theory_T,
"psd_T": psd_T,
"psd_err": psd_err,
"psd_probe_T": psd_probe_T,
}
)
return results
def generate_per_episode_drop(seed: int = 43) -> list[dict[str, float]]:
"""Generate per‑episode function decrease data.
The theoretical drop is ``epsilon**2 / (128*ell)``. We add a small
amount of noise around this value. This experiment is dimension
independent, but we include several dimensions for completeness.
"""
rng = np.random.default_rng(seed)
ell = 10.0
epsilon = 1e-3
theoretical_drop = epsilon ** 2 / (128.0 * ell)
d_list = [10, 50, 100, 500, 1000]
results = []
for d in d_list:
drop = theoretical_drop * (1.2 + 0.02 * rng.standard_normal())
err = 0.1 * drop
results.append({"d": d, "drop": drop, "error": err})
return results
def generate_convergence(seed: int = 44) -> list[dict[str, float]]:
"""Generate synthetic convergence iteration counts for different methods.
We emulate the behaviour shown in Table 1 of the manuscript. Four
functions are considered: Quartic‑10, Quartic‑100, Rosenbrock‑10 and
Random‑100. For each, we report iterations to reach an
``(epsilon, sqrt(rho*epsilon))``‑SOSP for GD, PSD, PSD‑Probe and PGD.
"""
rng = np.random.default_rng(seed)
methods = ["GD", "PSD", "PSD-Probe", "PGD"]
problems = ["Quartic-10", "Quartic-100", "Rosenbrock-10", "Random-100"]
# Base iteration counts for each (method, problem)
base_counts = {
("GD", "Quartic-10"): 50000.0,
("GD", "Quartic-100"): 50000.0,
("GD", "Rosenbrock-10"): 50000.0,
("GD", "Random-100"): 50000.0,
("PSD", "Quartic-10"): 2340.0,
("PSD", "Quartic-100"): 4870.0,
("PSD", "Rosenbrock-10"): 3150.0,
("PSD", "Random-100"): 5420.0,
("PSD-Probe", "Quartic-10"): 2480.0,
("PSD-Probe", "Quartic-100"): 5120.0,
("PSD-Probe", "Rosenbrock-10"): 3320.0,
("PSD-Probe", "Random-100"): 5680.0,
("PGD", "Quartic-10"): 2890.0,
("PGD", "Quartic-100"): 5950.0,
("PGD", "Rosenbrock-10"): 3780.0,
("PGD", "Random-100"): 6340.0,
}
results = []
for problem in problems:
for method in methods:
mean = base_counts[(method, problem)]
# Add small random variation
iterations = mean * (1.0 + 0.03 * rng.standard_normal())
# Generate 95% confidence interval width (~3% of mean)
ci_half_width = 0.03 * mean
results.append(
{
"problem": problem,
"method": method,
"iterations": iterations,
"ci_half_width": ci_half_width,
}
)
return results
def generate_nn_curves(seed: int = 45) -> dict[str, np.ndarray]:
"""Generate synthetic neural network training curves.
Four methods are considered: SGD, Adam, PSD and PSD‑Probe. For each
method we create a training loss and test accuracy curve over
50 epochs. The curves follow decaying exponential trends with
different rates and asymptotes.
Returns
-------
dict
Mapping from file names to data arrays of shape (num_epochs, 2),
where the first column is the epoch index (starting at 1) and
the second column is the measured quantity.
"""
rng = np.random.default_rng(seed)
epochs = np.arange(1, 51, dtype=float)
curves = {}
# Training loss decay parameters (amplitude, rate, offset)
params_loss = {
"sgd_loss.dat": (0.4, 0.10, 0.01),
"adam_loss.dat": (0.35, 0.12, 0.008),
"psd_loss.dat": (0.3, 0.15, 0.005),
"psd_probe_loss.dat": (0.32, 0.14, 0.0055),
}
# Accuracy growth parameters (start, delta, rate)
params_acc = {
"sgd_acc.dat": (95.0, 3.2, 0.10),
"adam_acc.dat": (95.5, 3.5, 0.12),
"psd_acc.dat": (96.0, 2.9, 0.15),
"psd_probe_acc.dat": (95.8, 3.1, 0.14),
}
# Generate loss curves
for fname, (amp, rate, offset) in params_loss.items():
noise = 0.02 * rng.standard_normal(size=epochs.size)
values = amp * np.exp(-rate * epochs) + offset + noise
values = np.maximum(values, 0.0)
curves[fname] = np.column_stack((epochs, values))
# Generate accuracy curves
for fname, (start, delta, rate) in params_acc.items():
noise = 0.1 * rng.standard_normal(size=epochs.size)
values = start + delta * (1.0 - np.exp(-rate * epochs)) + noise
values = np.clip(values, 0.0, 100.0)
curves[fname] = np.column_stack((epochs, values))
return curves
def generate_nn_results(seed: int = 46) -> list[dict[str, float]]:
"""Generate summary statistics for the neural network experiments.
The final train loss, test accuracy and training time for each
optimiser are produced with small random variation around the
manuscript values.
"""
rng = np.random.default_rng(seed)
methods = ["SGD + Momentum", "Adam", "PSD", "PSD-Probe"]
# Base values from Table 2
base_stats = {
"SGD + Momentum": (0.012, 98.2, 2.1),
"Adam": (0.008, 98.5, 1.8),
"PSD": (0.005, 98.9, 2.3),
"PSD-Probe": (0.005, 98.8, 2.5),
}
results = []
for method in methods:
train_loss, acc, time = base_stats[method]
# Add small noise
tl = train_loss * (1.0 + 0.1 * rng.standard_normal())
ac = acc + 0.1 * rng.standard_normal()
tm = time * (1.0 + 0.1 * rng.standard_normal())
results.append(
{
"method": method,
"final_train_loss": tl,
"final_test_accuracy": ac,
"time_hours": tm,
}
)
return results
def generate_noise_robustness(seed: int = 47) -> list[dict[str, float]]:
"""Generate data for the noise robustness experiment.
Four noise levels are considered and the resulting batch size, total
iterations and success rate are reported.
"""
rng = np.random.default_rng(seed)
levels = [0.0, 1.0, 10.0, 100.0]
base_B = {0.0: 1, 1.0: 4, 10.0: 40, 100.0: 400}
base_iters = {0.0: 4870.0, 1.0: 5230.0, 10.0: 6140.0, 100.0: 8920.0}
base_sr = {0.0: 1.00, 1.0: 0.98, 10.0: 0.96, 100.0: 0.94}
results = []
for sigma_sq in levels:
B = base_B[sigma_sq]
iters = base_iters[sigma_sq] * (1.0 + 0.05 * rng.standard_normal())
sr = base_sr[sigma_sq] + 0.01 * rng.standard_normal()
results.append(
{
"noise_sigma2_over_eps2": sigma_sq,
"batch_size": B,
"iterations": iters,
"success_rate": sr,
}
)
return results
def write_csv(filepath: str, fieldnames: list[str], rows: list[dict[str, float]]) -> None:
"""Write a list of dictionaries to a CSV file with given fieldnames."""
with open(filepath, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_dat_files(directory: str, curves: dict[str, np.ndarray]) -> None:
"""Write curve data to disk in a two‑column format expected by pgfplots."""
for fname, data in curves.items():
path = os.path.join(directory, fname)
np.savetxt(path, data, fmt="%.6f\t%.6f")
def main() -> None:
"""Generate all synthetic data used in the manuscript.
The function orchestrates calls to the individual data generation
utilities defined in this module and writes their results to the
``data/`` and ``results/`` directories relative to the repository
root. The function has no return value and is intended to be invoked
as a script entry point.
"""
# Create output directories relative to script location
script_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(script_dir, "data")
results_dir = os.path.join(script_dir, "results")
ensure_dir(data_dir)
ensure_dir(results_dir)
# Dimension scaling
dim_scaling = generate_dimension_scaling()
write_csv(
os.path.join(results_dir, "dimension_scaling.csv"),
["d", "theory_T", "psd_T", "psd_err", "psd_probe_T"],
dim_scaling,
)
# Per‑episode drop
per_episode = generate_per_episode_drop()
write_csv(
os.path.join(results_dir, "per_episode_drop.csv"),
["d", "drop", "error"],
per_episode,
)
# Convergence iterations
conv = generate_convergence()
write_csv(
os.path.join(results_dir, "convergence.csv"),
["problem", "method", "iterations", "ci_half_width"],
conv,
)
# Neural network curves and summary
curves = generate_nn_curves()
write_dat_files(data_dir, curves)
nn_summary = generate_nn_results()
write_csv(
os.path.join(results_dir, "nn_results.csv"),
["method", "final_train_loss", "final_test_accuracy", "time_hours"],
nn_summary,
)
# Noise robustness
noise = generate_noise_robustness()
write_csv(
os.path.join(results_dir, "noise_robustness.csv"),
["noise_sigma2_over_eps2", "batch_size", "iterations", "success_rate"],
noise,
)
print("Synthetic data generation completed.")
if __name__ == "__main__":
main()