-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit.sql
More file actions
273 lines (247 loc) · 10.9 KB
/
init.sql
File metadata and controls
273 lines (247 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
CREATE TABLE cml_data (
time TIMESTAMPTZ NOT NULL,
cml_id TEXT NOT NULL,
sublink_id TEXT NOT NULL,
rsl REAL,
tsl REAL,
user_id TEXT NOT NULL DEFAULT 'user1'
);
CREATE TABLE cml_metadata (
cml_id TEXT NOT NULL,
sublink_id TEXT NOT NULL,
site_0_lon REAL,
site_0_lat REAL,
site_1_lon REAL,
site_1_lat REAL,
frequency REAL,
polarization TEXT,
length REAL,
user_id TEXT NOT NULL DEFAULT 'user1',
PRIMARY KEY (cml_id, sublink_id, user_id),
-- Backward-compat constraint: keeps the parser's ON CONFLICT (cml_id, sublink_id)
-- clause valid until PR3 (feat/parser-user-id) updates it.
UNIQUE (cml_id, sublink_id)
);
CREATE TABLE cml_stats (
cml_id TEXT NOT NULL,
user_id TEXT NOT NULL DEFAULT 'user1',
total_records BIGINT,
valid_records BIGINT,
null_records BIGINT,
completeness_percent REAL,
min_rsl REAL,
max_rsl REAL,
mean_rsl REAL,
stddev_rsl REAL,
last_rsl REAL,
last_update TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (cml_id, user_id)
);
-- update_cml_stats(target_cml_id, target_user_id)
--
-- target_user_id defaults to 'user1' so the existing single-argument call
-- sites in the parser continue to work until PR3 updates them.
CREATE OR REPLACE FUNCTION update_cml_stats(
target_cml_id TEXT,
target_user_id TEXT DEFAULT 'user1'
) RETURNS VOID AS $$
BEGIN
INSERT INTO cml_stats (
cml_id,
user_id,
total_records,
valid_records,
null_records,
completeness_percent,
min_rsl,
max_rsl,
mean_rsl,
stddev_rsl,
last_rsl,
last_update
)
SELECT
cd.cml_id::text,
target_user_id,
COUNT(*) AS total_records,
COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) AS valid_records,
COUNT(CASE WHEN cd.rsl IS NULL THEN 1 END) AS null_records,
ROUND(
100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*),
2
) AS completeness_percent,
MIN(cd.rsl) AS min_rsl,
MAX(cd.rsl) AS max_rsl,
ROUND(AVG(cd.rsl)::numeric, 2) AS mean_rsl,
ROUND(STDDEV(cd.rsl)::numeric, 2) AS stddev_rsl,
(
SELECT rsl FROM cml_data
WHERE cml_id = cd.cml_id
AND user_id = target_user_id
ORDER BY time DESC LIMIT 1
) AS last_rsl,
NOW()
FROM cml_data cd
WHERE cd.cml_id = target_cml_id
AND cd.user_id = target_user_id
GROUP BY cd.cml_id
ON CONFLICT (cml_id, user_id) DO UPDATE SET
total_records = EXCLUDED.total_records,
valid_records = EXCLUDED.valid_records,
null_records = EXCLUDED.null_records,
completeness_percent = EXCLUDED.completeness_percent,
min_rsl = EXCLUDED.min_rsl,
max_rsl = EXCLUDED.max_rsl,
mean_rsl = EXCLUDED.mean_rsl,
stddev_rsl = EXCLUDED.stddev_rsl,
last_rsl = EXCLUDED.last_rsl,
last_update = EXCLUDED.last_update;
END;
$$ LANGUAGE plpgsql;
SELECT create_hypertable('cml_data', 'time');
-- Per-user lookup indexes.
CREATE INDEX idx_cml_data_user_id ON cml_data (user_id);
CREATE INDEX idx_cml_metadata_user_id ON cml_metadata (user_id);
-- Index is created by the archive_loader service after bulk data load (faster COPY).
-- If no archive data is loaded, create it manually:
-- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC);
-- ---------------------------------------------------------------------------
-- 1-hour continuous aggregate for fast queries over large time ranges.
-- Grafana and the webserver automatically switch to this view when the
-- requested time range exceeds 3 days, reducing the scanned row count
-- by ~360x (10-second raw data → 1-hour buckets).
-- ---------------------------------------------------------------------------
CREATE MATERIALIZED VIEW cml_data_1h
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
user_id,
cml_id,
sublink_id,
MIN(rsl) AS rsl_min,
MAX(rsl) AS rsl_max,
AVG(rsl) AS rsl_avg,
MIN(tsl) AS tsl_min,
MAX(tsl) AS tsl_max,
AVG(tsl) AS tsl_avg
FROM cml_data
GROUP BY bucket, user_id, cml_id, sublink_id
WITH NO DATA;
-- Automatically refresh every hour, covering up to 2 days of history.
-- The 1-hour end_offset prevents partial (in-progress) buckets from being
-- materialised prematurely; very recent data reads through to raw cml_data.
SELECT add_continuous_aggregate_policy('cml_data_1h',
start_offset => INTERVAL '2 days',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
-- ---------------------------------------------------------------------------
-- Compression for cml_data chunks older than 7 days.
--
-- compress_segmentby: one compressed segment per (user_id, cml_id).
-- user_id is the leading key so a per-user query skips all other users'
-- segments entirely. sublink_id is intentionally omitted: ~80% of CMLs
-- have 2 sublinks and ~15% have 4; keeping sublinks together in one
-- segment roughly halves decompression work per CML query vs. splitting
-- by sublink. Filtering to a specific sublink after decompression is a
-- trivial CPU operation on already-decompressed columnar data.
-- compress_orderby: matches the query pattern (time range scans), allowing
-- skip-scan decompression for narrow time windows within a segment.
--
-- At ~10-20x compression ratio, the last month of data fits in shared_buffers
-- after a single cache warm-up, regardless of how many new streams are added.
-- The current uncompressed week chunk is left untouched so real-time ingestion
-- and detail-view queries on recent data have no decompression overhead.
-- ---------------------------------------------------------------------------
-- Note: TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a compressed
-- hypertable, and compression cannot be set on an RLS-enabled table. These
-- two features are mutually exclusive on the same hypertable. Per-user
-- isolation for cml_data is provided by the cml_data_secure view below.
ALTER TABLE cml_data SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'user_id, cml_id',
timescaledb.compress_orderby = 'time DESC'
);
SELECT add_compression_policy('cml_data', INTERVAL '7 days');
-- ---------------------------------------------------------------------------
-- Database roles and Row-Level Security (PR feat/db-roles-rls)
--
-- Role naming convention: PG login role name = user_id value in the data.
-- "user1" role ↔ user_id = 'user1' — enables current_user-based RLS
-- policies and the cml_data_1h_secure security-barrier view below.
--
-- user1: used by the user1 parser instance (writes) and by the webserver
-- (via SET ROLE) for DB-enforced scoped reads.
-- webserver_role: used by the webserver process. Has a read-all RLS policy
-- for admin/aggregate queries; SET ROLEs to a user role for scoped reads.
--
-- Passwords shown here are development defaults.
-- Override them via environment variables or a secrets manager in production.
-- ---------------------------------------------------------------------------
CREATE ROLE user1 LOGIN PASSWORD 'user1password';
CREATE ROLE webserver_role LOGIN PASSWORD 'webserverpassword';
-- Allow webserver_role to impersonate user roles (SET ROLE user1).
GRANT user1 TO webserver_role;
-- Schema access.
GRANT USAGE ON SCHEMA public TO user1, webserver_role;
-- Table permissions.
GRANT SELECT, INSERT, UPDATE ON cml_data TO user1;
GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1;
GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1;
GRANT SELECT ON cml_data TO webserver_role;
GRANT SELECT ON cml_metadata TO webserver_role;
GRANT SELECT ON cml_stats TO webserver_role;
-- Parser calls update_cml_stats() to upsert per-CML statistics.
GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1;
-- Row-Level Security on cml_metadata and cml_stats.
-- cml_data is excluded: TimescaleDB does not allow RLS on compressed
-- hypertables (and compression cannot be set on an RLS-enabled table).
-- Per-user isolation for raw cml_data queries is provided by the
-- cml_data_secure security-barrier view defined below.
ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY;
ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY;
-- Generic current_user policies for cml_metadata and cml_stats.
-- Because role name = user_id value, one policy per table covers all users.
CREATE POLICY user_cml_metadata_policy ON cml_metadata
FOR ALL
USING (user_id = current_user)
WITH CHECK (user_id = current_user);
CREATE POLICY user_cml_stats_policy ON cml_stats
FOR ALL
USING (user_id = current_user)
WITH CHECK (user_id = current_user);
-- Permissive read-all policies for webserver_role (admin / cross-user use).
CREATE POLICY webserver_cml_metadata_policy ON cml_metadata
FOR SELECT TO webserver_role
USING (true);
CREATE POLICY webserver_cml_stats_policy ON cml_stats
FOR SELECT TO webserver_role
USING (true);
-- Security-barrier view over cml_data (compressed hypertable).
-- Provides per-user isolation for raw cml_data queries via
-- WHERE user_id = current_user. The security_barrier option prevents the
-- query optimizer from pushing caller-supplied predicates above the filter
-- (SQL injection protection). WITH CHECK OPTION rejects writes through
-- this view where user_id != current_user.
CREATE VIEW cml_data_secure WITH (security_barrier) AS
SELECT * FROM cml_data
WHERE user_id = current_user
WITH CHECK OPTION;
GRANT SELECT ON cml_data_secure TO user1;
GRANT SELECT ON cml_data_secure TO webserver_role;
-- Security-barrier view over cml_data_1h (continuous aggregate).
--
-- PostgreSQL cannot apply RLS to materialized views. This view wraps
-- cml_data_1h with WHERE user_id = current_user and security_barrier,
-- providing DB-enforced per-user filtering with no application WHERE clause.
--
-- User roles query cml_data_1h_secure (auto-filtered).
-- webserver_role queries cml_data_1h_secure after SET ROLE for user pages;
-- queries cml_data_1h directly (as webserver_role) for admin/cross-user
-- aggregates — those paths still need WHERE user_id = ? in the app.
CREATE VIEW cml_data_1h_secure WITH (security_barrier) AS
SELECT * FROM cml_data_1h
WHERE user_id = current_user;
GRANT SELECT ON cml_data_1h_secure TO user1;
GRANT SELECT ON cml_data_1h TO webserver_role;
GRANT SELECT ON cml_data_1h_secure TO webserver_role;