Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions db/migrations/20260619090000_create_moderation_case_table.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
const caseTable = 'moderation_case'
const reporterTable = 'moderation_case_reporter'
const eventTable = 'moderation_event'

export const up = async (knex) => {
await knex('entity_type').insert([
{ table: caseTable },
{ table: reporterTable },
{ table: eventTable },
])

await knex.schema.createTable(caseTable, (t) => {
t.bigIncrements('id').primary()
t.enu('source', [
'direct_report',
'community_watch',
'admin',
'system',
'model_assisted',
'automated',
]).notNullable()
t.enu('target_type', [
'article',
'comment',
'moment',
'user',
'tag',
'other',
]).notNullable()
t.bigInteger('target_id').unsigned().notNullable()
t.bigInteger('primary_reporter_id').unsigned()
t.string('reason').notNullable()
t.text('public_reason')
t.enu('status', [
'received',
'reviewing',
'action_taken',
'rejected',
'appealed',
'resolved',
'closed',
])
.notNullable()
.defaultTo('received')
t.enu('outcome', [
'no_action',
'content_collapsed',
'content_hidden',
'content_removed',
'account_limited',
'restored',
'partially_restored',
'upheld',
])
t.enu('automation_role', [
'none',
'suggested',
'assisted',
'automated',
])
.notNullable()
.defaultTo('none')
t.string('model_name')
t.string('model_version')
t.enu('notice_state', [
'not_required',
'pending',
'sent',
'delayed',
'prohibited',
'failed',
])
.notNullable()
.defaultTo('not_required')
t.timestamp('resolved_at')
t.timestamp('closed_at')
t.timestamp('created_at').defaultTo(knex.fn.now())
t.timestamp('updated_at').defaultTo(knex.fn.now())

t.foreign('primary_reporter_id').references('id').inTable('user')
t.index(['source', 'created_at'])
t.index(['target_type', 'target_id'])
t.index(['status', 'created_at'])
t.index(['outcome', 'created_at'])
t.index(['automation_role', 'created_at'])
t.unique(['source', 'target_type', 'target_id', 'reason'])
})

await knex.schema.createTable(reporterTable, (t) => {
t.bigIncrements('id').primary()
t.bigInteger('case_id').unsigned().notNullable()
t.bigInteger('reporter_id').unsigned().notNullable()
t.bigInteger('report_id').unsigned()
t.timestamp('reported_at').defaultTo(knex.fn.now())

t.foreign('case_id').references('id').inTable(caseTable).onDelete('CASCADE')
t.foreign('reporter_id').references('id').inTable('user')
t.foreign('report_id').references('id').inTable('report').onDelete('SET NULL')
t.unique(['case_id', 'reporter_id'])
t.index(['reporter_id', 'reported_at'])
t.index(['report_id'])
})

await knex.schema.createTable(eventTable, (t) => {
t.bigIncrements('id').primary()
t.bigInteger('case_id').unsigned().notNullable()
t.enu('event_type', [
'created',
'notified',
'reviewed',
'actioned',
'appealed',
'restored',
'closed',
'exported',
]).notNullable()
t.enu('actor_type', [
'user',
'community_watcher',
'admin',
'system',
'model',
]).notNullable()
t.bigInteger('actor_id').unsigned()
t.text('public_reason')
t.text('internal_note')
t.string('from_status')
t.string('to_status')
t.string('from_outcome')
t.string('to_outcome')
t.jsonb('metadata')
t.timestamp('created_at').defaultTo(knex.fn.now())

t.foreign('case_id').references('id').inTable(caseTable).onDelete('CASCADE')
t.foreign('actor_id').references('id').inTable('user')
t.index(['case_id', 'created_at'])
t.index(['event_type', 'created_at'])
t.index(['actor_type', 'created_at'])
})
}

export const down = async (knex) => {
await knex.schema.dropTable(eventTable)
await knex.schema.dropTable(reporterTable)
await knex.schema.dropTable(caseTable)
await knex('entity_type')
.whereIn('table', [caseTable, reporterTable, eventTable])
.del()
}
51 changes: 51 additions & 0 deletions docs/NCCTransparencyExternalMetrics.example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"government_requests": {
"total": 2,
"by_jurisdiction": {
"TW": 2
},
"by_agency_type": {
"court": 1,
"law_enforcement": 1
},
"by_request_type": {
"data_request": 1,
"content_restriction": 1
},
"by_data_type": {
"account_information": 1,
"content": 1
},
"by_result": {
"rejected": 1,
"partially_complied": 1
},
"by_user_notice": {
"notified": 1,
"prohibited": 1
}
},
"privacy_requests": {
"total": 3,
"access": 1,
"correction": 0,
"deletion": 2,
"restriction": 0
},
"policy_changes": [
{
"date": "2026-03-01",
"category": "content_rules",
"summary": "Updated public content handling rules for spam reports.",
"public_url": "/transparency/automation"
}
],
"model_changes": [
{
"date": "2026-04-15",
"category": "spam_detection",
"summary": "Moved comment spam classifier to review-only monitoring."
}
],
"recommendation_changes": []
}
95 changes: 95 additions & 0 deletions docs/NCCTransparencyMetrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# NCC Transparency Metrics Export

This export produces aggregated moderation and Community Watch metrics for a
fixed reporting period. It is intended for transparency report drafting and
review, not for public case-level disclosure.

## Usage

Build the server first, then run the export command with an inclusive date
range.

Make sure the usual `MATTERS_PG_*` database environment variables point to the
database snapshot intended for the report before running the command.

```bash
npm run build
npm run transparency:export -- \
--start=2026-01-01 \
--end=2026-06-30 \
--timezone=Asia/Taipei \
--slug=2026-H1 \
--out-dir=./tmp/transparency-metrics
```

The command writes both files below.

```text
transparency-metrics-2026-H1.json
transparency-metrics-2026-H1.csv
```

## Optional External Structured Metrics

Government, legal, and privacy request logs may live outside the application
database. Policy, model, and recommendation change logs may also come from
reviewed public documentation instead of database tables. When those sources
have been reviewed and reduced to safe structured fields, pass a local JSON
file with `--external-metrics`.

```bash
npm run build
npm run transparency:export -- \
--start=2026-01-01 \
--end=2026-06-30 \
--timezone=Asia/Taipei \
--slug=2026-H1 \
--out-dir=./tmp/transparency-metrics \
--external-metrics=/path/to/private/aggregate-transparency-metrics.json
```

Request metrics in the external file must contain aggregate counts only.
Change logs may contain public-safe date, category, summary, and public URL
fields. The file must not include case records, requester names, email
addresses, IP addresses, original content, internal notes, reviewer notes,
legal document text, attachments, or private case identifiers.

See `docs/NCCTransparencyExternalMetrics.example.json` for the supported
schema. Unknown fields are rejected so accidental case-level data does not
enter the export.

## Included Metrics

- Moderation case totals and distributions by target type, source, reason,
status, outcome, automation role, and notice state.
- Appeal counts from moderation events and Community Watch review events.
- Handling-time aggregates for moderation cases with `resolved_at`.
- Community Watch action counts, reason distribution, appeals, restores, and
staff review counts.
- Explicit `not_recorded` fields for government requests, privacy requests,
policy changes, model changes, and recommendation changes until their
structured sources are wired in.
- Optional government request, privacy request, policy change, model change,
and recommendation change metrics when a reviewed external metrics JSON file
is provided.

## Privacy Boundary

The export only includes aggregate bucket counts. It must not include user ids,
email addresses, IP addresses, original content, internal notes, reviewer notes,
report ids, legal document content, or case-level records.

The focused test at
`src/connectors/__test__/transparencyService.test.ts` asserts this boundary by
seeding sensitive source fields and checking that the generated JSON omits them.

## Data Status

The first version marks the overall dataset as `partial` because legacy reports
do not yet have complete structured sources. Government request, privacy
request, policy change, model change, and recommendation change metrics are
marked `not_recorded` unless a reviewed external metrics file is passed with
`--external-metrics`.

Do not convert `not_recorded` or `unknown` fields into `0` in the public report.
Zero means a fully recorded source had no matching events in the period.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"lint:fix": "eslint . --ext .ts --fix",
"format": "prettier --write \"{,!(node_modules|build|coverage)/**/}*.{js,jsx,ts,tsx,json}\"",
"format:check": "npm run format -- --list-different",
"transparency:export": "node build/handlers/bin/exportTransparencyMetrics.js",
"gen:schema": "tsc -p . && node build/common/utils/exportSchema.js",
"gen:types": "graphql-codegen-esm --config codegen.json # detail docs see https://the-guild.dev/graphql/codegen/plugins/typescript/typescript",
"gen": "npm run gen:schema && npm run gen:types",
Expand Down
Loading
Loading