Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,8 @@ MEM_LIMIT=1073741824 # 1GB

# Project namespace (defaults to the current folder name if not set)
#COMPOSE_PROJECT_NAME=myproject

# Postgres
POSTGRES_DB=text-indexing
POSTGRES_USER=
POSTGRES_PASSWORD=
10 changes: 9 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# Empty currently
# services:
# setup:
# image: node:22.9.0-alpine
# volumes:
# - .:/app
# - /app/node_modules/
# working_dir: /app
# # keep the container running
# command: tail -f /dev/null
20 changes: 20 additions & 0 deletions docker-compose/postgres.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
services:
postgresql:
image: postgres:15.8-alpine
environment:
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- TZ=Asia/Hong_Kong
- PGTZ=Asia/Hong_Kong
ports:
- 5432:5432
volumes:
- postgresql-data:/var/lib/postgresql/data
networks:
- shared
restart: always

volumes:
postgresql-data:
driver: local
22 changes: 18 additions & 4 deletions drizzle.config.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
import "dotenv/config";
import { defineConfig } from "drizzle-kit";
import { ConnectionOptions } from "tls";

import config from "./src/configs";

type Credential = {
host: string;
port: number;
database: string;
user: string;
password: string;
ssl?:
| boolean
| "require"
| "allow"
| "prefer"
| "verify-full"
| ConnectionOptions;
};

export default defineConfig({
schema: "src/db/schema/index.ts",
out: "src/db/migrations",
dialect: "sqlite",
dbCredentials: {
url: `file:${config.databases.db}`,
},
dialect: "postgresql",
dbCredentials: { ...config.databases.db, ssl: false } as Credential,
// Print all statements
verbose: true,
// Always ask for my confirmation
Expand Down
14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@
"db:studio": "drizzle-kit studio",
"test": "jest",
"test:debug": "node --inspect node_modules/.bin/jest --runInBand",
"docker:up": "docker compose -f docker-compose.yml -f docker-compose/elasticsearch.yml up -d",
"docker:down": "docker compose -f docker-compose.yml -f docker-compose/elasticsearch.yml down"
"docker:up": "docker compose -f docker-compose.yml -f docker-compose/elasticsearch.yml -f docker-compose/postgres.yml up -d",
"docker:down": "docker compose -f docker-compose.yml -f docker-compose/elasticsearch.yml -f docker-compose/postgres.yml down"
},
"dependencies": {
"better-sqlite3": "11.3.0",
"commander": "12.1.0",
"dotenv": "16.4.5",
"drizzle-orm": "0.33.0",
"env-var": "7.5.0",
"fast-xml-parser": "4.5.0",
"winston": "^3.14.2"
"pg": "8.13.0",
"winston": "3.14.2"
},
"devDependencies": {
"@elastic/elasticsearch": "8.15.0",
"@types/benchmark": "^2.1.5",
"@types/better-sqlite3": "7.6.11",
"@types/benchmark": "2.1.5",
"@types/jest": "29.5.12",
"@types/node": "22.5.5",
"benchmark": "^2.1.4",
"@types/pg": "8.11.10",
"benchmark": "2.1.4",
"drizzle-kit": "0.22.8",
"jest": "29.7.0",
"ts-jest": "29.2.5",
Expand Down
7 changes: 5 additions & 2 deletions src/configs/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import esConfig from "./elasticsearch-config";
import sqliteConfig from "./sqlite-config";
import pgConfig from "./pg-config";
// import sqliteConfig from "./sqlite-config";

const config = {
databases: {
elasticsearch: esConfig,
db: sqliteConfig,
// not in use anymore
// db: sqliteConfig,
db: pgConfig,
},
};

Expand Down
13 changes: 13 additions & 0 deletions src/configs/pg-config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { PoolConfig } from "pg";
import * as env from "env-var";

const pgConfig: PoolConfig = {
host: "localhost",
port: 5432,
database: env.get("POSTGRES_DB").required().asString(),
user: env.get("POSTGRES_USER").required().asString(),
password: env.get("POSTGRES_PASSWORD").required().asString(),
// max: 1,
};

export default pgConfig;
16 changes: 0 additions & 16 deletions src/db/client.ts

This file was deleted.

2 changes: 1 addition & 1 deletion src/db/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import db, { DbClient } from "./client";
import db, { DbClient } from "./pg-client";

// Import all your schema definitions
import * as schema from "./schema";
Expand Down
117 changes: 71 additions & 46 deletions src/db/migrations/0000_init.sql
Original file line number Diff line number Diff line change
@@ -1,58 +1,83 @@
CREATE TABLE `doc` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`type` text NOT NULL
CREATE TABLE IF NOT EXISTS "doc" (
"id" serial PRIMARY KEY NOT NULL,
"type" text NOT NULL
);
--> statement-breakpoint
CREATE TABLE `doc_meta` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`doc_id` integer NOT NULL,
`hash` text NOT NULL,
`char_count` integer NOT NULL,
`word_count` integer NOT NULL,
`sentence_count` integer NOT NULL,
FOREIGN KEY (`doc_id`) REFERENCES `doc`(`id`) ON UPDATE no action ON DELETE no action
CREATE TABLE IF NOT EXISTS "doc_meta" (
"id" serial PRIMARY KEY NOT NULL,
"doc_id" integer NOT NULL,
"hash" text NOT NULL,
"char_count" integer NOT NULL,
"word_count" integer NOT NULL,
"sentence_count" integer NOT NULL
);
--> statement-breakpoint
CREATE TABLE `pubmed` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`doc_id` integer NOT NULL,
`title` text NOT NULL,
`abstract` text NOT NULL,
FOREIGN KEY (`doc_id`) REFERENCES `doc`(`id`) ON UPDATE no action ON DELETE no action
CREATE TABLE IF NOT EXISTS "pubmed" (
"id" serial PRIMARY KEY NOT NULL,
"doc_id" integer NOT NULL,
"title" text NOT NULL,
"abstract" text NOT NULL
);
--> statement-breakpoint
CREATE TABLE `stem` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`term` text NOT NULL
CREATE TABLE IF NOT EXISTS "stem" (
"id" serial PRIMARY KEY NOT NULL,
"term" text NOT NULL
);
--> statement-breakpoint
CREATE TABLE `stem_doc_stats` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`stem_id` integer NOT NULL,
`doc_id` integer NOT NULL,
`doc_type` text NOT NULL,
`count` integer NOT NULL,
FOREIGN KEY (`stem_id`) REFERENCES `stem`(`id`) ON UPDATE no action ON DELETE no action,
FOREIGN KEY (`doc_id`) REFERENCES `doc`(`id`) ON UPDATE no action ON DELETE no action
CREATE TABLE IF NOT EXISTS "stem_doc_stats" (
"id" serial PRIMARY KEY NOT NULL,
"stem_id" integer NOT NULL,
"doc_id" integer NOT NULL,
"doc_type" text NOT NULL,
"count" integer NOT NULL
);
--> statement-breakpoint
CREATE TABLE `twitter` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`doc_id` integer NOT NULL,
`content` text NOT NULL,
FOREIGN KEY (`doc_id`) REFERENCES `doc`(`id`) ON UPDATE no action ON DELETE no action
CREATE TABLE IF NOT EXISTS "twitter" (
"id" serial PRIMARY KEY NOT NULL,
"doc_id" integer NOT NULL,
"content" text NOT NULL
);
--> statement-breakpoint
CREATE INDEX `doc_type_idx` ON `doc` (`type`);--> statement-breakpoint
CREATE UNIQUE INDEX `doc_meta_doc_id_idx` ON `doc_meta` (`doc_id`);--> statement-breakpoint
CREATE INDEX `doc_meta_hash_idx` ON `doc_meta` (`hash`);--> statement-breakpoint
CREATE INDEX `doc_meta_char_count_idx` ON `doc_meta` (`char_count`);--> statement-breakpoint
CREATE INDEX `doc_meta_word_count_idx` ON `doc_meta` (`word_count`);--> statement-breakpoint
CREATE INDEX `doc_meta_sentence_count_idx` ON `doc_meta` (`sentence_count`);--> statement-breakpoint
CREATE UNIQUE INDEX `pubmed_doc_id_idx` ON `pubmed` (`doc_id`);--> statement-breakpoint
CREATE UNIQUE INDEX `stem_term_idx` ON `stem` (`term`);--> statement-breakpoint
CREATE INDEX `stem_doc_stats_doc_id_count_idx` ON `stem_doc_stats` (`doc_id`,`count`);--> statement-breakpoint
CREATE INDEX `stem_doc_stats_stem_id_count_idx` ON `stem_doc_stats` (`stem_id`,`count`);--> statement-breakpoint
CREATE INDEX `stem_doc_stats_doc_type_count_idx` ON `stem_doc_stats` (`doc_type`,`count`);--> statement-breakpoint
CREATE INDEX `stem_doc_stats_count_idx` ON `stem_doc_stats` (`count`);--> statement-breakpoint
CREATE UNIQUE INDEX `twitter_doc_id_idx` ON `twitter` (`doc_id`);
DO $$ BEGIN
ALTER TABLE "doc_meta" ADD CONSTRAINT "doc_meta_doc_id_doc_id_fk" FOREIGN KEY ("doc_id") REFERENCES "public"."doc"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "pubmed" ADD CONSTRAINT "pubmed_doc_id_doc_id_fk" FOREIGN KEY ("doc_id") REFERENCES "public"."doc"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "stem_doc_stats" ADD CONSTRAINT "stem_doc_stats_stem_id_stem_id_fk" FOREIGN KEY ("stem_id") REFERENCES "public"."stem"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "stem_doc_stats" ADD CONSTRAINT "stem_doc_stats_doc_id_doc_id_fk" FOREIGN KEY ("doc_id") REFERENCES "public"."doc"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "twitter" ADD CONSTRAINT "twitter_doc_id_doc_id_fk" FOREIGN KEY ("doc_id") REFERENCES "public"."doc"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "doc_type_idx" ON "doc" USING btree ("type");--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "doc_meta_doc_id_idx" ON "doc_meta" USING btree ("doc_id");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "doc_meta_hash_idx" ON "doc_meta" USING btree ("hash");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "doc_meta_char_count_idx" ON "doc_meta" USING btree ("char_count");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "doc_meta_word_count_idx" ON "doc_meta" USING btree ("word_count");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "doc_meta_sentence_count_idx" ON "doc_meta" USING btree ("sentence_count");--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "pubmed_doc_id_idx" ON "pubmed" USING btree ("doc_id");--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "stem_term_idx" ON "stem" USING btree ("term");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "stem_doc_stats_doc_id_count_idx" ON "stem_doc_stats" USING btree ("doc_id","count");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "stem_doc_stats_stem_id_count_idx" ON "stem_doc_stats" USING btree ("stem_id","count");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "stem_doc_stats_doc_type_count_idx" ON "stem_doc_stats" USING btree ("doc_type","count");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "stem_doc_stats_count_idx" ON "stem_doc_stats" USING btree ("count");--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "twitter_doc_id_idx" ON "twitter" USING btree ("doc_id");
9 changes: 9 additions & 0 deletions src/db/pg-client.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { drizzle } from "drizzle-orm/node-postgres";
import { Pool } from "pg";
import config from "../configs";

const db = drizzle(new Pool(config.databases.db));

export default db;

export type DbClient = typeof db;
9 changes: 5 additions & 4 deletions src/db/schema/doc-meta.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import {
index,
integer,
sqliteTable,
pgTable,
serial,
text,
uniqueIndex,
} from "drizzle-orm/sqlite-core";
} from "drizzle-orm/pg-core";
import doc from "./doc";

const docMeta = sqliteTable(
const docMeta = pgTable(
"doc_meta",
{
id: integer("id").primaryKey({ autoIncrement: true }),
id: serial("id").primaryKey(),
docId: integer("doc_id")
.references(() => doc.id)
.notNull(),
Expand Down
6 changes: 3 additions & 3 deletions src/db/schema/doc.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { sqliteTable, text, integer, index } from "drizzle-orm/sqlite-core";
import { pgTable, text, index, serial } from "drizzle-orm/pg-core";
import { DocTypeValues } from "../../constants/DocType";

const doc = sqliteTable(
const doc = pgTable(
"doc",
{
id: integer("id").primaryKey({ autoIncrement: true }),
id: serial("id").primaryKey(),
type: text("type", {
enum: DocTypeValues,
}).notNull(),
Expand Down
9 changes: 5 additions & 4 deletions src/db/schema/pubmed.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import {
integer,
sqliteTable,
pgTable,
serial,
text,
uniqueIndex,
} from "drizzle-orm/sqlite-core";
} from "drizzle-orm/pg-core";
import doc from "./doc";

const pubmed = sqliteTable(
const pubmed = pgTable(
"pubmed",
{
id: integer("id").primaryKey({ autoIncrement: true }),
id: serial("id").primaryKey(),
docId: integer("doc_id")
.references(() => doc.id)
.notNull(),
Expand Down
11 changes: 3 additions & 8 deletions src/db/schema/stem.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
import {
integer,
sqliteTable,
text,
uniqueIndex,
} from "drizzle-orm/sqlite-core";
import { serial, pgTable, text, uniqueIndex } from "drizzle-orm/pg-core";

const stem = sqliteTable(
const stem = pgTable(
"stem",
{
id: integer("id").primaryKey({ autoIncrement: true }),
id: serial("id").primaryKey(),
term: text("term").notNull(),
},
(table) => ({
Expand Down
7 changes: 3 additions & 4 deletions src/db/schema/stemDocStats.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import { index, integer, sqliteTable, text } from "drizzle-orm/sqlite-core";

import { index, integer, pgTable, serial, text } from "drizzle-orm/pg-core";
import doc from "./doc";
import stem from "./stem";
import { DocTypeValues } from "../../constants/DocType";

const stemDocStats = sqliteTable(
const stemDocStats = pgTable(
"stem_doc_stats",
{
id: integer("id").primaryKey({ autoIncrement: true }),
id: serial("id").primaryKey(),
stemId: integer("stem_id")
.references(() => stem.id)
.notNull(),
Expand Down
Loading