diff --git a/docker-compose.yml b/docker-compose.yml index 72be80c..abf3dcd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,5 @@ services: frontend: - user: node build: context: ./frontend @@ -10,44 +9,47 @@ services: - NODE_ENV=production networks: - rishu-app_network + user: node web: build: context: ./nginx dockerfile: Dockerfile restart: always + depends_on: + - frontend + - syllabus-backend ports: - 80:80 environment: - SERVER_NAME=${SERVER_NAME} networks: - rishu-app_network - depends_on: - - frontend - - syllabus-backend syllabus-backend: build: context: ./syllabus-backend dockerfile: Dockerfile restart: always + depends_on: + - syllabus-db + ports: + - 8080:8080 environment: POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DB: ${POSTGRES_DB} networks: - rishu-app_network - depends_on: - - syllabus-db syllabus-db: image: postgres:17.4-bullseye + volumes: + - syllabus-db:/var/lib/postgresql/data environment: POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DB: ${POSTGRES_DB} - volumes: - - syllabus-db:/var/lib/postgresql/data networks: - rishu-app_network @@ -56,18 +58,33 @@ services: context: ./syllabus-scrape dockerfile: Dockerfile restart: always + depends_on: + - syllabus-scrape-redis + - syllabus-backend environment: POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DB: ${POSTGRES_DB} NODE_ENV: production SYLLABUS_BACKEND_URL: http://syllabus-backend:8080 + REDIS_HOST: syllabus-scrape-redis + REDIS_PORT: 6379 networks: - rishu-app_network -networks: - rishu-app_network: - driver: bridge + syllabus-scrape-redis: + image: redis:8.0-alpine + restart: always + volumes: + - redis-data:/data + networks: + - rishu-app_network volumes: syllabus-db: + redis-data: + + +networks: + rishu-app_network: + driver: bridge diff --git a/syllabus-scrape/package.json b/syllabus-scrape/package.json index e91a32e..4074bcc 100644 --- a/syllabus-scrape/package.json +++ b/syllabus-scrape/package.json @@ -23,6 +23,7 @@ "typescript": "^5.9.2" }, "dependencies": { + "bullmq": "^5.74.1", "cheerio": "^1.1.2", "puppeteer": "^24.16.2" } diff --git a/syllabus-scrape/pnpm-lock.yaml b/syllabus-scrape/pnpm-lock.yaml index 6a63e4f..1e3fca4 100644 --- a/syllabus-scrape/pnpm-lock.yaml +++ b/syllabus-scrape/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: dependencies: + bullmq: + specifier: ^5.74.1 + version: 5.74.1 cheerio: specifier: ^1.1.2 version: 1.1.2 @@ -429,6 +432,9 @@ packages: cpu: [x64] os: [win32] + '@ioredis/commands@1.5.1': + resolution: {integrity: sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw==} + '@isaacs/cliui@8.0.2': resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} engines: {node: '>=12'} @@ -539,6 +545,36 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': + resolution: {integrity: sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==} + cpu: [arm64] + os: [darwin] + + '@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3': + resolution: {integrity: sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw==} + cpu: [x64] + os: [darwin] + + '@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3': + resolution: {integrity: sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg==} + cpu: [arm64] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3': + resolution: {integrity: sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw==} + cpu: [arm] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3': + resolution: {integrity: sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg==} + cpu: [x64] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3': + resolution: {integrity: sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ==} + cpu: [x64] + os: [win32] + '@napi-rs/wasm-runtime@0.2.12': resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==} @@ -870,6 +906,9 @@ packages: buffer-from@1.1.2: resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} + bullmq@5.74.1: + resolution: {integrity: sha512-GfJEos2zoOGM9xqkB7VZouwwFuejKFqm667cBcmbBekJXKqqXWk4QYP3Uy2pzgUwCbg1cR7GgGmGczM7fnhWSA==} + callsites@3.1.0: resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} engines: {node: '>=6'} @@ -920,6 +959,10 @@ packages: resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} engines: {node: '>=12'} + cluster-key-slot@1.1.2: + resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} + engines: {node: '>=0.10.0'} + co@4.6.0: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} @@ -953,6 +996,10 @@ packages: typescript: optional: true + cron-parser@4.9.0: + resolution: {integrity: sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q==} + engines: {node: '>=12.0.0'} + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -993,6 +1040,14 @@ packages: resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==} engines: {node: '>= 14'} + denque@2.1.0: + resolution: {integrity: sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==} + engines: {node: '>=0.10'} + + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + detect-newline@3.1.0: resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} engines: {node: '>=8'} @@ -1243,6 +1298,10 @@ packages: inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + ioredis@5.10.1: + resolution: {integrity: sha512-HuEDBTI70aYdx1v6U97SbNx9F1+svQKBDo30o0b9fw055LMepzpOOd0Ccg9Q6tbqmBSJaMuY0fB7yw9/vjBYCA==} + engines: {node: '>=12.22.0'} + ip-address@10.0.1: resolution: {integrity: sha512-NWv9YLW4PoW2B7xtzaS3NCot75m6nK7Icdv0o3lfMceJVRfSoQwqD4wEH5rLwoKJwUiZ/rfpiVBhnaF0FK4HoA==} engines: {node: '>= 12'} @@ -1467,6 +1526,12 @@ packages: resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==} engines: {node: '>=8'} + lodash.defaults@4.2.0: + resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==} + + lodash.isarguments@3.1.0: + resolution: {integrity: sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==} + lodash.memoize@4.1.2: resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==} @@ -1480,6 +1545,10 @@ packages: resolution: {integrity: sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==} engines: {node: '>=12'} + luxon@3.7.2: + resolution: {integrity: sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew==} + engines: {node: '>=12'} + make-dir@4.0.0: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} engines: {node: '>=10'} @@ -1525,6 +1594,13 @@ packages: ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + msgpackr-extract@3.0.3: + resolution: {integrity: sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA==} + hasBin: true + + msgpackr@1.11.5: + resolution: {integrity: sha512-UjkUHN0yqp9RWKy0Lplhh+wlpdt9oQBYgULZOiFhV3VclSF1JnSQWZ5r9gORQlNYaUKQoR8itv7g7z1xDDuACA==} + mylas@2.1.13: resolution: {integrity: sha512-+MrqnJRtxdF+xngFfUUkIMQrUUL0KsxbADUkn23Z/4ibGg192Q+z+CQyiYwvWTsYjJygmMR8+w3ZDa98Zh6ESg==} engines: {node: '>=12.0.0'} @@ -1544,6 +1620,13 @@ packages: resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==} engines: {node: '>= 0.4.0'} + node-abort-controller@3.1.1: + resolution: {integrity: sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==} + + node-gyp-build-optional-packages@5.2.2: + resolution: {integrity: sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw==} + hasBin: true + node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} @@ -1702,6 +1785,14 @@ packages: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} + redis-errors@1.2.0: + resolution: {integrity: sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==} + engines: {node: '>=4'} + + redis-parser@3.0.0: + resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==} + engines: {node: '>=4'} + require-directory@2.1.1: resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} engines: {node: '>=0.10.0'} @@ -1745,6 +1836,11 @@ packages: engines: {node: '>=10'} hasBin: true + semver@7.7.4: + resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} + engines: {node: '>=10'} + hasBin: true + shebang-command@2.0.0: resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} engines: {node: '>=8'} @@ -1790,6 +1886,9 @@ packages: resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==} engines: {node: '>=10'} + standard-as-callback@2.1.0: + resolution: {integrity: sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==} + streamx@2.22.1: resolution: {integrity: sha512-znKXEBxfatz2GBNK02kRnCXjV+AA4kjZIUxeWSr3UGirZMJfTE9uiwKHobnbgxWyL/JWro8tTq+vOqAK1/qbSA==} @@ -1938,6 +2037,10 @@ packages: peerDependencies: browserslist: '>= 4.21.0' + uuid@11.1.0: + resolution: {integrity: sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==} + hasBin: true + v8-to-istanbul@9.3.0: resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} engines: {node: '>=10.12.0'} @@ -2336,6 +2439,8 @@ snapshots: '@esbuild/win32-x64@0.25.9': optional: true + '@ioredis/commands@1.5.1': {} + '@isaacs/cliui@8.0.2': dependencies: string-width: 5.1.2 @@ -2553,6 +2658,24 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3': + optional: true + '@napi-rs/wasm-runtime@0.2.12': dependencies: '@emnapi/core': 1.7.1 @@ -2878,6 +3001,18 @@ snapshots: buffer-from@1.1.2: {} + bullmq@5.74.1: + dependencies: + cron-parser: 4.9.0 + ioredis: 5.10.1 + msgpackr: 1.11.5 + node-abort-controller: 3.1.1 + semver: 7.7.4 + tslib: 2.8.1 + uuid: 11.1.0 + transitivePeerDependencies: + - supports-color + callsites@3.1.0: {} camelcase@5.3.1: {} @@ -2944,6 +3079,8 @@ snapshots: strip-ansi: 6.0.1 wrap-ansi: 7.0.0 + cluster-key-slot@1.1.2: {} + co@4.6.0: {} collect-v8-coverage@1.0.3: {} @@ -2969,6 +3106,10 @@ snapshots: optionalDependencies: typescript: 5.9.2 + cron-parser@4.9.0: + dependencies: + luxon: 3.7.2 + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 @@ -3001,6 +3142,11 @@ snapshots: escodegen: 2.1.0 esprima: 4.0.1 + denque@2.1.0: {} + + detect-libc@2.1.2: + optional: true + detect-newline@3.1.0: {} devtools-protocol@0.0.1475386: {} @@ -3297,6 +3443,20 @@ snapshots: inherits@2.0.4: {} + ioredis@5.10.1: + dependencies: + '@ioredis/commands': 1.5.1 + cluster-key-slot: 1.1.2 + debug: 4.4.1 + denque: 2.1.0 + lodash.defaults: 4.2.0 + lodash.isarguments: 3.1.0 + redis-errors: 1.2.0 + redis-parser: 3.0.0 + standard-as-callback: 2.1.0 + transitivePeerDependencies: + - supports-color + ip-address@10.0.1: {} is-arrayish@0.2.1: {} @@ -3694,6 +3854,10 @@ snapshots: dependencies: p-locate: 4.1.0 + lodash.defaults@4.2.0: {} + + lodash.isarguments@3.1.0: {} + lodash.memoize@4.1.2: {} lru-cache@10.4.3: {} @@ -3704,6 +3868,8 @@ snapshots: lru-cache@7.18.3: {} + luxon@3.7.2: {} + make-dir@4.0.0: dependencies: semver: 7.7.2 @@ -3741,6 +3907,22 @@ snapshots: ms@2.1.3: {} + msgpackr-extract@3.0.3: + dependencies: + node-gyp-build-optional-packages: 5.2.2 + optionalDependencies: + '@msgpackr-extract/msgpackr-extract-darwin-arm64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-darwin-x64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-arm': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-arm64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-x64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-win32-x64': 3.0.3 + optional: true + + msgpackr@1.11.5: + optionalDependencies: + msgpackr-extract: 3.0.3 + mylas@2.1.13: {} napi-postinstall@0.3.4: {} @@ -3751,6 +3933,13 @@ snapshots: netmask@2.0.2: {} + node-abort-controller@3.1.1: {} + + node-gyp-build-optional-packages@5.2.2: + dependencies: + detect-libc: 2.1.2 + optional: true + node-int64@0.4.0: {} node-releases@2.0.27: {} @@ -3931,6 +4120,12 @@ snapshots: dependencies: picomatch: 2.3.1 + redis-errors@1.2.0: {} + + redis-parser@3.0.0: + dependencies: + redis-errors: 1.2.0 + require-directory@2.1.1: {} resolve-cwd@3.0.0: @@ -3957,6 +4152,8 @@ snapshots: semver@7.7.3: {} + semver@7.7.4: {} + shebang-command@2.0.0: dependencies: shebang-regex: 3.0.0 @@ -3997,6 +4194,8 @@ snapshots: dependencies: escape-string-regexp: 2.0.0 + standard-as-callback@2.1.0: {} + streamx@2.22.1: dependencies: fast-fifo: 1.3.2 @@ -4165,6 +4364,8 @@ snapshots: escalade: 3.2.0 picocolors: 1.1.1 + uuid@11.1.0: {} + v8-to-istanbul@9.3.0: dependencies: '@jridgewell/trace-mapping': 0.3.31 diff --git a/syllabus-scrape/src/main.ts b/syllabus-scrape/src/main.ts index 7cfcf14..fa34fa1 100644 --- a/syllabus-scrape/src/main.ts +++ b/syllabus-scrape/src/main.ts @@ -3,7 +3,7 @@ import type { Department } from "./course"; import { facultyMap } from "./course"; import { Logger } from "./logger"; import { PuppeteerClient } from "./resourceClient/browserClient/puppeteerClient"; -import { TimeRangeScheduler } from "./scheduler/timeRangeScheduler"; +import { BullMQScheduler } from "./scheduler/bullmqScheduler"; import { CheerioDOMParser } from "./scraping/adapters"; import { FacultyParser } from "./scraping/commonParser/facultyParser"; import { InstructorParser } from "./scraping/commonParser/instructorParser"; @@ -154,11 +154,10 @@ async function scrapeSyllabusSearchResult(department: Department) { logger.log("start scraping syllabus..."); -const timeRangeScheduler = new TimeRangeScheduler( - 23, - 3, - () => (50 + Math.random() * 20) * 1000 -); +const bullMQScheduler = new BullMQScheduler({ + host: process.env.REDIS_HOST || "localhost", + port: Number(process.env.REDIS_PORT) || 6379, +}); const courseService = new CourseService(); const courseRepositoryAdapter = new RestApiCourseRepositoryAdapter( @@ -166,40 +165,59 @@ const courseRepositoryAdapter = new RestApiCourseRepositoryAdapter( ); const courseRepository = new CourseRepository(courseRepositoryAdapter); -Object.keys(facultyMap).forEach((key) => { - timeRangeScheduler.addTask(async () => { +type syllabusSearchResult = Awaited< + ReturnType +>[number]; + +await bullMQScheduler.addWorker( + "scrapeSyllabus", + async (taskPayload) => { try { - const syllabusSearchResults = await scrapeSyllabusSearchResult( - key as Department + if (!taskPayload) { + logger.log( + `Invalid task payload: ${JSON.stringify(taskPayload, null, 2)}`, + "error" + ); + return; + } + const syllabusData = await scrapeSyllabus(taskPayload.japaneseUrl || ""); + if (!syllabusData) return; + const course = courseService.createCourseFromSyllabusData( + syllabusData.data, + taskPayload, + 2025, + syllabusData.data.courseDescription + ); + + await courseRepository.saveCourse(course); + } catch (error) { + logger.log(`Error scraping syllabus: ${error}`, "error"); + logger.log( + `Stack trace: ${error instanceof Error ? error.stack : "no stack"}`, + "error" ); + logger.log( + `Error occurred at: ${JSON.stringify(taskPayload, null, 2)}`, + "error" + ); + } + }, + { max: 1, duration: 10000 } +); + +await bullMQScheduler.addWorker( + "scrapeSyllabusSearchResult", + async (taskPayload) => { + const department = taskPayload; + try { + const syllabusSearchResults = + await scrapeSyllabusSearchResult(department); syllabusSearchResults .filter((e) => e !== null) .forEach((syllabusSearchResult) => { - timeRangeScheduler.addTask(async () => { - try { - const syllabusData = await scrapeSyllabus( - syllabusSearchResult?.japaneseUrl || "" - ); - if (!syllabusData) return; - const course = courseService.createCourseFromSyllabusData( - syllabusData.data, - syllabusSearchResult, - 2025, - syllabusData.data.courseDescription - ); - - await courseRepository.saveCourse(course); - } catch (error) { - logger.log(`Error scraping syllabus: ${error}`, "error"); - logger.log( - `Stack trace: ${error instanceof Error ? error.stack : "no stack"}`, - "error" - ); - logger.log( - `Error occurred at: ${JSON.stringify(syllabusSearchResult, null, 2)}`, - "error" - ); - } + bullMQScheduler.addTask({ + type: "scrapeSyllabus", + payload: { syllabusSearchResult }, }); }); } catch (error) { @@ -208,21 +226,29 @@ Object.keys(facultyMap).forEach((key) => { `Stack trace: ${error instanceof Error ? error.stack : "no stack"}`, "error" ); - logger.log(`Faculty: ${key}`, "error"); + logger.log(`Faculty: ${department}`, "error"); } + }, + { max: 1, duration: 10000 } +); + +Object.keys(facultyMap).forEach((department) => { + bullMQScheduler.addTask({ + type: "scrapeSyllabusSearchResult", + payload: { department }, }); }); -timeRangeScheduler.start(); +bullMQScheduler.start(); process.on("SIGINT", async () => { await logger.log("Received SIGINT. Shutting down..."); - timeRangeScheduler.stop(); + await bullMQScheduler.stop(); process.exit(); }); process.on("SIGTERM", async () => { await logger.log("Received SIGTERM. Shutting down..."); - timeRangeScheduler.stop(); + await bullMQScheduler.stop(); process.exit(); }); diff --git a/syllabus-scrape/src/scheduler/bullmqScheduler.ts b/syllabus-scrape/src/scheduler/bullmqScheduler.ts new file mode 100644 index 0000000..2d9b351 --- /dev/null +++ b/syllabus-scrape/src/scheduler/bullmqScheduler.ts @@ -0,0 +1,68 @@ +import { + Queue, + Worker, + type ConnectionOptions, + type RateLimiterOptions, +} from "bullmq"; +import type { Scheduler, TaskPayload } from "./core"; + +export class BullMQScheduler implements Scheduler { + private workers: Record = {}; + private queues: Record = {}; + private redisConfig: ConnectionOptions; + + constructor(connectionOpts: ConnectionOptions) { + this.redisConfig = connectionOpts; + } + + async addWorker( + name: string, + processor: (taskPayload: DataType) => Promise, + limiter?: RateLimiterOptions + ) { + if (this.workers[name]) { + throw new Error(`Worker with name ${name} already exists.`); + } + this.queues[name] = new Queue(name, { + connection: this.redisConfig, + }); + this.workers[name] = new Worker( + name, + (job) => processor(job.data), + { + limiter: limiter ?? { + max: 1, + duration: 10000, + }, + connection: this.redisConfig, + } + ); + await this.workers[name].pause(); + } + + addTask(task: TaskPayload): void { + const queue = this.queues[task.type]; + if (!queue) { + throw new Error("No workers available to add tasks."); + } + queue.add(task.type, task.payload); + } + start(): void { + for (const worker of Object.values(this.workers)) { + worker.resume(); + } + } + async stop(): Promise { + const workers = Object.values(this.workers); + const queues = Object.values(this.queues); + + await Promise.all(workers.map((worker) => worker.pause())); + await Promise.all([ + ...workers.map((worker) => worker.close()), + ...queues.map((queue) => queue.close()), + ]); + + this.workers = {}; + this.queues = {}; + } +} diff --git a/syllabus-scrape/src/scheduler/core.ts b/syllabus-scrape/src/scheduler/core.ts index e44ba81..c3cd032 100644 --- a/syllabus-scrape/src/scheduler/core.ts +++ b/syllabus-scrape/src/scheduler/core.ts @@ -1,7 +1,14 @@ -export type Task = () => Promise | void; +export type TaskPayload = { + type: string; + payload: unknown; +}; export interface Scheduler { - addTask(task: Task): void; + addWorker( + name: string, + processor: (taskPayload: unknown) => Promise + ): void; + addTask(task: TaskPayload): void; start(): void; stop(): void; } diff --git a/syllabus-scrape/src/scheduler/timeRangeScheduler.ts b/syllabus-scrape/src/scheduler/timeRangeScheduler.ts index 81ea05e..6136ca1 100644 --- a/syllabus-scrape/src/scheduler/timeRangeScheduler.ts +++ b/syllabus-scrape/src/scheduler/timeRangeScheduler.ts @@ -1,8 +1,9 @@ -import type { Scheduler, Task } from "./core"; +import type { Scheduler, TaskPayload } from "./core"; export class TimeRangeScheduler implements Scheduler { - private tasks: Task[] = []; + private tasks: TaskPayload[] = []; private isRunning: boolean = false; + private workers: Record Promise> = {}; constructor( private startHour: number, @@ -10,7 +11,7 @@ export class TimeRangeScheduler implements Scheduler { private intervalMs: number | (() => number) ) {} - addTask(task: Task): void { + addTask(task: TaskPayload): void { this.tasks.push(task); } @@ -23,13 +24,32 @@ export class TimeRangeScheduler implements Scheduler { this.isRunning = false; } + addWorker( + name: string, + processor: (taskPayload: unknown) => Promise + ): void { + if (this.workers[name]) { + throw new Error(`Worker with name ${name} already exists.`); + } + this.workers[name] = processor; + } + private async runTasks(): Promise { while (this.isRunning) { const now = new Date().getHours(); if (this.tasks.length > 0 && this.isInTimeRange(now)) { const task = this.tasks.shift(); if (task) { - await task(); + const worker = this.workers[task.type]; + if (worker) { + try { + await worker(task.payload); + } catch (error) { + console.error(`Error processing task: ${error}`); + } + } else { + console.warn(`No worker found for task type: ${task.type}`); + } } } await this.sleep(