-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.js
More file actions
408 lines (355 loc) · 16 KB
/
server.js
File metadata and controls
408 lines (355 loc) · 16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
const express = require('express');
const cheerio = require('cheerio');
const path = require('path');
const fs = require('fs').promises;
const mongoose = require('mongoose');
const { execSync } = require('child_process');
const { spawn } = require('child_process');
const app = express();
const port = process.env.PORT || 3000;
// Load environment variables
require('dotenv').config();
// MongoDB connection string
const mongoURI = process.env.MONGODB_URI;
// Connect to MongoDB
mongoose.connect(mongoURI, {})
.then(() => console.log('Connected to MongoDB'))
.catch(err => console.error('MongoDB connection error:', err));
// Serve static files from the public directory
app.use(express.static(path.join(__dirname, 'public')));
// Parse JSON request bodies
app.use(express.json());
// Import the Course model
const Course = require('./models/course');
// State variables
let currentCookie = process.env.COOKIE;
let currentUserAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0';
let lastCookieRefresh = new Date();
let isRefreshingCookie = false; // Lock flag for cookie refresh
let cookieRefreshPromise = null; // Promise for ongoing refresh
let cookieRefreshQueue = []; // Queue for requests waiting for cookie refresh
const pythonPath = path.join(__dirname, '.venv', 'bin', 'python3');
// Constants for retry configurations
const CURL_MAX_RETRIES = 3;
const CURL_RETRY_DELAY = 2000; // 2 seconds
const COOKIE_REFRESH_INTERVAL = 10 * 60 * 1000; // 10 minutes
async function getClearanceCookie(url) {
// If there's an ongoing refresh, return its promise
if (cookieRefreshPromise) {
return cookieRefreshPromise;
}
// Set the lock
isRefreshingCookie = true;
// Create a new refresh promise
cookieRefreshPromise = new Promise((resolve, reject) => {
console.log(`Using Python interpreter: ${pythonPath}`);
const python = spawn(pythonPath, ['main.py', '--headed', url]);
let output = '';
let errorOutput = '';
python.stdout.on('data', (data) => {
const dataStr = data.toString();
// console.log(`Python stdout: ${dataStr}`); // Log the raw output for debugging
output += dataStr;
});
python.stderr.on('data', (data) => {
const message = data.toString();
errorOutput += message;
if (!message.includes('[INFO]')) {
console.error(`Python error: ${message}`);
} else {
console.log(`Python info: ${message}`);
}
});
python.on('error', (error) => {
console.error(`Failed to start Python process: ${error.message}`);
reject(new Error(`Failed to start Python process: ${error.message}`));
});
python.on('close', async (code) => {
try {
console.log(`Python process exited with code ${code}`);
const cookieMatch = output.match(/Cookie: ([^\n]*)/);
const userAgentMatch = output.match(/User agent: ([^\n]*)/);
if (cookieMatch && userAgentMatch) {
console.log('Successfully obtained new cookie and user agent');
resolve({
cookie: cookieMatch[1].trim(),
userAgent: userAgentMatch[1].trim()
});
} else {
console.error('Failed to extract cookie or user agent from output');
reject(new Error('Failed to extract cookie or user agent from output'));
}
} catch (error) {
console.error('Exception while processing Python output:', error);
reject(error);
} finally {
// Clear the lock and promise
isRefreshingCookie = false;
cookieRefreshPromise = null;
// Process queued requests
while (cookieRefreshQueue.length > 0) {
const { resolve: queuedResolve } = cookieRefreshQueue.shift();
try {
const result = await getClearanceCookie(url);
queuedResolve(result);
} catch (error) {
// If we fail, continue to next queued request
console.error('Failed to process queued cookie refresh:', error);
}
}
}
});
});
return cookieRefreshPromise;
}
async function performCurlRequest(courseCode, retryCount = 0) {
// Check if cookie needs refresh
const needsRefresh = !currentCookie || new Date() - lastCookieRefresh > COOKIE_REFRESH_INTERVAL;
if (needsRefresh) {
if (isRefreshingCookie) {
// If already refreshing, queue this request
await new Promise((resolve) => {
cookieRefreshQueue.push({ resolve });
});
} else {
try {
const clearance = await getClearanceCookie('https://enroll.dlsu.edu.ph/dlsu/view_course_offerings');
currentCookie = clearance.cookie;
currentUserAgent = clearance.userAgent;
lastCookieRefresh = new Date();
} catch (error) {
console.error('Failed to refresh cookie:', error);
// Continue with existing cookie if refresh fails
}
}
}
const command = `curl 'https://enroll.dlsu.edu.ph/dlsu/view_course_offerings' --compressed -X POST \
-H 'User-Agent: ${currentUserAgent}' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
-H 'Accept-Language: en-US,en;q=0.5' \
-H 'Accept-Encoding: gzip, deflate, br' \
-H 'Content-Type: application/x-www-form-urlencoded' \
-H 'Origin: https://enroll.dlsu.edu.ph' \
-H 'Connection: keep-alive' \
-H 'Referer: https://enroll.dlsu.edu.ph/dlsu/view_course_offerings' \
-H 'Cookie: ${currentCookie}' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Sec-Fetch-User: ?1' \
-H 'Priority: u=0, i' \
-H 'TE: trailers' \
--data-raw 'p_course_code=${encodeURIComponent(courseCode)}&p_option=all&p_button=Search&p_id_no=12216496&p_button=Submit' \
--silent --max-time 10`;
try {
const htmlContent = execSync(command, { encoding: 'utf8', timeout: 10000 });
const needsNewCookie =
htmlContent.includes('403 Forbidden') ||
htmlContent.includes('check your browser') ||
htmlContent.includes('Security check') ||
htmlContent.includes('Please wait') ||
htmlContent.length < 500 ||
!htmlContent.includes('table');
if (needsNewCookie && retryCount < CURL_MAX_RETRIES) {
console.log(`Invalid response detected, attempting to get new cookie (attempt ${retryCount + 1}/${CURL_MAX_RETRIES})`);
if (isRefreshingCookie) {
// Wait for ongoing refresh
await new Promise((resolve) => {
cookieRefreshQueue.push({ resolve });
});
} else {
try {
const clearance = await getClearanceCookie('https://enroll.dlsu.edu.ph/dlsu/view_course_offerings');
currentCookie = clearance.cookie;
currentUserAgent = clearance.userAgent;
lastCookieRefresh = new Date();
} catch (e) {
console.error('Failed to get new cookie:', e);
}
}
// Wait before retrying
await new Promise(resolve => setTimeout(resolve, CURL_RETRY_DELAY));
return performCurlRequest(courseCode, retryCount + 1);
}
return htmlContent;
} catch (error) {
if (retryCount < CURL_MAX_RETRIES) {
console.log(`Request failed, retrying (${retryCount + 1}/${CURL_MAX_RETRIES})...`);
await new Promise(resolve => setTimeout(resolve, CURL_RETRY_DELAY));
return performCurlRequest(courseCode, retryCount + 1);
}
throw error;
}
}
// Function to parse HTML using cheerio
function parseHTML(html, courseCode) {
const $ = cheerio.load(html);
const sections = [];
let currentSection = null;
// Find all table rows
$('table tr').each((index, row) => {
const cells = $(row).find('td');
// Skip header rows or invalid rows
if (cells.length === 9) {
// Check if this is a main section row (has a class number)
const classNbr = $(cells[0]).find('b').text().trim();
const sectionCode = $(cells[2]).find('b').text().trim();
// Skip header row with column titles
if ($(cells[6]).text().trim() === 'Enrl Cap') {
return;
}
// If this row has a class number, it's a new main section
if (classNbr) {
// Save previous section if exists
if (currentSection) {
sections.push(currentSection);
}
// Create new section
currentSection = {
classNbr: classNbr,
course: courseCode,
section: sectionCode,
days: [$(cells[3]).text().trim()].filter(Boolean),
times: [$(cells[4]).text().trim()].filter(Boolean),
rooms: [$(cells[5]).text().trim()].filter(Boolean),
enrlCap: $(cells[6]).text().trim(),
enrolled: $(cells[7]).text().trim(),
remarks: $(cells[8]).text().trim(),
professor: '',
isOpen: $(cells[0]).attr('bgcolor') === '#D2EED3'
};
}
// If this is a continuation row for a multi-day course
else if (currentSection && !classNbr && !sectionCode) {
const day = $(cells[3]).text().trim();
const time = $(cells[4]).text().trim();
const room = $(cells[5]).text().trim();
// Only add non-empty values
if (day && cells.eq(3).attr('bgcolor') === '#D2EED3') {
currentSection.days.push(day);
}
if (time && cells.eq(4).attr('bgcolor') === '#D2EED3') {
currentSection.times.push(time);
}
if (room && cells.eq(5).attr('bgcolor') === '#D2EED3') {
currentSection.rooms.push(room);
}
}
}
// Handle professor information row
else if (cells.length > 0 && $(cells[0]).attr('colspan') === '6' && currentSection) {
const professorText = $(cells[0]).text().trim();
if (professorText && !currentSection.professor) {
currentSection.professor = professorText;
}
}
});
// Add the last section if it exists
if (currentSection) {
sections.push(currentSection);
}
// Filter out any invalid entries (like header rows that might have slipped through)
const validSections = sections.filter(section =>
section.classNbr && section.section && section.days.length > 0
);
return { courseCode, sections: validSections };
}
// API endpoint to fetch course offerings using curl
app.get('/api/search', async (req, res) => {
try {
const courseCode = req.query.course?.toUpperCase().trim();
if (!courseCode) {
return res.status(400).json({ error: 'Course code is required' });
}
// Check if cookie needs refresh (every 30 minutes)
if (!currentCookie || new Date() - lastCookieRefresh > 30 * 60 * 1000) {
try {
const clearance = await getClearanceCookie('https://enroll.dlsu.edu.ph/dlsu/view_course_offerings');
currentCookie = clearance.cookie;
currentUserAgent = clearance.userAgent;
lastCookieRefresh = new Date();
} catch (error) {
console.error('Failed to refresh cookie:', error);
}
}
const htmlContent = await performCurlRequest(courseCode);
let courseDataFromDLSU = null;
let fetchError = null;
if (htmlContent) {
// Add better validation of the HTML content
if (htmlContent.length < 500 || !htmlContent.includes('table')) {
console.log(`Invalid response received for ${courseCode}, length: ${htmlContent.length}`);
throw new Error('Invalid response received');
}
const parsedData = parseHTML(htmlContent, courseCode);
if (htmlContent.includes('No course sections found')) {
console.log(`No sections found for ${courseCode} on DLSU site.`);
courseDataFromDLSU = { courseCode, sections: [], noResults: true };
} else if (parsedData && parsedData.sections && parsedData.sections.length > 0) {
courseDataFromDLSU = parsedData;
} else {
console.log(`Potentially empty or invalid response for ${courseCode} from DLSU (curl).`);
courseDataFromDLSU = null;
}
}
// Fallback and DB logic
if (courseDataFromDLSU && !courseDataFromDLSU.noResults) {
const now = new Date();
try {
await Course.updateOne(
{ courseCode: courseCode },
{ $set: { sections: courseDataFromDLSU.sections, lastUpdated: now } },
{ upsert: true }
);
console.log(`Course ${courseCode} data saved/updated in MongoDB`);
return res.json({
courseCode: courseDataFromDLSU.courseCode,
sections: courseDataFromDLSU.sections,
lastUpdated: now
});
} catch (dbError) {
console.error('Error saving to MongoDB:', dbError);
return res.json({
courseCode: courseDataFromDLSU.courseCode,
sections: courseDataFromDLSU.sections,
lastUpdated: now
});
}
}
else if (courseDataFromDLSU && courseDataFromDLSU.noResults) {
return res.json({ courseCode: courseCode, noResults: true });
}
else {
console.log(`Attempting fallback to MongoDB for ${courseCode}`);
try {
const courseFromDB = await Course.findOne({ courseCode: courseCode });
if (courseFromDB) {
console.log(`Course ${courseCode} data retrieved from MongoDB`);
return res.json({
courseCode: courseFromDB.courseCode,
sections: courseFromDB.sections,
lastUpdated: courseFromDB.lastUpdated
});
} else {
console.log(`Course ${courseCode} not found in MongoDB either.`);
return res.json({ courseCode: courseCode, noResults: true });
}
} catch (dbError) {
console.error('Error fetching from MongoDB:', dbError);
return res.status(500).json({ error: 'Failed to fetch course data from MongoDB' });
}
}
} catch (error) {
console.error('Unexpected error in /api/search:', error);
return res.status(500).json({ error: 'An unexpected error occurred' });
}
});
// Serve the main HTML page
app.get('/', (req, res) => {
res.sendFile(path.join(__dirname, 'public', 'index.html'));
});
// Start the server
app.listen(port, () => {
console.log(`Server running at http://localhost:${port}`);
});