diff --git a/sql/Classes/down_just_year.sql b/sql/Classes/down_just_year.sql new file mode 100644 index 0000000..1fd6d08 --- /dev/null +++ b/sql/Classes/down_just_year.sql @@ -0,0 +1,2 @@ +DELETE FROM Classes +WHERE "year" = {year}; \ No newline at end of file diff --git a/sql/Classes/up.sql b/sql/Classes/up.sql index d476f96..0df3159 100644 --- a/sql/Classes/up.sql +++ b/sql/Classes/up.sql @@ -1,11 +1,12 @@ CREATE TYPE status_enum AS ENUM ('Open', 'Closed', 'Full', 'On Hold'); CREATE TABLE Classes ( "class_id" VARCHAR(255) PRIMARY KEY, - "career" VARCHAR(255), - "course_id" VARCHAR(255) NOT NULL, + "career" VARCHAR(255) NOT NULL, + "course_id" VARCHAR(255) NOT NULL, + "class_nr" VARCHAR(255) NOT NULL, "section" VARCHAR(255) NOT NULL, "term" VARCHAR(50) NOT NULL, - "year" VARCHAR(4) NOT NULL, + "year" INT NOT NULL, "activity" VARCHAR(255) NOT NULL, "status" status_enum, "course_enrolment" VARCHAR(255) NOT NULL, diff --git a/sql/Classes/up_just_year.sql b/sql/Classes/up_just_year.sql new file mode 100644 index 0000000..f26a647 --- /dev/null +++ b/sql/Classes/up_just_year.sql @@ -0,0 +1,21 @@ +SELECT 1; +-- CREATE TYPE IF NOT EXISTS status_enum AS ENUM ('Open', 'Closed', 'Full', 'On Hold'); +-- CREATE TABLE IF NOT EXISTS Classes ( +-- "class_id" VARCHAR(255) PRIMARY KEY, +-- "career" VARCHAR(255) NOT NULL, +-- "course_id" VARCHAR(255) NOT NULL, +-- "class_nr" VARCHAR(255) NOT NULL, +-- "section" VARCHAR(255) NOT NULL, +-- "term" VARCHAR(50) NOT NULL, +-- "year" INT NOT NULL, +-- "activity" VARCHAR(255) NOT NULL, +-- "status" status_enum, +-- "course_enrolment" VARCHAR(255) NOT NULL, +-- "offering_period" VARCHAR(255) NOT NULL, +-- "meeting_dates" VARCHAR(255) NOT NULL, +-- "census_date" VARCHAR(255) NOT NULL, +-- "consent" VARCHAR(255) NOT NULL, +-- "mode" VARCHAR(255) NOT NULL, +-- "class_notes" TEXT, +-- FOREIGN KEY ("course_id") REFERENCES Courses("course_id") ON DELETE CASCADE +-- ); diff --git a/sql/Courses/down_just_year.sql b/sql/Courses/down_just_year.sql new file mode 100644 index 0000000..cf6d058 --- /dev/null +++ b/sql/Courses/down_just_year.sql @@ -0,0 +1,2 @@ +DELETE FROM Courses +WHERE "year" = {year}; \ No newline at end of file diff --git a/sql/Courses/up.sql b/sql/Courses/up.sql index f215c4a..56ae534 100644 --- a/sql/Courses/up.sql +++ b/sql/Courses/up.sql @@ -1,12 +1,13 @@ CREATE TABLE Courses ( "course_id" VARCHAR(255) PRIMARY KEY, - "course_code" VARCHAR(8), --id + "course_code" VARCHAR(8) NOT NULL, + "year" INT NOT NULL, "course_name" VARCHAR(255) NOT NULL, - "uoc" INT NOT NULL, + "uoc" INT NOT NULL, "faculty" VARCHAR(255), "school" VARCHAR(255), "campus" VARCHAR(255), - "career" VARCHAR(255), - "terms" TEXT, + "career" VARCHAR(255) NOT NULL, + "terms" VARCHAR(255)[], "modes" VARCHAR(255)[] ); \ No newline at end of file diff --git a/sql/Courses/up_just_year.sql b/sql/Courses/up_just_year.sql new file mode 100644 index 0000000..2d66dda --- /dev/null +++ b/sql/Courses/up_just_year.sql @@ -0,0 +1,14 @@ +SELECT 1; +-- CREATE TABLE IF NOT EXISTS Courses ( +-- "course_id" VARCHAR(255) PRIMARY KEY, +-- "course_code" VARCHAR(8) NOT NULL, +-- "year" INT NOT NULL, +-- "course_name" VARCHAR(255) NOT NULL, +-- "uoc" INT NOT NULL, +-- "faculty" VARCHAR(255), +-- "school" VARCHAR(255), +-- "campus" VARCHAR(255), +-- "career" VARCHAR(255) NOT NULL, +-- "terms" VARCHAR(255)[], +-- "modes" VARCHAR(255)[] +-- ); \ No newline at end of file diff --git a/sql/Times/down_just_year.sql b/sql/Times/down_just_year.sql new file mode 100644 index 0000000..b704212 --- /dev/null +++ b/sql/Times/down_just_year.sql @@ -0,0 +1,2 @@ +DELETE FROM Times +WHERE "year" = {year}; \ No newline at end of file diff --git a/sql/Times/up.sql b/sql/Times/up.sql index edd03e9..8842eee 100644 --- a/sql/Times/up.sql +++ b/sql/Times/up.sql @@ -1,11 +1,12 @@ CREATE TABLE Times ( - "id" VARCHAR(500) PRIMARY KEY, + "time_id" VARCHAR(511) PRIMARY KEY, + "year" INT NOT NULL, "class_id" VARCHAR(255) NOT NULL, "day" VARCHAR(255) NOT NULL, "instructor" VARCHAR(255), "location" VARCHAR(255) NOT NULL, - "time" VARCHAR(100) NOT NULL, - "weeks" VARCHAR(100) NOT NULL, - "career" VARCHAR(255), + "time" VARCHAR(255) NOT NULL, + "weeks" VARCHAR(255) NOT NULL, + "career" VARCHAR(255) NOT NULL, FOREIGN KEY ("class_id") REFERENCES Classes("class_id") ON DELETE CASCADE ); diff --git a/sql/Times/up_just_year.sql b/sql/Times/up_just_year.sql new file mode 100644 index 0000000..d290066 --- /dev/null +++ b/sql/Times/up_just_year.sql @@ -0,0 +1,13 @@ +SELECT 1; +-- CREATE TABLE IF NOT EXISTS Times ( +-- "time_id" VARCHAR(511) PRIMARY KEY, +-- "year" INT NOT NULL, +-- "class_id" VARCHAR(255) NOT NULL, +-- "day" VARCHAR(255) NOT NULL, +-- "instructor" VARCHAR(255), +-- "location" VARCHAR(255) NOT NULL, +-- "time" VARCHAR(255) NOT NULL, +-- "weeks" VARCHAR(255) NOT NULL, +-- "career" VARCHAR(255) NOT NULL, +-- FOREIGN KEY ("class_id") REFERENCES Classes("class_id") ON DELETE CASCADE +-- ); diff --git a/src/config.rs b/src/config.rs index 41047dd..205e30e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -10,6 +10,7 @@ pub struct ScrapingEnv { timetable_api_url: String, } +#[derive(Debug)] pub struct ScrapingConfig { timetable_api_url: String, } @@ -29,6 +30,7 @@ impl ScrapingConfig { /// Regexes that can be used to extract the year and course code from a UNSW /// timetable url. +#[derive(Debug)] pub struct TimetableUrlRegex { year_only_regex: Regex, course_url_regex: Regex, diff --git a/src/course_scraper.rs b/src/course_scraper.rs index a0e2b62..8e13f94 100644 --- a/src/course_scraper.rs +++ b/src/course_scraper.rs @@ -4,18 +4,18 @@ use scraper::Selector; use serde::Serialize; use std::collections::{HashMap, HashSet}; -use crate::{ScrapingContext, text_manipulators::extract_text}; +use crate::{ScrapingContext, Year, text_manipulators::extract_text}; #[derive(Debug, Serialize)] pub struct Course { pub course_id: String, pub course_code: String, + pub year: Year, pub course_name: String, pub uoc: i32, - // TODO: try making non-optional. pub faculty: Option, pub school: Option, - pub career: Option, + pub career: String, // Sorted ascendingly. pub modes: Vec, // For Notangles. pub campus: Option, @@ -28,9 +28,10 @@ pub struct Class { pub course_id: String, pub career: String, pub class_id: String, + pub class_nr: String, pub section: String, pub term: String, - pub year: String, + pub year: Year, pub activity: String, pub status: String, pub course_enrolment: String, @@ -45,10 +46,21 @@ pub struct Class { #[derive(Debug, Serialize)] pub struct Time { + pub time_id: String, + pub year: Year, pub career: String, + pub location: String, pub day: String, pub time: String, + pub weeks: String, + pub instructor: Option, +} + +#[derive(Debug, Serialize)] +pub struct PartialTime { pub location: String, + pub day: String, + pub time: String, pub weeks: String, pub instructor: Option, } @@ -60,6 +72,7 @@ pub struct PartialCourse { pub career: String, pub uoc: i32, pub url: String, + pub year: Year, } impl PartialCourse { @@ -162,14 +175,20 @@ impl PartialCourse { } } - let course_id = format!("{}{}", &self.course_code, career); + // The reason we aren't including the term in the course id is that the handbook only + // contains one page per course per year, which contains data for the course that year. + // We use the same format to reduce duplicated data. + let course_id = format!("{}-{}-{}", self.course_code, career, self.year); + let course_code = self.course_code; let course_name = self.course_name; let uoc = self.uoc; let classes: Vec = class_activity_information .into_par_iter() - .map(|class_data| parse_class_info(class_data, course_id.as_str(), career.as_ref())) + .map(|class_data| { + parse_class_info(class_data, &course_id, &course_code, &career, self.year) + }) .collect::>()?; let unique_modes: HashSet<&String> = classes.iter().map(|class| &class.mode).collect(); @@ -180,12 +199,13 @@ impl PartialCourse { Ok(Course { course_id, course_code, + year: self.year, course_name, uoc, faculty, school, campus, - career: Some(career), + career, modes, terms, classes, @@ -200,11 +220,13 @@ impl PartialCourse { fn parse_class_info( class_data: Vec, course_id: &str, - career: &str, + course_code: &str, + course_career: &str, + course_year: Year, ) -> anyhow::Result { let mut map: HashMap<&str, &str> = HashMap::new(); let mut i = 0; - let mut times_parsed = Vec::