-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapeClassLists.py
More file actions
78 lines (52 loc) · 1.85 KB
/
scrapeClassLists.py
File metadata and controls
78 lines (52 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys
import os
import re
def student_code(string):
# "dan.jn@eduge.ch,,membre,," -> "dan.jn"
matches = re.findall(r"^[a-z\-]+\.[a-z1-9]*", string)
return matches[0]
def is_member(string):
# "dan.jn@eduge.ch,,membre,," -> True
matches = re.findall(r"(?!,)membre(?=,)", string)
return len(matches) == 1
def alphabetic(codes):
surnames = [code.split(".")[1] for code in codes]
# sort on surnames (in the first position) but use whole code (in second position)
codes = [x for _, x in sorted(zip(surnames, codes))]
return codes
def student_codes(strings):
ids = [student_code(s) for s in strings if is_member(s)]
return alphabetic(ids)
def text(strings):
return "\n".join(student_codes(strings))
def scrape_file(file_name):
with open(file_name, 'r') as f:
string = f.readlines()
return student_codes(string)
def outFileName(file_name):
file_name = file_name.replace("rousseau-cours-", "")
file_name = file_name.replace("rousseau-classe-", "rg")
file_name = file_name.replace(".csv", ".txt")
return file_name
def file_paths(dump_file, in_path, out_path):
in_file = os.path.join(in_path, dump_file)
out_file = os.path.join(out_path, outFileName(dump_file))
return in_file, out_file
def write_codes(file_name, codes):
with open(file_name, 'w') as f:
f.write('\n'.join(codes))
def load_scrape_write(in_file, out_file):
codes = scrape_file(in_file)
write_codes(out_file, codes)
def main():
if len(sys.argv) != 3:
raise RuntimeError("Wrong number of args, expected 2 !!")
in_path = sys.argv[1]
out_path = sys.argv[2]
if not os.path.isdir(out_path):
os.mkdir(out_path)
files = os.listdir(in_path)
for file in files:
load_scrape_write(*file_paths(file, in_path, out_path))
if __name__ == "__main__":
main()