This repository was archived by the owner on Jan 16, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathPearsonDL.py
More file actions
executable file
·104 lines (85 loc) · 3.49 KB
/
PearsonDL.py
File metadata and controls
executable file
·104 lines (85 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
from urllib import request
from argparse import ArgumentParser
from time import time
from uuid import UUID
from multiprocessing import Process, Pool, cpu_count
from os import path, makedirs, sep, remove
from sys import exit
from glob import glob
def main():
parser = ArgumentParser()
parser.add_argument('-p', '--pages', type=int, help='The number of pages in this book.')
parser.add_argument('-i', '--id', type=str, help='This book\'s product number and ID.')
parser.add_argument('-g', "--generate_pdf", action='store_true', help='Generate a PDF from the downloaded images.')
parser.add_argument('-r', "--remove_png", action='store_true', help='Remove the downloaded PNG files after PDF generation.')
parser.add_argument('-v', "--verbose", action='store_true', help='Show verbose output.')
args = parser.parse_args()
if not args.id:
print("Please provide a value for this book's product number and ID!")
exit(1)
elif not args.pages:
print("Please provide a value for the number of pages in this book!")
exit(1)
# End if/else block
print("PearsonDL")
print("Developed by NamesJoeyWheeler, FurryLovingMcNugget, and cdchris12")
print("---------------------------------------------------")
_id = args.id
pages = args.pages
if not path.isdir(path.join('Pearson Books', _id)):
directory = _id
parent_dir = 'Pearson Books'
_path = path.join(parent_dir, directory)
makedirs(_path)
# End if
print("Downloading page images...")
pool = Pool(cpu_count())
# Create a process for each page
for i in range(0, pages):
pool.apply_async(get_files, args=(_id, i, args.verbose,))
# End for
pool.close()
pool.join()
print("Page downloads complete!")
if args.generate_pdf:
try:
from PIL import Image
except ModuleNotFoundError:
print("Python Imaging Library not installed, unable to generate PDF!")
exit(1)
# End try/except block
print('Generating a PDF... (This may take a while)')
# Grab list of created image files and sort them numerically
image_list = sorted(glob(path.join('Pearson Books', _id, "*.png")), key = lambda x: int(x.split(sep)[2].split(".")[0]))
im0 = Image.open(f'{image_list[0]}').convert('RGB')
converted_list = []
for i in range(1, len(image_list)):
converted_list.append(Image.open(f'{image_list[i]}').convert('RGB'))
# End for
im0.save(path.join('Pearson Books', _id, f'book.pdf'), save_all=True, append_images=converted_list, optimize=True)
print("Generated a PDF!")
if args.remove_png:
print("Cleaning up downloaded PNG files...")
for png in image_list:
remove(png)
# End for
print("PNG files removed!")
# End if
# End if
# End def
def get_files(_id, page, verbose=False):
pb = f'https://plus.pearson.com/eplayer/pdfassets/prod1/{_id}/pages/page{page}?password=&accessToken=null&formMode=true'
opener = request.build_opener()
opener.addheaders = [('Cookie', 'ADD COOKIE VALUE HERE')]
request.install_opener(opener)
request.urlretrieve(pb, f'Pearson Books/{_id}/{page}.png')
if verbose:
print(f"Downloaded page {page+1}!")
# End if
# End def
if __name__ == "__main__":
start_time = time()
main()
print("--- Generated in %.4f seconds ---" % (time() - start_time))
# End if