-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_sitemap.py
More file actions
48 lines (38 loc) · 1.31 KB
/
make_sitemap.py
File metadata and controls
48 lines (38 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
from urllib.parse import urlparse
from lxml import etree
from tqdm import tqdm
# Output directory for saving
output_dir = "docs"
# Fetch and parse the sitemap
def fetch_sitemap():
with open("docs/sitemap.xml", "rb") as sitemap_file:
sitemap_xml = sitemap_file.read()
sitemap = etree.fromstring(sitemap_xml)
urls = sitemap.findall(".//{http://www.sitemaps.org/schemas/sitemap/0.9}loc")
return [url.text for url in urls]
def make_url_path(url):
parsed_url = urlparse(url)
path = parsed_url.path
if path.endswith('/'):
path = path + "index.html"
if not path.endswith('.html'):
path = path + ".html"
# Replace "https://devinit.org/" with "/"
path = path.replace("https://devinit.org/", "/")
return path
def make_sitemap():
html_content = "<html><head><title>Sitemap</title></head><body><ol>"
urls = fetch_sitemap()
for url in tqdm(urls):
url_path = make_url_path(url)
anchor = f"<li><a href='{url_path}'>{url_path}</a></li>\n"
if os.path.exists(
f"docs/{url_path}"
):
html_content += anchor
html_content += "</ol></body></html>"
with open("docs/sitemap.html", "w") as html_file:
html_file.write(html_content)
if __name__ == "__main__":
make_sitemap()