-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit_docs.py
More file actions
102 lines (82 loc) · 3.2 KB
/
split_docs.py
File metadata and controls
102 lines (82 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
"""
GPU Security Toolkit - Document Splitter
Automatically splits large markdown documents into mdBook chapters
"""
import re
import os
from pathlib import Path
def split_by_h2_headers(input_file, output_dir, prefix=""):
"""Split markdown file by ## headers into separate chapter files"""
print(f"\nProcessing: {input_file}")
print(f"Output to: {output_dir}/")
if not os.path.exists(input_file):
print(f" ⚠ File not found, skipping")
return
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# Split on ## headers (H2 level)
sections = re.split(r'^## (.+)$', content, flags=re.MULTILINE)
# Create output directory
Path(output_dir).mkdir(parents=True, exist_ok=True)
# First section (before any H2) becomes README.md
intro = sections[0].strip()
if intro:
readme_path = Path(output_dir) / "README.md"
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(intro + "\n")
print(f" ✓ {readme_path}")
# Process each H2 section
file_count = 0
for i in range(1, len(sections), 2):
if i+1 >= len(sections):
break
title = sections[i].strip()
body = sections[i+1].strip()
# Create filename from title
filename = title.lower()
# Remove special characters
filename = re.sub(r'[^\w\s-]', '', filename)
# Replace spaces/underscores with hyphens
filename = re.sub(r'[-\s_]+', '-', filename)
# Add prefix if provided
if prefix:
filename = f"{prefix}-{filename}"
filename = f"{filename}.md"
# Write chapter file
filepath = Path(output_dir) / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"# {title}\n\n{body}\n")
print(f" ✓ {filepath}")
file_count += 1
print(f" Created {file_count} chapter files")
def main():
"""Split all GPU security documentation into mdBook structure"""
print("=" * 60)
print("GPU Security Toolkit - Content Splitter")
print("=" * 60)
# Split each major document
documents = [
('nvidia_gpu_security_controls.md', 'src/controls', ''),
('gpu_threat_model_frameworks.md', 'src/threats', ''),
('gpu_use_case_security_guide.md', 'src/use-cases', ''),
('gpu_forensics_complete_guide.md', 'src/forensics', ''),
('gpu_forensics_incident_response.md', 'src/forensics', 'ir'),
]
total_files = 0
for input_file, output_dir, prefix in documents:
if os.path.exists(input_file):
split_by_h2_headers(input_file, output_dir, prefix)
total_files += len(list(Path(output_dir).glob('*.md')))
print("\n" + "=" * 60)
print(f"✓ Content splitting complete!")
print(f"✓ Total files created: {total_files}")
print("=" * 60)
print("\nNext steps:")
print(" 1. Review files in src/ directories")
print(" 2. Update src/SUMMARY.md with new file paths")
print(" 3. Run: mdbook build")
print(" 4. Run: mdbook serve --open")
print("")
if __name__ == "__main__":
main()