-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkopia-find-files.py
More file actions
462 lines (382 loc) · 16.8 KB
/
kopia-find-files.py
File metadata and controls
462 lines (382 loc) · 16.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
#!/usr/bin/env python
"""
kopia-find-files.py - Search for files across Kopia backup snapshots.
Searches for files by filename pattern across all configured repositories and
snapshots. Uses 'kopia ls -l -r' to get file metadata directly (no mounting).
Features:
- Filename pattern matching using fnmatch (like 'find -name')
- Searches across multiple repositories and snapshots
- Shows file versions with modification times and sizes
- Interactive mount option to browse/restore files
- Configurable snapshot search limit (default: 100 most recent)
Pattern Syntax (fnmatch wildcards, like 'find -name'):
* Matches any characters (zero or more), but not /
? Matches exactly one character
[abc] Matches one character from the set (a, b, or c)
[a-z] Matches one character in the range
Matching modes:
Default Matches filename only (like find -name)
--path Matches full path (like find -path)
Usage:
python kopia-find-files.py "*.py" # Find all .py files (not .pyc)
python kopia-find-files.py "report*.pdf" # Find PDFs starting with 'report'
python kopia-find-files.py "kopia/*.py" --path # .py files in kopia/ directory
python kopia-find-files.py "*.pdf" --repo mysrc,docs # Search specific repos
python kopia-find-files.py --mount Z: --repo myrepo # Mount for browsing
Requires:
- kopia-helpers.yaml with repository definitions
- Password configured via yaml, environment variable, or .env file
- kopia CLI installed and in PATH
- WinFsp for mount functionality (Windows)
"""
import json
import subprocess
import os
import sys
import logging
import argparse
import time
import re
import fnmatch
from datetime import datetime
from pathlib import Path
import kopia_utils as utils
from typing import List, Dict, Any, Optional, Tuple
# Logging configured in main() after argument parsing
# Exit codes
EXIT_SUCCESS = 0
EXIT_TOOL_ERROR = 1
EXIT_CONFIG_ERROR = 2
EXIT_NO_RESULTS = 3
def parse_ls_line(line: str) -> Optional[Tuple[int, str, str]]:
"""Parse a kopia ls -l --no-human-readable line.
Format: <perms> <size> <date> <time> <tz> <hash> <path>
Example: -rw-rw-rw- 24155 2025-11-22 21:39:58 AEDT 749004d41187bf2322037fecbb5022af kopia/file.py
Returns (size, modified_str, path) or None if parse fails or is a directory.
"""
# Regex to parse the line
# Group 1: perms
# Group 2: size
# Group 3: date
# Group 4: time
# Group 5: tz
# Group 6: hash
# Group 7: path
pattern = r"^([d-][rwx-]{9})\s+(\d+)\s+(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})\s+(\w+)\s+([a-f0-9]+)\s+(.+)$"
match = re.match(pattern, line.strip())
if not match:
return None
perms = match.group(1)
if perms.startswith('d'):
return None
try:
size = int(match.group(2))
date = match.group(3)
time_str = match.group(4)
tz = match.group(5)
path = match.group(7)
modified_str = f"{date} {time_str} {tz}"
return (size, modified_str, path)
except (ValueError, IndexError):
return None
def find_in_repo(runner: utils.KopiaRunner, repo_config: Dict[str, Any], filename_pattern: str, max_snapshots: Optional[int] = None, match_path: bool = False, verbose: bool = False) -> List[Dict[str, Any]]:
"""Search for files using kopia ls -l -r (gets file details directly, no mounting)."""
name = repo_config['name']
config_file = repo_config['repo_config']
print(f" Searching in {name}...")
# List snapshots
success, stdout, stderr, _ = runner.run(
["snapshot", "list", "--json", "--config-file", config_file],
repo_config=repo_config, readonly=True
)
matches = []
if not success:
print(f" ERROR: Could not list snapshots. {stderr.strip()}")
return []
try:
snapshots = json.loads(stdout)
except json.JSONDecodeError:
print(f" Failed to parse snapshot list JSON.")
if verbose:
print(f" Stdout: {stdout}")
print(f" Stderr: {stderr}")
return []
if not snapshots:
print(" No snapshots found in this repository.")
return []
# Sort snapshots by startTime descending (newest first)
snapshots.sort(key=lambda s: s.get('startTime', ''), reverse=True)
# Limit to max_snapshots (None means all)
if max_snapshots is not None:
snapshots = snapshots[:max_snapshots]
logging.debug(f"Limiting search to {len(snapshots)} most recent snapshots")
for snap in snapshots:
snap_id = snap.get('id')
snap_time = snap.get('startTime', '') # e.g., "2025-11-22T05:08:53Z"
if not snap_id: continue
# Get file listing with details (size, mtime) - no mounting needed!
success, ls_out, _, _ = runner.run(
["ls", "-l", "--no-human-readable", "-r", snap_id, "--config-file", config_file],
repo_config=repo_config, readonly=True
)
if success:
for line in ls_out.splitlines():
line = line.strip()
if not line: continue
parsed = parse_ls_line(line)
if not parsed:
continue
size, modified_str, file_path = parsed
# Match against filename or full path based on --path flag
if match_path:
match_against = file_path
else:
match_against = os.path.basename(file_path)
if fnmatch.fnmatch(match_against, filename_pattern):
if verbose:
print(f" Match: {file_path}")
# Parse the timestamp
try:
# Format: "2025-11-22 21:39:58 AEDT"
# Remove timezone name for parsing, keep date and time
dt_parts = modified_str.split()
modified = datetime.strptime(f"{dt_parts[0]} {dt_parts[1]}", "%Y-%m-%d %H:%M:%S")
except (ValueError, IndexError):
modified = None
matches.append({
'repo': name,
'snapshot_id': snap_id,
'snapshot_time': snap_time,
'path': file_path,
'size': size,
'modified': modified,
'modified_str': modified_str,
})
return matches
def mount_repository(runner: utils.KopiaRunner, repo_config: Dict[str, Any], drive_letter: str, verbose: bool = False):
name = repo_config['name']
config_file = repo_config['repo_config']
print(f"\nPreparing to mount '{name}' to {drive_letter}...")
# Verify repository is accessible
success, _, stderr, _ = runner.run(
["snapshot", "list", "--json", "--config-file", config_file],
repo_config=repo_config, readonly=True
)
if not success:
print(f" ERROR: Could not access repository. {stderr.strip()}")
return
# Mount 'all' snapshots
print(f"Mounting all snapshots to {drive_letter}...")
print("Press Ctrl+C to unmount and exit.")
success, _, err, process = runner.run(
["mount", "all", drive_letter, "--config-file", config_file],
repo_config=repo_config, wait=False
)
if not success or process is None:
print(f"Failed to start mount process: {err}")
return
try:
# Wait for user to interrupt
while True:
time.sleep(1)
if process.poll() is not None:
stdout, stderr = process.communicate()
print(f"Mount process exited unexpectedly.")
if verbose:
print(f"Stdout: {stdout}")
print(f"Stderr: {stderr}")
break
except KeyboardInterrupt:
print("\nUnmounting...")
process.terminate()
try:
process.wait(timeout=10)
except subprocess.TimeoutExpired:
process.kill()
print("Done.")
def find_free_drive_letter() -> Optional[str]:
"""Find an available drive letter for mounting (searches Z: down to E:)."""
import string
# Get drives in use from net use (includes stale WebDAV mounts)
mapped_drives = set()
try:
result = subprocess.run(['net', 'use'], capture_output=True, text=True, encoding='utf-8')
for line in result.stdout.splitlines():
parts = line.split()
for part in parts:
if len(part) == 2 and part[1] == ':' and part[0].isalpha():
mapped_drives.add(part[0].upper())
except Exception:
pass
# Get drives that are actually accessible via filesystem
accessible_drives = set()
for letter in string.ascii_uppercase:
if os.path.exists(f"{letter}:\\"):
accessible_drives.add(letter)
# A drive is "in use" if it's EITHER mapped OR accessible
used_drives = mapped_drives | accessible_drives
# Find first free drive letter (Z: down to E:)
for letter in string.ascii_uppercase[::-1]:
if letter in 'ABCD':
continue
if letter not in used_drives:
return f"{letter}:"
return None
def list_repositories(config: Dict[str, Any]) -> List[Dict[str, Any]]:
print("\nAvailable Repositories:")
for i, repo in enumerate(config['repositories']):
print(f"[{i}] {repo['name']} ({repo['repo_destination']})")
return config['repositories']
def main():
parser = argparse.ArgumentParser(description="Find files or Mount Kopia backups")
parser.add_argument("pattern", nargs='?', help="Filename pattern (fnmatch: *.py, report*, [a-z]*.txt)")
parser.add_argument("--mount", nargs='?', const='auto', default=None,
help="Mount repository (optionally specify drive letter, e.g. --mount Z:)")
parser.add_argument("--repo", help="Comma-separated list of repo names to search (default: all repos)")
parser.add_argument("--restore", help="Restore selected file to this directory (default: Downloads)", const=str(Path.home() / "Downloads"), nargs='?')
parser.add_argument("-n", "--last", type=int, default=100,
help="Number of recent snapshots to search (default: 100)")
parser.add_argument("--all", action="store_true", dest="search_all",
help="Search all snapshots (no limit)")
parser.add_argument("--path", action="store_true", help="Match against full path (default: filename only)")
parser.add_argument("-v", "--verbose", action="store_true", help="Show executed commands")
parser.add_argument("--log-level", default="WARNING",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Set logging level (default: WARNING)")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=getattr(logging, args.log_level),
format='%(message)s',
handlers=[logging.StreamHandler(sys.stdout)]
)
# Pre-flight check: ensure kopia is available
kopia_ok, kopia_result = utils.check_tool_available(utils.KOPIA_EXE)
if not kopia_ok:
print(f"ERROR: {kopia_result}", file=sys.stderr)
print(utils.get_kopia_install_instructions(), file=sys.stderr)
sys.exit(EXIT_TOOL_ERROR)
# Initialize KopiaRunner
try:
runner = utils.KopiaRunner()
config = runner.config
except SystemExit:
sys.exit(EXIT_CONFIG_ERROR)
repos = config['repositories']
# Determine snapshot limit
max_snapshots = None if args.search_all else args.last
# Filter repos if --repo is specified
if args.repo:
repo_filter = [r.strip() for r in args.repo.split(',')]
repos = [r for r in repos if r['name'] in repo_filter]
if not repos:
print(f"No matching repositories found for: {args.repo}")
return
if args.mount and not args.pattern:
# Mount-only Mode (no search)
selected_repo = None
if len(repos) == 1:
selected_repo = repos[0]
else:
# Interactive selection for mount
print("Multiple repositories found. Please select one to mount:")
repos_list = list_repositories(config)
try:
choice = int(input("\nSelect repository number: "))
if 0 <= choice < len(repos_list):
selected_repo = repos_list[choice]
else:
print("Invalid selection.")
return
except ValueError:
print("Invalid input.")
return
# Get drive letter (auto-find or user-specified)
if args.mount == 'auto':
drive = find_free_drive_letter()
if not drive:
print("Error: No available drive letters (E-Z all in use)")
return
else:
drive = args.mount
if not drive.endswith(":"):
drive += ":"
mount_repository(runner, selected_repo, drive, verbose=args.verbose)
return
# Search Mode
if not args.pattern:
parser.print_help()
print("\nError: You must provide a filename pattern to search, or use --mount.")
return
# Step 1: Fast search using kopia ls -r (no mounting)
print(f"\nSearching for files matching: {args.pattern}")
all_matches = []
header_printed = False
for repo in repos:
matches = find_in_repo(runner, repo, args.pattern, max_snapshots=max_snapshots, match_path=args.path, verbose=args.verbose)
if matches:
# Print header on first results
if not header_printed:
print(f"\n{'='*115}")
print(f"{'#':<4} {'Modified':<26} {'Size (KB)':>14} {'Repo':<12} {'Path'}")
print(f"{'-'*115}")
header_printed = True
# Sort this repo's matches by modification time (newest first)
matches.sort(key=lambda x: x['modified'] or datetime.min, reverse=True)
# Print results immediately
for m in matches:
idx = len(all_matches)
size_kb = m['size'] / 1024
print(f"{idx:<4} {m['modified_str']:<26} {size_kb:>14.3f} {m['repo']:<12} {m['path']}")
all_matches.append(m)
if not all_matches:
print("\nNo matching files found.")
return
print(f"{'='*115}")
print(f"Found {len(all_matches)} version(s)")
# Skip mount prompt unless --mount flag provided
if not args.mount:
return
# Find unique repositories from matches
unique_repos = {}
for m in all_matches:
repo_name = m['repo']
if repo_name not in unique_repos:
for repo in repos:
if repo['name'] == repo_name:
unique_repos[repo_name] = repo
break
# Get drive letter (auto-find or user-specified)
if args.mount == 'auto':
drive = find_free_drive_letter()
if not drive:
print("Error: No available drive letters (E-Z all in use)")
return
else:
drive = args.mount
if not drive.endswith(":"):
drive += ":"
# Offer to mount for browsing
try:
if len(unique_repos) == 1:
repo_to_mount = list(unique_repos.values())[0]
confirm = input(f"\nMount '{repo_to_mount['name']}' to {drive}? (y/N): ").strip().lower()
if confirm == 'y':
mount_repository(runner, repo_to_mount, drive, verbose=args.verbose)
else:
# Multiple repos, let user choose
print(f"\nRepositories with matches (will mount to {drive}):")
repo_list = list(unique_repos.items())
for i, (name, repo) in enumerate(repo_list):
count = sum(1 for m in all_matches if m['repo'] == name)
print(f" [{i}] {name} ({count} version(s))")
choice = input("\nSelect repository number to mount (or Enter to skip): ").strip()
if choice:
repo_choice = int(choice)
if 0 <= repo_choice < len(repo_list):
repo_to_mount = repo_list[repo_choice][1]
mount_repository(runner, repo_to_mount, drive, verbose=args.verbose)
except (ValueError, KeyboardInterrupt):
print("\nExiting.")
if __name__ == "__main__":
main()