|
3 | 3 | """ |
4 | 4 | import inspect |
5 | 5 | import os.path |
| 6 | +import urllib.parse |
6 | 7 | import shutil |
7 | 8 | import sys |
8 | 9 | import threading |
9 | 10 | import time |
10 | 11 | import uuid |
11 | 12 | from collections import OrderedDict |
12 | 13 | from io import BytesIO |
| 14 | +from pathlib import Path |
13 | 15 | from typing import Callable |
14 | 16 |
|
15 | 17 | import requests |
@@ -54,66 +56,102 @@ def restart() -> None: |
54 | 56 | sys.exit() |
55 | 57 |
|
56 | 58 |
|
57 | | -def download_file_to_cache(url: str, headers=None, file_name: str = "", |
58 | | - download_path: str = None, stream=False, fake_headers: bool = True) -> str | None: |
| 59 | +def download_file_to_cache(url: str, |
| 60 | + headers=None, |
| 61 | + file_name: str = None, |
| 62 | + max_size: int = None, |
| 63 | + timeout: int = 30, |
| 64 | + download_path: str = paths.CACHE_PATH, |
| 65 | + stream=True, |
| 66 | + fake_headers: bool = True) -> str | None: |
59 | 67 | """ |
60 | 68 | 下载文件到缓存 |
| 69 | + **请自行保证下载链接的安全性** |
61 | 70 | Args: |
62 | 71 | url: 下载的url |
63 | 72 | headers: 下载请求的请求头 |
64 | 73 | file_name: 文件名 |
| 74 | + max_size: 最大大小,单位字节,None则为不限制 |
| 75 | + timeout: 请求超时时间 |
65 | 76 | download_path: 下载路径 |
66 | 77 | stream: 是否使用流式传输 |
67 | 78 | fake_headers: 是否使用自动生成的假请求头 |
68 | 79 | Returns: |
69 | | - 文件路径 |
| 80 | + 文件路径,如果请求失败则返回None |
70 | 81 | """ |
71 | 82 | if headers is None: |
72 | 83 | headers = {} |
73 | 84 |
|
74 | 85 | if fake_headers: |
75 | | - headers["User-Agent"] = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " |
76 | | - "Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42") |
77 | | - headers["Accept-Language"] = "zh-CN,zh;q=0.9,en;q=0.8,da;q=0.7,ko;q=0.6" |
78 | | - headers["Accept-Encoding"] = "gzip, deflate, br" |
79 | | - headers["Accept"] = ("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8," |
80 | | - "application/signed-exchange;v=b3;q=0.7") |
81 | | - headers["Connection"] = "keep-alive" |
82 | | - headers["Upgrade-Insecure-Requests"] = "1" |
83 | | - headers["Cache-Control"] = "max-age=0" |
84 | | - headers["Sec-Fetch-Dest"] = "document" |
85 | | - headers["Sec-Fetch-Mode"] = "navigate" |
86 | | - headers["Sec-Fetch-Site"] = "none" |
87 | | - headers["Sec-Fetch-User"] = "?1" |
88 | | - headers["Sec-Ch-Ua"] = "\"Chromium\";v=\"113\", \"Not-A.Brand\";v=\"24\", \"Microsoft Edge\";v=\"113\"" |
89 | | - headers["Sec-Ch-Ua-Mobile"] = "?0" |
90 | | - headers["Sec-Ch-Ua-Platform"] = "\"Windows\"" |
91 | | - headers["Host"] = url.split("/")[2] |
| 86 | + headers.update({ |
| 87 | + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " |
| 88 | + "Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42", |
| 89 | + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,da;q=0.7,ko;q=0.6", |
| 90 | + "Accept-Encoding": "gzip, deflate, br", |
| 91 | + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8," |
| 92 | + "application/signed-exchange;v=b3;q=0.7", |
| 93 | + "Connection": "keep-alive", |
| 94 | + "Host": urllib.parse.urlparse(url).hostname |
| 95 | + }) |
92 | 96 |
|
93 | 97 | # 路径拼接 |
94 | | - if file_name == "": |
| 98 | + if file_name is None: |
95 | 99 | file_name = uuid.uuid4().hex + ".cache" |
96 | 100 |
|
97 | | - if download_path is None: |
98 | | - file_path = os.path.join(paths.CACHE_PATH, file_name) |
99 | | - else: |
100 | | - file_path = os.path.join(download_path, file_name) |
101 | | - |
102 | | - # 路径不存在特判 |
103 | | - if not os.path.exists(paths.CACHE_PATH): |
104 | | - os.makedirs(paths.CACHE_PATH) |
| 101 | + file_path = Path(download_path) / file_name |
| 102 | + if paths.CACHE_PATH in file_path.parents: |
| 103 | + try: |
| 104 | + if not file_path.resolve().is_relative_to(paths.CACHE_PATH.resolve()): |
| 105 | + logger.warning("下载文件失败: 文件路径解析后超出缓存目录") |
| 106 | + return None |
| 107 | + except FileNotFoundError: |
| 108 | + pass |
| 109 | + file_path = str(file_path) |
105 | 110 |
|
106 | 111 | try: |
107 | 112 | # 下载 |
108 | 113 | if stream: |
109 | | - with open(file_path, "wb") as f, requests.get(url, stream=True, headers=headers) as res: |
110 | | - for chunk in res.iter_content(chunk_size=64 * 1024): |
111 | | - if not chunk: |
112 | | - break |
113 | | - f.write(chunk) |
| 114 | + # 使用流式下载 |
| 115 | + with requests.get(url, stream=True, timeout=timeout, headers=headers) as res: |
| 116 | + res.raise_for_status() # 请求失败则抛出异常 |
| 117 | + |
| 118 | + # 优先从Content-Length判断 |
| 119 | + content_length = res.headers.get('Content-Length') |
| 120 | + if max_size and content_length and int(content_length) > max_size: |
| 121 | + logger.warning(f"下载中止: 文件大小 ({content_length} B) 超出限制 ({max_size} B)") |
| 122 | + return None |
| 123 | + |
| 124 | + downloaded_size = 0 |
| 125 | + with open(file_path, "wb") as f: |
| 126 | + for chunk in res.iter_content(chunk_size=8192): |
| 127 | + downloaded_size += len(chunk) |
| 128 | + if max_size and downloaded_size > max_size: |
| 129 | + logger.warning(f"下载中止: 文件在传输过程中超出大小限制 ({max_size} B)") |
| 130 | + f.close() |
| 131 | + os.remove(file_path) |
| 132 | + return None |
| 133 | + f.write(chunk) |
114 | 134 | else: |
115 | 135 | # 不使用流式传输 |
| 136 | + if max_size is not None: |
| 137 | + # 获取响应头 |
| 138 | + res = requests.head(url, timeout=timeout, headers=headers) |
| 139 | + if "Content-Length" in res.headers: |
| 140 | + # 获取响应头中的Content-Length |
| 141 | + content_length = int(res.headers["Content-Length"]) |
| 142 | + if content_length > max_size: |
| 143 | + logger.warning(f"下载中止: 文件大小 ({content_length} B) 超出限制 ({max_size} B)") |
| 144 | + return None |
| 145 | + else: |
| 146 | + logger.warning(f"下载文件失败: HEAD请求未获取到文件大小,建议使用流式传输") |
| 147 | + return None |
| 148 | + |
116 | 149 | res = requests.get(url, headers=headers) |
| 150 | + res.raise_for_status() |
| 151 | + |
| 152 | + if len(res.content) > max_size: |
| 153 | + logger.warning(f"下载中止: 文件在传输过程中超出大小限制 ({max_size} B)") |
| 154 | + return None |
117 | 155 |
|
118 | 156 | with open(file_path, "wb") as f: |
119 | 157 | f.write(res.content) |
|
0 commit comments