Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions core/storages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from core.classes import Storage
from core.storages.local import LocalStorage
from core.storages.alist import AListStorage
from core.storages.s3 import S3Storage
from core.config import Config
from typing import List

Expand All @@ -20,4 +21,16 @@ def getStorages() -> List[Storage]:
path=storage["path"],
)
)
if storage["type"] == "s3":
storages.append(
S3Storage(
endpoint=storage["endpoint"],
access_key_id=storage["access_key_id"],
secret_access_key=storage["secret_access_key"],
signature_version=storage.get("signature_version", "s3v4"),
bucket=storage["bucket"],
addressing_style=storage.get("addressing_style", "auto"),
session_token=storage.get("session_token", None),
)
)
return storages
117 changes: 115 additions & 2 deletions core/storages/s3.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from core.classes import Storage, FileInfo, FileList
from core.logger import logger
from core.i18n import locale
from botocore.config import Config
from botocore.exceptions import ClientError
from typing import Literal, Union, Self
from tqdm import tqdm
from aiohttp import web
from pathlib import Path
import boto3
import humanize
import io
Expand Down Expand Up @@ -89,7 +92,8 @@
s3_files = {}
try:
paginator = self.client.get_paginator("list_objects_v2")
async for page in paginator.paginate(Bucket=self.bucket):
# Use asyncio.to_thread to run synchronous paginator in thread pool
for page in paginator.paginate(Bucket=self.bucket):
for obj in page.get("Contents", []):
s3_files[obj["Key"]] = obj["Size"]
except ClientError as e:
Expand All @@ -100,4 +104,113 @@
file_key = f"{file.hash[:2]}/{file.hash}"
if file_key not in s3_files or s3_files[file_key] != file.size:
missing_files.append(file)
pbar.update(1)
pbar.update(1)

return FileList(files=missing_files)

async def express(self, hash: str, counter: dict) -> Union[web.Response, web.FileResponse]:
file_key = f"{hash[:2]}/{hash}"
try:
# Generate a presigned URL for the file
response = await asyncio.to_thread(
self.client.head_object,
Bucket=self.bucket,
Key=file_key
)
file_size = response["ContentLength"]

# Generate presigned URL for downloading the file
url = await asyncio.to_thread(
self.client.generate_presigned_url,
"get_object",
Params={"Bucket": self.bucket, "Key": file_key},
ExpiresIn=3600 # URL valid for 1 hour
)

# Return redirect to presigned URL
response = web.HTTPFound(url)
response.headers["x-bmclapi-hash"] = hash
counter["bytes"] += file_size
counter["hits"] += 1
return response
except ClientError as e:
if e.response["Error"]["Code"] == "404":
return web.HTTPNotFound()
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium

Stack trace information
flows to this location and may be exposed to an external user.

Copilot Autofix

AI 3 months ago

In general, to fix information exposure via exceptions you should log detailed exception data on the server side and return only generic, non-sensitive error messages to clients. Avoid including str(e), stack traces, or other internal state directly in responses.

For this concrete case in core/storages/s3.py, the best fix is to replace web.HTTPError(text=str(e)) with generic error responses. We can keep the existing logging calls (logger.terror(...)) so that developers still have full diagnostic information, but change what is returned to the client:

  • For ClientError where the code is not "404", we can respond with a 502 or 500-level error with a generic message such as "Failed to access storage backend" or a localized equivalent.
  • For the generic Exception handler, respond with a generic 500 error (e.g., web.HTTPInternalServerError) and a minimal message.

We already import locale from core.i18n; following existing patterns in the project, we can use it to obtain a localized generic error message key like "storage.error.s3.express". The only lines that need modification are 145 and 148 (the two return web.HTTPError(text=str(e)) statements). No new imports or helper methods are required.

Suggested changeset 1
core/storages/s3.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/core/storages/s3.py b/core/storages/s3.py
--- a/core/storages/s3.py
+++ b/core/storages/s3.py
@@ -142,10 +142,10 @@
             if e.response["Error"]["Code"] == "404":
                 return web.HTTPNotFound()
             logger.terror("storage.error.s3.express", hash=hash, e=e)
-            return web.HTTPError(text=str(e))
+            return web.HTTPBadGateway(text=locale("storage.error.s3.express"))
         except Exception as e:
             logger.terror("storage.error.s3.express", hash=hash, e=e)
-            return web.HTTPError(text=str(e))
+            return web.HTTPInternalServerError(text=locale("storage.error.s3.express"))
 
     async def recycleFiles(self, files: FileList) -> None:
         """Clean up files in S3 that are not in the valid file list"""
EOF
@@ -142,10 +142,10 @@
if e.response["Error"]["Code"] == "404":
return web.HTTPNotFound()
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))
return web.HTTPBadGateway(text=locale("storage.error.s3.express"))
except Exception as e:
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))
return web.HTTPInternalServerError(text=locale("storage.error.s3.express"))

async def recycleFiles(self, files: FileList) -> None:
"""Clean up files in S3 that are not in the valid file list"""
Copilot is powered by AI and may make mistakes. Always verify output.
except Exception as e:
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium

Stack trace information
flows to this location and may be exposed to an external user.

Copilot Autofix

AI 3 months ago

In general, to fix information exposure via exceptions, you should never send raw exception messages or stack traces back to the client. Instead, log detailed errors server-side and return a generic, user-safe error response (optionally with a stable error code that can be correlated in logs).

For this specific code, the best minimal fix is to keep the detailed logging (logger.terror("storage.error.s3.express", ...)) for diagnostics but replace web.HTTPError(text=str(e)) with a response that contains a generic message. We should do this for both the ClientError and generic Exception branches in express while keeping existing behavior (status codes, logging, counters) as intact as possible. Because we must not assume additional project structure, we can directly use a short, non-sensitive message like "Internal server error" in the HTTP error body and leave the status code to web.HTTPError’s default (500). No new imports are needed, since we are just changing the argument passed to web.HTTPError.

Concretely:

  • In core/storages/s3.py, locate the express method.
  • On line 145, change web.HTTPError(text=str(e)) to web.HTTPError(text="Internal server error").
  • On line 148, do the same: replace str(e) with the same generic message.
  • Keep the logger.terror(...) calls unchanged so that developers still see the real exception details in logs.

This preserves existing functionality (S3 presigned-URL creation, counter updates, logging, 404 behavior) while preventing sensitive information from being exposed to clients.

Suggested changeset 1
core/storages/s3.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/core/storages/s3.py b/core/storages/s3.py
--- a/core/storages/s3.py
+++ b/core/storages/s3.py
@@ -142,10 +142,10 @@
             if e.response["Error"]["Code"] == "404":
                 return web.HTTPNotFound()
             logger.terror("storage.error.s3.express", hash=hash, e=e)
-            return web.HTTPError(text=str(e))
+            return web.HTTPError(text="Internal server error")
         except Exception as e:
             logger.terror("storage.error.s3.express", hash=hash, e=e)
-            return web.HTTPError(text=str(e))
+            return web.HTTPError(text="Internal server error")
 
     async def recycleFiles(self, files: FileList) -> None:
         """Clean up files in S3 that are not in the valid file list"""
EOF
@@ -142,10 +142,10 @@
if e.response["Error"]["Code"] == "404":
return web.HTTPNotFound()
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))
return web.HTTPError(text="Internal server error")
except Exception as e:
logger.terror("storage.error.s3.express", hash=hash, e=e)
return web.HTTPError(text=str(e))
return web.HTTPError(text="Internal server error")

async def recycleFiles(self, files: FileList) -> None:
"""Clean up files in S3 that are not in the valid file list"""
Copilot is powered by AI and may make mistakes. Always verify output.

async def recycleFiles(self, files: FileList) -> None:
"""Clean up files in S3 that are not in the valid file list"""
delete_files = []

# Get all valid file keys
valid_keys = {
f"{file.hash[:2]}/{file.hash}"
for file in files.files
}

# List all files in S3 bucket
try:
paginator = self.client.get_paginator("list_objects_v2")
all_s3_keys = []
for page in paginator.paginate(Bucket=self.bucket):
for obj in page.get("Contents", []):
all_s3_keys.append(obj["Key"])
except ClientError as e:
logger.terror("storage.error.s3.recycle.list", e=e)
return

# Find files to delete
with tqdm(
desc=locale.t("storage.tqdm.desc.recycling_check"),
total=len(all_s3_keys),
unit_scale=True,
unit=locale.t("storage.tqdm.unit.files"),
) as pbar:
for key in all_s3_keys:
pbar.update(1)
if key not in valid_keys:
delete_files.append(key)

if len(delete_files) == 0:
logger.tinfo("storage.success.s3.no_need_to_recycle")
return

# Delete files in batches (S3 allows max 1000 objects per delete request)
with tqdm(
desc=locale.t("storage.tqdm.desc.recycling"),
total=len(delete_files),
unit_scale=True,
unit=locale.t("storage.tqdm.unit.files"),
) as pbar:
total_size = 0
for i in range(0, len(delete_files), 1000):
batch = delete_files[i:i+1000]
try:
# Get sizes before deleting
for key in batch:
try:
response = self.client.head_object(Bucket=self.bucket, Key=key)
total_size += response["ContentLength"]
except Exception:
pass

# Delete batch
self.client.delete_objects(
Bucket=self.bucket,
Delete={
"Objects": [{"Key": key} for key in batch],
"Quiet": True
}
)
pbar.update(len(batch))
except ClientError as e:
logger.terror("storage.error.s3.recycle", e=e)

logger.tsuccess(
"storage.success.s3.recycled",
size=humanize.naturalsize(total_size, binary=True),
)
6 changes: 6 additions & 0 deletions i18n/zh_cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
"storage.error.s3.write_file.size_mismatch": "无法校验 S3 储存文件 ${file} 的大小。理论值:${file_size},实际值:${actual_file_size}。",
"storage.error.s3.write_file.retry": "在尝试写入 S3 储存文件 ${file} 时遇到错误:${e},将在 ${retry}s 后重试。",
"storage.error.s3.write_file.failed": "无法写入 S3 储存文件 ${file},已达到最高重试次数。",
"storage.error.s3.get_s3_files": "无法获取 S3 储存文件列表:${e}。",
"storage.error.s3.express": "无法从 S3 储存获取文件 ${hash}:${e}。",
"storage.error.s3.recycle.list": "无法列出 S3 储存文件进行回收:${e}。",
"storage.error.s3.recycle": "无法回收 S3 储存文件:${e}。",
"storage.success.s3.recycled": "成功回收了 ${size} 的 S3 储存文件。",
"storage.success.s3.no_need_to_recycle": "当前 S3 储存无需要回收的文件。",
"i18n.prompt.failed": "(i18n 字符串解析失败)",
"cluster.info.filelist.fetching": "正在获取文件列表……",
"cluster.success.filelist.fetched": "成功获取文件列表!",
Expand Down
Loading