Skip to content

Commit 9cade9f

Browse files
committed
refactor: refactored by new version
1 parent 7c0aacc commit 9cade9f

1,635 files changed

Lines changed: 456109 additions & 148200 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-format

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,15 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25+
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
26+
AttributeMacros:
27+
- __host__
28+
- __device__
29+
- __global__
30+
- __forceinline__
31+
- __launch_bounds__
2532
BinPackArguments: true
26-
BinPackParameters: true # OnePerLine
33+
BinPackParameters: false # OnePerLine
2734
BitFieldColonSpacing: Both
2835
BreakBeforeBraces: Custom # Attach
2936
BraceWrapping:
@@ -70,15 +77,18 @@ ExperimentalAutoDetectBinPacking: false
7077
FixNamespaceComments: true
7178
IncludeBlocks: Regroup
7279
IncludeCategories:
73-
- Regex: '^<.*\.h>'
80+
- Regex: '".*"'
7481
Priority: 1
7582
SortPriority: 0
76-
- Regex: '^<.*'
83+
- Regex: '^<.*\.h>'
7784
Priority: 2
7885
SortPriority: 0
79-
- Regex: '.*'
86+
- Regex: '^<.*'
8087
Priority: 3
8188
SortPriority: 0
89+
- Regex: '.*'
90+
Priority: 4
91+
SortPriority: 0
8292
IncludeIsMainRegex: '([-_](test|unittest))?$'
8393
IncludeIsMainSourceRegex: ''
8494
IndentAccessModifiers: false

.clang-tidy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ Checks: >
1313
-readability-magic-numbers,
1414
-readability-uppercase-literal-suffix,
1515
-readability-simplify-boolean-expr,
16+
-readability-math-missing-parentheses,
1617
clang-analyzer-*,
1718
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1819
performance-*,
20+
-performance-enum-size,
1921
portability-*,
2022
-portability-simd-intrinsics,
2123
misc-*,

.devops/cann.Dockerfile

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# ==============================================================================
2+
# ARGUMENTS
3+
# ==============================================================================
4+
5+
# Define the CANN base image for easier version updates later
6+
ARG CHIP_TYPE=910b
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
8+
9+
# ==============================================================================
10+
# BUILD STAGE
11+
# Compile all binary files and libraries
12+
# ==============================================================================
13+
FROM ${CANN_BASE_IMAGE} AS build
14+
15+
# -- Install build dependencies --
16+
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
17+
yum clean all && \
18+
rm -rf /var/cache/yum
19+
20+
# -- Set the working directory --
21+
WORKDIR /app
22+
23+
# -- Copy project files --
24+
COPY . .
25+
26+
# -- Set CANN environment variables (required for compilation) --
27+
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
28+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
29+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
30+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
31+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
32+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
33+
# ... You can add other environment variables from the original file as needed ...
34+
# For brevity, only core variables are listed here. You can paste the original ENV list here.
35+
36+
# -- Build llama.cpp --
37+
# Use the passed CHIP_TYPE argument and add general build options
38+
ARG CHIP_TYPE
39+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
40+
&& \
41+
cmake -B build \
42+
-DGGML_CANN=ON \
43+
-DCMAKE_BUILD_TYPE=Release \
44+
-DSOC_TYPE=ascend${CHIP_TYPE} \
45+
-DUSE_ACL_GRAPH=ON \
46+
. && \
47+
cmake --build build --config Release -j$(nproc)
48+
49+
# -- Organize build artifacts for copying in later stages --
50+
# Create a lib directory to store all .so files
51+
RUN mkdir -p /app/lib && \
52+
find build -name "*.so*" -exec cp -P {} /app/lib \;
53+
54+
# Create a full directory to store all executables and Python scripts
55+
RUN mkdir -p /app/full && \
56+
cp build/bin/* /app/full/ && \
57+
cp *.py /app/full/ && \
58+
cp -r gguf-py /app/full/ && \
59+
cp -r requirements /app/full/ && \
60+
cp requirements.txt /app/full/
61+
# If you have a tools.sh script, make sure it is copied here
62+
# cp .devops/tools.sh /app/full/tools.sh
63+
64+
# ==============================================================================
65+
# BASE STAGE
66+
# Create a minimal base image with CANN runtime and common libraries
67+
# ==============================================================================
68+
FROM ${CANN_BASE_IMAGE} AS base
69+
70+
# -- Install runtime dependencies --
71+
RUN yum install -y libgomp curl && \
72+
yum clean all && \
73+
rm -rf /var/cache/yum
74+
75+
# -- Set CANN environment variables (required for runtime) --
76+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
77+
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
78+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
79+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
80+
# ... You can add other environment variables from the original file as needed ...
81+
82+
WORKDIR /app
83+
84+
# Copy compiled .so files from the build stage
85+
COPY --from=build /app/lib/ /app
86+
87+
# ==============================================================================
88+
# FINAL STAGES (TARGETS)
89+
# ==============================================================================
90+
91+
### Target: full
92+
# Complete image with all tools, Python bindings, and dependencies
93+
# ==============================================================================
94+
FROM base AS full
95+
96+
COPY --from=build /app/full /app
97+
98+
# Install Python dependencies
99+
RUN yum install -y git python3 python3-pip && \
100+
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
101+
pip3 install --no-cache-dir -r requirements.txt && \
102+
yum clean all && \
103+
rm -rf /var/cache/yum
104+
105+
# You need to provide a tools.sh script as the entrypoint
106+
ENTRYPOINT ["/app/tools.sh"]
107+
# If there is no tools.sh, you can set the default to start the server
108+
# ENTRYPOINT ["/app/llama-server"]
109+
110+
### Target: light
111+
# Lightweight image containing only llama-cli and llama-completion
112+
# ==============================================================================
113+
FROM base AS light
114+
115+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
116+
117+
ENTRYPOINT [ "/app/llama-cli" ]
118+
119+
### Target: server
120+
# Dedicated server image containing only llama-server
121+
# ==============================================================================
122+
FROM base AS server
123+
124+
ENV LLAMA_ARG_HOST=0.0.0.0
125+
126+
COPY --from=build /app/full/llama-server /app
127+
128+
HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
129+
130+
ENTRYPOINT [ "/app/llama-server" ]

.devops/cpu.Dockerfile

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,25 @@ ARG UBUNTU_VERSION=22.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

5+
ARG TARGETARCH
6+
57
RUN apt-get update && \
6-
apt-get install -y build-essential git cmake libcurl4-openssl-dev
8+
apt-get install -y build-essential git cmake libssl-dev
79

810
WORKDIR /app
911

1012
COPY . .
1113

12-
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
14+
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
15+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
16+
else \
17+
echo "Unsupported architecture"; \
18+
exit 1; \
19+
fi && \
1320
cmake --build build -j $(nproc)
1421

1522
RUN mkdir -p /app/lib && \
16-
find build -name "*.so" -exec cp {} /app/lib \;
23+
find build -name "*.so*" -exec cp -P {} /app/lib \;
1724

1825
RUN mkdir -p /app/full \
1926
&& cp build/bin/* /app/full \
@@ -61,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6168
### Light, CLI only
6269
FROM base AS light
6370

64-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
6572

6673
WORKDIR /app
6774

.devops/cuda-new.Dockerfile

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
ARG UBUNTU_VERSION=24.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=13.1.0
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10+
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23+
fi && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25+
cmake --build build --config Release -j$(nproc)
26+
27+
RUN mkdir -p /app/lib && \
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
29+
30+
RUN mkdir -p /app/full \
31+
&& cp build/bin/* /app/full \
32+
&& cp *.py /app/full \
33+
&& cp -r gguf-py /app/full \
34+
&& cp -r requirements /app/full \
35+
&& cp requirements.txt /app/full \
36+
&& cp .devops/tools.sh /app/full/tools.sh
37+
38+
## Base image
39+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40+
41+
RUN apt-get update \
42+
&& apt-get install -y libgomp1 curl\
43+
&& apt autoremove -y \
44+
&& apt clean -y \
45+
&& rm -rf /tmp/* /var/tmp/* \
46+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47+
&& find /var/cache -type f -delete
48+
49+
COPY --from=build /app/lib/ /app
50+
51+
### Full
52+
FROM base AS full
53+
54+
COPY --from=build /app/full /app
55+
56+
WORKDIR /app
57+
58+
RUN apt-get update \
59+
&& apt-get install -y \
60+
git \
61+
python3 \
62+
python3-pip \
63+
python3-wheel \
64+
&& pip install --break-system-packages --upgrade setuptools \
65+
&& pip install --break-system-packages -r requirements.txt \
66+
&& apt autoremove -y \
67+
&& apt clean -y \
68+
&& rm -rf /tmp/* /var/tmp/* \
69+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
70+
&& find /var/cache -type f -delete
71+
72+
73+
ENTRYPOINT ["/app/tools.sh"]
74+
75+
### Light, CLI only
76+
FROM base AS light
77+
78+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
79+
80+
WORKDIR /app
81+
82+
ENTRYPOINT [ "/app/llama-cli" ]
83+
84+
### Server, Server only
85+
FROM base AS server
86+
87+
ENV LLAMA_ARG_HOST=0.0.0.0
88+
89+
COPY --from=build /app/full/llama-server /app
90+
91+
WORKDIR /app
92+
93+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
94+
95+
ENTRYPOINT [ "/app/llama-server" ]

.devops/cuda.Dockerfile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=12.6.0
3+
ARG CUDA_VERSION=12.4.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66

@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1212
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
1616

1717
WORKDIR /app
1818

@@ -21,11 +21,11 @@ COPY . .
2121
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2222
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2323
fi && \
24-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \
28-
find build -name "*.so" -exec cp {} /app/lib \;
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2929

3030
RUN mkdir -p /app/full \
3131
&& cp build/bin/* /app/full \
@@ -61,7 +61,7 @@ RUN apt-get update \
6161
python3 \
6262
python3-pip \
6363
&& pip install --upgrade pip setuptools wheel \
64-
&& pip install -r requirements.txt \
64+
&& pip install --break-system-packages -r requirements.txt \
6565
&& apt autoremove -y \
6666
&& apt clean -y \
6767
&& rm -rf /tmp/* /var/tmp/* \
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

0 commit comments

Comments
 (0)