diff --git a/.github/workflows/bb_builder.yaml b/.github/workflows/bb_builder.yaml index c34feea..65916a3 100644 --- a/.github/workflows/bb_builder.yaml +++ b/.github/workflows/bb_builder.yaml @@ -45,7 +45,7 @@ jobs: env: tool_ver: ${{ matrix.manifest }} id: humannversplit - run: TMP=${tool_ver##*,} && echo "HUMANNVERSION=${TMP##*--}" >> $GITHUB_OUTPUT + run: TMP=${tool_ver##*,} && AFTER_DASH=${TMP#*--} && echo "HUMANNVERSION=${AFTER_DASH%%_*}" >> $GITHUB_OUTPUT - name: Get metaphlan version env: tool_ver: ${{ matrix.manifest }} diff --git a/biobakery-profiler/Dockerfile b/biobakery-profiler/Dockerfile index 2d6c946..a971e16 100644 --- a/biobakery-profiler/Dockerfile +++ b/biobakery-profiler/Dockerfile @@ -24,16 +24,19 @@ FROM python:3.9.17-slim-bookworm AS pybuild ARG HUMANNVERSION=0.0.0 ARG METAPHLANVERSION=0.0.0 +# IMPORTANT: libglpk-dev is needed here at compile time +# so that the Python GLPK bindings compile correctly RUN apt-get update && \ DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \ build-essential \ - wget \ - ca-certificates \ + git \ patch \ libjpeg-dev \ zlib1g-dev \ libbz2-dev \ - liblzma-dev && \ + liblzma-dev \ + libglpk-dev \ + glpk-utils && \ rm -rf /var/lib/apt/lists/* RUN python3 -m venv /opt/venv @@ -42,21 +45,53 @@ ENV PATH="/opt/venv/bin:$PATH" COPY nucleotide_${HUMANNVERSION}.patch /tmp/nucleotide.patch COPY config_${HUMANNVERSION}.patch /tmp/config.patch -RUN pip install --no-cache-dir numpy cython && \ - pip install --no-cache-dir boto3 cloudpickle awscli && \ - pip install --no-cache-dir biom-format && \ - wget -qO- https://github.com/biobakery/humann/archive/refs/tags/${HUMANNVERSION}.tar.gz \ - | tar xz -C /tmp && \ - mv /tmp/humann-* /tmp/humann && \ +# IMPORTANT: Installation order matters! +# 1. numpy and cython MUST be installed before biom-format +# 2. biom-format must be installed before humann +# 3. GLPK libs must be present when installing packages that link against them +# see: https://forum.biobakery.org/t/silent-errors-with-bad-install-of-libglpk/4814 + +# Step 1: numpy and cython first — biom-format needs these at build time +RUN pip install --no-cache-dir numpy cython + +# Step 2: biom-format — needs numpy already installed to compile +RUN pip install --no-cache-dir biom-format && \ + python3 -c "from biom import load_table; print('biom-format OK')" + +# Step 3: Other Python dependencies +RUN pip install --no-cache-dir boto3 cloudpickle awscli + +# Step 4: humann — needs biom-format already working +RUN set -e && \ + git clone --depth=1 --branch ${HUMANNVERSION} https://github.com/biobakery/humann.git /tmp/humann && \ cd /tmp/humann && \ patch humann/search/nucleotide.py < /tmp/nucleotide.patch && \ patch humann/config.py < /tmp/config.patch && \ - mv setup.py tmp.py && \ - sed 's|2\.2\.3|2\.5\.1|g' tmp.py > tmp2.py && \ - sed 's|bowtie2_folder="bowtie2-2\.5\.1|bowtie2_folder="bowtie2-2\.5\.1-linux-x86_64|g' tmp2.py > setup.py && \ + cp setup.py setup.py.orig && \ + sed -i 's|2\.2\.3|2\.5\.1|g' setup.py && \ + sed -i 's|bowtie2_folder="bowtie2-2\.5\.1|bowtie2_folder="bowtie2-2\.5\.1-linux-x86_64|g' setup.py && \ + diff setup.py.orig setup.py || true && \ pip install --no-cache-dir . --no-binary :all: && \ - pip install --no-cache-dir metaphlan==${METAPHLANVERSION} && \ - find /opt/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; \ + ldconfig /usr/local/lib && \ + glpsol --version + +# Step 5: metaphlan after humann +RUN pip install --no-cache-dir metaphlan==${METAPHLANVERSION} + +# Verify the full dependency chain works +RUN set -e && \ + echo "=== Verifying dependency chain ===" && \ + python3 -c "import numpy; print(f'numpy {numpy.__version__} OK')" && \ + python3 -c "from biom import load_table; print('biom-format OK')" && \ + python3 -c "import humann; print('humann OK')" && \ + python3 -c "import metaphlan; print('metaphlan OK')" && \ + which humann && \ + humann --version && \ + ls /opt/venv/bin/humann* && \ + echo "=== All pybuild checks passed ===" + +# Clean up +RUN find /opt/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; \ find /opt/venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null; \ find /opt/venv -type d -name "test" -exec rm -rf {} + 2>/dev/null; \ find /opt/venv -name "*.pyc" -delete 2>/dev/null; \ @@ -68,17 +103,12 @@ RUN pip install --no-cache-dir numpy cython && \ # ============================================================ FROM python:3.9.17-slim-bookworm AS runtime-image -ARG HUMANNVERSION=0.0.0 -ARG METAPHLANVERSION=0.0.0 - -# Copy samtools binary and its needed libs COPY --from=dbbuild /usr/local/bin/samtools /usr/local/bin/samtools - -# Copy the entire pre-built venv COPY --from=pybuild /opt/venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" -# Install ONLY runtime dependencies — no build-essential, no git +# IMPORTANT: Runtime versions of the same libraries that were used at compile time +# libglpk40 + glpk-utils must be here or humann/biom will fail at runtime RUN apt-get update && \ DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \ libjpeg62-turbo \ @@ -92,10 +122,44 @@ RUN apt-get update && \ libglpk40 \ glpk-utils && \ rm -rf /var/lib/apt/lists/* && \ - ldconfig /usr/local/lib && \ - glpsol --version + ldconfig /usr/local/lib ENV LC_ALL=en_US.UTF-8 ENV LANG=en_US.UTF-8 - WORKDIR /tmp + +# Thorough smoke tests — verify the full chain works at runtime +RUN set -e && \ + echo "=== Runtime smoke tests ===" && \ + echo "--- Binary checks ---" && \ + glpsol --version && \ + samtools --version | head -1 && \ + echo "--- Python import chain (order matters) ---" && \ + python3 -c "import numpy; print(f'numpy {numpy.__version__} OK')" && \ + python3 -c "from biom import load_table; print('biom-format OK')" && \ + python3 -c "import humann; print('humann OK')" && \ + python3 -c "import metaphlan; print('metaphlan OK')" && \ + echo "--- HUMAnN tool checks ---" && \ + humann --version && \ + humann_config --print && \ + humann_regroup_table --help > /dev/null && \ + humann_renorm_table --help > /dev/null && \ + humann_join_tables --help > /dev/null && \ + echo "--- MetaPhlAn check ---" && \ + metaphlan --version && \ + echo "=== All runtime smoke tests passed ===" + +RUN apt-get update && \ + DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \ + libjpeg62-turbo zlib1g libbz2-1.0 liblzma5 \ + libglpk40 glpk-utils libgomp1 \ + ncbi-blast+ mafft raxml \ + procps \ + locales && \ + rm -rf /var/lib/apt/lists/* && \ + ldconfig /usr/local/lib && \ + sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \ + locale-gen en_US.UTF-8 + +ENV LC_ALL=en_US.UTF-8 +ENV LANG=en_US.UTF-8 diff --git a/biobakery_build_manifest.csv b/biobakery_build_manifest.csv index bab1bb7..8bc8841 100644 --- a/biobakery_build_manifest.csv +++ b/biobakery_build_manifest.csv @@ -1,3 +1,3 @@ -biobakery-profiler,4.0.6--4.0.0.alpha.1-final-smaller +biobakery-profiler,4.0.6--4.0.0.alpha.1-final_smaller-pt2 biobakery-profiler,4.1.0--v3.9 -biobakery-profiler,4.0.5--3.6.1 +biobakery-profiler,4.0.5--3.6.1_smaller-pt2