Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 85 additions & 49 deletions biobakery-profiler/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,65 +1,101 @@
FROM python:3.9.17-bookworm AS dbbuild
# ============================================================
# Stage 1: Build samtools
# ============================================================
FROM python:3.9.17-slim-bookworm AS dbbuild

RUN apt-get update && \
DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y wget ca-certificates build-essential zlib1g-dev libbz2-dev liblzma-dev && \
DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \
wget ca-certificates build-essential zlib1g-dev libbz2-dev liblzma-dev && \
rm -rf /var/lib/apt/lists/*

# RUN mkdir -p /dbs/util/ && \
# wget -c --read-timeout=5 http://huttenhower.sph.harvard.edu/humann_data/full_mapping_v201901b.tar.gz && \
# cd /dbs/util/ && \
# tar xzf /full_mapping_v201901b.tar.gz && \
# rm /full_mapping_v201901b.tar.gz
RUN wget https://github.com/samtools/samtools/releases/download/1.15.1/samtools-1.15.1.tar.bz2 && \
tar xjf samtools-1.15.1.tar.bz2 && cd samtools-1.15.1 && \
./configure --without-curses && make && make install && which samtools #



#FROM ubuntu:18.04 AS runtime-image
FROM python:3.9.17-bookworm AS runtime-image
RUN wget -q https://github.com/samtools/samtools/releases/download/1.15.1/samtools-1.15.1.tar.bz2 && \
tar xjf samtools-1.15.1.tar.bz2 && \
cd samtools-1.15.1 && \
./configure --without-curses && \
make -j$(nproc) && \
make install && \
which samtools

# ============================================================
# Stage 2: Build Python packages in a venv we can copy
# ============================================================
FROM python:3.9.17-slim-bookworm AS pybuild

ARG HUMANNVERSION=0.0.0
ARG METAPHLANVERSION=0.0.0

COPY --from=dbbuild /usr/local/bin/samtools /usr/local/bin/samtools

WORKDIR /tmp
RUN export LC_ALL=en_US.UTF-8 && export LANG=en_US.UTF-8

RUN apt-get update && \
DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y build-essential libjpeg-dev zlib1g-dev libbz2-dev liblzma-dev ncbi-blast+ mafft raxml && \
DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \
build-essential \
wget \
ca-certificates \
patch \
libjpeg-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev && \
rm -rf /var/lib/apt/lists/*

RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

COPY nucleotide_${HUMANNVERSION}.patch /tmp/nucleotide.patch
COPY config_${HUMANNVERSION}.patch /tmp/config.patch

RUN pip install --no-cache-dir numpy cython && \
pip install --no-cache-dir boto3 cloudpickle awscli && \
pip install --no-cache-dir biom-format && \
wget -qO- https://github.com/biobakery/humann/archive/refs/tags/${HUMANNVERSION}.tar.gz \
| tar xz -C /tmp && \
mv /tmp/humann-* /tmp/humann && \
cd /tmp/humann && \
patch humann/search/nucleotide.py < /tmp/nucleotide.patch && \
patch humann/config.py < /tmp/config.patch && \
mv setup.py tmp.py && \
sed 's|2\.2\.3|2\.5\.1|g' tmp.py > tmp2.py && \
sed 's|bowtie2_folder="bowtie2-2\.5\.1|bowtie2_folder="bowtie2-2\.5\.1-linux-x86_64|g' tmp2.py > setup.py && \
pip install --no-cache-dir . --no-binary :all: && \
pip install --no-cache-dir metaphlan==${METAPHLANVERSION} && \
find /opt/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; \
find /opt/venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null; \
find /opt/venv -type d -name "test" -exec rm -rf {} + 2>/dev/null; \
find /opt/venv -name "*.pyc" -delete 2>/dev/null; \
find /opt/venv -name "*.pyo" -delete 2>/dev/null; \
rm -rf /tmp/humann /tmp/*.patch

# ============================================================
# Stage 3: Minimal runtime image
# ============================================================
FROM python:3.9.17-slim-bookworm AS runtime-image

# You may be tempted to do these all in one call but unfortunately
# (eg) biom-format wont build without numpy, and it wont install it automatically

# see the glpsol issue here https://forum.biobakery.org/t/silent-errors-with-bad-install-of-libglpk/4814

# this nucleotide.py makes bowtie indexing parallel
ADD nucleotide_${HUMANNVERSION}.patch /tmp/nucleotide.patch
ADD config_${HUMANNVERSION}.patch /tmp/config.patch
ARG HUMANNVERSION=0.0.0
ARG METAPHLANVERSION=0.0.0

# We also have to bump to a more recent version of bowtie that is multithreaded,
# so we clumsily make that with some uninspired sed calls
RUN pip3 install boto3 cloudpickle awscli && \
git clone --depth=1 --branch ${HUMANNVERSION} https://github.com/biobakery/humann.git humann && \
du -h --max-depth=0 humann && \
cd humann && \
patch humann/search/nucleotide.py < /tmp/nucleotide.patch && \
patch humann/config.py < /tmp/config.patch && \
mv setup.py tmp.py && cat tmp.py | sed "s|2\.2\.3|2\.5\.1|g" > tmp2.py && \
sed "s|bowtie2_folder\=\"bowtie2-2\.5\.1|bowtie2_folder\=\"bowtie2-2\.5\.1-linux-x86_64|g" tmp2.py > setup.py && \
pip3 install -vvv . --no-binary :all: && \
ldconfig /usr/local/lib && glpsol --version && \
pip3 install numpy cython && pip3 install --no-cache-dir biom-format && \
pip3 install metaphlan==${METAPHLANVERSION} && du --max-depth 1 -h ~/.cache/pip && \
rm -r ~/.cache/pip
# Copy samtools binary and its needed libs
COPY --from=dbbuild /usr/local/bin/samtools /usr/local/bin/samtools

# The --help is so you can check to ensure that the grouping table are properly read
#RUN humann_config --update database_folders utility_mapping /dbs/util/ && \
# humann_regroup_table --help
# Copy the entire pre-built venv
COPY --from=pybuild /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

#RUN humann_test --run-functional-tests-end-to-end
# Install ONLY runtime dependencies — no build-essential, no git
RUN apt-get update && \
DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -y \
libjpeg62-turbo \
zlib1g \
libbz2-1.0 \
liblzma5 \
ncbi-blast+ \
mafft \
raxml \
libgomp1 \
libglpk40 \
glpk-utils && \
rm -rf /var/lib/apt/lists/* && \
ldconfig /usr/local/lib && \
glpsol --version

ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8

#RUN cd humann-3.9 && ls examples && humann --input examples/demo.fasta.gz --output tmpout
WORKDIR /tmp
14 changes: 14 additions & 0 deletions biobakery-profiler/config_4.0.0.alpha.1-final-smaller.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
105c105,107
< user_edit_config_file="humann.cfg"
---
> # User config file
> if not os.environ.get("HUMANN_CONFIG"):
> user_edit_config_file="humann.cfg"
107,108c109,112
< full_path_user_edit_config_file=os.path.join(os.path.dirname(os.path.abspath(__file__)),
< user_edit_config_file)
---
> full_path_user_edit_config_file=os.path.join(os.path.dirname(os.path.abspath(__file__)),
> user_edit_config_file)
> else:
> full_path_user_edit_config_file=os.environ.get("HUMANN_CONFIG")
2 changes: 1 addition & 1 deletion biobakery_build_manifest.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
biobakery-profiler,4.0.6--4.0.0.alpha.1-final
biobakery-profiler,4.0.6--4.0.0.alpha.1-final-smaller
biobakery-profiler,4.1.0--v3.9
biobakery-profiler,4.0.5--3.6.1
1 change: 1 addition & 0 deletions build_manifest.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ rgi,6.0.0
bowtie2,2.5.0
snap-aligner,2.0.1
utility,0b
medi,2.1.0
phanta,1.1.0-dev
portalclient,1.4.5
prokka_and_blast,0.1.1
Expand Down
18 changes: 18 additions & 0 deletions medi/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM --platform=linux/amd64 docker.io/condaforge/miniforge3:latest

Check warning on line 1 in medi/Dockerfile

View workflow job for this annotation

GitHub Actions / Build (medi,2.1.0)

FROM --platform flag should not use a constant value

FromPlatformFlagConstDisallowed: FROM --platform flag should not use constant value "linux/amd64" More info: https://docs.docker.com/go/dockerfile/rule/from-platform-flag-const-disallowed/

RUN mkdir /tmp/medi /tmp/medi/bin

COPY medi.yml Makefile patches/*.patch /tmp/medi

RUN mamba env create -n medi -f /tmp/medi/medi.yml && \
. ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate medi && \
cd /tmp/medi && make report && mv /tmp/medi/bin/kraken2-report /bin && \
patch ${CONDA_DIR}/envs/medi/share/kraken2-2.1.3-4/libexec/build_kraken2_db.sh /tmp/medi/build.patch && \
patch ${CONDA_DIR}/envs/medi/share/kraken2-2.1.3-4/libexec/download_genomic_library.sh /tmp/medi/download_genomic.patch && \
conda clean --tarballs --index-cache --packages --yes && \
find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \
find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \
conda clean --force-pkgs-dirs --all --yes && \
rm -rf /tmp/medi

ENV PATH="/opt/conda/envs/medi/bin:$PATH"
15 changes: 15 additions & 0 deletions medi/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CXX := g++
K2DIR := "src/kraken2"
SRC := ${K2DIR}/src

repo:
rm -rf ${K2DIR}
git clone https://github.com/daydream-boost/kraken2 ${K2DIR}

report: repo
${CXX} -O3 -std=c++11 \
${SRC}/mmap_file.cc ${SRC}/reports.cc ${SRC}/taxonomy.cc \
${SRC}/kraken2-report.cpp -o ./bin/kraken2-report

clean:
rm -rf ${K2DIR}
28 changes: 28 additions & 0 deletions medi/medi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: medi
channels:
- conda-forge
- bioconda
dependencies:
- patch
- python>=3.8
- nextflow>=23.10.0
- fastp
- kraken2==2.1.3
- bracken==2.6.0
- prodigal
- multiqc>=1.28.0
- typeguard<=4.4.0
- biopython
- sourmash
- pandas
- r-base
- r-data.table
- r-reutils
- r-r.utils
- r-rcurl
- r-magrittr
- r-futile.logger
- bioconductor-biostrings
- taxonkit>=0.20.0
- architeuthis>=0.5.0
- gxx
28 changes: 28 additions & 0 deletions medi/patches/build.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
--- /home/gpfs/o_diener/code/k2/scripts/build_kraken2_db.sh 2025-11-17 14:42:43.458544663 +0100
+++ /home/gpfs/o_diener/miniforge3/envs/medi/share/kraken2-2.1.3-4/libexec/build_kraken2_db.sh 2025-04-29 09:24:03.422237276 +0200
@@ -32,7 +32,7 @@
}

function list_sequence_files() {
- find library/ '(' -name '*.fna' -o -name '*.faa' ')' -print0
+ find -L library/ '(' -name '*.fna' -o -name '*.faa' ')' -print0
}

start_time=$(get_current_time)
@@ -66,14 +66,14 @@

echo "Creating sequence ID to taxonomy ID map (step 1)..."
if [ -d "library/added" ]; then
- find library/added/ -name 'prelim_map_*.txt' | xargs cat > library/added/prelim_map.txt
+ find -L library/added/ -name 'prelim_map_*.txt' | xargs cat > library/added/prelim_map.txt
fi
seqid2taxid_map_file=seqid2taxid.map
if [ -e "$seqid2taxid_map_file" ]; then
echo "Sequence ID to taxonomy ID map already present, skipping map creation."
else
step_time=$(get_current_time)
- find library/ -maxdepth 2 -name prelim_map.txt | xargs cat > taxonomy/prelim_map.txt
+ find -L library/ -maxdepth 2 -name prelim_map.txt | xargs cat > taxonomy/prelim_map.txt
if [ ! -s "taxonomy/prelim_map.txt" ]; then
echo "No preliminary seqid/taxid mapping files found, aborting."
exit 1
11 changes: 11 additions & 0 deletions medi/patches/download_genomic.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--- /home/gpfs/o_diener/code/k2/scripts/download_genomic_library.sh 2025-11-17 14:42:43.476888231 +0100
+++ /home/gpfs/o_diener/miniforge3/envs/medi/share/kraken2-2.1.3-4/libexec/download_genomic_library.sh 2025-04-09 12:42:41.740766021 +0200
@@ -68,7 +68,7 @@
else
awk '{ print $NF }' .listing | perl -ple 'tr/\r//d' | grep '\.fna\.gz' > manifest.txt
fi
- cat manifest.txt | xargs -n1 -I{} wget -q $FTP_SERVER/genomes/refseq/plasmid/{}
+ cat manifest.txt | xargs -n1 -I{} wget -q https://$NCBI_SERVER/genomes/refseq/plasmid/{}
cat manifest.txt | xargs -n1 -I{} gunzip -c {} > $library_file
rm -f plasmid.* .listing
scan_fasta_file.pl $library_file > prelim_map.txt
Loading