diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..aba6e3ad --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git +.github +.coveragerc +.gitignore +docker diff --git a/.gitignore b/.gitignore index ca379a55..609e8dfd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__ venv .DS_Store .vscode +*.code-workspace src/plotman.egg-info diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e717de8..21ad220e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [unreleased] +### Fixed +- `plotman kill` doesn't leave any temporary files behind anymore. + ([#801](https://github.com/ericaltendorf/plotman/pull/801)) ### Added +- tmp directory overrides moved to `scheduling:` `tmp_overrides:`. + ([#758](https://github.com/ericaltendorf/plotman/pull/758)) +- Per tmp directory phase limit control added to `scheduling:` `tmp_overrides:`. + ([#758](https://github.com/ericaltendorf/plotman/pull/758)) - `plotman export` command to output summaries from plot logs in `.csv` format. ([#557](https://github.com/ericaltendorf/plotman/pull/557)) - `--json` option for `plotman status`. @@ -20,6 +27,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support the [madMAx plotter](https://github.com/madMAx43v3r/chia-plotter). See the [configuration wiki page](https://github.com/ericaltendorf/plotman/wiki/Configuration#2-v05) for help setting it up. ([#797](https://github.com/ericaltendorf/plotman/pull/797)) +- Added argument `-f`/`--force` to `plotman kill` to skip confirmation before killing the job. + ([#801](https://github.com/ericaltendorf/plotman/pull/801)) +- Docker container support. + See the [docker configuration wiki page](https://github.com/ericaltendorf/plotman/wiki/Docker-Configuration) for help setting it up. + ([#783](https://github.com/ericaltendorf/plotman/pull/783)) +- Plot sizes other than k32 are handled. + ([#803](https://github.com/ericaltendorf/plotman/pull/803)) ## [0.4.1] - 2021-06-11 ### Fixed diff --git a/MANIFEST.in b/MANIFEST.in index 08d64bbe..8e5ab768 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,3 +9,6 @@ include .coveragerc recursive-include src *.py recursive-include src/plotman/_tests/resources * recursive-include src/plotman/resources * +recursive-exclude docker * +exclude .dockerignore +exclude build-docker-plotman.sh diff --git a/build-docker-plotman.sh b/build-docker-plotman.sh new file mode 100755 index 00000000..1ae200b0 --- /dev/null +++ b/build-docker-plotman.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +DOCKER_REGISTRY="" +PROJECT="chia-plotman" +TAG="plotter" +BASE_CONTAINER="ubuntu:20.04" +CHIA_GIT_REFERENCE="1.1.7" + +# The UID/GID should match the 'chia' owner of the directories on the host system +UID=10001 +GID=10001 + +docker rmi ${LOCAL_REGISTRY}/${PROJECT}:${TAG} + +docker build . \ + --squash \ + --build-arg BASE_CONTAINER=${BASE_CONTAINER} \ + --build-arg CHIA_GIT_REFERENCE=${CHIA_GIT_REFERENCE} \ + --build-arg UID=${UID} \ + --build-arg GID=${GID} \ + -f docker/Dockerfile \ + -t ${DOCKER_REGISTRY}/${PROJECT}:${TAG} + +docker push ${DOCKER_REGISTRY}/${PROJECT}:${TAG} diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..58908986 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,56 @@ +# Build deployable artifacts +ARG BASE_CONTAINER=ubuntu:20.04 +FROM ${BASE_CONTAINER} as plotman-builder + +ARG CHIA_GIT_REFERENCE + +RUN DEBIAN_FRONTEND=noninteractive apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl jq python3 ansible tar bash ca-certificates git openssl unzip wget python3-pip sudo acl build-essential python3-dev python3.8-venv python3.8-distutils apt nfs-common python-is-python3 + +RUN echo "cloning ${CHIA_GIT_REFERENCE}" +RUN git clone --branch "${CHIA_GIT_REFERENCE}" https://github.com/Chia-Network/chia-blockchain.git \ +&& cd chia-blockchain \ +&& git submodule update --init mozilla-ca + +WORKDIR /chia-blockchain +# Placeholder for patches +RUN /bin/bash ./install.sh + +COPY . /plotman + +RUN ["/bin/bash", "-c", "source ./activate && pip install /plotman && deactivate"] + +# Build deployment container +FROM ${BASE_CONTAINER} as plotman + +ARG UID=10001 +ARG GID=10001 + +RUN DEBIAN_FRONTEND=noninteractive apt-get update \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y curl jq python3 python3.8-venv ca-certificates tzdata ssh rsync \ +&& apt-get clean all \ +&& rm -rf /var/lib/apt/lists + +COPY --from=plotman-builder /chia-blockchain /chia-blockchain + +RUN groupadd -g ${GID} chia +RUN useradd -m -u ${UID} -g ${GID} chia + +RUN mkdir -p /data/chia/tmp \ +&& mkdir -p /data/chia/plots \ +&& mkdir -p /data/chia/logs + +VOLUME ["/data/chia/tmp","/data/chia/plots","/data/chia/logs"] + +RUN chown -R chia:chia /chia-blockchain \ +&& chown -R chia:chia /data/chia + +WORKDIR /chia-blockchain +USER chia + +ENV VIRTUAL_ENV="/chia-blockchain/venv" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# Kick off plots (assumes the environemnt is good to go) +CMD ["/bin/bash", "-c", "plotman plot" ] +# Alternative command to simply provide shell environment +# CMD ["/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait" ] diff --git a/docker/sample.docker-compose.yml b/docker/sample.docker-compose.yml new file mode 100644 index 00000000..af6ae14f --- /dev/null +++ b/docker/sample.docker-compose.yml @@ -0,0 +1,19 @@ +version: "3" + +services: + chia_plotman: + restart: always + container_name: chia-plotman + image: ${DOCKER_IMAGE} + volumes: + - ${HOME}/.ssh:/home/chia/.ssh + - ${HOME}/.chia:/home/chia/.chia + - ${HOME}/.config:/home/chia/.config + - ${LOGS_DIR}:/data/chia/logs + - ${PLOTS_DIR}:/data/chia/plots + - ${PLOTS_TMP_DIR}:/data/chia/tmp + - /tmp:/tmp + logging: + options: + max-size: ${DOCKER_LOG_MAX_SIZE} + max-file: ${DOCKER_LOG_MAX_FILE} diff --git a/docker/sample.env b/docker/sample.env new file mode 100644 index 00000000..725fd840 --- /dev/null +++ b/docker/sample.env @@ -0,0 +1,7 @@ +DOCKER_IMAGE=/chia-plotman:plotter +LOGS_DIR=/data/chia/logs +PLOTS_DIR=/data/chia/plots +PLOTS_TMP_DIR=/data/chia/tmp +DOCKER_LOG_MAX_SIZE=4m +DOCKER_LOG_MAX_FILE=10 + diff --git a/src/plotman/_tests/manager_test.py b/src/plotman/_tests/manager_test.py index 0c0b752e..d9fbae85 100755 --- a/src/plotman/_tests/manager_test.py +++ b/src/plotman/_tests/manager_test.py @@ -15,15 +15,15 @@ def sched_cfg() -> configuration.Scheduling: polling_time_s=2, tmpdir_stagger_phase_major=3, tmpdir_stagger_phase_minor=0, - tmpdir_max_jobs=3 + tmpdir_max_jobs=3, + tmp_overrides={"/mnt/tmp/04": configuration.TmpOverrides(tmpdir_max_jobs=4)} ) @pytest.fixture def dir_cfg() -> configuration.Directories: return configuration.Directories( tmp=["/var/tmp", "/tmp"], - dst=["/mnt/dst/00", "/mnt/dst/01", "/mnt/dst/03"], - tmp_overrides={"/mnt/tmp/04": configuration.TmpOverrides(tmpdir_max_jobs=4)} + dst=["/mnt/dst/00", "/mnt/dst/01", "/mnt/dst/03"] ) def test_permit_new_job_post_milestone(sched_cfg: configuration.Scheduling, dir_cfg: configuration.Directories) -> None: diff --git a/src/plotman/_tests/plot_util_test.py b/src/plotman/_tests/plot_util_test.py index 5a0f9879..6718cb44 100755 --- a/src/plotman/_tests/plot_util_test.py +++ b/src/plotman/_tests/plot_util_test.py @@ -45,7 +45,7 @@ def test_columns() -> None: [ 1 ], [ 2 ] ] ) -def test_list_k_plots(fs: pyfakefs.fake_filesystem.FakeFilesystem) -> None: +def test_list_plots(fs: pyfakefs.fake_filesystem.FakeFilesystem) -> None: fs.create_file('/t/plot-k32-0.plot', st_size=108 * GB) fs.create_file('/t/plot-k32-1.plot', st_size=108 * GB) fs.create_file('/t/.plot-k32-2.plot', st_size=108 * GB) @@ -53,10 +53,14 @@ def test_list_k_plots(fs: pyfakefs.fake_filesystem.FakeFilesystem) -> None: fs.create_file('/t/plot-k32-4.plot', st_size=100 * GB) fs.create_file('/t/plot-k32-5.plot', st_size=108 * GB) - assert (plot_util.list_k_plots('/t/') == + fs.create_file('/t/plot-k33-6.plot', st_size=108 * GB) + fs.create_file('/t/plot-k33-7.plot', st_size=216 * GB) + + assert (plot_util.list_plots('/t/') == [ '/t/plot-k32-0.plot', '/t/plot-k32-1.plot', - '/t/plot-k32-5.plot' ] ) + '/t/plot-k32-5.plot', + '/t/plot-k33-7.plot' ] ) def test_get_plotsize() -> None: diff --git a/src/plotman/archive.py b/src/plotman/archive.py index 0c78ee07..59a9a9e6 100644 --- a/src/plotman/archive.py +++ b/src/plotman/archive.py @@ -96,7 +96,7 @@ def spawn_archive_process(dir_cfg: configuration.Directories, arch_cfg: configur def compute_priority(phase: job.Phase, gb_free: float, n_plots: int) -> int: # All these values are designed around dst buffer dirs of about - # ~2TB size and containing k plots. TODO: Generalize, and + # ~2TB size and containing k32 plots. TODO: Generalize, and # rewrite as a sort function. priority = 50 @@ -210,7 +210,7 @@ def archive(dir_cfg: configuration.Directories, arch_cfg: configuration.Archivin dst_dir = dir_cfg.get_dst_directories() for d in dst_dir: ph = dir2ph.get(d, job.Phase(0, 0)) - dir_plots = plot_util.list_k_plots(d) + dir_plots = plot_util.list_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) diff --git a/src/plotman/configuration.py b/src/plotman/configuration.py index 6d59b936..a0316fe9 100644 --- a/src/plotman/configuration.py +++ b/src/plotman/configuration.py @@ -231,6 +231,9 @@ def maybe_create_scripts(self, temp: str) -> None: @attr.frozen class TmpOverrides: + tmpdir_stagger_phase_major: Optional[int] = None + tmpdir_stagger_phase_minor: Optional[int] = None + tmpdir_stagger_phase_limit: Optional[int] = None tmpdir_max_jobs: Optional[int] = None @attr.frozen @@ -267,7 +270,6 @@ class Directories: tmp: List[str] dst: Optional[List[str]] = None tmp2: Optional[str] = None - tmp_overrides: Optional[Dict[str, TmpOverrides]] = None def dst_is_tmp(self) -> bool: return self.dst is None and self.tmp2 is None @@ -295,6 +297,7 @@ class Scheduling: tmpdir_stagger_phase_major: int tmpdir_stagger_phase_minor: int tmpdir_stagger_phase_limit: int = 1 # If not explicit, "tmpdir_stagger_phase_limit" will default to 1 + tmp_overrides: Optional[Dict[str, TmpOverrides]] = None @attr.frozen class ChiaPlotterOptions: diff --git a/src/plotman/job.py b/src/plotman/job.py index 3009df6b..ffa0dc71 100644 --- a/src/plotman/job.py +++ b/src/plotman/job.py @@ -7,6 +7,7 @@ import random import re import sys +import glob import time from datetime import datetime from enum import Enum, auto @@ -555,13 +556,11 @@ def resume(self) -> None: def get_temp_files(self) -> typing.Set[str]: # Prevent duplicate file paths by using set. temp_files = set([]) - for f in self.proc.open_files(): - if any( - dir in f.path - for dir in [self.tmpdir, self.tmp2dir, self.dstdir] - if dir is not None - ): - temp_files.add(f.path) + + for dir in [self.tmpdir, self.tmp2dir, self.dstdir]: + if dir is not None: + temp_files.update(glob.glob(os.path.join(dir, f"plot-*-{self.plot_id}.tmp"))) + return temp_files def cancel(self) -> None: diff --git a/src/plotman/manager.py b/src/plotman/manager.py index 40d07b93..1565b900 100644 --- a/src/plotman/manager.py +++ b/src/plotman/manager.py @@ -53,21 +53,38 @@ def phases_permit_new_job(phases: typing.List[job.Phase], d: str, sched_cfg: plo if len(phases) == 0: return True - milestone = job.Phase( - major=sched_cfg.tmpdir_stagger_phase_major, - minor=sched_cfg.tmpdir_stagger_phase_minor, - ) + # Assign variables + major = sched_cfg.tmpdir_stagger_phase_major + minor = sched_cfg.tmpdir_stagger_phase_minor # tmpdir_stagger_phase_limit default is 1, as declared in configuration.py - if len([p for p in phases if p < milestone]) >= sched_cfg.tmpdir_stagger_phase_limit: - return False - - # Limit the total number of jobs per tmp dir. Default to the overall max + stagger_phase_limit = sched_cfg.tmpdir_stagger_phase_limit + + # Limit the total number of jobs per tmp dir. Default to overall max # jobs configuration, but restrict to any configured overrides. max_plots = sched_cfg.tmpdir_max_jobs - if dir_cfg.tmp_overrides is not None and d in dir_cfg.tmp_overrides: - curr_overrides = dir_cfg.tmp_overrides[d] + + # Check if any overrides exist for the current job + if sched_cfg.tmp_overrides is not None and d in sched_cfg.tmp_overrides: + curr_overrides = sched_cfg.tmp_overrides[d] + + # Check for and assign major & minor phase overrides + if curr_overrides.tmpdir_stagger_phase_major is not None: + major = curr_overrides.tmpdir_stagger_phase_major + if curr_overrides.tmpdir_stagger_phase_minor is not None: + minor = curr_overrides.tmpdir_stagger_phase_minor + # Check for and assign stagger phase limit override + if curr_overrides.tmpdir_stagger_phase_limit is not None: + stagger_phase_limit = curr_overrides.tmpdir_stagger_phase_limit + # Check for and assign stagger phase limit override if curr_overrides.tmpdir_max_jobs is not None: max_plots = curr_overrides.tmpdir_max_jobs + + milestone = job.Phase(major,minor) + + # Check if phases pass the criteria + if len([p for p in phases if p < milestone]) >= stagger_phase_limit: + return False + if len(phases) >= max_plots: return False diff --git a/src/plotman/plot_util.py b/src/plotman/plot_util.py index 5bad17bd..75d85eca 100644 --- a/src/plotman/plot_util.py +++ b/src/plotman/plot_util.py @@ -51,11 +51,12 @@ def split_path_prefix(items: typing.List[str]) -> typing.Tuple[str, typing.List[ remainders = [ os.path.relpath(i, prefix) for i in items ] return (prefix, remainders) -def list_k_plots(d: str) -> typing.List[str]: +def list_plots(d: str) -> typing.List[str]: 'List completed plots in a directory (not recursive)' plots = [] for plot in os.listdir(d): - if matches := re.search(r"^plot-k(\d*)-.*plot$", plot): + matches = re.search(r"^plot-k(\d*)-.*plot$", plot) + if matches is not None: grps = matches.groups() plot_k = int(grps[0]) plot = os.path.join(d, plot) diff --git a/src/plotman/plotman.py b/src/plotman/plotman.py index 7aa9536c..5312b78c 100755 --- a/src/plotman/plotman.py +++ b/src/plotman/plotman.py @@ -73,6 +73,7 @@ def parse_args(self) -> typing.Any: self.add_idprefix_arg(p_files) p_kill = sp.add_parser('kill', help='kill job (and cleanup temp files)') + p_kill.add_argument('-f', '--force', action='store_true', default=False, help="Don't ask for confirmation before killing the plot job") self.add_idprefix_arg(p_kill) p_suspend = sp.add_parser('suspend', help='suspend job') @@ -309,15 +310,24 @@ def main() -> None: job.suspend() temp_files = job.get_temp_files() + print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id)) print('Will delete %d temp files' % len(temp_files)) - conf = input('Are you sure? ("y" to confirm): ') + + if args.force: + conf = 'y' + else: + conf = input('Are you sure? ("y" to confirm): ') + if (conf != 'y'): - print('canceled. If you wish to resume the job, do so manually.') + print('Canceled. If you wish to resume the job, do so manually.') else: print('killing...') + job.cancel() + print('cleaning up temp files...') + for f in temp_files: os.remove(f) diff --git a/src/plotman/reporting.py b/src/plotman/reporting.py index 83fe5230..75ad5a8c 100644 --- a/src/plotman/reporting.py +++ b/src/plotman/reporting.py @@ -216,7 +216,7 @@ def dst_dir_report(jobs: typing.List[job.Job], dstdirs: typing.List[str], width: eldest_ph = dir2oldphase.get(d, job.Phase(0, 0)) phases = job.job_phases_for_dstdir(d, jobs) - dir_plots = plot_util.list_k_plots(d) + dir_plots = plot_util.list_plots(d) gb_free = int(plot_util.df_b(d) / plot_util.GB) n_plots = len(dir_plots) priority = archive.compute_priority(eldest_ph, gb_free, n_plots) diff --git a/src/plotman/resources/plotman.yaml b/src/plotman/resources/plotman.yaml index 1ccfd9c4..20195abd 100644 --- a/src/plotman/resources/plotman.yaml +++ b/src/plotman/resources/plotman.yaml @@ -44,19 +44,6 @@ directories: - /mnt/tmp/02 - /mnt/tmp/03 - # Optional: Allows overriding some characteristics of certain tmp - # directories. This contains a map of tmp directory names to - # attributes. If a tmp directory and attribute is not listed here, - # it uses the default attribute setting from the main configuration. - # - # Currently support override parameters: - # - tmpdir_max_jobs - tmp_overrides: - # In this example, /mnt/tmp/00 is larger than the other tmp - # dirs and it can hold more plots than the default. - "/mnt/tmp/00": - tmpdir_max_jobs: 5 - # Optional: tmp2 directory. If specified, will be passed to # chia plots create as -2. Only one tmp2 directory is supported. # tmp2: /mnt/tmp/a @@ -131,6 +118,33 @@ scheduling: # How often the daemon wakes to consider starting a new plot job, in seconds. polling_time_s: 20 + # Optional: Allows the overriding of some scheduling characteristics of the + # tmp directories specified here. + # This contains a map of tmp directory names to attributes. If a tmp directory + # and attribute is not listed here, the default attribute setting from the main + # configuration will be used + # + # Currently support override parameters: + # - tmpdir_stagger_phase_major (requires tmpdir_stagger_phase_minor) + # - tmpdir_stagger_phase_minor (requires tmpdir_stagger_phase_major) + # - tmpdir_stagger_phase_limit + # - tmpdir_max_jobs + tmp_overrides: + # In this example, /mnt/tmp/00 is larger and faster than the + # other tmp dirs and it can hold more plots than the default, + # allowing more simultaneous plots, so they are being started + # earlier than the global setting above. + "/mnt/tmp/00": + tmpdir_stagger_phase_major: 1 + tmpdir_stagger_phase_minor: 5 + tmpdir_max_jobs: 5 + # Here, /mnt/tmp/03 is smaller, so a different config might be + # to space the phase stagger further apart and only allow 2 jobs + # to run concurrently in it + "/mnt/tmp/03": + tmpdir_stagger_phase_major: 3 + tmpdir_stagger_phase_minor: 1 + tmpdir_max_jobs: 2 # Plotting parameters. These are pass-through parameters to chia plots create. # See documentation at