Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
bde3ab3
Fix SPINDLE_DEBUG + SPINDLE_TEST in SPANK plugin
nchaimov Mar 10, 2026
dcb530e
Cachepath: parse_loc utilities.
rountree-alt Oct 3, 2025
d96b8a4
Cachepath: remove/rename [orig_]location.
rountree-alt Oct 3, 2025
2ff3bfb
Cachepath: Configure-time support.
rountree-alt Oct 3, 2025
bc09adc
Cachepath: Internal messaging for path resolution
rountree-alt Oct 3, 2025
f876d40
Cachepath: Adds cobo_allreduce()
rountree-alt Oct 3, 2025
fdd22cc
Cachepath: Adds parameters to config_mgr
rountree-alt Oct 3, 2025
6a60e27
Cachepath: adds flux parameter support
rountree-alt Oct 3, 2025
f873dcf
Cachepath: Adds logging support.
rountree-alt Oct 3, 2025
846b34b
Cachepath: Removes out-of-root cleanup checks.
rountree-alt Oct 3, 2025
7db2da1
Cachepath: Set of small, miscellaneous patches.
rountree-alt Oct 3, 2025
1cd9c27
Fixes per Matt's comments.
rountree-alt Oct 21, 2025
4ba3a67
Single source of truth for client cachepath.
rountree-alt Oct 22, 2025
b0fb21e
Comments the cachepath variables.
rountree-alt Oct 22, 2025
504e4b2
Removes internal vars from spindle_launch.h
rountree-alt Oct 23, 2025
10a7233
Client cachepath message now uses single response.
rountree-alt Oct 23, 2025
d63aabd
Removes assert(0) in network error paths.
rountree-alt Oct 23, 2025
bc944b9
Renames ldcs_audit_server_md_consensus().
rountree-alt Oct 23, 2025
a8ddaeb
Adds explicit enum values to CmdlineShortOptions.
rountree-alt Oct 23, 2025
5a61296
Return instead of exit on network errors.
rountree-alt Oct 23, 2025
1823f93
Apply rename to configuration and parameters.
rountree-alt Oct 24, 2025
9bef38b
Renaming location variables/fields to commpath.
rountree-alt Oct 24, 2025
0db1331
Renames enums.
rountree-alt Oct 24, 2025
e322f71
Use strdup() for commpath instead of stack var.
rountree-alt Nov 5, 2025
3acca2c
Adds LDCS_COMMPATH
rountree-alt Nov 12, 2025
06876b6
Restores checkLinkForLeak() to test_driver.c
rountree-alt Nov 13, 2025
ce7af37
Replacing "location" with "commpath" as needed.
rountree-alt Nov 13, 2025
73d13fc
Continues location rename.
rountree-alt Nov 14, 2025
b49a922
Fixes -Wsign-compare warning in new code.
rountree-alt Dec 7, 2025
ab21ded
LDCS_CHOSEN_PARSED_CACHEPATH set in bootstrap.
rountree-alt Dec 7, 2025
30362bc
Updates test_driver.c to ignore FIFO files.
rountree-alt Dec 19, 2025
78b45b4
Sets TMPDIR=/tmp in each Dockerfile
rountree-alt Jan 12, 2026
2e0db40
Restores --with-localstorage to generate error.
rountree-alt Feb 12, 2026
cc1ed05
Updates spank plugin to use commpath.
rountree-alt Feb 13, 2026
a9c8809
Updates a configure script to use commpath.
rountree-alt Feb 13, 2026
73b2d69
Additional integration for commpath + spank-plugin.
rountree-alt Feb 13, 2026
46e6e51
Fixes two silly bugs.
rountree-alt Feb 14, 2026
f1094fc
Testing non-overlapping cache/commpath directories.
rountree-alt Feb 20, 2026
b9d29a5
Testing commpath as subdirectory of cachepath.
rountree-alt Feb 20, 2026
2b57b08
Testing cachepath as a subdirectory of commpath.
rountree-alt Feb 20, 2026
2cfb6ce
Generate debug log artifacts as part of github ci
rountree-alt Mar 9, 2026
382036a
Adds slurm-plugin to matrix refactor.
rountree-alt Mar 12, 2026
01af996
Fixup
rountree-alt Mar 12, 2026
9dba19e
Finalizes logging on spindleRunBE error paths
rountree-alt Mar 12, 2026
5d9b280
Several small fixes, add head node to slurm*
rountree-alt Mar 12, 2026
d2bd28c
Restoring non-canonical paths in runner.
rountree-alt Mar 12, 2026
c5a584c
Tarball filenames now allow multiple artifacts
rountree-alt Mar 13, 2026
f9ad3c4
env doesn't take an array.
rountree-alt Mar 13, 2026
d8a981b
printf, awkwardly.
rountree-alt Mar 13, 2026
f2b2223
One bugfix, one upgrade
rountree-alt Mar 13, 2026
0a1e1ad
I bet we can't resolve yaml variables in bash
rountree-alt Mar 13, 2026
66a505c
Remove glob by tarring entire testsuite dir.
rountree-alt Mar 13, 2026
e8702a1
Removes YAML variables from bash tar commands
rountree-alt Mar 13, 2026
df873e5
Adds ./ in front of testsuite dir.
rountree-alt Mar 13, 2026
785e0af
Using full paths, serial only, force-"fail"
rountree-alt Mar 13, 2026
37c508e
Testing hard-coded filename for one DEBUG case
rountree-alt Mar 13, 2026
4d68fc3
Gingerly reintroducing a YAML variable.
rountree-alt Mar 13, 2026
553f8a9
Fix typo
rountree-alt Mar 13, 2026
24d8514
Gingerly restoring environment variables.
rountree-alt Mar 13, 2026
b33298e
Bring flux back online with lessons learned.
rountree-alt Mar 13, 2026
e46cca2
Pass unique name to upload action.
rountree-alt Mar 13, 2026
2c5dc7d
Tweaks.
rountree-alt Mar 13, 2026
a66b780
Unique name fixes.
rountree-alt Mar 13, 2026
0e8f8e2
slurm and plugin back online
rountree-alt Mar 13, 2026
b115678
Removed debugging code.
rountree-alt Mar 13, 2026
f5787ca
Handle absent log in head node cases.
rountree-alt Mar 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 233 additions & 20 deletions .github/workflows/ci.yml

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
/* Whether we are using a broken srun */
#undef BROKEN_SRUN

/* Colon-separated list of potential back-end cache directories */
#undef CACHEPATHS

/* Back-end directory for communication and housekeeping */
#undef COMMPATH

/* Define if were using biter for client/server communication */
#undef COMM_BITER

Expand Down Expand Up @@ -131,9 +137,6 @@
/* Default mode for slurm launch */
#undef SLURMLAUNCH_ENABLED

/* The default local directory for Spindle */
#undef SPINDLE_LOC

/* The default colon-separated list of directories that Spindle will not cache
files out of */
#undef SPINDLE_LOCAL_PREFIX
Expand Down
44 changes: 39 additions & 5 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,9 @@ enable_maintainer_mode
with_default_port
with_default_num_ports
with_localstorage
with_cachepaths
with_cachepath
with_commpath
with_default_local_prefix
with_testrm
with_rm
Expand Down Expand Up @@ -1590,7 +1593,13 @@ Optional Packages:
--with-default-numports=NUM
Number of TCP/IP ports to scan for Spindle server
communication
--with-localstorage=DIR Directory on back-ends for storing relocated files
--with-localstorage=DIR (obsolete)
Use --with-cachepaths and --with-commpath instead.
--with-cachepaths=DIR Colon-separated list of potential back-end cache
directories
,
--with-compath=DIR Back-end directory for communication and
housekeeping
--with-default-local-prefix=DIRS
Colon-seperated list of directories that Spindle
will not cache files out of
Expand Down Expand Up @@ -16664,17 +16673,37 @@ fi

# Check whether --with-localstorage was given.
if test "${with_localstorage+set}" = set; then :
withval=$with_localstorage; SPINDLE_LOC=${withval}
withval=$with_localstorage; as_fn_error $? "requested obsolete option --with-localstorage. Use --with-cachepaths and --with-commpath instead." "$LINENO" 5
fi


# Check whether --with-cachepaths was given.
if test "${with_cachepaths+set}" = set; then :
withval=$with_cachepaths; CACHEPATHS=${withval}
else
CACHEPATHS=$DEFAULT_LOC
fi


# Check whether --with-cachepath was given.
if test "${with_cachepath+set}" = set; then :
withval=$with_cachepath; as_fn_error $? "use --with-cachepaths=DIRS (plural) instead of --with-cachepath=DIR to specify one or more cache paths" "$LINENO" 5
fi


# Check whether --with-commpath was given.
if test "${with_commpath+set}" = set; then :
withval=$with_commpath; COMMPATH=${withval}
else
SPINDLE_LOC=$DEFAULT_LOC
COMMPATH=$DEFAULT_LOC
fi


# Check whether --with-default-local-prefix was given.
if test "${with_default_local_prefix+set}" = set; then :
withval=$with_default_local_prefix; SPINDLE_LOCAL_PREFIX=${withval}
else
SPINDLE_LOCAL_PREFIX="$DEFAULT_LOCAL_PREFIX:$SPINDLE_LOC"
SPINDLE_LOCAL_PREFIX="$DEFAULT_LOCAL_PREFIX:$COMMPATH"
fi


Expand All @@ -16694,7 +16723,12 @@ _ACEOF


cat >>confdefs.h <<_ACEOF
#define SPINDLE_LOC "$SPINDLE_LOC"
#define COMMPATH "$COMMPATH"
_ACEOF


cat >>confdefs.h <<_ACEOF
#define CACHEPATHS "$CACHEPATHS"
_ACEOF


Expand Down
23 changes: 18 additions & 5 deletions configure.common.ac
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,30 @@ AC_ARG_WITH(default-num-ports,
[NUM_COBO_PORTS=${withval}],
[NUM_COBO_PORTS=$DEFAULT_NUM_COBO_PORTS])
AC_ARG_WITH(localstorage,
[AS_HELP_STRING([--with-localstorage=DIR],[Directory on back-ends for storing relocated files])],
[SPINDLE_LOC=${withval}],
[SPINDLE_LOC=$DEFAULT_LOC])
[AS_HELP_STRING([--with-localstorage=DIR (obsolete)],[Use --with-cachepaths and --with-commpath instead.])],
[AC_MSG_ERROR(requested obsolete option --with-localstorage. Use --with-cachepaths and --with-commpath instead.)],
[])
AC_ARG_WITH(cachepaths,
[AS_HELP_STRING([--with-cachepaths=DIR],[Colon-separated list of potential back-end cache directories])],
[CACHEPATHS=${withval}],
[CACHEPATHS=$DEFAULT_LOC])
AC_ARG_WITH(cachepath,
[[],[]],
[AC_MSG_ERROR(use --with-cachepaths=DIRS (plural) instead of --with-cachepath=DIR to specify one or more cache paths)],
[])
AC_ARG_WITH(commpath,
[AS_HELP_STRING([--with-compath=DIR],[Back-end directory for communication and housekeeping])],
[COMMPATH=${withval}],
[COMMPATH=$DEFAULT_LOC])
AC_ARG_WITH(default-local-prefix,
[AS_HELP_STRING([--with-default-local-prefix=DIRS],[Colon-seperated list of directories that Spindle will not cache files out of])],
[SPINDLE_LOCAL_PREFIX=${withval}],
[SPINDLE_LOCAL_PREFIX="$DEFAULT_LOCAL_PREFIX:$SPINDLE_LOC"])
[SPINDLE_LOCAL_PREFIX="$DEFAULT_LOCAL_PREFIX:$COMMPATH"])
AC_DEFINE_UNQUOTED([SPINDLE_PORT],[$SPINDLE_PORT],[The default port for Spindle])
AC_DEFINE_UNQUOTED([NUM_COBO_PORTS],[$NUM_COBO_PORTS],[Number of ports for COBO to search for an open port])
AC_DEFINE_UNQUOTED([SPINDLE_MAX_PORT],[$(($SPINDLE_PORT + $NUM_COBO_PORTS - 1))],[The maximum port value])
AC_DEFINE_UNQUOTED([SPINDLE_LOC],"[$SPINDLE_LOC]",[The default local directory for Spindle])
AC_DEFINE_UNQUOTED([COMMPATH],"[$COMMPATH]",[Back-end directory for communication and housekeeping])
AC_DEFINE_UNQUOTED([CACHEPATHS],"[$CACHEPATHS]",[Colon-separated list of potential back-end cache directories])
AC_DEFINE_UNQUOTED([SPINDLE_LOCAL_PREFIX],"[$SPINDLE_LOCAL_PREFIX]",[The default colon-separated list of directories that Spindle will not cache files out of])

TESTRM=unknown
Expand Down
2 changes: 2 additions & 0 deletions containers/spindle-flux-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ FROM fluxrm/flux-sched:${flux_sched_version} AS builder
ARG replicas=4
ENV workers=${replicas}
USER root
ENV TMPDIR=/tmp
RUN echo 'TMPDIR="/tmp"' >> /etc/environment

RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
&& apt-get -qq install -y --no-install-recommends \
Expand Down
2 changes: 1 addition & 1 deletion containers/spindle-flux-ubuntu/scripts/build_spindle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -euxo pipefail

mkdir -p /home/${USER}/Spindle-build
cd /home/${USER}/Spindle-build
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=flux --enable-flux-plugin --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=flux --enable-flux-plugin --with-cachepaths=/tmp/commpath/cachepath --with-commpath=/tmp/commpath CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
make -j$(nproc)
make install

2 changes: 2 additions & 0 deletions containers/spindle-serial-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
ARG ubuntu_version=noble
FROM ubuntu:${ubuntu_version}
USER root
ENV TMPDIR=/tmp
RUN echo 'TMPDIR="/tmp"' >> /etc/environment

RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
# install latest pkg utils:
Expand Down
2 changes: 1 addition & 1 deletion containers/spindle-serial-ubuntu/scripts/build_spindle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -euxo pipefail

mkdir -p /home/${USER}/Spindle-build
cd /home/${USER}/Spindle-build
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=serial --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=serial --with-cachepaths=/tmp/commpath/cachepath --with-commpath=/tmp/commpath CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
make -j$(nproc)
make install

2 changes: 2 additions & 0 deletions containers/spindle-slurm-ubuntu/base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
ARG UBUNTU_VERSION=noble
FROM ubuntu:${UBUNTU_VERSION}
USER root
ENV TMPDIR=/tmp
RUN echo 'TMPDIR="/tmp"' >> /etc/environment

RUN apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y --no-install-recommends \
Expand Down
2 changes: 2 additions & 0 deletions containers/spindle-slurm-ubuntu/testing-plugin/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ ARG BASE_VERSION=latest
FROM ghcr.io/llnl/spindle-slurm-base:${BASE_VERSION}
ARG replicas=4
ENV workers=${replicas}
ENV TMPDIR=/tmp
RUN echo 'TMPDIR="/tmp"' >> /etc/environment

ARG BUILD_ROOT=containers/spindle-slurm-ubuntu/testing-plugin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -euxo pipefail

mkdir -p /home/${USER}/Spindle-build
cd /home/${USER}/Spindle-build
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm-plugin --enable-slurm-plugin --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm-plugin --enable-slurm-plugin --with-cachepaths=/tmp/commpath/cachepath --with-commpath=/tmp/commpath CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
make -j$(nproc)
make install

2 changes: 2 additions & 0 deletions containers/spindle-slurm-ubuntu/testing/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ ARG BASE_VERSION=latest
FROM ghcr.io/llnl/spindle-slurm-base:${BASE_VERSION}
ARG replicas=4
ENV workers=${replicas}
ENV TMPDIR=/tmp
RUN echo 'TMPDIR="/tmp"' >> /etc/environment

ARG BUILD_ROOT=containers/spindle-slurm-ubuntu/testing

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -euxo pipefail

mkdir -p /home/${USER}/Spindle-build
cd /home/${USER}/Spindle-build
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm --with-rsh-launch --with-rsh-cmd=/usr/bin/ssh --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm --with-rsh-launch --with-rsh-cmd=/usr/bin/ssh --with-cachepaths=/tmp/commpath/cachepath --with-commpath=/tmp/commpath CFLAGS="-O2 -g" CXXFLAGS="-O2 -g"
make -j$(nproc)
make install

32 changes: 18 additions & 14 deletions src/client/beboot/spindle_bootstrap.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static int rankinfo[4]={-1,-1,-1,-1};
number_t number;
static int use_cache;
static unsigned int cachesize;
static char *location, *number_s, *orig_location, *symbolic_location;
static char *commpath, *number_s, *symbolic_commpath;
static char **cmdline;
static char *executable;
static char *client_lib;
Expand Down Expand Up @@ -91,7 +91,7 @@ extern char *realize(char *path);
static int establish_connection()
{
debug_printf2("Opening connection to server\n");
ldcsid = client_open_connection(location, number);
ldcsid = client_open_connection(commpath, number);
if (ldcsid == -1)
return -1;

Expand All @@ -107,14 +107,17 @@ static void setup_environment()
{
char rankinfo_str[256];
snprintf(rankinfo_str, 256, "%d %d %d %d %d", ldcsid, rankinfo[0], rankinfo[1], rankinfo[2], rankinfo[3]);

char *connection_str = NULL;
if (opts & OPT_RELOCAOUT)
connection_str = client_get_connection_string(ldcsid);

char *chosen_parsed_cachepath;
send_cachepath_query( ldcsid , NULL, &chosen_parsed_cachepath);

setenv("LD_AUDIT", client_lib, 1);
setenv("LDCS_LOCATION", location, 1);
setenv("LDCS_ORIG_LOCATION", orig_location, 1);
setenv("LDCS_COMMPATH", commpath, 1);
setenv("LDCS_CHOSEN_PARSED_CACHEPATH", chosen_parsed_cachepath, 1);
setenv("LDCS_NUMBER", number_s, 1);
setenv("LDCS_RANKINFO", rankinfo_str, 1);
if (connection_str)
Expand Down Expand Up @@ -160,7 +163,8 @@ static int parse_cmdline(int argc, char *argv[])
daemon_args[i - 3] = NULL;
}

symbolic_location = argv[i++];
symbolic_commpath = argv[i++];
i++; // Skip over candidate_cachepaths.
number_s = argv[i++];
number = (number_t) strtoul(number_s, NULL, 0);
opts_s = argv[i++];
Expand All @@ -173,7 +177,7 @@ static int parse_cmdline(int argc, char *argv[])
return 0;
}

static void launch_daemon(char *location)
static void launch_daemon(char *commpath)
{
/*grand-child fork, then execv daemon. By grand-child forking we ensure that
the app won't get confused by seeing an unknown process as a child. */
Expand All @@ -183,12 +187,12 @@ static void launch_daemon(char *location)
char unique_file[MAX_PATH_LEN+1];
char buffer[32];

result = spindle_mkdir(location);
result = spindle_mkdir(commpath);
if (result == -1) {
debug_printf("Exiting due to spindle_mkdir error\n");
exit(-1);
}
snprintf(unique_file, MAX_PATH_LEN, "%s/spindle_daemon_pid", location);
snprintf(unique_file, MAX_PATH_LEN, "%s/spindle_daemon_pid", commpath);
unique_file[MAX_PATH_LEN] = '\0';
fd = open(unique_file, O_CREAT | O_EXCL | O_WRONLY, 0600);
if (fd == -1) {
Expand Down Expand Up @@ -343,14 +347,14 @@ int main(int argc, char *argv[])
}
}

orig_location = parse_location(symbolic_location, number);
if (!orig_location) {
char *orig_commpath = parse_location(symbolic_commpath, number);
if (!orig_commpath) {
return -1;
}
location = realize(orig_location);
commpath = realize(orig_commpath);

if (daemon_args) {
launch_daemon(location);
launch_daemon(commpath);
}

result = establish_connection();
Expand All @@ -374,7 +378,7 @@ int main(int argc, char *argv[])
#else
shm_cache_limit = cachesize;
#endif
shmcache_init(location, number, cachesize, shm_cache_limit);
shmcache_init(commpath, number, cachesize, shm_cache_limit);
use_cache = 1;
}

Expand Down
Loading