Skip to content

Commit 2906810

Browse files
authored
Define PathwaysJob CRD ; construct PathwaysJob object with different deployment modes. (#2)
* Initial commit * Initial scaffolding from Kubebuilder. * Simplified working Pathways spec. * Simplified Pathways spec with constructed JobSet object * Simplified Pathways spec with constructed LWS and JobSet objects. * Updates to Pathways LWS spec - RBAC, etc. * Pathways JobSet client.go example. * Pathways JobSet Inference using JobSet client. * Pathways JobSet Inference using JobSet client 2 - working after resource limits and removing tolerations. * Adding utils - moved RM, Proxy container specs and affinity to utils. * Added license automatically. Controller code, API structure. * Some extras. * Set Pathways as the owner of JobSet. * Renamed PathwaysAPI to PathwaysJob * Renamed PathwaysAPI to PathwaysJob 2, changed RBAC references. * Introduced WorkerSpec and ColocationPolicy. * Updated Pathways flags. * Added hostNetwork, removed resources, moved userpodspec to YAML. * Reconciliation - List JobSets and Create only if the JobSet does not exist. * Fix reconciliation logic to avoid multiple calls to createJobSet. * Detailing PathwaysJobStatus definitions. * Redefined PathwaysJobStatus. * Simplify and test. * Update port numbers, topology etc. Evaluate colocate mode. * Clean up for release. * Update README, update licenses. * Generic TPU type, topology and parallelisms; add input validations. * Install and deploy JobSet along with PathwaysJob. Add comments. * Add spec maps for concrete validations, fix headless mode, cleanup. * Adding Codeowners file. * Update images, add annotation. Update sample config YAML to use Python image. * Change proxy port back to 29000.
1 parent 5e96455 commit 2906810

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+11785
-5
lines changed

.dockerignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
2+
# Ignore build and test binaries.
3+
bin/

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Code owners and required reviewers.
2+
* @RoshaniN

.gitignore

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Binaries for programs and plugins
2+
*.exe
3+
*.exe~
4+
*.dll
5+
*.so
6+
*.dylib
7+
bin/*
8+
Dockerfile.cross
9+
10+
# Test binary, built with `go test -c`
11+
*.test
12+
13+
# Output of the go coverage tool, specifically when used with LiteIDE
14+
*.out
15+
16+
# Go workspace file
17+
go.work
18+
19+
# Kubernetes Generated files - skip generated files, except for vendored files
20+
!vendor/**/zz_generated.*
21+
22+
# editor and IDE paraphernalia
23+
.idea
24+
.vscode
25+
*.swp
26+
*.swo
27+
*~

.golangci.yml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
run:
16+
timeout: 5m
17+
allow-parallel-runners: true
18+
19+
issues:
20+
# don't skip warning about doc comments
21+
# don't exclude the default set of lint
22+
exclude-use-default: false
23+
# restore some of the defaults
24+
# (fill in the rest as needed)
25+
exclude-rules:
26+
- path: "api/*"
27+
linters:
28+
- lll
29+
- path: "internal/*"
30+
linters:
31+
- dupl
32+
- lll
33+
linters:
34+
disable-all: true
35+
enable:
36+
- dupl
37+
- errcheck
38+
- exportloopref
39+
- ginkgolinter
40+
- goconst
41+
- gocyclo
42+
- gofmt
43+
- goimports
44+
- gosimple
45+
- govet
46+
- ineffassign
47+
- lll
48+
- misspell
49+
- nakedret
50+
- prealloc
51+
- revive
52+
- staticcheck
53+
- typecheck
54+
- unconvert
55+
- unparam
56+
- unused
57+
58+
linters-settings:
59+
revive:
60+
rules:
61+
- name: comment-spacings

Dockerfile

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Build the manager binary
16+
FROM golang:1.22 AS builder
17+
ARG TARGETOS
18+
ARG TARGETARCH
19+
20+
WORKDIR /workspace
21+
# Copy the Go Modules manifests
22+
COPY go.mod go.mod
23+
COPY go.sum go.sum
24+
# cache deps before building and copying source so that we don't need to re-download as much
25+
# and so that source changes don't invalidate our downloaded layer
26+
RUN go mod download
27+
28+
# Copy the go source
29+
COPY cmd/main.go cmd/main.go
30+
COPY api/ api/
31+
COPY internal/controller/ internal/controller/
32+
33+
# Build
34+
# the GOARCH has not a default value to allow the binary be built according to the host where the command
35+
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
36+
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
37+
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
38+
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
39+
40+
# Use distroless as minimal base image to package the manager binary
41+
# Refer to https://github.com/GoogleContainerTools/distroless for more details
42+
FROM gcr.io/distroless/static:nonroot
43+
WORKDIR /
44+
COPY --from=builder /workspace/manager .
45+
USER 65532:65532
46+
47+
ENTRYPOINT ["/manager"]

Makefile

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# Image URL to use all building/pushing image targets
2+
IMG ?= controller:latest
3+
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
4+
ENVTEST_K8S_VERSION = 1.30.0
5+
6+
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
7+
ifeq (,$(shell go env GOBIN))
8+
GOBIN=$(shell go env GOPATH)/bin
9+
else
10+
GOBIN=$(shell go env GOBIN)
11+
endif
12+
13+
# CONTAINER_TOOL defines the container tool to be used for building images.
14+
# Be aware that the target commands are only tested with Docker which is
15+
# scaffolded by default. However, you might want to replace it to use other
16+
# tools. (i.e. podman)
17+
CONTAINER_TOOL ?= docker
18+
19+
# Setting SHELL to bash allows bash commands to be executed by recipes.
20+
# Options are set to exit when a recipe line exits non-zero or a piped command fails.
21+
SHELL = /usr/bin/env bash -o pipefail
22+
.SHELLFLAGS = -ec
23+
24+
.PHONY: all
25+
all: build
26+
27+
##@ General
28+
29+
# The help target prints out all targets with their descriptions organized
30+
# beneath their categories. The categories are represented by '##@' and the
31+
# target descriptions by '##'. The awk command is responsible for reading the
32+
# entire set of makefiles included in this invocation, looking for lines of the
33+
# file as xyz: ## something, and then pretty-format the target and help. Then,
34+
# if there's a line with ##@ something, that gets pretty-printed as a category.
35+
# More info on the usage of ANSI control characters for terminal formatting:
36+
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
37+
# More info on the awk command:
38+
# http://linuxcommand.org/lc3_adv_awk.php
39+
40+
.PHONY: help
41+
help: ## Display this help.
42+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
43+
44+
##@ Development
45+
46+
.PHONY: manifests
47+
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
48+
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
49+
50+
.PHONY: generate
51+
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
52+
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
53+
54+
.PHONY: fmt
55+
fmt: ## Run go fmt against code.
56+
go fmt ./...
57+
58+
.PHONY: vet
59+
vet: ## Run go vet against code.
60+
go vet ./...
61+
62+
.PHONY: test
63+
test: manifests generate fmt vet envtest ## Run tests.
64+
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
65+
66+
# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors.
67+
.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up.
68+
test-e2e:
69+
go test ./test/e2e/ -v -ginkgo.v
70+
71+
.PHONY: lint
72+
lint: golangci-lint ## Run golangci-lint linter
73+
$(GOLANGCI_LINT) run
74+
75+
.PHONY: lint-fix
76+
lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
77+
$(GOLANGCI_LINT) run --fix
78+
79+
##@ Build
80+
81+
.PHONY: build
82+
build: manifests generate fmt vet ## Build manager binary.
83+
go build -o bin/manager cmd/main.go
84+
85+
.PHONY: run
86+
run: manifests generate fmt vet ## Run a controller from your host.
87+
go run ./cmd/main.go
88+
89+
# If you wish to build the manager image targeting other platforms you can use the --platform flag.
90+
# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
91+
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
92+
.PHONY: docker-build
93+
docker-build: ## Build docker image with the manager.
94+
$(CONTAINER_TOOL) build -t ${IMG} .
95+
96+
.PHONY: docker-push
97+
docker-push: ## Push docker image with the manager.
98+
$(CONTAINER_TOOL) push ${IMG}
99+
100+
# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
101+
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
102+
# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
103+
# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
104+
# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
105+
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
106+
PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
107+
.PHONY: docker-buildx
108+
docker-buildx: ## Build and push docker image for the manager for cross-platform support
109+
# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
110+
sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
111+
- $(CONTAINER_TOOL) buildx create --name pathways-job-builder
112+
$(CONTAINER_TOOL) buildx use pathways-job-builder
113+
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
114+
- $(CONTAINER_TOOL) buildx rm pathways-job-builder
115+
rm Dockerfile.cross
116+
117+
.PHONY: build-installer
118+
build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment.
119+
mkdir -p dist
120+
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
121+
$(KUSTOMIZE) build config/default > dist/install.yaml
122+
123+
##@ Deployment
124+
125+
ifndef ignore-not-found
126+
ignore-not-found = false
127+
endif
128+
129+
JOBSET_VERSION ?= v0.8.0
130+
JOBSET_MANIFEST_URL := https://github.com/kubernetes-sigs/jobset/releases/download/${JOBSET_VERSION}/manifests.yaml
131+
132+
.PHONY: install
133+
install: manifests kustomize ## Install PathwaysJob and JobSet CRDs into the K8s cluster specified in ~/.kube/config.
134+
$(KUSTOMIZE) build config/crd | $(KUBECTL) apply --server-side -f -
135+
$(KUBECTL) apply --server-side -f ${JOBSET_MANIFEST_URL}
136+
137+
.PHONY: uninstall
138+
uninstall: manifests kustomize ## Uninstall PathwaysJob and JobSet CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
139+
$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
140+
$(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f ${JOBSET_MANIFEST_URL}
141+
142+
.PHONY: deploy
143+
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
144+
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
145+
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side -f -
146+
$(KUBECTL) apply --server-side -f ${JOBSET_MANIFEST_URL}
147+
148+
.PHONY: undeploy
149+
undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
150+
$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
151+
$(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f ${JOBSET_MANIFEST_URL}
152+
153+
##@ Dependencies
154+
155+
## Location to install dependencies to
156+
LOCALBIN ?= $(shell pwd)/bin
157+
$(LOCALBIN):
158+
mkdir -p $(LOCALBIN)
159+
160+
## Tool Binaries
161+
KUBECTL ?= kubectl
162+
KUSTOMIZE ?= $(LOCALBIN)/kustomize
163+
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
164+
ENVTEST ?= $(LOCALBIN)/setup-envtest
165+
GOLANGCI_LINT = $(LOCALBIN)/golangci-lint
166+
167+
## Tool Versions
168+
KUSTOMIZE_VERSION ?= v5.4.2
169+
CONTROLLER_TOOLS_VERSION ?= v0.15.0
170+
ENVTEST_VERSION ?= release-0.18
171+
GOLANGCI_LINT_VERSION ?= v1.59.1
172+
173+
.PHONY: kustomize
174+
kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
175+
$(KUSTOMIZE): $(LOCALBIN)
176+
$(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION))
177+
178+
.PHONY: controller-gen
179+
controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
180+
$(CONTROLLER_GEN): $(LOCALBIN)
181+
$(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION))
182+
183+
.PHONY: envtest
184+
envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
185+
$(ENVTEST): $(LOCALBIN)
186+
$(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION))
187+
188+
.PHONY: golangci-lint
189+
golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
190+
$(GOLANGCI_LINT): $(LOCALBIN)
191+
$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
192+
193+
# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
194+
# $1 - target path with name of binary
195+
# $2 - package url which can be installed
196+
# $3 - specific version of package
197+
define go-install-tool
198+
@[ -f "$(1)-$(3)" ] || { \
199+
set -e; \
200+
package=$(2)@$(3) ;\
201+
echo "Downloading $${package}" ;\
202+
rm -f $(1) || true ;\
203+
GOBIN=$(LOCALBIN) go install $${package} ;\
204+
mv $(1) $(1)-$(3) ;\
205+
} ;\
206+
ln -sf $(1)-$(3) $(1)
207+
endef

PROJECT

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Code generated by tool. DO NOT EDIT.
2+
# This file is used to track the info used to scaffold your project
3+
# and allow the plugins properly work.
4+
# More info: https://book.kubebuilder.io/reference/project-config.html
5+
domain: pathways.domain
6+
layout:
7+
- go.kubebuilder.io/v4
8+
projectName: pathways-job
9+
repo: pathways-job
10+
resources:
11+
- api:
12+
crdVersion: v1
13+
namespaced: true
14+
controller: true
15+
domain: pathways.domain
16+
group: pathways-job
17+
kind: PathwaysJob
18+
path: pathways-job/api/v1
19+
version: v1
20+
version: "3"

0 commit comments

Comments
 (0)