Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4c9ee08
Merge pull request #689 from kthcloud/main
Phillezi Oct 14, 2025
4dc6bfc
Update dependencies, replace old go-jose package with golang-jwt/jwt/v5
Phillezi Oct 19, 2025
0c581ea
started working on adding dynamic resource allocation support for sha…
Phillezi Oct 19, 2025
6cc65c8
persist gpu config on app
Phillezi Oct 19, 2025
e6b7e04
Add gpu allocation support for deployments
Phillezi Oct 20, 2025
e016a61
add support for referencing resourceClaims so they can be shared
Phillezi Oct 20, 2025
08bdd10
enable cgo for build on dev to prevent dns resolution bug/inconsisten…
Phillezi Oct 24, 2025
d1caa6d
Added resourceClaim types, strategy design pattern parser for differe…
Phillezi Oct 28, 2025
d219eb1
continu on manifest mapping
Phillezi Oct 28, 2025
409ebce
finish k8s layer impl for resourceclaim
Phillezi Oct 28, 2025
d31ac67
start on model structs
Phillezi Oct 28, 2025
8cbc0ba
Model struct and start of admin read dto
Phillezi Oct 28, 2025
7481298
gpu claim impls
Phillezi Oct 28, 2025
4b8bf46
update swagger
Phillezi Oct 28, 2025
aed3154
fix db collection init for gpuClaims
Phillezi Oct 28, 2025
4804571
add new routes
Phillezi Oct 29, 2025
24d2a73
updates
Phillezi Oct 29, 2025
989dc36
observe gpu claim status in k8s, remove resourceClaimTemplate + gener…
Phillezi Oct 30, 2025
1d99695
broken state: TBD fix mapstructure bindings
Phillezi Oct 30, 2025
0c98805
Sync updates
Phillezi Jan 13, 2026
348d85c
add k8s cluster version check to ensure DRA is supported when creatin…
Phillezi Jan 14, 2026
e81df80
start on dra rbac
Phillezi Jan 14, 2026
9dd66d9
put gpuclaims behind capability configuration for dra, so it can be r…
Phillezi Jan 14, 2026
3abcce7
fix dra related endpoints + dra attach on deployments
Phillezi Jan 21, 2026
01849a5
Validate gpu claims when referenced from deployment create / update, …
Phillezi Jan 22, 2026
7955c05
fix db query for claims by role
Phillezi Jan 22, 2026
308adc1
cascade removal of resourceclaims to remove references on deployments
Phillezi Jan 26, 2026
e602d6b
fix context signal handling for graceful termination
Phillezi Jan 27, 2026
3245f64
fix create gpuclaim return type for openapi spec
Phillezi Jan 30, 2026
54630db
add vm usage permission
Phillezi Jan 30, 2026
c6d4da3
Merge pull request #691 from kthcloud/vm-usage-permission
Phillezi Feb 10, 2026
8615d41
add gpu usage for users
Phillezi Feb 18, 2026
faefa74
Merge pull request #692 from kthcloud/usage-limits-for-gpus
Phillezi Feb 18, 2026
67cc2ca
update openapi + types
Phillezi Feb 18, 2026
bf8ddbe
remove old symlink used for dev
Phillezi Feb 18, 2026
9d611a6
fix schema metadata
Phillezi Feb 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,47 +1,56 @@
############################
# STEP 1 build executable binary
############################
FROM --platform=$BUILDPLATFORM golang:alpine AS builder
FROM --platform=$BUILDPLATFORM golang:alpine AS build-0

# Install git.
RUN apk update && apk add --no-cache git=~2
RUN apk update && \
apk add --no-cache git=~2

# Set up working directory
WORKDIR /app

# Copy go.mod and go.sum separately so we only invalidate the downloading layers if we need to
COPY go.mod go.sum ./

# Fetch dependencies and build the binary
ENV GO111MODULE=on
RUN go mod download
ENV CGO_ENABLED=0

RUN go mod download -x

# Copy the rest of the project to ensure code changes doesnt trigger re-download of all deps
COPY . .

RUN CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=$TARGETARCH go build -a -installsuffix cgo -o main .
FROM --platform=$BUILDPLATFORM build-0 AS build-1

ARG TARGETOS
ARG TARGETARCH

############################
# STEP 2 build a small image
############################
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -a -installsuffix cgo -o go-deploy_${TARGETOS}_${TARGETARCH} .


# Runner
FROM alpine:3

# Set up the working directory
WORKDIR /go

ARG TARGETOS
ARG TARGETARCH

# Copy the binary from the builder stage
COPY --from=builder /app/main .
COPY --from=build-1 --chmod=777 /app/go-deploy_${TARGETOS}_${TARGETARCH} /usr/bin/go-deploy

# Copy the "index" folder
COPY --from=builder /app/index index
COPY --from=build-1 /app/index index

# Copy the "docs" folder
COPY --from=builder /app/docs docs
COPY --from=build-1 /app/docs docs

# Set environment variables and expose necessary port
ENV PORT=8080
ENV GIN_MODE=release
EXPOSE 8080

# Run the Go Gin binary
ENTRYPOINT ["./main"]
ENTRYPOINT ["/usr/bin/go-deploy"]

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ all: build
build:
@echo "Building the application..."
@mkdir -p $(BUILD_DIR)
@CGO_ENABLED=0 go build -o $(BUILD_DIR)/$(BINARY_NAME)$(EXT) .
@CGO_ENABLED=1 go build -ldflags="-X github.com/NVIDIA/k8s-dra-driver-gpu/internal/info.version=v1.34.2" -o $(BUILD_DIR)/$(BINARY_NAME)$(EXT) .
@echo "Build complete."

run: build
Expand Down
16 changes: 12 additions & 4 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
argFlag "flag"
"fmt"
"net"
"net/http"
"os"
"time"
Expand All @@ -19,9 +20,11 @@ import (
"github.com/kthcloud/go-deploy/pkg/log"
"github.com/kthcloud/go-deploy/pkg/metrics"
"github.com/kthcloud/go-deploy/routers"
"github.com/kthcloud/go-deploy/service/utils"
)

type Options struct {
Ctx context.Context
Flags FlagDefinitionList
Mode string
}
Expand Down Expand Up @@ -74,8 +77,7 @@ func Create(opts *Options) *App {
}
log.Printf("%sInitialization completed%s", log.Orange, log.Reset)

ctx, cancel := context.WithCancel(context.Background())

ctx, cancel := context.WithCancel(utils.FirstNonZero(opts.Ctx, context.Background()))
for _, flag := range opts.Flags {
// Handle api worker separately
if flag.Name == "api" {
Expand All @@ -100,6 +102,9 @@ func Create(opts *Options) *App {
httpServer = &http.Server{
Addr: fmt.Sprintf("0.0.0.0:%d", config.Config.Port),
Handler: routers.NewRouter(),
BaseContext: func(_ net.Listener) context.Context {
return ctx
},
}

go func() {
Expand All @@ -126,11 +131,14 @@ func (app *App) Stop() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := app.httpServer.Shutdown(ctx); err != nil {
log.Fatalln(fmt.Errorf("failed to shutdown server. details: %w", err))
log.Errorln(fmt.Errorf("failed to gracefully shutdown server. details: %w", err))
if closeErr := app.httpServer.Close(); closeErr != nil {
log.Fatalln("Force close failed:", closeErr)
}
}

<-ctx.Done()
log.Println("Saiting for http server to shutdown...")
log.Println("Waiting for http server to shutdown...")
}

shutdown()
Expand Down
7,244 changes: 7,240 additions & 4 deletions docs/api/v2/V2_docs.go

Large diffs are not rendered by default.

7,244 changes: 7,240 additions & 4 deletions docs/api/v2/V2_swagger.json

Large diffs are not rendered by default.

Loading