From 4a1441a85220aa1c5d2070a73fd1bba71056a3a6 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 16:42:29 -0500 Subject: [PATCH 01/19] Added some global flags for the CLI --- cmd/taskharbor/main.go | 1 + taskharbor/driver/driver.go | 3 + taskharbor/internal/app/app.go | 67 +++++++++++++++ taskharbor/internal/app/usage.go | 136 +++++++++++++++++++++++++++++++ 4 files changed, 207 insertions(+) create mode 100644 cmd/taskharbor/main.go create mode 100644 taskharbor/internal/app/app.go create mode 100644 taskharbor/internal/app/usage.go diff --git a/cmd/taskharbor/main.go b/cmd/taskharbor/main.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/cmd/taskharbor/main.go @@ -0,0 +1 @@ +package main diff --git a/taskharbor/driver/driver.go b/taskharbor/driver/driver.go index 7c29126..f2096b0 100644 --- a/taskharbor/driver/driver.go +++ b/taskharbor/driver/driver.go @@ -7,6 +7,9 @@ import ( "time" ) +// List of all implemented drivers +var ImplementedDrivers = []string{"memory", "redis", "postgres"} + /* This JobRecord is the driver-level representation of a Job. This is the struct that will be saved in some backend (Redis/SQL/etc...) diff --git a/taskharbor/internal/app/app.go b/taskharbor/internal/app/app.go new file mode 100644 index 0000000..32c2336 --- /dev/null +++ b/taskharbor/internal/app/app.go @@ -0,0 +1,67 @@ +package app + +import ( + "flag" + "fmt" + "io" + + th "github.com/ARJ2211/taskharbor/taskharbor" + "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +type GlobalFlags struct { + Driver string + Queue string + JSON bool + Verbose bool +} + +func Run(argv []string, stdout, stderr io.Writer) { + var g GlobalFlags + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + driverList := "" + for _, d := range driver.ImplementedDrivers { + driverList += d + "|" + } + driverList = driverList[:len(driverList)-1] + + fs.StringVar(&g.Driver, "driver", "memory", fmt.Sprintf("drivers: %s", driverList)) + fs.StringVar(&g.Queue, "queue", th.DefaultQueue, "queue name") + fs.BoolVar(&g.JSON, "json", false, "output JSON") + fs.BoolVar(&g.Verbose, "verbose", false, "verbose logs") + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") +} + +func runWorker() { + +} + +func runEnqueue() { + +} + +func runList() { + +} + +func runInspect() { + +} + +func runDlqList() { + +} + +func runDlqRetry() { + +} + +func runJobRetry() { + +} diff --git a/taskharbor/internal/app/usage.go b/taskharbor/internal/app/usage.go new file mode 100644 index 0000000..84fef55 --- /dev/null +++ b/taskharbor/internal/app/usage.go @@ -0,0 +1,136 @@ +package app + +import ( + "fmt" + "io" +) + +func printRootUsage(w io.Writer) { + fmt.Fprintln(w, `TaskHarbor CLI + +Usage: + taskharbor [global flags] [args] + +Global flags: + --driver memory|postgres|redis (default: memory) + --queue queue name (default: default) + --json JSON output + --verbose verbose logs + --help, -h show help + +Commands: + worker run + enqueue + list + inspect + dlq list + dlq requeue + job retry + +Examples: + taskharbor --help + taskharbor worker run --help + taskharbor enqueue --help +`) +} + +func printWorkerUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] worker + +Subcommands: + run + +Example: + taskharbor worker run --help +`) +} + +func printWorkerRunUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] worker run [flags] + +Notes: + This will be implemented in issue #113. +`) +} + +func printEnqueueUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] enqueue [flags] + +Notes: + This will be implemented in issue #114. +`) +} + +func printListUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] list [flags] + +Notes: + This will be implemented in issue #119. +`) +} + +func printInspectUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] inspect [flags] + +Notes: + This will be implemented in issue #119. +`) +} + +func printDLQUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] dlq + +Subcommands: + list + requeue + +Examples: + taskharbor dlq list --help + taskharbor dlq requeue +`) +} + +func printDLQListUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] dlq list [flags] + +Notes: + This will be implemented in issue #119. +`) +} + +func printDLQRequeueUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] dlq requeue [flags] + +Notes: + This will be implemented in issue #119. +`) +} + +func printJobUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] job + +Subcommands: + retry + +Example: + taskharbor job retry +`) +} + +func printJobRetryUsage(w io.Writer) { + fmt.Fprintln(w, `Usage: + taskharbor [global flags] job retry [flags] + +Notes: + This will be implemented in issue #119. +`) +} From f41a35d1133ea1940da6afd5d25ac861b10ecccd Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 17:00:32 -0500 Subject: [PATCH 02/19] Timmed the new line error in println --- taskharbor/internal/app/usage.go | 33 +++++++++++--------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/taskharbor/internal/app/usage.go b/taskharbor/internal/app/usage.go index 84fef55..7f7c99d 100644 --- a/taskharbor/internal/app/usage.go +++ b/taskharbor/internal/app/usage.go @@ -30,8 +30,7 @@ Commands: Examples: taskharbor --help taskharbor worker run --help - taskharbor enqueue --help -`) + taskharbor enqueue --help`) } func printWorkerUsage(w io.Writer) { @@ -42,8 +41,7 @@ Subcommands: run Example: - taskharbor worker run --help -`) + taskharbor worker run --help`) } func printWorkerRunUsage(w io.Writer) { @@ -51,8 +49,7 @@ func printWorkerRunUsage(w io.Writer) { taskharbor [global flags] worker run [flags] Notes: - This will be implemented in issue #113. -`) + This will be implemented in issue #113.`) } func printEnqueueUsage(w io.Writer) { @@ -60,8 +57,7 @@ func printEnqueueUsage(w io.Writer) { taskharbor [global flags] enqueue [flags] Notes: - This will be implemented in issue #114. -`) + This will be implemented in issue #114.`) } func printListUsage(w io.Writer) { @@ -69,8 +65,7 @@ func printListUsage(w io.Writer) { taskharbor [global flags] list [flags] Notes: - This will be implemented in issue #119. -`) + This will be implemented in issue #119.`) } func printInspectUsage(w io.Writer) { @@ -78,8 +73,7 @@ func printInspectUsage(w io.Writer) { taskharbor [global flags] inspect [flags] Notes: - This will be implemented in issue #119. -`) + This will be implemented in issue #119.`) } func printDLQUsage(w io.Writer) { @@ -92,8 +86,7 @@ Subcommands: Examples: taskharbor dlq list --help - taskharbor dlq requeue -`) + taskharbor dlq requeue `) } func printDLQListUsage(w io.Writer) { @@ -101,8 +94,7 @@ func printDLQListUsage(w io.Writer) { taskharbor [global flags] dlq list [flags] Notes: - This will be implemented in issue #119. -`) + This will be implemented in issue #119.`) } func printDLQRequeueUsage(w io.Writer) { @@ -110,8 +102,7 @@ func printDLQRequeueUsage(w io.Writer) { taskharbor [global flags] dlq requeue [flags] Notes: - This will be implemented in issue #119. -`) + This will be implemented in issue #119.`) } func printJobUsage(w io.Writer) { @@ -122,8 +113,7 @@ Subcommands: retry Example: - taskharbor job retry -`) + taskharbor job retry `) } func printJobRetryUsage(w io.Writer) { @@ -131,6 +121,5 @@ func printJobRetryUsage(w io.Writer) { taskharbor [global flags] job retry [flags] Notes: - This will be implemented in issue #119. -`) + This will be implemented in issue #119.`) } From 6304bb14179d6ed74c5daa3e1d425136bca4d4da Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 17:21:41 -0500 Subject: [PATCH 03/19] Added the main runner script --- cmd/taskharbor/main.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cmd/taskharbor/main.go b/cmd/taskharbor/main.go index 06ab7d0..cdecef8 100644 --- a/cmd/taskharbor/main.go +++ b/cmd/taskharbor/main.go @@ -1 +1,11 @@ package main + +import ( + "os" + + "github.com/ARJ2211/taskharbor/cmd/taskharbor/internal/app" +) + +func main() { + os.Exit(app.Run(os.Args[1:], os.Stdout, os.Stderr)) +} From 78eb252ebd7056fcc1cee5167eedb959c437245f Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 17:22:31 -0500 Subject: [PATCH 04/19] Relocated the internal package --- cmd/taskharbor/internal/app/app.go | 376 ++++++++++++++++++ cmd/taskharbor/internal/app/app_test.go | 37 ++ .../taskharbor}/internal/app/usage.go | 0 taskharbor/internal/app/app.go | 67 ---- 4 files changed, 413 insertions(+), 67 deletions(-) create mode 100644 cmd/taskharbor/internal/app/app.go create mode 100644 cmd/taskharbor/internal/app/app_test.go rename {taskharbor => cmd/taskharbor}/internal/app/usage.go (100%) delete mode 100644 taskharbor/internal/app/app.go diff --git a/cmd/taskharbor/internal/app/app.go b/cmd/taskharbor/internal/app/app.go new file mode 100644 index 0000000..4a270e1 --- /dev/null +++ b/cmd/taskharbor/internal/app/app.go @@ -0,0 +1,376 @@ +package app + +import ( + "flag" + "fmt" + "io" + "strings" + + th "github.com/ARJ2211/taskharbor/taskharbor" + "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +type GlobalFlags struct { + Driver string + Queue string + JSON bool + Verbose bool +} + +func Run(argv []string, stdout, stderr io.Writer) int { + var g GlobalFlags + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + driverList := "" + for _, d := range driver.ImplementedDrivers { + driverList += d + "|" + } + driverList = driverList[:len(driverList)-1] + + fs.StringVar(&g.Driver, "driver", "memory", fmt.Sprintf("drivers: %s", driverList)) + fs.StringVar(&g.Queue, "queue", th.DefaultQueue, "queue name") + fs.BoolVar(&g.JSON, "json", false, "output JSON") + fs.BoolVar(&g.Verbose, "verbose", false, "verbose logs") + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Println(stderr, "error: ", err) + printRootUsage(stderr) + return 2 + } + + args := fs.Args() + if help || h || len(argv) == 0 { + printRootUsage(stdout) + return 0 + } + + cmd := args[0] + cmdArgs := args[1:] + + switch cmd { + case "worker": + return runWorker(g, cmdArgs, stdout, stderr) + case "enqueue": + return runEnqueue(g, cmdArgs, stdout, stderr) + case "list": + return runList(g, cmdArgs, stdout, stderr) + case "inspect": + return runInspect(g, cmdArgs, stdout, stderr) + case "dlq": + return runDLQ(g, cmdArgs, stdout, stderr) + case "job": + return runJob(g, cmdArgs, stdout, stderr) + case "help": + printRootUsage(stdout) + return 0 + default: + fmt.Fprintln(stderr, "error: unknown command:", cmd) + printRootUsage(stderr) + return 2 + } +} + +func runWorker(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor worker", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printWorkerUsage(stderr) + return 2 + } + + args := fs.Args() + if help || h || len(args) == 0 { + printWorkerUsage(stdout) + return 0 + } + + sub := args[0] + subArgs := args[1:] + + switch sub { + case "run": + return runWorkerRun(g, subArgs, stdout, stderr) + default: + fmt.Fprintln(stderr, "error: unknown subcommand: worker", sub) + printWorkerUsage(stderr) + return 2 + } +} + +func runWorkerRun(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor worker run", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printWorkerRunUsage(stderr) + return 2 + } + + if help || h { + printWorkerRunUsage(stdout) + return 0 + } + + if len(fs.Args()) != 0 { + fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) + printWorkerRunUsage(stderr) + return 2 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #113)") + return 1 +} + +func runEnqueue(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor enqueue", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printEnqueueUsage(stderr) + return 2 + } + + if help || h { + printEnqueueUsage(stdout) + return 0 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #114)") + return 1 +} + +func runList(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor list", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printListUsage(stderr) + return 2 + } + + if help || h { + printListUsage(stdout) + return 0 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #119)") + return 1 +} + +func runInspect(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor inspect", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printInspectUsage(stderr) + return 2 + } + + if help || h { + printInspectUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: inspect requires exactly 1 arg: ") + printInspectUsage(stderr) + return 2 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #119)") + return 1 +} + +func runDLQ(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor dlq", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printDLQUsage(stderr) + return 2 + } + + args := fs.Args() + if help || h || len(args) == 0 { + printDLQUsage(stdout) + return 0 + } + + sub := args[0] + subArgs := args[1:] + + switch sub { + case "list": + return runDLQList(g, subArgs, stdout, stderr) + case "requeue": + return runDLQRequeue(g, subArgs, stdout, stderr) + default: + fmt.Fprintln(stderr, "error: unknown subcommand: dlq", sub) + printDLQUsage(stderr) + return 2 + } +} + +func runDLQList(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor dlq list", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printDLQListUsage(stderr) + return 2 + } + + if help || h { + printDLQListUsage(stdout) + return 0 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #119)") + return 1 +} + +func runDLQRequeue(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor dlq requeue", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printDLQRequeueUsage(stderr) + return 2 + } + + if help || h { + printDLQRequeueUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: dlq requeue requires exactly 1 arg: ") + printDLQRequeueUsage(stderr) + return 2 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #119)") + return 1 +} + +func runJob(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor job", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printJobUsage(stderr) + return 2 + } + + args := fs.Args() + if help || h || len(args) == 0 { + printJobUsage(stdout) + return 0 + } + + sub := args[0] + subArgs := args[1:] + + switch sub { + case "retry": + return runJobRetry(g, subArgs, stdout, stderr) + default: + fmt.Fprintln(stderr, "error: unknown subcommand: job", sub) + printJobUsage(stderr) + return 2 + } +} + +func runJobRetry(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor job retry", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printJobRetryUsage(stderr) + return 2 + } + + if help || h { + printJobRetryUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: job retry requires exactly 1 arg: ") + printJobRetryUsage(stderr) + return 2 + } + + fmt.Fprintln(stderr, "not implemented yet (issue #119)") + return 1 +} diff --git a/cmd/taskharbor/internal/app/app_test.go b/cmd/taskharbor/internal/app/app_test.go new file mode 100644 index 0000000..60df5ae --- /dev/null +++ b/cmd/taskharbor/internal/app/app_test.go @@ -0,0 +1,37 @@ +package app + +import ( + "bytes" + "strings" + "testing" +) + +func TestRootHelp(t *testing.T) { + var out, err bytes.Buffer + code := Run([]string{"--help"}, &out, &err) + if code != 0 { + t.Fatalf("expected 0, got %d (stderr=%q)", code, err.String()) + } + if !strings.Contains(out.String(), "worker run") { + t.Fatalf("expected help to mention worker run, got: %q", out.String()) + } +} + +func TestWorkerRunHelp(t *testing.T) { + var out, err bytes.Buffer + code := Run([]string{"worker", "run", "--help"}, &out, &err) + if code != 0 { + t.Fatalf("expected 0, got %d (stderr=%q)", code, err.String()) + } + if !strings.Contains(out.String(), "worker run") { + t.Fatalf("expected worker run usage, got: %q", out.String()) + } +} + +func TestUnknownCommand(t *testing.T) { + var out, err bytes.Buffer + code := Run([]string{"nope"}, &out, &err) + if code == 0 { + t.Fatalf("expected non-zero, got %d", code) + } +} diff --git a/taskharbor/internal/app/usage.go b/cmd/taskharbor/internal/app/usage.go similarity index 100% rename from taskharbor/internal/app/usage.go rename to cmd/taskharbor/internal/app/usage.go diff --git a/taskharbor/internal/app/app.go b/taskharbor/internal/app/app.go deleted file mode 100644 index 32c2336..0000000 --- a/taskharbor/internal/app/app.go +++ /dev/null @@ -1,67 +0,0 @@ -package app - -import ( - "flag" - "fmt" - "io" - - th "github.com/ARJ2211/taskharbor/taskharbor" - "github.com/ARJ2211/taskharbor/taskharbor/driver" -) - -type GlobalFlags struct { - Driver string - Queue string - JSON bool - Verbose bool -} - -func Run(argv []string, stdout, stderr io.Writer) { - var g GlobalFlags - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor", flag.ContinueOnError) - fs.SetOutput(io.Discard) - - driverList := "" - for _, d := range driver.ImplementedDrivers { - driverList += d + "|" - } - driverList = driverList[:len(driverList)-1] - - fs.StringVar(&g.Driver, "driver", "memory", fmt.Sprintf("drivers: %s", driverList)) - fs.StringVar(&g.Queue, "queue", th.DefaultQueue, "queue name") - fs.BoolVar(&g.JSON, "json", false, "output JSON") - fs.BoolVar(&g.Verbose, "verbose", false, "verbose logs") - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") -} - -func runWorker() { - -} - -func runEnqueue() { - -} - -func runList() { - -} - -func runInspect() { - -} - -func runDlqList() { - -} - -func runDlqRetry() { - -} - -func runJobRetry() { - -} From d2f0e7632841db96e80487511f20a3c6fe634ccd Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 17:23:23 -0500 Subject: [PATCH 05/19] Updated gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index ea7d938..fafe4e0 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,7 @@ go.work.sum # Stale branches / forks stale +# Taskharbor binaries +th + # .gitignore From a7c53201556fae32aef65630cd1adb5e516219bd Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Sun, 22 Feb 2026 21:41:44 -0500 Subject: [PATCH 06/19] Postgres / Memory / Redis driver are able to open using CLI --- cmd/taskharbor/internal/app/app.go | 14 ++++ cmd/taskharbor/internal/app/usage.go | 2 + cmd/taskharbor/internal/backend/backend.go | 71 +++++++++++++++++++ .../internal/backend/backend_test.go | 65 +++++++++++++++++ cmd/taskharbor/internal/envutil/dotenv.go | 36 ++++++++++ 5 files changed, 188 insertions(+) create mode 100644 cmd/taskharbor/internal/backend/backend.go create mode 100644 cmd/taskharbor/internal/backend/backend_test.go create mode 100644 cmd/taskharbor/internal/envutil/dotenv.go diff --git a/cmd/taskharbor/internal/app/app.go b/cmd/taskharbor/internal/app/app.go index 4a270e1..bba0a39 100644 --- a/cmd/taskharbor/internal/app/app.go +++ b/cmd/taskharbor/internal/app/app.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "io" + "os" "strings" th "github.com/ARJ2211/taskharbor/taskharbor" @@ -15,6 +16,16 @@ type GlobalFlags struct { Queue string JSON bool Verbose bool + + PostgresDSN string + RedisAddr string +} + +func envOr(key, fallback string) string { + if v := strings.TrimSpace(os.Getenv(key)); v != "" { + return v + } + return fallback } func Run(argv []string, stdout, stderr io.Writer) int { @@ -38,6 +49,9 @@ func Run(argv []string, stdout, stderr io.Writer) int { fs.BoolVar(&help, "help", false, "show help") fs.BoolVar(&h, "h", false, "show help") + fs.StringVar(&g.PostgresDSN, "dsn", envOr("TH_PG_DSN", envOr("TH_POSTGRES_DSN", "")), "postgres DSN (for --driver postgres)") + fs.StringVar(&g.RedisAddr, "redis-addr", envOr("TH_REDIS_ADDR", ""), "redis addr host:port (for --driver redis)") + if err := fs.Parse(argv); err != nil { fmt.Println(stderr, "error: ", err) printRootUsage(stderr) diff --git a/cmd/taskharbor/internal/app/usage.go b/cmd/taskharbor/internal/app/usage.go index 7f7c99d..31ccfe7 100644 --- a/cmd/taskharbor/internal/app/usage.go +++ b/cmd/taskharbor/internal/app/usage.go @@ -17,6 +17,8 @@ Global flags: --json JSON output --verbose verbose logs --help, -h show help + --dsn postgres DSN (for --driver postgres) + --redis-addr redis addr host:port (for --driver redis) Commands: worker run diff --git a/cmd/taskharbor/internal/backend/backend.go b/cmd/taskharbor/internal/backend/backend.go new file mode 100644 index 0000000..bcbbd9b --- /dev/null +++ b/cmd/taskharbor/internal/backend/backend.go @@ -0,0 +1,71 @@ +package backend + +import ( + "context" + "fmt" + "strings" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" + "github.com/ARJ2211/taskharbor/taskharbor/driver/memory" + "github.com/ARJ2211/taskharbor/taskharbor/driver/postgres" + "github.com/ARJ2211/taskharbor/taskharbor/driver/redis" +) + +type Config struct { + Driver string + + PostgresDSN string + RedisAddr string +} + +type Handle struct { + Driver driver.Driver +} + +func (h *Handle) Close() error { + if h == nil || h.Driver == nil { + return nil + } + if err := h.Driver.Close(); err != nil { + return err + } + return nil +} + +/* +This function opens a new driver based on the configs and +arguments provided by the user. Default driver: memory. +*/ +func Open(ctx context.Context, cfg Config) (*Handle, error) { + ds := strings.ToLower(strings.TrimSpace(cfg.Driver)) + switch ds { + case "", "memory": + memDrvHnd := Handle{Driver: memory.New()} + return &memDrvHnd, nil + case "postgres": + if strings.TrimSpace(cfg.PostgresDSN) == "" { + return nil, fmt.Errorf("postgres requires --dsn (or TH_PG_DSN)") + } + d, err := postgres.New(ctx, cfg.PostgresDSN) + if err != nil { + return nil, err + } + psqlDrvHnd := Handle{Driver: d} + return &psqlDrvHnd, nil + case "redis": + if strings.TrimSpace(cfg.RedisAddr) == "" { + return nil, fmt.Errorf("redis requires --redis-addr (or TH_REDIS_ADDR)") + } + d, err := redis.New(ctx, cfg.RedisAddr) + if err != nil { + return nil, err + } + redisDrvHnd := Handle{Driver: d} + return &redisDrvHnd, nil + default: + return nil, fmt.Errorf( + "unknown driver: %s (expected memory|postgres|redis)", + cfg.Driver, + ) + } +} diff --git a/cmd/taskharbor/internal/backend/backend_test.go b/cmd/taskharbor/internal/backend/backend_test.go new file mode 100644 index 0000000..cf850f0 --- /dev/null +++ b/cmd/taskharbor/internal/backend/backend_test.go @@ -0,0 +1,65 @@ +package backend + +import ( + "context" + "os" + "testing" + + "github.com/ARJ2211/taskharbor/cmd/taskharbor/internal/envutil" +) + +func TestOpenMemory(t *testing.T) { + h, err := Open(context.Background(), Config{Driver: "memory"}) + if err != nil { + t.Fatalf("expected nil err, got %v", err) + } + if h == nil || h.Driver == nil { + t.Fatalf("expected non-nil handle + driver") + } + if err := h.Close(); err != nil { + t.Fatalf("expected nil close err, got %v", err) + } +} + +func TestOpenRedis(t *testing.T) { + cwd, _ := os.Getwd() + _ = envutil.LoadRepoDotenv(cwd) + + h, err := Open(context.Background(), Config{ + Driver: "redis", RedisAddr: os.Getenv("REDIS_ADDR"), + }) + if err != nil { + t.Fatalf("expected nil err, got %v", err) + } + if h == nil || h.Driver == nil { + t.Fatalf("expected non-nil handle + driver") + } + if err := h.Close(); err != nil { + t.Fatalf("expected nil close err, got %v", err) + } +} + +func TestOpenPostgres(t *testing.T) { + cwd, _ := os.Getwd() + _ = envutil.LoadRepoDotenv(cwd) + + h, err := Open(context.Background(), Config{ + Driver: "postgres", PostgresDSN: os.Getenv("TASKHARBOR_TEST_DSN"), + }) + if err != nil { + t.Fatalf("expected nil err, got %v", err) + } + if h == nil || h.Driver == nil { + t.Fatalf("expected non-nil handle + driver") + } + if err := h.Close(); err != nil { + t.Fatalf("expected nil close err, got %v", err) + } +} + +func TestOpenUnknownDriver(t *testing.T) { + _, err := Open(context.Background(), Config{Driver: "nope"}) + if err == nil { + t.Fatalf("expected error") + } +} diff --git a/cmd/taskharbor/internal/envutil/dotenv.go b/cmd/taskharbor/internal/envutil/dotenv.go new file mode 100644 index 0000000..25fa7e7 --- /dev/null +++ b/cmd/taskharbor/internal/envutil/dotenv.go @@ -0,0 +1,36 @@ +package envutil + +import ( + "os" + "path/filepath" + + "github.com/joho/godotenv" +) + +/* +LoadRepoDotenv walks up from startDir until it finds go.mod, then loads .env from that directory. + +If not found, it does nothing and returns nil. +*/ +func LoadRepoDotenv(startDir string) error { + dir, err := filepath.Abs(startDir) + if err != nil { + return err + } + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + envPath := filepath.Join(dir, ".env") + if _, err := os.Stat(envPath); err == nil { + return godotenv.Load(envPath) + } + return nil + } + + parent := filepath.Dir(dir) + if parent == dir { + return nil + } + dir = parent + } +} From dba0378ca41c7d657c254706e9e7cd52c2f67090 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 15:20:11 -0500 Subject: [PATCH 07/19] Updated usage --- cmd/taskharbor/internal/app/usage.go | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/cmd/taskharbor/internal/app/usage.go b/cmd/taskharbor/internal/app/usage.go index 31ccfe7..c96df89 100644 --- a/cmd/taskharbor/internal/app/usage.go +++ b/cmd/taskharbor/internal/app/usage.go @@ -50,16 +50,38 @@ func printWorkerRunUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] worker run [flags] -Notes: - This will be implemented in issue #113.`) +Flags: + --concurrency max concurrent jobs + --poll-interval e.g. 200ms + --lease-duration e.g. 30s + --heartbeat-interval e.g. 10s + --register repeatable mapping: jobType=builtin (builtins: echo, fail, sleep) + +Examples: + taskharbor worker run + taskharbor worker run --concurrency 8 + taskharbor worker run --register email=echo --register slow=sleep`) } func printEnqueueUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] enqueue [flags] -Notes: - This will be implemented in issue #114.`) +Flags: + --type required job type + --queue queue name (default: global --queue or "default") + --run-at RFC3339/RFC3339Nano or unix seconds (or unix ms) + --timeout e.g. 30s + --max-attempts retries before DLQ (0 means fail immediately on handler error) + --idempotency-key idempotency key + --payload string payload (JSON-encoded) + --payload-json raw JSON payload (not double-encoded) + +Examples: + taskharbor enqueue --type echo --payload hello + taskharbor enqueue --type echo --payload-json {"msg":"hi"} + taskharbor enqueue --type echo --run-at 1772000000 --payload hi + taskharbor --driver postgres --dsn $TH_PG_DSN enqueue --type echo --idempotency-key user:123 --payload hi`) } func printListUsage(w io.Writer) { From 11696c9debc1c1f7f6d3c181ede305693d132a25 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 15:20:42 -0500 Subject: [PATCH 08/19] Updated app and added flags for parsing --- cmd/taskharbor/internal/app/app.go | 54 ---------------------------- cmd/taskharbor/internal/app/flags.go | 11 ++++++ 2 files changed, 11 insertions(+), 54 deletions(-) create mode 100644 cmd/taskharbor/internal/app/flags.go diff --git a/cmd/taskharbor/internal/app/app.go b/cmd/taskharbor/internal/app/app.go index bba0a39..c706a2f 100644 --- a/cmd/taskharbor/internal/app/app.go +++ b/cmd/taskharbor/internal/app/app.go @@ -124,60 +124,6 @@ func runWorker(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { } } -func runWorkerRun(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor worker run", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printWorkerRunUsage(stderr) - return 2 - } - - if help || h { - printWorkerRunUsage(stdout) - return 0 - } - - if len(fs.Args()) != 0 { - fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) - printWorkerRunUsage(stderr) - return 2 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #113)") - return 1 -} - -func runEnqueue(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor enqueue", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printEnqueueUsage(stderr) - return 2 - } - - if help || h { - printEnqueueUsage(stdout) - return 0 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #114)") - return 1 -} - func runList(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { var help bool var h bool diff --git a/cmd/taskharbor/internal/app/flags.go b/cmd/taskharbor/internal/app/flags.go new file mode 100644 index 0000000..43c9280 --- /dev/null +++ b/cmd/taskharbor/internal/app/flags.go @@ -0,0 +1,11 @@ +package app + +import "strings" + +type multiString []string + +func (m *multiString) String() string { return strings.Join(*m, ",") } +func (m *multiString) Set(v string) error { + *m = append(*m, v) + return nil +} From 7b3de1bb8c9b5dede69576ba0da8c2dd242cdb5d Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 15:49:08 -0500 Subject: [PATCH 09/19] Memory admin added for CLI --- cmd/taskharbor/internal/app/enqueue.go | 204 +++++++++++ taskharbor/driver/admin.go | 127 +++++++ taskharbor/driver/memory/admin.go | 328 ++++++++++++++++++ taskharbor/driver/memory/memory_admin_test.go | 151 ++++++++ 4 files changed, 810 insertions(+) create mode 100644 cmd/taskharbor/internal/app/enqueue.go create mode 100644 taskharbor/driver/admin.go create mode 100644 taskharbor/driver/memory/admin.go create mode 100644 taskharbor/driver/memory/memory_admin_test.go diff --git a/cmd/taskharbor/internal/app/enqueue.go b/cmd/taskharbor/internal/app/enqueue.go new file mode 100644 index 0000000..68d16e9 --- /dev/null +++ b/cmd/taskharbor/internal/app/enqueue.go @@ -0,0 +1,204 @@ +package app + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "strconv" + "strings" + "time" + + "github.com/ARJ2211/taskharbor/cmd/taskharbor/internal/backend" + th "github.com/ARJ2211/taskharbor/taskharbor" +) + +func runEnqueue(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var ( + help bool + h bool + + jobType string + queue string + runAt string + + timeout time.Duration + maxAttempts int + idKey string + + payloadStr string + payloadJSON string + ) + + fs := flag.NewFlagSet("taskharbor enqueue", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.StringVar(&jobType, "type", "", "job type (required)") + fs.StringVar(&queue, "queue", "", "queue name (default: global --queue or DefaultQueue)") + fs.StringVar(&runAt, "run-at", "", "schedule time (RFC3339/RFC3339Nano) or unix seconds (or unix ms)") + fs.DurationVar(&timeout, "timeout", 0, "job timeout (e.g. 30s)") + fs.IntVar(&maxAttempts, "max-attempts", 0, "max attempts before DLQ (0 means fail immediately on handler error)") + fs.StringVar(&idKey, "idempotency-key", "", "idempotency key") + + fs.StringVar(&payloadStr, "payload", "", "payload as string (will be JSON-encoded)") + fs.StringVar(&payloadJSON, "payload-json", "", "payload as raw JSON (not double-encoded)") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printEnqueueUsage(stderr) + return 2 + } + if help || h { + printEnqueueUsage(stdout) + return 0 + } + if len(fs.Args()) != 0 { + fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) + printEnqueueUsage(stderr) + return 2 + } + + if strings.TrimSpace(jobType) == "" { + fmt.Fprintln(stderr, "error: --type is required") + printEnqueueUsage(stderr) + return 2 + } + if payloadStr != "" && payloadJSON != "" { + fmt.Fprintln(stderr, "error: use only one of --payload or --payload-json") + return 2 + } + + effectiveQueue := strings.TrimSpace(queue) + if effectiveQueue == "" { + effectiveQueue = strings.TrimSpace(g.Queue) + } + if effectiveQueue == "" { + effectiveQueue = th.DefaultQueue + } + + runAtTime, err := parseRunAt(runAt) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 2 + } + + var payload any = nil + if payloadJSON != "" { + b := []byte(payloadJSON) + if !json.Valid(b) { + fmt.Fprintln(stderr, "error: --payload-json must be valid JSON") + return 2 + } + payload = json.RawMessage(b) + } else if payloadStr != "" { + payload = payloadStr + } + + ctx := context.Background() + hnd, err := backend.Open(ctx, backend.Config{ + Driver: g.Driver, + PostgresDSN: g.PostgresDSN, + RedisAddr: g.RedisAddr, + }) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = hnd.Close() }() + + client := th.NewClient(hnd.Driver) + + req := th.JobRequest{ + Type: jobType, + Payload: payload, + Queue: effectiveQueue, + RunAt: runAtTime, + Timeout: timeout, + IdempotencyKey: idKey, + MaxAttempts: maxAttempts, + } + + id, err := client.Enqueue(ctx, req) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + if g.JSON { + out := map[string]any{ + "id": id, + "type": jobType, + "queue": effectiveQueue, + "max_attempts": maxAttempts, + "idempotency": idKey, + "timeout_millis": func() any { + if timeout <= 0 { + return nil + } + return timeout.Milliseconds() + }(), + "run_at": func() any { + if runAtTime.IsZero() { + return nil + } + return runAtTime.Format(time.RFC3339Nano) + }(), + } + enc := json.NewEncoder(stdout) + enc.SetEscapeHTML(false) + _ = enc.Encode(out) + return 0 + } + + fmt.Fprintln(stdout, id) + return 0 +} + +func parseRunAt(s string) (time.Time, error) { + s = strings.TrimSpace(s) + if s == "" { + return time.Time{}, nil + } + + // unix seconds or millis + if isDigits(s) { + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return time.Time{}, fmt.Errorf("invalid --run-at: %w", err) + } + if n < 0 { + return time.Time{}, fmt.Errorf("invalid --run-at: must be >= 0") + } + // heuristic: 13+ digits -> ms + if n >= 1_000_000_000_000 { + return time.Unix(0, n*int64(time.Millisecond)).UTC(), nil + } + return time.Unix(n, 0).UTC(), nil + } + + // RFC3339 / RFC3339Nano + if t, err := time.Parse(time.RFC3339Nano, s); err == nil { + return t.UTC(), nil + } + if t, err := time.Parse(time.RFC3339, s); err == nil { + return t.UTC(), nil + } + + return time.Time{}, fmt.Errorf("invalid --run-at: use RFC3339 (with timezone) or unix seconds") +} + +func isDigits(s string) bool { + if s == "" { + return false + } + for i := 0; i < len(s); i++ { + if s[i] < '0' || s[i] > '9' { + return false + } + } + return true +} diff --git a/taskharbor/driver/admin.go b/taskharbor/driver/admin.go new file mode 100644 index 0000000..a0d851f --- /dev/null +++ b/taskharbor/driver/admin.go @@ -0,0 +1,127 @@ +package driver + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "strings" + "time" +) + +type JobState string + +const ( + StateReady JobState = "ready" // runnable now (due) + StateScheduled JobState = "scheduled" // not due yet (run_at > now) + StateInflight JobState = "inflight" // leased + StateDone JobState = "done" // terminal success + StateDLQ JobState = "dlq" // terminal failure +) + +var ( + ErrAdminUnsupported = errors.New("admin interface not supported by driver") + ErrInvalidCursor = errors.New("invalid cursor") + ErrJobNotDLQ = errors.New("job is not in dlq") +) + +type LeaseInfo struct { + Token LeaseToken + ExpiresAt time.Time +} + +type DLQInfo struct { + Reason string + FailedAt time.Time +} + +type JobInfo struct { + Record JobRecord + State JobState + + Lease *LeaseInfo + DLQ *DLQInfo +} + +type JobSummary struct { + ID string + Type string + Queue string + State JobState + RunAt time.Time + CreatedAt time.Time + Timeout time.Duration + Attempts int + MaxAttempts int + LastError string + FailedAt time.Time + + LeaseExpiresAt time.Time + DLQReason string + DLQFailedAt time.Time +} + +type ListRequest struct { + Queue string // required + State JobState // optional; empty means "all" + Now time.Time + Limit int + Cursor string +} + +type ListPage struct { + Jobs []JobSummary + NextCursor string +} + +type RequeueOptions struct { + Queue string // optional guard; if set and mismatch, driver should error + RunAt time.Time // zero => immediate + ResetAttempts bool // if true: set attempts=0 and clear last_error/failed_at +} + +// Admin is optional and used by CLI/dev tooling. Worker + Client must not depend on this. +type Admin interface { + Inspect(ctx context.Context, id string, now time.Time) (JobInfo, error) + List(ctx context.Context, req ListRequest) (ListPage, error) + RequeueDLQ(ctx context.Context, id string, now time.Time, opt RequeueOptions) error +} + +// Cursor is an opaque pagination token with a standard encoding. +// A/B/ID meaning depends on State and the driver’s chosen ordering. +type Cursor struct { + V int `json:"v"` + State JobState `json:"s"` + A int64 `json:"a"` + B int64 `json:"b,omitempty"` + ID string `json:"id"` +} + +func EncodeCursor(c Cursor) string { + c.V = 1 + b, _ := json.Marshal(c) + return base64.RawURLEncoding.EncodeToString(b) +} + +func DecodeCursor(s string) (Cursor, error) { + s = strings.TrimSpace(s) + if s == "" { + return Cursor{}, nil + } + b, err := base64.RawURLEncoding.DecodeString(s) + if err != nil { + return Cursor{}, fmt.Errorf("%w: %v", ErrInvalidCursor, err) + } + var c Cursor + if err := json.Unmarshal(b, &c); err != nil { + return Cursor{}, fmt.Errorf("%w: %v", ErrInvalidCursor, err) + } + if c.V != 1 { + return Cursor{}, fmt.Errorf("%w: unsupported cursor version", ErrInvalidCursor) + } + if strings.TrimSpace(c.ID) == "" { + return Cursor{}, fmt.Errorf("%w: missing id", ErrInvalidCursor) + } + return c, nil +} diff --git a/taskharbor/driver/memory/admin.go b/taskharbor/driver/memory/admin.go new file mode 100644 index 0000000..7b349ac --- /dev/null +++ b/taskharbor/driver/memory/admin.go @@ -0,0 +1,328 @@ +package memory + +import ( + "container/heap" + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +var _ driver.Admin = (*Driver)(nil) + +func (d *Driver) Inspect(ctx context.Context, id string, now time.Time) (driver.JobInfo, error) { + if err := ctx.Err(); err != nil { + return driver.JobInfo{}, err + } + id = strings.TrimSpace(id) + if id == "" { + return driver.JobInfo{}, driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } + + d.mu.Lock() + defer d.mu.Unlock() + + if d.closed { + return driver.JobInfo{}, ErrDriverClosed + } + + // inflight (fast path) + if q, ok := d.inflightIndex[id]; ok { + qs := d.queues[q] + if qs != nil { + if it, ok := qs.inflight[id]; ok { + return driver.JobInfo{ + Record: it.rec, + State: driver.StateInflight, + Lease: &driver.LeaseInfo{ + Token: it.lease.Token, + ExpiresAt: it.lease.ExpiresAt, + }, + }, nil + } + } + } + + // dlq (fast path) + if q, ok := d.dlqIndex[id]; ok { + qs := d.queues[q] + if qs != nil { + if item, ok := qs.dlq[id]; ok { + return driver.JobInfo{ + Record: item.Record, + State: driver.StateDLQ, + DLQ: &driver.DLQInfo{ + Reason: item.Reason, + FailedAt: item.FailedAt, + }, + }, nil + } + } + } + + // done (fast path) + if _, ok := d.doneIndex[id]; ok { + if rec, ok2 := d.doneRecords[id]; ok2 { + return driver.JobInfo{Record: rec, State: driver.StateDone}, nil + } + // Shouldn't happen after we store doneRecords, but keep a sane error. + return driver.JobInfo{}, driver.ErrJobNotFound + } + + // ready/scheduled scan (no index) + for _, qs := range d.queues { + if qs == nil { + continue + } + for _, rec := range qs.runnable { + if rec.ID == id { + return driver.JobInfo{Record: rec, State: stateFromRunAt(now, rec.RunAt)}, nil + } + } + for _, rec := range qs.scheduled { + if rec.ID == id { + return driver.JobInfo{Record: rec, State: stateFromRunAt(now, rec.RunAt)}, nil + } + } + } + + return driver.JobInfo{}, driver.ErrJobNotFound +} + +func (d *Driver) List(ctx context.Context, req driver.ListRequest) (driver.ListPage, error) { + if err := ctx.Err(); err != nil { + return driver.ListPage{}, err + } + req.Queue = strings.TrimSpace(req.Queue) + if req.Queue == "" { + return driver.ListPage{}, fmt.Errorf("queue is required") + } + if req.Now.IsZero() { + req.Now = time.Now().UTC() + } + if req.Limit <= 0 { + req.Limit = 50 + } + + var cur driver.Cursor + if strings.TrimSpace(req.Cursor) != "" { + c, err := driver.DecodeCursor(req.Cursor) + if err != nil { + return driver.ListPage{}, err + } + cur = c + } + + d.mu.Lock() + defer d.mu.Unlock() + + if d.closed { + return driver.ListPage{}, ErrDriverClosed + } + + qs := d.queues[req.Queue] + if qs == nil { + return driver.ListPage{Jobs: nil, NextCursor: ""}, nil + } + + jobs := make([]driver.JobSummary, 0) + + addIf := func(s driver.JobSummary) { + if req.State == "" || s.State == req.State { + jobs = append(jobs, s) + } + } + + // runnable + for _, rec := range qs.runnable { + addIf(summaryFromRecord(req.Now, rec, nil, nil)) + } + // scheduled heap contents + for _, rec := range qs.scheduled { + addIf(summaryFromRecord(req.Now, rec, nil, nil)) + } + // inflight + for _, it := range qs.inflight { + lease := &driver.LeaseInfo{Token: it.lease.Token, ExpiresAt: it.lease.ExpiresAt} + addIf(summaryFromRecord(req.Now, it.rec, lease, nil)) + } + // dlq + for _, item := range qs.dlq { + dlq := &driver.DLQInfo{Reason: item.Reason, FailedAt: item.FailedAt} + addIf(summaryFromRecord(req.Now, item.Record, nil, dlq)) + } + // done (global map, filter by queue) + for _, rec := range d.doneRecords { + if rec.Queue != req.Queue { + continue + } + addIf(summaryFromRecord(req.Now, rec, nil, nil)) + } + + // stable ordering: (created_at asc, id asc) + sort.Slice(jobs, func(i, j int) bool { + ai := jobs[i].CreatedAt.UnixNano() + aj := jobs[j].CreatedAt.UnixNano() + if ai != aj { + return ai < aj + } + return jobs[i].ID < jobs[j].ID + }) + + // apply cursor filter + if strings.TrimSpace(req.Cursor) != "" { + out := jobs[:0] + for _, s := range jobs { + a := s.CreatedAt.UnixNano() + if a > cur.A || (a == cur.A && s.ID > cur.ID) { + out = append(out, s) + } + } + jobs = out + } + + if len(jobs) == 0 { + return driver.ListPage{Jobs: nil, NextCursor: ""}, nil + } + + if len(jobs) <= req.Limit { + return driver.ListPage{Jobs: jobs, NextCursor: ""}, nil + } + + page := jobs[:req.Limit] + last := page[len(page)-1] + next := driver.EncodeCursor(driver.Cursor{ + State: req.State, + A: last.CreatedAt.UnixNano(), + ID: last.ID, + }) + + return driver.ListPage{Jobs: page, NextCursor: next}, nil +} + +func (d *Driver) RequeueDLQ(ctx context.Context, id string, now time.Time, opt driver.RequeueOptions) error { + if err := ctx.Err(); err != nil { + return err + } + id = strings.TrimSpace(id) + if id == "" { + return driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } + + d.mu.Lock() + defer d.mu.Unlock() + + if d.closed { + return ErrDriverClosed + } + + q, ok := d.dlqIndex[id] + if !ok { + // If job exists elsewhere, be explicit. + if _, ok := d.inflightIndex[id]; ok { + return driver.ErrJobNotDLQ + } + if _, ok := d.doneIndex[id]; ok { + return driver.ErrJobNotDLQ + } + for _, qs := range d.queues { + if qs == nil { + continue + } + for _, rec := range qs.runnable { + if rec.ID == id { + return driver.ErrJobNotDLQ + } + } + for _, rec := range qs.scheduled { + if rec.ID == id { + return driver.ErrJobNotDLQ + } + } + } + return driver.ErrJobNotFound + } + + if opt.Queue != "" && opt.Queue != q { + return fmt.Errorf("queue mismatch: job is in %q, not %q", q, opt.Queue) + } + + qs := d.queues[q] + if qs == nil { + return driver.ErrJobNotFound + } + + item, ok := qs.dlq[id] + if !ok { + return driver.ErrJobNotFound + } + + delete(qs.dlq, id) + delete(d.dlqIndex, id) + + rec := item.Record + rec.Queue = q + rec.RunAt = opt.RunAt // zero => immediate + + if opt.ResetAttempts { + rec.Attempts = 0 + rec.LastError = "" + rec.FailedAt = time.Time{} + } + + if rec.RunAt.IsZero() || !rec.RunAt.After(now) { + rec.RunAt = time.Time{} + qs.runnable = append(qs.runnable, rec) + return nil + } + + heap.Push(&qs.scheduled, rec) + return nil +} + +func stateFromRunAt(now, runAt time.Time) driver.JobState { + if runAt.IsZero() || !runAt.After(now) { + return driver.StateReady + } + return driver.StateScheduled +} + +func summaryFromRecord(now time.Time, rec driver.JobRecord, lease *driver.LeaseInfo, dlq *driver.DLQInfo) driver.JobSummary { + s := driver.JobSummary{ + ID: rec.ID, + Type: rec.Type, + Queue: rec.Queue, + RunAt: rec.RunAt, + CreatedAt: rec.CreatedAt, + Timeout: rec.Timeout, + Attempts: rec.Attempts, + MaxAttempts: rec.MaxAttempts, + LastError: rec.LastError, + FailedAt: rec.FailedAt, + State: stateFromRunAt(now, rec.RunAt), + } + + if lease != nil { + s.State = driver.StateInflight + s.LeaseExpiresAt = lease.ExpiresAt + } + if dlq != nil { + s.State = driver.StateDLQ + s.DLQReason = dlq.Reason + s.DLQFailedAt = dlq.FailedAt + } + + // done is inferred later via state override in List when needed, + // but for memory we don’t have a separate “done container” per queue. + // Inspect sets done explicitly. + return s +} diff --git a/taskharbor/driver/memory/memory_admin_test.go b/taskharbor/driver/memory/memory_admin_test.go new file mode 100644 index 0000000..43c93be --- /dev/null +++ b/taskharbor/driver/memory/memory_admin_test.go @@ -0,0 +1,151 @@ +package memory + +import ( + "context" + "testing" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +func TestAdmin_Inspect_List_RequeueDLQ(t *testing.T) { + d := New() + ctx := context.Background() + now := time.Now().UTC() + q := "q0" + + recReady := driver.JobRecord{ + ID: "job_ready", + Type: "echo", + Queue: q, + CreatedAt: now.Add(-3 * time.Second), + } + if _, _, err := d.Enqueue(ctx, recReady); err != nil { + t.Fatal(err) + } + + recSched := driver.JobRecord{ + ID: "job_sched", + Type: "echo", + Queue: q, + RunAt: now.Add(10 * time.Second), + CreatedAt: now.Add(-2 * time.Second), + } + if _, _, err := d.Enqueue(ctx, recSched); err != nil { + t.Fatal(err) + } + + // Inspect ready + ji, err := d.Inspect(ctx, "job_ready", now) + if err != nil { + t.Fatal(err) + } + if ji.State != driver.StateReady { + t.Fatalf("expected ready, got %s", ji.State) + } + + // Reserve -> inflight + r, lease, ok, err := d.Reserve(ctx, q, now, 5*time.Second) + if err != nil || !ok { + t.Fatalf("reserve err=%v ok=%v", err, ok) + } + if r.ID != "job_ready" { + t.Fatalf("expected job_ready reserved, got %s", r.ID) + } + + ji, err = d.Inspect(ctx, "job_ready", now) + if err != nil { + t.Fatal(err) + } + if ji.State != driver.StateInflight || ji.Lease == nil { + t.Fatalf("expected inflight w/ lease, got state=%s lease=%v", ji.State, ji.Lease) + } + + // Fail -> DLQ + if err := d.Fail(ctx, "job_ready", lease.Token, now, "boom"); err != nil { + t.Fatal(err) + } + ji, err = d.Inspect(ctx, "job_ready", now) + if err != nil { + t.Fatal(err) + } + if ji.State != driver.StateDLQ || ji.DLQ == nil { + t.Fatalf("expected dlq w/ info, got state=%s dlq=%v", ji.State, ji.DLQ) + } + + // Requeue DLQ -> ready + if err := d.RequeueDLQ(ctx, "job_ready", now, driver.RequeueOptions{Queue: q, ResetAttempts: true}); err != nil { + t.Fatal(err) + } + ji, err = d.Inspect(ctx, "job_ready", now) + if err != nil { + t.Fatal(err) + } + if ji.State != driver.StateReady { + t.Fatalf("expected ready after requeue, got %s", ji.State) + } + + // Reserve + Ack -> done + r2, lease2, ok, err := d.Reserve(ctx, q, now, 5*time.Second) + if err != nil || !ok { + t.Fatalf("reserve2 err=%v ok=%v", err, ok) + } + if r2.ID != "job_ready" { + t.Fatalf("expected job_ready reserved again, got %s", r2.ID) + } + if err := d.Ack(ctx, "job_ready", lease2.Token, now); err != nil { + t.Fatal(err) + } + + ji, err = d.Inspect(ctx, "job_ready", now) + if err != nil { + t.Fatal(err) + } + if ji.State != driver.StateDone { + t.Fatalf("expected done, got %s", ji.State) + } + + // List all states in queue + page, err := d.List(ctx, driver.ListRequest{Queue: q, Now: now, Limit: 10}) + if err != nil { + t.Fatal(err) + } + if len(page.Jobs) == 0 { + t.Fatalf("expected some jobs in list") + } +} + +func TestAdmin_List_Pagination(t *testing.T) { + d := New() + ctx := context.Background() + now := time.Now().UTC() + q := "q0" + + for i := 0; i < 3; i++ { + rec := driver.JobRecord{ + ID: "job_" + string(rune('a'+i)), + Type: "echo", + Queue: q, + CreatedAt: now.Add(time.Duration(i) * time.Second), + } + if _, _, err := d.Enqueue(ctx, rec); err != nil { + t.Fatal(err) + } + } + + p1, err := d.List(ctx, driver.ListRequest{Queue: q, Now: now, Limit: 2}) + if err != nil { + t.Fatal(err) + } + if len(p1.Jobs) != 2 || p1.NextCursor == "" { + t.Fatalf("expected 2 jobs + cursor, got %d cursor=%q", len(p1.Jobs), p1.NextCursor) + } + + p2, err := d.List(ctx, driver.ListRequest{Queue: q, Now: now, Limit: 2, Cursor: p1.NextCursor}) + if err != nil { + t.Fatal(err) + } + if len(p2.Jobs) != 1 { + t.Fatalf("expected 1 job on second page, got %d", len(p2.Jobs)) + } +} From 7cd94a96fa01aa2d0ceeff90c05d5d66dd16a9af Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 15:49:30 -0500 Subject: [PATCH 10/19] Updated memory driver to track done records --- taskharbor/driver/memory/memory.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/taskharbor/driver/memory/memory.go b/taskharbor/driver/memory/memory.go index dfb910d..fcf3983 100644 --- a/taskharbor/driver/memory/memory.go +++ b/taskharbor/driver/memory/memory.go @@ -29,8 +29,9 @@ type Driver struct { inflightIndex map[string]string idemIndex map[string]string - doneIndex map[string]struct{} - dlqIndex map[string]string + doneIndex map[string]struct{} + dlqIndex map[string]string + doneRecords map[string]driver.JobRecord closed bool } @@ -78,6 +79,11 @@ func (d *Driver) Reset() error { d.queues = make(map[string]*queueState) d.inflightIndex = make(map[string]string) d.idemIndex = make(map[string]string) + + d.doneIndex = make(map[string]struct{}) + d.doneRecords = make(map[string]driver.JobRecord) + d.dlqIndex = make(map[string]string) + d.closed = false return nil } @@ -123,6 +129,7 @@ func New() *Driver { idemIndex: make(map[string]string), doneIndex: make(map[string]struct{}), dlqIndex: make(map[string]string), + doneRecords: make(map[string]driver.JobRecord), } return &driver } @@ -415,6 +422,9 @@ func (d *Driver) Ack( delete(qs.inflight, id) delete(d.inflightIndex, id) + d.doneIndex[id] = struct{}{} + d.doneRecords[id] = it.rec + d.doneIndex[id] = struct{}{} return nil From fecf2772a1ca1dd252d80f17ff373edbf6263cb3 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 16:24:53 -0500 Subject: [PATCH 11/19] Updated the tests for app cli and added worker_run.go --- cmd/taskharbor/internal/app/app_test.go | 11 ++ cmd/taskharbor/internal/app/worker_run.go | 216 ++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 cmd/taskharbor/internal/app/worker_run.go diff --git a/cmd/taskharbor/internal/app/app_test.go b/cmd/taskharbor/internal/app/app_test.go index 60df5ae..2510d71 100644 --- a/cmd/taskharbor/internal/app/app_test.go +++ b/cmd/taskharbor/internal/app/app_test.go @@ -35,3 +35,14 @@ func TestUnknownCommand(t *testing.T) { t.Fatalf("expected non-zero, got %d", code) } } + +func TestEnqueueMinimal(t *testing.T) { + var out, err bytes.Buffer + code := Run([]string{"enqueue", "--type", "echo", "--payload", "hi"}, &out, &err) + if code != 0 { + t.Fatalf("expected 0, got %d (stderr=%q)", code, err.String()) + } + if strings.TrimSpace(out.String()) == "" { + t.Fatalf("expected a job id, got empty output") + } +} diff --git a/cmd/taskharbor/internal/app/worker_run.go b/cmd/taskharbor/internal/app/worker_run.go new file mode 100644 index 0000000..159dd3f --- /dev/null +++ b/cmd/taskharbor/internal/app/worker_run.go @@ -0,0 +1,216 @@ +package app + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "os" + "os/signal" + "strconv" + "strings" + "syscall" + "time" + + "github.com/ARJ2211/taskharbor/cmd/taskharbor/internal/backend" + th "github.com/ARJ2211/taskharbor/taskharbor" +) + +func runWorkerRun(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var ( + help bool + h bool + + concurrency int + poll time.Duration + lease time.Duration + heartbeat time.Duration + + reg multiString + ) + fs := flag.NewFlagSet("taskharbor worker run", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.IntVar(&concurrency, "concurrency", 0, "max concurrent jobs (default: taskharbor default)") + fs.DurationVar(&poll, "poll-interval", 0, "poll interval when no jobs (e.g. 200ms)") + fs.DurationVar(&lease, "lease-duration", 0, "lease duration (e.g. 30s)") + fs.DurationVar(&heartbeat, "heartbeat-interval", 0, "lease heartbeat interval (e.g. 10s)") + fs.Var(®, "register", "map jobType to builtin handler (repeatable). format: jobType=builtin. builtins: echo,fail,sleep") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printWorkerRunUsage(stderr) + return 2 + } + if help || h { + printWorkerRunUsage(stdout) + return 0 + } + if len(fs.Args()) != 0 { + fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) + printWorkerRunUsage(stderr) + return 2 + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Ctrl+C / SIGTERM -> graceful shutdown + sigc := make(chan os.Signal, 2) + signal.Notify(sigc, os.Interrupt, syscall.SIGTERM) + defer signal.Stop(sigc) + go func() { + <-sigc + cancel() + }() + + bh, err := backend.Open(ctx, backend.Config{ + Driver: g.Driver, + PostgresDSN: g.PostgresDSN, + RedisAddr: g.RedisAddr, + }) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = bh.Close() }() + + opts := make([]th.Option, 0, 6) + opts = append(opts, th.WithDefaultQueue(g.Queue)) + if concurrency > 0 { + opts = append(opts, th.WithConcurrency(concurrency)) + } + if poll > 0 { + opts = append(opts, th.WithPollInterval(poll)) + } + if lease > 0 { + opts = append(opts, th.WithLeaseDuration(lease)) + } + if heartbeat > 0 { + opts = append(opts, th.WithHeartbeatInterval(heartbeat)) + } + + worker := th.NewWorker(bh.Driver, opts...) + + // Example Builtins + builtins := map[string]th.Handler{ + "echo": echoHandler(stdout), + "fail": failHandler(), + "sleep": sleepHandler(stdout), + } + + // Always register the default names. + for name, fn := range builtins { + _ = worker.Register(name, fn) + } + + // Apply --register mappings (jobType=builtin) + for _, m := range reg { + jobType, builtin, ok := strings.Cut(m, "=") + jobType = strings.TrimSpace(jobType) + if !ok { + // allow shorthand + builtin = jobType + } + builtin = strings.TrimSpace(builtin) + + if jobType == "" || builtin == "" { + fmt.Fprintln(stderr, "error: invalid --register:", m) + return 2 + } + fn, ok := builtins[builtin] + if !ok { + fmt.Fprintln(stderr, "error: unknown builtin for --register:", builtin) + return 2 + } + if err := worker.Register(jobType, fn); err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + } + + fmt.Fprintf(stdout, "worker started id=%s driver=%s queue=%s\n", worker.ID(), strings.ToLower(g.Driver), g.Queue) + + if err := worker.Run(ctx); err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + fmt.Fprintln(stdout, "worker stopped") + return 0 +} + +func echoHandler(w io.Writer) th.Handler { + return func(ctx context.Context, job th.Job) error { + _ = ctx + fmt.Fprintf(w, "echo id=%s type=%s queue=%s payload=%s\n", job.ID, job.Type, job.Queue, string(job.Payload)) + return nil + } +} + +func failHandler() th.Handler { + return func(ctx context.Context, job th.Job) error { + _ = ctx + _ = job + return errors.New("fail handler: requested failure") + } +} + +func sleepHandler(w io.Writer) th.Handler { + return func(ctx context.Context, job th.Job) error { + d := parseSleepDuration(job.Payload) + if d <= 0 { + d = 250 * time.Millisecond + } + fmt.Fprintf(w, "sleep id=%s duration=%s\n", job.ID, d) + t := time.NewTimer(d) + defer t.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + return nil + } + } +} + +func parseSleepDuration(payload []byte) time.Duration { + s := strings.TrimSpace(string(payload)) + if s == "" { + return 0 + } + + // Try plain int millis: "1500" or `"1500"` + s = strings.Trim(s, "\"") + if n, err := strconv.Atoi(s); err == nil { + return time.Duration(n) * time.Millisecond + } + + // Try JSON number or JSON object: {"ms": 1500} or {"duration":"1.5s"} + var num int + if err := json.Unmarshal(payload, &num); err == nil { + return time.Duration(num) * time.Millisecond + } + + var obj struct { + MS int `json:"ms"` + Duration string `json:"duration"` + } + if err := json.Unmarshal(payload, &obj); err == nil { + if obj.Duration != "" { + if d, err := time.ParseDuration(obj.Duration); err == nil { + return d + } + } + if obj.MS > 0 { + return time.Duration(obj.MS) * time.Millisecond + } + } + + return 0 +} From add7e0ad2d5612dc7f676b288f4c62e0c800acd2 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 21:37:50 -0500 Subject: [PATCH 12/19] Redis admin added --- taskharbor/driver/redis/admin.go | 694 ++++++++++++++++++++ taskharbor/driver/redis/redis_admin_test.go | 184 ++++++ taskharbor/driver/redis/scripts.go | 106 +++ 3 files changed, 984 insertions(+) create mode 100644 taskharbor/driver/redis/admin.go create mode 100644 taskharbor/driver/redis/redis_admin_test.go diff --git a/taskharbor/driver/redis/admin.go b/taskharbor/driver/redis/admin.go new file mode 100644 index 0000000..f479e7d --- /dev/null +++ b/taskharbor/driver/redis/admin.go @@ -0,0 +1,694 @@ +package redis + +import ( + "context" + "fmt" + "sort" + "strconv" + "strings" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" + "github.com/redis/go-redis/v9" +) + +type hgetAllCmd interface { + Result() (map[string]string, error) +} + +var _ driver.Admin = (*Driver)(nil) + +func (d *Driver) keyReady(queue string) string { return d.opts.prefix + ":queue:" + queue + ":ready" } +func (d *Driver) keyScheduled(queue string) string { + return d.opts.prefix + ":queue:" + queue + ":scheduled" +} +func (d *Driver) keyInflight(queue string) string { + return d.opts.prefix + ":queue:" + queue + ":inflight" +} +func (d *Driver) keyDLQ(queue string) string { return d.opts.prefix + ":queue:" + queue + ":dlq" } +func (d *Driver) keyDone(queue string) string { return d.opts.prefix + ":queue:" + queue + ":done" } + +type jobMeta struct { + rec driver.JobRecord + status string + leaseTok string + leaseExp int64 + dlqReason string + dlqFail int64 +} + +func (d *Driver) Inspect(ctx context.Context, id string, now time.Time) (driver.JobInfo, error) { + if err := ctx.Err(); err != nil { + return driver.JobInfo{}, err + } + if err := d.ensureOpen(); err != nil { + return driver.JobInfo{}, err + } + + id = strings.TrimSpace(id) + if id == "" { + return driver.JobInfo{}, driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } else { + now = now.UTC() + } + + m, err := d.loadJobMeta(ctx, id) + if err != nil { + return driver.JobInfo{}, err + } + + state := stateFromRedis(now, m.status, m.rec.RunAt) + + var lease *driver.LeaseInfo + if state == driver.StateInflight && m.leaseTok != "" && m.leaseExp != 0 { + lease = &driver.LeaseInfo{ + Token: driver.LeaseToken(m.leaseTok), + ExpiresAt: time.Unix(0, m.leaseExp).UTC(), + } + } + + // Important: always return DLQ info when state is DLQ (even if dlq_failed_at_nano isn't stored). + var dlq *driver.DLQInfo + if state == driver.StateDLQ { + failedAt := time.Time{} + if m.dlqFail != 0 { + failedAt = time.Unix(0, m.dlqFail).UTC() + } else if !m.rec.FailedAt.IsZero() { + failedAt = m.rec.FailedAt.UTC() + } + dlq = &driver.DLQInfo{ + Reason: m.dlqReason, + FailedAt: failedAt, + } + } + + return driver.JobInfo{ + Record: m.rec, + State: state, + Lease: lease, + DLQ: dlq, + }, nil +} + +func (d *Driver) List(ctx context.Context, req driver.ListRequest) (driver.ListPage, error) { + if err := ctx.Err(); err != nil { + return driver.ListPage{}, err + } + if err := d.ensureOpen(); err != nil { + return driver.ListPage{}, err + } + + req.Queue = strings.TrimSpace(req.Queue) + if req.Queue == "" { + return driver.ListPage{}, fmt.Errorf("queue is required") + } + if req.Now.IsZero() { + req.Now = time.Now().UTC() + } else { + req.Now = req.Now.UTC() + } + if req.Limit <= 0 { + req.Limit = 50 + } + + var cur driver.Cursor + if strings.TrimSpace(req.Cursor) != "" { + c, err := driver.DecodeCursor(req.Cursor) + if err != nil { + return driver.ListPage{}, err + } + cur = c + } + + cap := req.Limit * 10 + if cap < 200 { + cap = 200 + } + if cap > 2000 { + cap = 2000 + } + + ids, err := d.collectIDsForList(ctx, req, cap) + if err != nil { + return driver.ListPage{}, err + } + if len(ids) == 0 { + return driver.ListPage{Jobs: nil, NextCursor: ""}, nil + } + + metas, err := d.loadManyJobMeta(ctx, ids) + if err != nil { + return driver.ListPage{}, err + } + + jobs := make([]driver.JobSummary, 0, len(ids)) + for _, id := range ids { + m, ok := metas[id] + if !ok { + continue + } + st := stateFromRedis(req.Now, m.status, m.rec.RunAt) + if req.State != "" && st != req.State { + continue + } + jobs = append(jobs, summaryFromMeta(req.Now, st, m)) + } + + sort.Slice(jobs, func(i, j int) bool { + ai := jobs[i].CreatedAt.UnixNano() + aj := jobs[j].CreatedAt.UnixNano() + if ai != aj { + return ai < aj + } + return jobs[i].ID < jobs[j].ID + }) + + if strings.TrimSpace(req.Cursor) != "" { + out := jobs[:0] + for _, s := range jobs { + a := s.CreatedAt.UnixNano() + if a > cur.A || (a == cur.A && s.ID > cur.ID) { + out = append(out, s) + } + } + jobs = out + } + + if len(jobs) == 0 { + return driver.ListPage{Jobs: nil, NextCursor: ""}, nil + } + if len(jobs) <= req.Limit { + return driver.ListPage{Jobs: jobs, NextCursor: ""}, nil + } + + page := jobs[:req.Limit] + last := page[len(page)-1] + next := driver.EncodeCursor(driver.Cursor{ + State: req.State, + A: last.CreatedAt.UnixNano(), + ID: last.ID, + }) + + return driver.ListPage{Jobs: page, NextCursor: next}, nil +} + +func (d *Driver) RequeueDLQ(ctx context.Context, id string, now time.Time, opt driver.RequeueOptions) error { + if err := ctx.Err(); err != nil { + return err + } + if err := d.ensureOpen(); err != nil { + return err + } + + id = strings.TrimSpace(id) + if id == "" { + return driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } else { + now = now.UTC() + } + + runAtNano := int64(0) + runAtSec := int64(0) + runAtMember := "" + + if !opt.RunAt.IsZero() && opt.RunAt.UTC().After(now) { + ru := opt.RunAt.UTC() + runAtNano = ru.UnixNano() + runAtSec = ru.Unix() + runAtMember = schedMember(int64(ru.Nanosecond()), id) + } + + queueGuard := strings.TrimSpace(opt.Queue) + + code, err := d.runRequeueDLQScript( + ctx, + id, + now.UnixNano(), + queueGuard, + runAtNano, + runAtSec, + runAtMember, + opt.ResetAttempts, + ) + if err != nil { + return err + } + + switch code { + case 1: + return nil + case 0: + return driver.ErrJobNotFound + case -1: + return driver.ErrJobNotDLQ + case -2: + return fmt.Errorf("queue mismatch for job %s", id) + default: + return driver.ErrJobNotDLQ + } +} + +func stateFromRedis(now time.Time, status string, runAt time.Time) driver.JobState { + switch status { + case "inflight": + return driver.StateInflight + case "dlq": + return driver.StateDLQ + case "done": + return driver.StateDone + default: + // ready/scheduled stored status is not authoritative; run_at decides. + if runAt.IsZero() || !runAt.After(now) { + return driver.StateReady + } + return driver.StateScheduled + } +} + +func summaryFromMeta(now time.Time, st driver.JobState, m jobMeta) driver.JobSummary { + s := driver.JobSummary{ + ID: m.rec.ID, + Type: m.rec.Type, + Queue: m.rec.Queue, + State: st, + RunAt: m.rec.RunAt, + CreatedAt: m.rec.CreatedAt, + Timeout: m.rec.Timeout, + Attempts: m.rec.Attempts, + MaxAttempts: m.rec.MaxAttempts, + LastError: m.rec.LastError, + FailedAt: m.rec.FailedAt, + } + + if m.leaseExp != 0 { + s.LeaseExpiresAt = time.Unix(0, m.leaseExp).UTC() + } + if m.dlqReason != "" { + s.DLQReason = m.dlqReason + } + if m.dlqFail != 0 { + s.DLQFailedAt = time.Unix(0, m.dlqFail).UTC() + } + + return s +} + +func (d *Driver) loadJobMeta(ctx context.Context, id string) (jobMeta, error) { + jobKey := d.keyJob(id) + m, err := d.client.HGetAll(ctx, jobKey).Result() + if err != nil { + return jobMeta{}, err + } + if len(m) == 0 { + return jobMeta{}, driver.ErrJobNotFound + } + + rec := driver.JobRecord{ID: id} + rec.Type = m["type"] + rec.Queue = m["queue"] + if v, ok := m["payload"]; ok { + rec.Payload = []byte(v) + } + rec.IdempotencyKey = m["idempotency_key"] + rec.LastError = m["last_error"] + + if v := m["run_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil && n != 0 { + rec.RunAt = time.Unix(0, n).UTC() + } + } + if v := m["timeout_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil { + rec.Timeout = time.Duration(n) + } + } + if v := m["created_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil { + rec.CreatedAt = time.Unix(0, n).UTC() + } + } + if v := m["failed_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil && n != 0 { + rec.FailedAt = time.Unix(0, n).UTC() + } + } + if v := m["attempts"]; v != "" { + rec.Attempts, _ = strconv.Atoi(v) + } + if v := m["max_attempts"]; v != "" { + rec.MaxAttempts, _ = strconv.Atoi(v) + } + + meta := jobMeta{ + rec: rec, + status: m["status"], + leaseTok: m["lease_token"], + dlqReason: m["dlq_reason"], + } + + if v := m["lease_expires_at_nano"]; v != "" { + meta.leaseExp, _ = strconv.ParseInt(v, 10, 64) + } + if v := m["dlq_failed_at_nano"]; v != "" { + meta.dlqFail, _ = strconv.ParseInt(v, 10, 64) + } + + // Fallback for older/legacy Redis schema: DLQ time/reason might live in failed_at_nano/last_error. + if meta.status == "dlq" { + if meta.dlqReason == "" { + meta.dlqReason = rec.LastError + } + if meta.dlqFail == 0 && !rec.FailedAt.IsZero() { + meta.dlqFail = rec.FailedAt.UnixNano() + } + } + + return meta, nil +} + +func (d *Driver) loadManyJobMeta(ctx context.Context, ids []string) (map[string]jobMeta, error) { + pipe := d.client.Pipeline() + cmds := make(map[string]hgetAllCmd, len(ids)) + + for _, id := range ids { + cmds[id] = pipe.HGetAll(ctx, d.keyJob(id)) + } + + _, err := pipe.Exec(ctx) + if err != nil && err != redis.Nil { + return nil, err + } + + out := make(map[string]jobMeta, len(ids)) + for id, cmd := range cmds { + m, err := cmd.Result() + if err != nil || len(m) == 0 { + continue + } + + rec := driver.JobRecord{ID: id} + rec.Type = m["type"] + rec.Queue = m["queue"] + if v, ok := m["payload"]; ok { + rec.Payload = []byte(v) + } + rec.IdempotencyKey = m["idempotency_key"] + rec.LastError = m["last_error"] + + if v := m["run_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil && n != 0 { + rec.RunAt = time.Unix(0, n).UTC() + } + } + if v := m["timeout_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil { + rec.Timeout = time.Duration(n) + } + } + if v := m["created_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil { + rec.CreatedAt = time.Unix(0, n).UTC() + } + } + if v := m["failed_at_nano"]; v != "" { + if n, err := strconv.ParseInt(v, 10, 64); err == nil && n != 0 { + rec.FailedAt = time.Unix(0, n).UTC() + } + } + if v := m["attempts"]; v != "" { + rec.Attempts, _ = strconv.Atoi(v) + } + if v := m["max_attempts"]; v != "" { + rec.MaxAttempts, _ = strconv.Atoi(v) + } + + meta := jobMeta{ + rec: rec, + status: m["status"], + leaseTok: m["lease_token"], + dlqReason: m["dlq_reason"], + } + if v := m["lease_expires_at_nano"]; v != "" { + meta.leaseExp, _ = strconv.ParseInt(v, 10, 64) + } + if v := m["dlq_failed_at_nano"]; v != "" { + meta.dlqFail, _ = strconv.ParseInt(v, 10, 64) + } + + // same fallback in bulk path + if meta.status == "dlq" { + if meta.dlqReason == "" { + meta.dlqReason = rec.LastError + } + if meta.dlqFail == 0 && !rec.FailedAt.IsZero() { + meta.dlqFail = rec.FailedAt.UnixNano() + } + } + + out[id] = meta + } + + return out, nil +} + +func (d *Driver) collectIDsForList(ctx context.Context, req driver.ListRequest, cap int) ([]string, error) { + queue := req.Queue + + add := func(dst []string, ids ...string) []string { + for _, id := range ids { + if id != "" { + dst = append(dst, id) + } + } + return dst + } + + uniq := make(map[string]struct{}) + out := make([]string, 0, cap) + + pushUnique := func(id string) { + if id == "" { + return + } + if _, ok := uniq[id]; ok { + return + } + uniq[id] = struct{}{} + out = append(out, id) + } + + pushMany := func(ids []string) { + for _, id := range ids { + if len(out) >= cap { + return + } + pushUnique(id) + } + } + + switch req.State { + case driver.StateReady: + ids, err := d.client.LRange(ctx, d.keyReady(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(ids) + + due, err := d.dueScheduledIDs(ctx, queue, req.Now, cap-len(out)) + if err != nil { + return nil, err + } + pushMany(due) + + case driver.StateScheduled: + fut, err := d.futureScheduledIDs(ctx, queue, req.Now, cap) + if err != nil { + return nil, err + } + pushMany(fut) + + case driver.StateInflight: + ids, err := d.client.ZRange(ctx, d.keyInflight(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(ids) + + case driver.StateDLQ: + ids, err := d.client.LRange(ctx, d.keyDLQ(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(ids) + + case driver.StateDone: + ids, err := d.client.LRange(ctx, d.keyDone(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(ids) + + case "": + ids, err := d.client.LRange(ctx, d.keyReady(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(ids) + + sched, err := d.client.ZRange(ctx, d.keyScheduled(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + for _, m := range sched { + if len(out) >= cap { + break + } + if _, id, ok := strings.Cut(m, ":"); ok { + pushUnique(id) + } + } + + inflight, err := d.client.ZRange(ctx, d.keyInflight(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(inflight) + + dlq, err := d.client.LRange(ctx, d.keyDLQ(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(dlq) + + done, err := d.client.LRange(ctx, d.keyDone(queue), 0, int64(cap-1)).Result() + if err != nil && err != redis.Nil { + return nil, err + } + pushMany(done) + + default: + out = add(out) + } + + return out, nil +} + +func (d *Driver) dueScheduledIDs(ctx context.Context, queue string, now time.Time, cap int) ([]string, error) { + if cap <= 0 { + return nil, nil + } + nowSec := now.Unix() + nowSub := int64(now.Nanosecond()) + skey := d.keyScheduled(queue) + + out := make([]string, 0, cap) + + // < nowSec + if nowSec-1 >= 0 { + members, err := d.client.ZRangeByScore(ctx, skey, &redis.ZRangeBy{ + Min: "0", + Max: strconv.FormatInt(nowSec-1, 10), + Count: int64(cap), + }).Result() + if err != nil && err != redis.Nil { + return nil, err + } + for _, m := range members { + if len(out) >= cap { + break + } + if _, id, ok := strings.Cut(m, ":"); ok { + out = append(out, id) + } + } + } + + // == nowSec (filter by sub) + if len(out) < cap { + members, err := d.client.ZRangeByScore(ctx, skey, &redis.ZRangeBy{ + Min: strconv.FormatInt(nowSec, 10), + Max: strconv.FormatInt(nowSec, 10), + }).Result() + if err != nil && err != redis.Nil { + return nil, err + } + for _, m := range members { + if len(out) >= cap { + break + } + if len(m) < 10 { + continue + } + sub, _ := strconv.ParseInt(m[:9], 10, 64) + if sub <= nowSub { + if _, id, ok := strings.Cut(m, ":"); ok { + out = append(out, id) + } + } + } + } + + return out, nil +} + +func (d *Driver) futureScheduledIDs(ctx context.Context, queue string, now time.Time, cap int) ([]string, error) { + if cap <= 0 { + return nil, nil + } + nowSec := now.Unix() + nowSub := int64(now.Nanosecond()) + skey := d.keyScheduled(queue) + + out := make([]string, 0, cap) + + // > nowSec + members, err := d.client.ZRangeByScore(ctx, skey, &redis.ZRangeBy{ + Min: strconv.FormatInt(nowSec+1, 10), + Max: "+inf", + Count: int64(cap), + }).Result() + if err != nil && err != redis.Nil { + return nil, err + } + for _, m := range members { + if len(out) >= cap { + break + } + if _, id, ok := strings.Cut(m, ":"); ok { + out = append(out, id) + } + } + + // == nowSec (filter by sub > nowSub) + if len(out) < cap { + members, err := d.client.ZRangeByScore(ctx, skey, &redis.ZRangeBy{ + Min: strconv.FormatInt(nowSec, 10), + Max: strconv.FormatInt(nowSec, 10), + }).Result() + if err != nil && err != redis.Nil { + return nil, err + } + for _, m := range members { + if len(out) >= cap { + break + } + if len(m) < 10 { + continue + } + sub, _ := strconv.ParseInt(m[:9], 10, 64) + if sub > nowSub { + if _, id, ok := strings.Cut(m, ":"); ok { + out = append(out, id) + } + } + } + } + + return out, nil +} diff --git a/taskharbor/driver/redis/redis_admin_test.go b/taskharbor/driver/redis/redis_admin_test.go new file mode 100644 index 0000000..049d7ee --- /dev/null +++ b/taskharbor/driver/redis/redis_admin_test.go @@ -0,0 +1,184 @@ +package redis + +import ( + "context" + "os" + "testing" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" + "github.com/ARJ2211/taskharbor/taskharbor/internal/envutil" +) + +func TestRedisAdmin_Inspect_List_RequeueDLQ(t *testing.T) { + cwd, _ := os.Getwd() + _ = envutil.LoadRepoDotenv(cwd) + + addr := os.Getenv("REDIS_ADDR") + if addr == "" { + t.Skip("REDIS_ADDR not set") + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + d, err := New(ctx, addr) + if err != nil { + t.Fatalf("New: %v", err) + } + defer func() { _ = d.Close() }() + + // isolate keys + d.opts.prefix = "taskharbor_admin_test:" + time.Now().UTC().Format("20060102150405.000000000") + + now := time.Now().UTC() + q := "q0" + + recSched := driver.JobRecord{ + ID: "admin_sched_1", + Type: "t", + Queue: q, + Payload: []byte(`{"x":1}`), + RunAt: now.Add(10 * time.Second), + CreatedAt: now.Add(-3 * time.Second), + MaxAttempts: 3, + } + if _, _, err := d.Enqueue(ctx, recSched); err != nil { + t.Fatalf("Enqueue sched: %v", err) + } + + recDLQ := driver.JobRecord{ + ID: "admin_dlq_1", + Type: "t", + Queue: q, + Payload: []byte(`{}`), + CreatedAt: now.Add(-2 * time.Second), + MaxAttempts: 3, + } + if _, _, err := d.Enqueue(ctx, recDLQ); err != nil { + t.Fatalf("Enqueue dlq: %v", err) + } + + // Reserve + Fail -> DLQ + _, lease, ok, err := d.Reserve(ctx, q, now, 5*time.Second) + if err != nil || !ok { + t.Fatalf("Reserve: ok=%v err=%v", ok, err) + } + if err := d.Fail(ctx, recDLQ.ID, lease.Token, now.Add(1*time.Second), "boom"); err != nil { + t.Fatalf("Fail: %v", err) + } + + jiSched, err := d.Inspect(ctx, recSched.ID, now) + if err != nil { + t.Fatalf("Inspect sched: %v", err) + } + if jiSched.State != driver.StateScheduled { + t.Fatalf("expected scheduled, got %s", jiSched.State) + } + + jiDLQ, err := d.Inspect(ctx, recDLQ.ID, now) + if err != nil { + t.Fatalf("Inspect dlq: %v", err) + } + if jiDLQ.State != driver.StateDLQ || jiDLQ.DLQ == nil { + t.Fatalf("expected dlq with info, got state=%s dlq=%v", jiDLQ.State, jiDLQ.DLQ) + } + + pageDLQ, err := d.List(ctx, driver.ListRequest{Queue: q, State: driver.StateDLQ, Now: now, Limit: 10}) + if err != nil { + t.Fatalf("List dlq: %v", err) + } + if len(pageDLQ.Jobs) != 1 || pageDLQ.Jobs[0].ID != recDLQ.ID { + t.Fatalf("expected dlq job %s, got %#v", recDLQ.ID, pageDLQ.Jobs) + } + + // Requeue DLQ -> ready + if err := d.RequeueDLQ(ctx, recDLQ.ID, now, driver.RequeueOptions{Queue: q, ResetAttempts: true}); err != nil { + t.Fatalf("RequeueDLQ: %v", err) + } + jiReady, err := d.Inspect(ctx, recDLQ.ID, now) + if err != nil { + t.Fatalf("Inspect after requeue: %v", err) + } + if jiReady.State != driver.StateReady { + t.Fatalf("expected ready, got %s", jiReady.State) + } + + // Reserve + Ack -> done + _, lease2, ok, err := d.Reserve(ctx, q, now.Add(2*time.Second), 5*time.Second) + if err != nil || !ok { + t.Fatalf("Reserve2: ok=%v err=%v", ok, err) + } + if err := d.Ack(ctx, recDLQ.ID, lease2.Token, now.Add(3*time.Second)); err != nil { + t.Fatalf("Ack: %v", err) + } + + jiDone, err := d.Inspect(ctx, recDLQ.ID, now.Add(3*time.Second)) + if err != nil { + t.Fatalf("Inspect done: %v", err) + } + if jiDone.State != driver.StateDone { + t.Fatalf("expected done, got %s", jiDone.State) + } + + pageDone, err := d.List(ctx, driver.ListRequest{Queue: q, State: driver.StateDone, Now: now, Limit: 10}) + if err != nil { + t.Fatalf("List done: %v", err) + } + if len(pageDone.Jobs) != 1 || pageDone.Jobs[0].ID != recDLQ.ID { + t.Fatalf("expected done job %s, got %#v", recDLQ.ID, pageDone.Jobs) + } +} + +func TestRedisAdmin_List_Pagination(t *testing.T) { + cwd, _ := os.Getwd() + _ = envutil.LoadRepoDotenv(cwd) + + addr := os.Getenv("REDIS_ADDR") + if addr == "" { + t.Skip("REDIS_ADDR not set") + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + d, err := New(ctx, addr) + if err != nil { + t.Fatalf("New: %v", err) + } + defer func() { _ = d.Close() }() + + d.opts.prefix = "taskharbor_admin_page_test:" + time.Now().UTC().Format("20060102150405.000000000") + + now := time.Now().UTC() + q := "q0" + + for i := 0; i < 3; i++ { + rec := driver.JobRecord{ + ID: "admin_page_" + string(rune('a'+i)), + Type: "t", + Queue: q, + Payload: []byte(`{}`), + CreatedAt: now.Add(time.Duration(i) * time.Second), + } + if _, _, err := d.Enqueue(ctx, rec); err != nil { + t.Fatalf("Enqueue %d: %v", i, err) + } + } + + p1, err := d.List(ctx, driver.ListRequest{Queue: q, State: driver.StateReady, Now: now, Limit: 2}) + if err != nil { + t.Fatalf("List p1: %v", err) + } + if len(p1.Jobs) != 2 || p1.NextCursor == "" { + t.Fatalf("expected 2 jobs + cursor, got %d cursor=%q", len(p1.Jobs), p1.NextCursor) + } + + p2, err := d.List(ctx, driver.ListRequest{Queue: q, State: driver.StateReady, Now: now, Limit: 2, Cursor: p1.NextCursor}) + if err != nil { + t.Fatalf("List p2: %v", err) + } + if len(p2.Jobs) != 1 { + t.Fatalf("expected 1 job on second page, got %d", len(p2.Jobs)) + } +} diff --git a/taskharbor/driver/redis/scripts.go b/taskharbor/driver/redis/scripts.go index 56ea7b0..0a3e2c7 100644 --- a/taskharbor/driver/redis/scripts.go +++ b/taskharbor/driver/redis/scripts.go @@ -386,6 +386,7 @@ if tonumber(db_exp) <= now then end local queue = redis.call('HGET', job_key, 'queue') redis.call('HSET', job_key, 'status', 'done', 'lease_token', '', 'lease_expires_at_nano', '0') +redis.call('RPUSH', prefix .. ":queue:" .. queue .. ":done", id) local inflight_key = prefix .. ":queue:" .. queue .. ":inflight" redis.call('ZREM', inflight_key, id) return 1 @@ -548,5 +549,110 @@ func (d *Driver) runFailScript(ctx context.Context, id, token string, nowNano in return n == 1, nil } +const scriptRequeueDLQ = ` +local prefix = KEYS[1] +local id = ARGV[1] +local now_nano = tonumber(ARGV[2]) + +local queue_guard = ARGV[3] -- '' means ignore +local run_at_nano = ARGV[4] -- string +local run_at_sec = tonumber(ARGV[5]) +local run_at_member = ARGV[6] +local reset = tonumber(ARGV[7]) -- 0/1 + +local job_key = prefix .. ":job:" .. id +if redis.call('EXISTS', job_key) == 0 then + return 0 +end + +local status = redis.call('HGET', job_key, 'status') +if status ~= 'dlq' then + return -1 +end + +local queue = redis.call('HGET', job_key, 'queue') +if queue_guard ~= nil and queue_guard ~= '' and queue_guard ~= queue then + return -2 +end + +local dlq_key = prefix .. ":queue:" .. queue .. ":dlq" +redis.call('LREM', dlq_key, 0, id) + +local new_status = 'ready' +if run_at_nano ~= nil and run_at_nano ~= '' and run_at_nano ~= '0' then + if tonumber(run_at_nano) > now_nano then + new_status = 'scheduled' + else + run_at_nano = '0' + end +else + run_at_nano = '0' +end + +if reset == 1 then + redis.call('HSET', job_key, + 'status', new_status, + 'run_at_nano', run_at_nano, + 'attempts', '0', + 'last_error', '', + 'failed_at_nano', '0', + 'dlq_reason', '', + 'dlq_failed_at_nano', '0', + 'lease_token', '', + 'lease_expires_at_nano', '0' + ) +else + redis.call('HSET', job_key, + 'status', new_status, + 'run_at_nano', run_at_nano, + 'dlq_reason', '', + 'dlq_failed_at_nano', '0', + 'lease_token', '', + 'lease_expires_at_nano', '0' + ) +end + +if new_status == 'ready' then + redis.call('RPUSH', prefix .. ":queue:" .. queue .. ":ready", id) +else + redis.call('ZADD', prefix .. ":queue:" .. queue .. ":scheduled", run_at_sec, run_at_member) +end + +return 1 +` + +func (d *Driver) runRequeueDLQScript( + ctx context.Context, + id string, + nowNano int64, + queueGuard string, + runAtNano int64, + runAtSec int64, + runAtMember string, + resetAttempts bool, +) (int64, error) { + keys := []string{d.opts.prefix} + reset := int64(0) + if resetAttempts { + reset = 1 + } + args := []any{ + id, + strconv.FormatInt(nowNano, 10), + queueGuard, + strconv.FormatInt(runAtNano, 10), + strconv.FormatInt(runAtSec, 10), + runAtMember, + strconv.FormatInt(reset, 10), + } + + v, err := d.client.Eval(ctx, scriptRequeueDLQ, keys, args...).Result() + if err != nil { + return 0, err + } + n, _ := toInt64(v) + return n, nil +} + // Silence unused import warnings if you temporarily comment-out scripts during refactors. var _ = errors.New From 0aaae3023f26ebeab1a9a30eb1f77a4d78b3915c Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Mon, 23 Feb 2026 22:30:56 -0500 Subject: [PATCH 13/19] Added the postgres admin page --- taskharbor/driver/postgres/admin.go | 468 ++++++++++++++++++ .../driver/postgres/postgres_admin_test.go | 220 ++++++++ 2 files changed, 688 insertions(+) create mode 100644 taskharbor/driver/postgres/admin.go create mode 100644 taskharbor/driver/postgres/postgres_admin_test.go diff --git a/taskharbor/driver/postgres/admin.go b/taskharbor/driver/postgres/admin.go new file mode 100644 index 0000000..c7a219e --- /dev/null +++ b/taskharbor/driver/postgres/admin.go @@ -0,0 +1,468 @@ +package postgres + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" + "github.com/jackc/pgx/v5" +) + +var _ driver.Admin = (*Driver)(nil) + +const qAdminInspect = ` +SELECT + id, + type, + queue, + payload, + run_at, + timeout_nanos, + idempotency_key, + created_at, + attempts, + max_attempts, + last_error, + failed_at, + status, + lease_token, + lease_expires_at, + dlq_reason, + dlq_failed_at +FROM th_jobs +WHERE id = $1 +` + +const qAdminGetStatus = ` +SELECT queue, status +FROM th_jobs +WHERE id = $1 +` + +const qAdminRequeueDLQ = ` +UPDATE th_jobs +SET + status = 'ready', + run_at = $2, + dlq_reason = NULL, + dlq_failed_at = NULL, + lease_token = NULL, + lease_expires_at = NULL, + attempts = CASE WHEN $3 THEN 0 ELSE attempts END, + last_error = CASE WHEN $3 THEN '' ELSE last_error END, + failed_at = CASE WHEN $3 THEN NULL ELSE failed_at END +WHERE id = $1 + AND status = 'dlq' + AND ($4::text IS NULL OR queue = $4) +RETURNING id +` + +func (d *Driver) Inspect(ctx context.Context, id string, now time.Time) (driver.JobInfo, error) { + if err := ctx.Err(); err != nil { + return driver.JobInfo{}, err + } + if err := d.ensureOpen(); err != nil { + return driver.JobInfo{}, err + } + + id = strings.TrimSpace(id) + if id == "" { + return driver.JobInfo{}, driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } else { + now = now.UTC() + } + + var ( + dbID string + typ string + queue string + payload []byte + runAtPtr *time.Time + timeoutNanos int64 + idemPtr *string + createdAt time.Time + attempts int + maxAttempts int + lastError string + failedAtPtr *time.Time + status string + leaseTokPtr *string + leaseExpPtr *time.Time + dlqReasonPtr *string + dlqFailedAtPtr *time.Time + ) + + err := d.pool.QueryRow(ctx, qAdminInspect, id).Scan( + &dbID, + &typ, + &queue, + &payload, + &runAtPtr, + &timeoutNanos, + &idemPtr, + &createdAt, + &attempts, + &maxAttempts, + &lastError, + &failedAtPtr, + &status, + &leaseTokPtr, + &leaseExpPtr, + &dlqReasonPtr, + &dlqFailedAtPtr, + ) + if err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return driver.JobInfo{}, driver.ErrJobNotFound + } + return driver.JobInfo{}, err + } + + rec := driver.JobRecord{ + ID: dbID, + Type: typ, + Queue: queue, + Payload: payload, + RunAt: time.Time{}, + Timeout: time.Duration(timeoutNanos), + IdempotencyKey: "", + CreatedAt: createdAt.UTC(), + Attempts: attempts, + MaxAttempts: maxAttempts, + LastError: lastError, + FailedAt: time.Time{}, + } + + if runAtPtr != nil { + rec.RunAt = runAtPtr.UTC() + } + if failedAtPtr != nil { + rec.FailedAt = failedAtPtr.UTC() + } + if idemPtr != nil { + rec.IdempotencyKey = *idemPtr + } + + st := stateFromDB(status, runAtPtr, now) + + var lease *driver.LeaseInfo + if status == "inflight" && leaseTokPtr != nil && leaseExpPtr != nil { + lease = &driver.LeaseInfo{ + Token: driver.LeaseToken(*leaseTokPtr), + ExpiresAt: leaseExpPtr.UTC(), + } + } + + var dlq *driver.DLQInfo + if status == "dlq" && dlqFailedAtPtr != nil { + reason := "" + if dlqReasonPtr != nil { + reason = *dlqReasonPtr + } + dlq = &driver.DLQInfo{ + Reason: reason, + FailedAt: dlqFailedAtPtr.UTC(), + } + } + + return driver.JobInfo{ + Record: rec, + State: st, + Lease: lease, + DLQ: dlq, + }, nil +} + +func (d *Driver) List(ctx context.Context, req driver.ListRequest) (driver.ListPage, error) { + if err := ctx.Err(); err != nil { + return driver.ListPage{}, err + } + if err := d.ensureOpen(); err != nil { + return driver.ListPage{}, err + } + + req.Queue = strings.TrimSpace(req.Queue) + if req.Queue == "" { + return driver.ListPage{}, fmt.Errorf("queue is required") + } + if req.Now.IsZero() { + req.Now = time.Now().UTC() + } else { + req.Now = req.Now.UTC() + } + if req.Limit <= 0 { + req.Limit = 50 + } + + var cur driver.Cursor + if strings.TrimSpace(req.Cursor) != "" { + c, err := driver.DecodeCursor(req.Cursor) + if err != nil { + return driver.ListPage{}, err + } + cur = c + } + + sql, args := buildAdminListQuery(req, cur) + + rows, err := d.pool.Query(ctx, sql, args...) + if err != nil { + return driver.ListPage{}, err + } + defer rows.Close() + + out := make([]driver.JobSummary, 0, req.Limit+1) + for rows.Next() { + var ( + id string + typ string + queue string + runAtPtr *time.Time + timeoutNanos int64 + createdAt time.Time + attempts int + maxAttempts int + lastError string + failedAtPtr *time.Time + status string + leaseExpPtr *time.Time + dlqReasonPtr *string + dlqFailedPtr *time.Time + ) + + if err := rows.Scan( + &id, + &typ, + &queue, + &runAtPtr, + &timeoutNanos, + &createdAt, + &attempts, + &maxAttempts, + &lastError, + &failedAtPtr, + &status, + &leaseExpPtr, + &dlqReasonPtr, + &dlqFailedPtr, + ); err != nil { + return driver.ListPage{}, err + } + + s := driver.JobSummary{ + ID: id, + Type: typ, + Queue: queue, + RunAt: time.Time{}, + CreatedAt: createdAt.UTC(), + Timeout: time.Duration(timeoutNanos), + Attempts: attempts, + MaxAttempts: maxAttempts, + LastError: lastError, + FailedAt: time.Time{}, + State: stateFromDB(status, runAtPtr, req.Now), + } + + if runAtPtr != nil { + s.RunAt = runAtPtr.UTC() + } + if failedAtPtr != nil { + s.FailedAt = failedAtPtr.UTC() + } + if leaseExpPtr != nil { + s.LeaseExpiresAt = leaseExpPtr.UTC() + } + if dlqReasonPtr != nil { + s.DLQReason = *dlqReasonPtr + } + if dlqFailedPtr != nil { + s.DLQFailedAt = dlqFailedPtr.UTC() + } + + out = append(out, s) + } + if err := rows.Err(); err != nil { + return driver.ListPage{}, err + } + + if len(out) == 0 { + return driver.ListPage{Jobs: nil, NextCursor: ""}, nil + } + + if len(out) <= req.Limit { + return driver.ListPage{Jobs: out, NextCursor: ""}, nil + } + + page := out[:req.Limit] + last := page[len(page)-1] + next := driver.EncodeCursor(driver.Cursor{ + State: req.State, + A: last.CreatedAt.UnixNano(), + ID: last.ID, + }) + + return driver.ListPage{Jobs: page, NextCursor: next}, nil +} + +func (d *Driver) RequeueDLQ(ctx context.Context, id string, now time.Time, opt driver.RequeueOptions) error { + if err := ctx.Err(); err != nil { + return err + } + if err := d.ensureOpen(); err != nil { + return err + } + + id = strings.TrimSpace(id) + if id == "" { + return driver.ErrJobNotFound + } + if now.IsZero() { + now = time.Now().UTC() + } else { + now = now.UTC() + } + + var runAt any + if opt.RunAt.IsZero() || !opt.RunAt.After(now) { + runAt = nil + } else { + runAt = opt.RunAt.UTC() + } + + var queueGuard any + if strings.TrimSpace(opt.Queue) == "" { + queueGuard = nil + } else { + queueGuard = opt.Queue + } + + var ignored string + err := d.pool.QueryRow(ctx, qAdminRequeueDLQ, id, runAt, opt.ResetAttempts, queueGuard).Scan(&ignored) + if err == nil { + return nil + } + if !errors.Is(err, pgx.ErrNoRows) { + return err + } + + // classify + var q string + var status string + err2 := d.pool.QueryRow(ctx, qAdminGetStatus, id).Scan(&q, &status) + if err2 != nil { + if errors.Is(err2, pgx.ErrNoRows) { + return driver.ErrJobNotFound + } + return err2 + } + + if opt.Queue != "" && q != opt.Queue { + return fmt.Errorf("queue mismatch: job is in %q, not %q", q, opt.Queue) + } + + if status != "dlq" { + return driver.ErrJobNotDLQ + } + + return driver.ErrJobNotDLQ +} + +func stateFromDB(status string, runAt *time.Time, now time.Time) driver.JobState { + switch status { + case "ready": + if runAt == nil || !runAt.UTC().After(now) { + return driver.StateReady + } + return driver.StateScheduled + case "inflight": + return driver.StateInflight + case "dlq": + return driver.StateDLQ + case "done": + return driver.StateDone + default: + // safest fallback + if runAt == nil || !runAt.UTC().After(now) { + return driver.StateReady + } + return driver.StateScheduled + } +} + +func buildAdminListQuery(req driver.ListRequest, cur driver.Cursor) (string, []any) { + // We use stable ordering: created_at ASC, id ASC + // Cursor is (created_at, id) > (cursorTime, cursorID) + args := make([]any, 0, 6) + + where := "WHERE queue = $1" + args = append(args, req.Queue) + p := 2 + + needNow := false + + switch req.State { + case "": + // no filter + case driver.StateReady: + where += fmt.Sprintf(" AND status = 'ready' AND (run_at IS NULL OR run_at <= $%d)", p) + args = append(args, req.Now) + p++ + needNow = true + case driver.StateScheduled: + where += fmt.Sprintf(" AND status = 'ready' AND run_at > $%d", p) + args = append(args, req.Now) + p++ + needNow = true + case driver.StateInflight: + where += " AND status = 'inflight'" + case driver.StateDLQ: + where += " AND status = 'dlq'" + case driver.StateDone: + where += " AND status = 'done'" + default: + // unknown state filter: return empty page rather than surprise + where += " AND 1=0" + } + + if strings.TrimSpace(req.Cursor) != "" { + cursorTime := time.Unix(0, cur.A).UTC() + where += fmt.Sprintf(" AND (created_at > $%d OR (created_at = $%d AND id > $%d))", p, p, p+1) + args = append(args, cursorTime, cur.ID) + p += 2 + } + + limit := req.Limit + 1 + args = append(args, limit) + + // select fields needed for JobSummary + sql := ` +SELECT + id, + type, + queue, + run_at, + timeout_nanos, + created_at, + attempts, + max_attempts, + last_error, + failed_at, + status, + lease_expires_at, + dlq_reason, + dlq_failed_at +FROM th_jobs +` + where + ` +ORDER BY created_at ASC, id ASC +LIMIT $` + fmt.Sprintf("%d", p) + + _ = needNow // just to make intent obvious while editing + + return sql, args +} diff --git a/taskharbor/driver/postgres/postgres_admin_test.go b/taskharbor/driver/postgres/postgres_admin_test.go new file mode 100644 index 0000000..10d0d4e --- /dev/null +++ b/taskharbor/driver/postgres/postgres_admin_test.go @@ -0,0 +1,220 @@ +package postgres + +import ( + "context" + "fmt" + "os" + "testing" + "time" + + "github.com/ARJ2211/taskharbor/taskharbor/driver" + "github.com/ARJ2211/taskharbor/taskharbor/internal/envutil" + "github.com/jackc/pgx/v5/pgxpool" +) + +func newAdminPoolAndDriver(t *testing.T) (context.Context, *pgxpool.Pool, *Driver) { + t.Helper() + + wd, _ := os.Getwd() + _ = envutil.LoadRepoDotenv(wd) + + dsn := os.Getenv("TASKHARBOR_TEST_DSN") + if dsn == "" { + t.Skip("TASKHARBOR_TEST_DSN not set") + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + pool, err := pgxpool.New(ctx, dsn) + if err != nil { + t.Fatalf("pgxpool.New: %v", err) + } + t.Cleanup(pool.Close) + + if err := ApplyMigrations(ctx, pool); err != nil { + t.Fatalf("ApplyMigrations: %v", err) + } + if _, err := pool.Exec(ctx, `DELETE FROM th_jobs`); err != nil { + t.Fatalf("cleanup: %v", err) + } + + d, err := NewWithPool(pool) + if err != nil { + t.Fatalf("NewWithPool: %v", err) + } + return ctx, pool, d +} + +func TestPostgresAdmin_Inspect_List_RequeueDLQ(t *testing.T) { + ctx, pool, d := newAdminPoolAndDriver(t) + _ = pool + + now := time.Date(2026, 2, 23, 12, 0, 0, 0, time.UTC) + + // scheduled job + recSched := driver.JobRecord{ + ID: "admin_sched_1", + Type: "t", + Queue: "default", + Payload: []byte(`{"x":1}`), + RunAt: now.Add(30 * time.Second), + CreatedAt: now.Add(-3 * time.Second), + MaxAttempts: 3, + } + if _, _, err := d.Enqueue(ctx, recSched); err != nil { + t.Fatalf("Enqueue sched: %v", err) + } + + // job we'll push to DLQ with attempts>0, then requeue with reset + recDLQ := driver.JobRecord{ + ID: "admin_dlq_1", + Type: "t", + Queue: "default", + Payload: []byte(`{}`), + CreatedAt: now.Add(-2 * time.Second), + MaxAttempts: 3, + } + if _, _, err := d.Enqueue(ctx, recDLQ); err != nil { + t.Fatalf("Enqueue dlq: %v", err) + } + + // reserve + retry to bump attempts + _, lease1, ok, err := d.Reserve(ctx, "default", now, 5*time.Second) + if err != nil || !ok { + t.Fatalf("Reserve#1: ok=%v err=%v", ok, err) + } + if err := d.Retry(ctx, recDLQ.ID, lease1.Token, now.Add(1*time.Second), driver.RetryUpdate{ + Attempts: 1, + LastError: "boom", + FailedAt: now.Add(1 * time.Second), + RunAt: time.Time{}, + }); err != nil { + t.Fatalf("Retry: %v", err) + } + + // reserve again + fail to DLQ + _, lease2, ok, err := d.Reserve(ctx, "default", now.Add(2*time.Second), 5*time.Second) + if err != nil || !ok { + t.Fatalf("Reserve#2: ok=%v err=%v", ok, err) + } + if err := d.Fail(ctx, recDLQ.ID, lease2.Token, now.Add(3*time.Second), "max attempts reached"); err != nil { + t.Fatalf("Fail: %v", err) + } + + // Inspect scheduled + ji, err := d.Inspect(ctx, recSched.ID, now) + if err != nil { + t.Fatalf("Inspect sched: %v", err) + } + if ji.State != driver.StateScheduled { + t.Fatalf("expected scheduled, got %s", ji.State) + } + + // Inspect dlq + ji2, err := d.Inspect(ctx, recDLQ.ID, now.Add(4*time.Second)) + if err != nil { + t.Fatalf("Inspect dlq: %v", err) + } + if ji2.State != driver.StateDLQ || ji2.DLQ == nil { + t.Fatalf("expected dlq with info, got state=%s dlq=%v", ji2.State, ji2.DLQ) + } + + // List scheduled only + pageSched, err := d.List(ctx, driver.ListRequest{ + Queue: "default", + State: driver.StateScheduled, + Now: now, + Limit: 10, + }) + if err != nil { + t.Fatalf("List scheduled: %v", err) + } + if len(pageSched.Jobs) != 1 || pageSched.Jobs[0].ID != recSched.ID { + t.Fatalf("expected 1 scheduled job %s, got %#v", recSched.ID, pageSched.Jobs) + } + + // List dlq only + pageDLQ, err := d.List(ctx, driver.ListRequest{ + Queue: "default", + State: driver.StateDLQ, + Now: now, + Limit: 10, + }) + if err != nil { + t.Fatalf("List dlq: %v", err) + } + if len(pageDLQ.Jobs) != 1 || pageDLQ.Jobs[0].ID != recDLQ.ID { + t.Fatalf("expected 1 dlq job %s, got %#v", recDLQ.ID, pageDLQ.Jobs) + } + + // Requeue from DLQ with reset attempts + if err := d.RequeueDLQ(ctx, recDLQ.ID, now.Add(5*time.Second), driver.RequeueOptions{ + Queue: "default", + ResetAttempts: true, + }); err != nil { + t.Fatalf("RequeueDLQ: %v", err) + } + + ji3, err := d.Inspect(ctx, recDLQ.ID, now.Add(5*time.Second)) + if err != nil { + t.Fatalf("Inspect after requeue: %v", err) + } + if ji3.State != driver.StateReady { + t.Fatalf("expected ready after requeue, got %s", ji3.State) + } + if ji3.Record.Attempts != 0 || ji3.Record.LastError != "" { + t.Fatalf("expected attempts reset, got attempts=%d last_error=%q", ji3.Record.Attempts, ji3.Record.LastError) + } + + // Reservable again + _, lease3, ok, err := d.Reserve(ctx, "default", now.Add(6*time.Second), 5*time.Second) + if err != nil || !ok { + t.Fatalf("Reserve after requeue: ok=%v err=%v", ok, err) + } + if err := d.Ack(ctx, recDLQ.ID, lease3.Token, now.Add(7*time.Second)); err != nil { + t.Fatalf("Ack: %v", err) + } + + ji4, err := d.Inspect(ctx, recDLQ.ID, now.Add(8*time.Second)) + if err != nil { + t.Fatalf("Inspect done: %v", err) + } + if ji4.State != driver.StateDone { + t.Fatalf("expected done, got %s", ji4.State) + } +} + +func TestPostgresAdmin_List_Pagination(t *testing.T) { + ctx, _, d := newAdminPoolAndDriver(t) + now := time.Date(2026, 2, 23, 12, 0, 0, 0, time.UTC) + + for i := 0; i < 3; i++ { + rec := driver.JobRecord{ + ID: fmt.Sprintf("admin_page_%d", i), + Type: "t", + Queue: "default", + Payload: []byte(`{}`), + CreatedAt: now.Add(time.Duration(i) * time.Second), + } + if _, _, err := d.Enqueue(ctx, rec); err != nil { + t.Fatalf("Enqueue %d: %v", i, err) + } + } + + p1, err := d.List(ctx, driver.ListRequest{Queue: "default", Now: now, Limit: 2}) + if err != nil { + t.Fatalf("List p1: %v", err) + } + if len(p1.Jobs) != 2 || p1.NextCursor == "" { + t.Fatalf("expected 2 jobs + cursor, got %d cursor=%q", len(p1.Jobs), p1.NextCursor) + } + + p2, err := d.List(ctx, driver.ListRequest{Queue: "default", Now: now, Limit: 2, Cursor: p1.NextCursor}) + if err != nil { + t.Fatalf("List p2: %v", err) + } + if len(p2.Jobs) != 1 { + t.Fatalf("expected 1 job on second page, got %d", len(p2.Jobs)) + } +} From df4b38d74dc3e621f0d04f35ddf53c3995ed14cd Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 16:28:24 -0500 Subject: [PATCH 14/19] Added list and dlq --- cmd/taskharbor/internal/app/admin_util.go | 36 +++++ cmd/taskharbor/internal/app/dlq.go | 177 ++++++++++++++++++++++ cmd/taskharbor/internal/app/list.go | 150 ++++++++++++++++++ 3 files changed, 363 insertions(+) create mode 100644 cmd/taskharbor/internal/app/admin_util.go create mode 100644 cmd/taskharbor/internal/app/dlq.go create mode 100644 cmd/taskharbor/internal/app/list.go diff --git a/cmd/taskharbor/internal/app/admin_util.go b/cmd/taskharbor/internal/app/admin_util.go new file mode 100644 index 0000000..8881c60 --- /dev/null +++ b/cmd/taskharbor/internal/app/admin_util.go @@ -0,0 +1,36 @@ +package app + +import ( + "context" + "fmt" + + "github.com/ARJ2211/taskharbor/cmd/taskharbor/internal/backend" + drv "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +type adminHandle struct { + Admin drv.Admin + Close func() error +} + +func openAdmin(ctx context.Context, g GlobalFlags) (*adminHandle, error) { + h, err := backend.Open(ctx, backend.Config{ + Driver: g.Driver, + PostgresDSN: g.PostgresDSN, + RedisAddr: g.RedisAddr, + }) + if err != nil { + return nil, err + } + + a, ok := h.Driver.(drv.Admin) + if !ok { + _ = h.Close() + return nil, fmt.Errorf("driver %q does not support admin operations", g.Driver) + } + + return &adminHandle{ + Admin: a, + Close: h.Close, + }, nil +} diff --git a/cmd/taskharbor/internal/app/dlq.go b/cmd/taskharbor/internal/app/dlq.go new file mode 100644 index 0000000..6170829 --- /dev/null +++ b/cmd/taskharbor/internal/app/dlq.go @@ -0,0 +1,177 @@ +package app + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "strings" + "time" + + drv "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +func runDLQList(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var ( + help bool + h bool + queue string + limit int + cursor string + ) + + fs := flag.NewFlagSet("taskharbor dlq list", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.StringVar(&queue, "queue", "", "queue name (defaults to global --queue)") + fs.IntVar(&limit, "limit", 20, "page size") + fs.StringVar(&cursor, "cursor", "", "pagination cursor") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printDLQListUsage(stderr) + return 2 + } + if help || h { + printDLQListUsage(stdout) + return 0 + } + if len(fs.Args()) != 0 { + fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) + printDLQListUsage(stderr) + return 2 + } + + if strings.TrimSpace(queue) == "" { + queue = g.Queue + } + queue = strings.TrimSpace(queue) + if queue == "" { + fmt.Fprintln(stderr, "error: queue is required (use --queue or global --queue)") + return 2 + } + + ctx := context.Background() + ah, err := openAdmin(ctx, g) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = ah.Close() }() + + page, err := ah.Admin.List(ctx, drv.ListRequest{ + Queue: queue, + State: drv.StateDLQ, + Now: time.Now().UTC(), + Limit: limit, + Cursor: cursor, + }) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + if g.JSON { + enc := json.NewEncoder(stdout) + enc.SetEscapeHTML(false) + _ = enc.Encode(map[string]any{ + "queue": queue, + "status": "dlq", + "jobs": page.Jobs, + "next_cursor": page.NextCursor, + }) + return 0 + } + + printJobSummaryTable(stdout, page.Jobs) + if page.NextCursor != "" { + fmt.Fprintln(stdout) + fmt.Fprintln(stdout, "next_cursor:", page.NextCursor) + } + return 0 +} + +func runDLQRequeue(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var ( + help bool + h bool + queue string + runAt string + reset bool + ) + + fs := flag.NewFlagSet("taskharbor dlq requeue", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.StringVar(&queue, "queue", "", "queue guard (defaults to global --queue)") + fs.StringVar(&runAt, "run-at", "", "schedule time (RFC3339/RFC3339Nano) or unix seconds (or unix ms)") + fs.BoolVar(&reset, "reset-attempts", false, "reset attempts/last_error/failed_at") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printDLQRequeueUsage(stderr) + return 2 + } + if help || h { + printDLQRequeueUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: dlq requeue requires exactly 1 arg: ") + printDLQRequeueUsage(stderr) + return 2 + } + id := strings.TrimSpace(args[0]) + if id == "" { + fmt.Fprintln(stderr, "error: job_id cannot be empty") + return 2 + } + + if strings.TrimSpace(queue) == "" { + queue = g.Queue + } + queue = strings.TrimSpace(queue) + + runAtTime, err := parseRunAt(runAt) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 2 + } + + ctx := context.Background() + ah, err := openAdmin(ctx, g) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = ah.Close() }() + + err = ah.Admin.RequeueDLQ(ctx, id, time.Now().UTC(), drv.RequeueOptions{ + Queue: queue, + RunAt: runAtTime, + ResetAttempts: reset, + }) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + if g.JSON { + enc := json.NewEncoder(stdout) + enc.SetEscapeHTML(false) + _ = enc.Encode(map[string]any{"ok": true, "id": id}) + return 0 + } + + fmt.Fprintln(stdout, "ok:", id) + return 0 +} diff --git a/cmd/taskharbor/internal/app/list.go b/cmd/taskharbor/internal/app/list.go new file mode 100644 index 0000000..7439509 --- /dev/null +++ b/cmd/taskharbor/internal/app/list.go @@ -0,0 +1,150 @@ +package app + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "strings" + "text/tabwriter" + "time" + + drv "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +func runList(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var ( + help bool + h bool + queue string + state string + limit int + cursor string + ) + + fs := flag.NewFlagSet("taskharbor list", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.StringVar(&queue, "queue", "", "queue name (defaults to global --queue)") + fs.StringVar(&state, "status", "", "ready|scheduled|inflight|dlq|done|all") + fs.IntVar(&limit, "limit", 20, "page size") + fs.StringVar(&cursor, "cursor", "", "pagination cursor") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printListUsage(stderr) + return 2 + } + if help || h { + printListUsage(stdout) + return 0 + } + if len(fs.Args()) != 0 { + fmt.Fprintln(stderr, "error: unexpected args:", strings.Join(fs.Args(), " ")) + printListUsage(stderr) + return 2 + } + + if strings.TrimSpace(queue) == "" { + queue = g.Queue + } + queue = strings.TrimSpace(queue) + if queue == "" { + fmt.Fprintln(stderr, "error: queue is required (use --queue or global --queue)") + return 2 + } + + st, err := parseState(state) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 2 + } + + ctx := context.Background() + ah, err := openAdmin(ctx, g) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = ah.Close() }() + + page, err := ah.Admin.List(ctx, drv.ListRequest{ + Queue: queue, + State: st, + Now: time.Now().UTC(), + Limit: limit, + Cursor: cursor, + }) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + if g.JSON { + enc := json.NewEncoder(stdout) + enc.SetEscapeHTML(false) + _ = enc.Encode(map[string]any{ + "queue": queue, + "status": string(st), + "jobs": page.Jobs, + "next_cursor": page.NextCursor, + }) + return 0 + } + + printJobSummaryTable(stdout, page.Jobs) + if page.NextCursor != "" { + fmt.Fprintln(stdout) + fmt.Fprintln(stdout, "next_cursor:", page.NextCursor) + } + return 0 +} + +func parseState(s string) (drv.JobState, error) { + s = strings.TrimSpace(strings.ToLower(s)) + if s == "" || s == "all" { + return "", nil + } + switch s { + case "ready": + return drv.StateReady, nil + case "scheduled": + return drv.StateScheduled, nil + case "inflight": + return drv.StateInflight, nil + case "dlq": + return drv.StateDLQ, nil + case "done": + return drv.StateDone, nil + default: + return "", fmt.Errorf("invalid --status %q (expected ready|scheduled|inflight|dlq|done|all)", s) + } +} + +func printJobSummaryTable(w io.Writer, jobs []drv.JobSummary) { + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "ID\tTYPE\tSTATE\tATTEMPTS\tRUN_AT\tLEASE_EXPIRES\tDLQ_REASON") + for _, j := range jobs { + runAt := "-" + if !j.RunAt.IsZero() { + runAt = j.RunAt.UTC().Format(time.RFC3339) + } + lease := "-" + if !j.LeaseExpiresAt.IsZero() { + lease = j.LeaseExpiresAt.UTC().Format(time.RFC3339) + } + at := fmt.Sprintf("%d/%d", j.Attempts, j.MaxAttempts) + reason := j.DLQReason + if reason == "" { + reason = "-" + } + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + j.ID, j.Type, j.State, at, runAt, lease, reason, + ) + } + _ = tw.Flush() +} From 367540df0d126149cf6098ab121e71e2ad2a3819 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 16:28:46 -0500 Subject: [PATCH 15/19] added inspect and job_retry cli --- cmd/taskharbor/internal/app/inspect.go | 118 +++++++++++++++++++++ cmd/taskharbor/internal/app/job_retry.go | 66 ++++++++++++ cmd/taskharbor/internal/backend/backend.go | 55 ++++++---- 3 files changed, 218 insertions(+), 21 deletions(-) create mode 100644 cmd/taskharbor/internal/app/inspect.go create mode 100644 cmd/taskharbor/internal/app/job_retry.go diff --git a/cmd/taskharbor/internal/app/inspect.go b/cmd/taskharbor/internal/app/inspect.go new file mode 100644 index 0000000..3a2496a --- /dev/null +++ b/cmd/taskharbor/internal/app/inspect.go @@ -0,0 +1,118 @@ +package app + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "strings" + "time" + + drv "github.com/ARJ2211/taskharbor/taskharbor/driver" +) + +func runInspect(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + var help bool + var h bool + + fs := flag.NewFlagSet("taskharbor inspect", flag.ContinueOnError) + fs.SetOutput(io.Discard) + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printInspectUsage(stderr) + return 2 + } + if help || h { + printInspectUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: inspect requires exactly 1 arg: ") + printInspectUsage(stderr) + return 2 + } + id := strings.TrimSpace(args[0]) + if id == "" { + fmt.Fprintln(stderr, "error: job_id cannot be empty") + return 2 + } + + ctx := context.Background() + ah, err := openAdmin(ctx, g) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + defer func() { _ = ah.Close() }() + + info, err := ah.Admin.Inspect(ctx, id, time.Now().UTC()) + if err != nil { + fmt.Fprintln(stderr, "error:", err) + return 1 + } + + if g.JSON { + enc := json.NewEncoder(stdout) + enc.SetEscapeHTML(false) + _ = enc.Encode(info) + return 0 + } + + printJobInfo(stdout, info) + return 0 +} + +func printJobInfo(w io.Writer, info drv.JobInfo) { + r := info.Record + + fmt.Fprintln(w, "id:", r.ID) + fmt.Fprintln(w, "type:", r.Type) + fmt.Fprintln(w, "queue:", r.Queue) + fmt.Fprintln(w, "state:", info.State) + + if !r.CreatedAt.IsZero() { + fmt.Fprintln(w, "created_at:", r.CreatedAt.UTC().Format(time.RFC3339Nano)) + } + if !r.RunAt.IsZero() { + fmt.Fprintln(w, "run_at:", r.RunAt.UTC().Format(time.RFC3339Nano)) + } + if r.Timeout > 0 { + fmt.Fprintln(w, "timeout:", r.Timeout.String()) + } + fmt.Fprintln(w, "attempts:", r.Attempts) + fmt.Fprintln(w, "max_attempts:", r.MaxAttempts) + + if r.LastError != "" { + fmt.Fprintln(w, "last_error:", r.LastError) + } + if !r.FailedAt.IsZero() { + fmt.Fprintln(w, "failed_at:", r.FailedAt.UTC().Format(time.RFC3339Nano)) + } + + if info.Lease != nil { + fmt.Fprintln(w, "lease_token:", string(info.Lease.Token)) + if !info.Lease.ExpiresAt.IsZero() { + fmt.Fprintln(w, "lease_expires_at:", info.Lease.ExpiresAt.UTC().Format(time.RFC3339Nano)) + } + } + + if info.DLQ != nil { + if info.DLQ.Reason != "" { + fmt.Fprintln(w, "dlq_reason:", info.DLQ.Reason) + } + if !info.DLQ.FailedAt.IsZero() { + fmt.Fprintln(w, "dlq_failed_at:", info.DLQ.FailedAt.UTC().Format(time.RFC3339Nano)) + } + } + + if len(r.Payload) > 0 { + fmt.Fprintln(w, "payload_bytes:", len(r.Payload)) + fmt.Fprintln(w, "payload_preview:", string(r.Payload)) + } +} diff --git a/cmd/taskharbor/internal/app/job_retry.go b/cmd/taskharbor/internal/app/job_retry.go new file mode 100644 index 0000000..27ea627 --- /dev/null +++ b/cmd/taskharbor/internal/app/job_retry.go @@ -0,0 +1,66 @@ +package app + +import ( + "flag" + "fmt" + "io" + "strings" +) + +func runJobRetry(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { + // Alias to: dlq requeue + // Supports: --queue, --run-at, --reset-attempts + var ( + help bool + h bool + queue string + runAt string + reset bool + ) + + fs := flag.NewFlagSet("taskharbor job retry", flag.ContinueOnError) + fs.SetOutput(io.Discard) + + fs.StringVar(&queue, "queue", "", "queue guard (defaults to global --queue)") + fs.StringVar(&runAt, "run-at", "", "schedule time (RFC3339/RFC3339Nano) or unix seconds (or unix ms)") + fs.BoolVar(&reset, "reset-attempts", false, "reset attempts/last_error/failed_at") + + fs.BoolVar(&help, "help", false, "show help") + fs.BoolVar(&h, "h", false, "show help") + + if err := fs.Parse(argv); err != nil { + fmt.Fprintln(stderr, "error:", err) + printJobRetryUsage(stderr) + return 2 + } + if help || h { + printJobRetryUsage(stdout) + return 0 + } + + args := fs.Args() + if len(args) != 1 { + fmt.Fprintln(stderr, "error: job retry requires exactly 1 arg: ") + printJobRetryUsage(stderr) + return 2 + } + id := strings.TrimSpace(args[0]) + if id == "" { + fmt.Fprintln(stderr, "error: job_id cannot be empty") + return 2 + } + + // Reuse dlq requeue implementation + requeueArgs := []string{id} + if queue != "" { + requeueArgs = append([]string{"--queue", queue}, requeueArgs...) + } + if runAt != "" { + requeueArgs = append([]string{"--run-at", runAt}, requeueArgs...) + } + if reset { + requeueArgs = append([]string{"--reset-attempts"}, requeueArgs...) + } + + return runDLQRequeue(g, requeueArgs, stdout, stderr) +} diff --git a/cmd/taskharbor/internal/backend/backend.go b/cmd/taskharbor/internal/backend/backend.go index bcbbd9b..c638006 100644 --- a/cmd/taskharbor/internal/backend/backend.go +++ b/cmd/taskharbor/internal/backend/backend.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "sync" "github.com/ARJ2211/taskharbor/taskharbor/driver" "github.com/ARJ2211/taskharbor/taskharbor/driver/memory" @@ -19,29 +20,44 @@ type Config struct { } type Handle struct { - Driver driver.Driver + Driver driver.Driver + closeFn func() error } func (h *Handle) Close() error { - if h == nil || h.Driver == nil { + if h == nil { return nil } - if err := h.Driver.Close(); err != nil { - return err + if h.closeFn != nil { + return h.closeFn() } - return nil + if h.Driver == nil { + return nil + } + return h.Driver.Close() } -/* -This function opens a new driver based on the configs and -arguments provided by the user. Default driver: memory. -*/ +var ( + memMu sync.Mutex + sharedMem *memory.Driver +) + func Open(ctx context.Context, cfg Config) (*Handle, error) { - ds := strings.ToLower(strings.TrimSpace(cfg.Driver)) - switch ds { + switch strings.ToLower(strings.TrimSpace(cfg.Driver)) { case "", "memory": - memDrvHnd := Handle{Driver: memory.New()} - return &memDrvHnd, nil + memMu.Lock() + if sharedMem == nil { + sharedMem = memory.New() + } + d := sharedMem + memMu.Unlock() + + // memory is in-process only; do not close it between commands + return &Handle{ + Driver: d, + closeFn: func() error { return nil }, + }, nil + case "postgres": if strings.TrimSpace(cfg.PostgresDSN) == "" { return nil, fmt.Errorf("postgres requires --dsn (or TH_PG_DSN)") @@ -50,8 +66,8 @@ func Open(ctx context.Context, cfg Config) (*Handle, error) { if err != nil { return nil, err } - psqlDrvHnd := Handle{Driver: d} - return &psqlDrvHnd, nil + return &Handle{Driver: d}, nil + case "redis": if strings.TrimSpace(cfg.RedisAddr) == "" { return nil, fmt.Errorf("redis requires --redis-addr (or TH_REDIS_ADDR)") @@ -60,12 +76,9 @@ func Open(ctx context.Context, cfg Config) (*Handle, error) { if err != nil { return nil, err } - redisDrvHnd := Handle{Driver: d} - return &redisDrvHnd, nil + return &Handle{Driver: d}, nil + default: - return nil, fmt.Errorf( - "unknown driver: %s (expected memory|postgres|redis)", - cfg.Driver, - ) + return nil, fmt.Errorf("unknown driver: %s (expected memory|postgres|redis)", cfg.Driver) } } From 7bfed1a463342e8cea4bd938cfa50685c9a0d95b Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 16:29:01 -0500 Subject: [PATCH 16/19] Updated the usage of CLI --- cmd/taskharbor/internal/app/app.go | 141 ------------------------ cmd/taskharbor/internal/app/app_test.go | 30 +++++ cmd/taskharbor/internal/app/usage.go | 47 ++++++-- 3 files changed, 68 insertions(+), 150 deletions(-) diff --git a/cmd/taskharbor/internal/app/app.go b/cmd/taskharbor/internal/app/app.go index c706a2f..8237a92 100644 --- a/cmd/taskharbor/internal/app/app.go +++ b/cmd/taskharbor/internal/app/app.go @@ -124,61 +124,6 @@ func runWorker(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { } } -func runList(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor list", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printListUsage(stderr) - return 2 - } - - if help || h { - printListUsage(stdout) - return 0 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #119)") - return 1 -} - -func runInspect(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor inspect", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printInspectUsage(stderr) - return 2 - } - - if help || h { - printInspectUsage(stdout) - return 0 - } - - args := fs.Args() - if len(args) != 1 { - fmt.Fprintln(stderr, "error: inspect requires exactly 1 arg: ") - printInspectUsage(stderr) - return 2 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #119)") - return 1 -} - func runDLQ(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { var help bool var h bool @@ -215,61 +160,6 @@ func runDLQ(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { } } -func runDLQList(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor dlq list", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printDLQListUsage(stderr) - return 2 - } - - if help || h { - printDLQListUsage(stdout) - return 0 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #119)") - return 1 -} - -func runDLQRequeue(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor dlq requeue", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printDLQRequeueUsage(stderr) - return 2 - } - - if help || h { - printDLQRequeueUsage(stdout) - return 0 - } - - args := fs.Args() - if len(args) != 1 { - fmt.Fprintln(stderr, "error: dlq requeue requires exactly 1 arg: ") - printDLQRequeueUsage(stderr) - return 2 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #119)") - return 1 -} - func runJob(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { var help bool var h bool @@ -303,34 +193,3 @@ func runJob(g GlobalFlags, argv []string, stdout, stderr io.Writer) int { return 2 } } - -func runJobRetry(_ GlobalFlags, argv []string, stdout, stderr io.Writer) int { - var help bool - var h bool - - fs := flag.NewFlagSet("taskharbor job retry", flag.ContinueOnError) - fs.SetOutput(io.Discard) - fs.BoolVar(&help, "help", false, "show help") - fs.BoolVar(&h, "h", false, "show help") - - if err := fs.Parse(argv); err != nil { - fmt.Fprintln(stderr, "error:", err) - printJobRetryUsage(stderr) - return 2 - } - - if help || h { - printJobRetryUsage(stdout) - return 0 - } - - args := fs.Args() - if len(args) != 1 { - fmt.Fprintln(stderr, "error: job retry requires exactly 1 arg: ") - printJobRetryUsage(stderr) - return 2 - } - - fmt.Fprintln(stderr, "not implemented yet (issue #119)") - return 1 -} diff --git a/cmd/taskharbor/internal/app/app_test.go b/cmd/taskharbor/internal/app/app_test.go index 2510d71..480637e 100644 --- a/cmd/taskharbor/internal/app/app_test.go +++ b/cmd/taskharbor/internal/app/app_test.go @@ -46,3 +46,33 @@ func TestEnqueueMinimal(t *testing.T) { t.Fatalf("expected a job id, got empty output") } } + +func TestListAndInspectAfterEnqueue(t *testing.T) { + var out1, err1 bytes.Buffer + code := Run([]string{"enqueue", "--type", "echo", "--payload", "hi"}, &out1, &err1) + if code != 0 { + t.Fatalf("enqueue expected 0, got %d (stderr=%q)", code, err1.String()) + } + id := strings.TrimSpace(out1.String()) + if id == "" { + t.Fatalf("expected job id") + } + + var out2, err2 bytes.Buffer + code = Run([]string{"list"}, &out2, &err2) + if code != 0 { + t.Fatalf("list expected 0, got %d (stderr=%q)", code, err2.String()) + } + if !strings.Contains(out2.String(), id) { + t.Fatalf("expected list to contain id %q, got: %q", id, out2.String()) + } + + var out3, err3 bytes.Buffer + code = Run([]string{"inspect", id}, &out3, &err3) + if code != 0 { + t.Fatalf("inspect expected 0, got %d (stderr=%q)", code, err3.String()) + } + if !strings.Contains(out3.String(), "id: "+id) { + t.Fatalf("expected inspect output to include id, got: %q", out3.String()) + } +} diff --git a/cmd/taskharbor/internal/app/usage.go b/cmd/taskharbor/internal/app/usage.go index c96df89..c9950c0 100644 --- a/cmd/taskharbor/internal/app/usage.go +++ b/cmd/taskharbor/internal/app/usage.go @@ -88,16 +88,24 @@ func printListUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] list [flags] -Notes: - This will be implemented in issue #119.`) +Flags: + --queue queue name (defaults to global --queue) + --status ready|scheduled|inflight|dlq|done|all + --limit page size (default: 20) + --cursor pagination cursor + +Examples: + taskharbor list + taskharbor list --status inflight + taskharbor list --status scheduled --limit 50`) } func printInspectUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] inspect [flags] -Notes: - This will be implemented in issue #119.`) +Examples: + taskharbor inspect 01J0ABCDEF1234567890`) } func printDLQUsage(w io.Writer) { @@ -117,16 +125,28 @@ func printDLQListUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] dlq list [flags] -Notes: - This will be implemented in issue #119.`) +Flags: + --queue queue name (defaults to global --queue) + --limit page size (default: 20) + --cursor pagination cursor + +Examples: + taskharbor dlq list + taskharbor dlq list --limit 50`) } func printDLQRequeueUsage(w io.Writer) { fmt.Fprintln(w, `Usage: taskharbor [global flags] dlq requeue [flags] -Notes: - This will be implemented in issue #119.`) +Flags: + --queue queue guard (defaults to global --queue) + --run-at RFC3339/RFC3339Nano or unix seconds (or unix ms) + --reset-attempts reset attempts/last_error/failed_at + +Examples: + taskharbor dlq requeue + taskharbor dlq requeue --reset-attempts`) } func printJobUsage(w io.Writer) { @@ -145,5 +165,14 @@ func printJobRetryUsage(w io.Writer) { taskharbor [global flags] job retry [flags] Notes: - This will be implemented in issue #119.`) + Alias of: taskharbor dlq requeue + +Flags: + --queue queue guard (defaults to global --queue) + --run-at RFC3339/RFC3339Nano or unix seconds (or unix ms) + --reset-attempts reset attempts/last_error/failed_at + +Examples: + taskharbor job retry + taskharbor job retry --reset-attempts`) } From a7422bf89ca28a8ce1e48c69c081cc654d08b0de Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 16:43:39 -0500 Subject: [PATCH 17/19] added CLI.md and small change to test context window --- docs/cli.md | 506 ++++++++++++++++++ .../driver/postgres/postgres_ack_test.go | 2 +- 2 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 docs/cli.md diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 0000000..8481413 --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,506 @@ +# TaskHarbor CLI (v0) + +This document explains the TaskHarbor CLI added in **Milestone 8**. It is meant for local development and quick sanity checks across drivers. The CLI is intentionally simple: + +- It uses the same core **Client/Worker APIs** your application would use. +- It connects to a driver backend (`memory`, `postgres`, `redis`). +- It relies on the optional `driver.Admin` interface for list/inspect/DLQ operations. + +--- + +## How to Run It + +From the repo root: + +**1. Run directly with `go run`:** + +```bash +go run ./cmd/taskharbor --help +``` + +**2. Or install the binary locally:** + +```bash +go install ./cmd/taskharbor +taskharbor --help +``` + +--- + +## Global Flags and Environment Variables + +Global flags apply **before** the command name. + +| Flag | Values / Example | Default | Description | +| -------------- | ----------------------------- | --------- | ----------------------------------------------- | +| `--driver` | `memory`, `postgres`, `redis` | `memory` | Backend driver to connect to | +| `--queue` | any string | `default` | Queue name | +| `--json` | — | — | Print JSON output (useful for scripting) | +| `--verbose` | — | — | Extra logs (currently minimal) | +| `--dsn` | `postgres://...` | — | Postgres DSN (required for `--driver postgres`) | +| `--redis-addr` | `host:port` | — | Redis address (required for `--driver redis`) | + +**Environment variables (used as defaults):** + +| Variable | Purpose | +| -------------------------------- | -------------- | +| `TH_DRIVER` | Default driver | +| `TH_PG_DSN` or `TH_POSTGRES_DSN` | Postgres DSN | +| `TH_REDIS_ADDR` | Redis address | + +**Examples:** + +```bash +TH_DRIVER=postgres TH_PG_DSN='postgres://...' go run ./cmd/taskharbor list +TH_DRIVER=redis TH_REDIS_ADDR='localhost:6379' go run ./cmd/taskharbor list +``` + +--- + +## Important Note About the Memory Driver + +The memory driver lives only inside a **single process**. That means: + +- If you run `worker run` in terminal 1 and `enqueue` in terminal 2 using `--driver memory`, they will **not** see each other. +- For multi-terminal testing, use **Postgres** or **Redis**. + +The memory driver is still useful for: + +- Unit tests +- Quick single-process experiments +- Debugging worker behavior without external services + +--- + +## Command Overview + +| Command | Description | +| ------------------ | ---------------------------------------------- | +| `worker run` | Start a long-running worker process | +| `enqueue` | Add a job to the queue | +| `list` | List jobs for a queue | +| `inspect ` | Show a single job record and its state | +| `dlq list` | List dead-letter queue jobs | +| `dlq requeue ` | Move a DLQ job back into the runnable pipeline | +| `job retry ` | Alias for `dlq requeue` | + +Most commands open a connection to the driver, do one thing, print output, and exit. `worker run` is the only long-running command. + +--- + +## `worker run` + +Starts a worker process and registers a small set of built-in handlers so you can test the pipeline without writing app code. + +**Built-in handlers:** + +| Handler | Behavior | +| ------- | -------------------------------------------------------------------------------- | +| `echo` | Prints basic job info + payload preview, returns success | +| `fail` | Always returns an error — useful to force retries and DLQ | +| `sleep` | Sleeps for a duration derived from the payload, returns success unless cancelled | + +**Basic usage:** + +```bash +taskharbor worker run +``` + +**Common flags:** + +| Flag | Example | Description | +| ---------------------- | ------- | -------------------------------------------- | +| `--concurrency` | `5` | Max concurrent jobs | +| `--poll-interval` | `200ms` | How often to poll when no jobs are available | +| `--lease-duration` | `30s` | Visibility timeout / lease duration | +| `--heartbeat-interval` | `10s` | How often to extend the lease | + +**Handler mapping:** + +`--register` is repeatable and maps `jobType=builtin`: + +```bash +taskharbor worker run --register email=echo --register slow=sleep +``` + +This lets you enqueue jobs with type `email` or `slow` and have them routed to the built-ins. + +**Sleep payload formats:** + +```bash +# Plain integer milliseconds +--payload 1500 + +# JSON number +--payload-json 1500 + +# JSON object with ms field +--payload-json '{"ms":1500}' + +# JSON object with duration string +--payload-json '{"duration":"1.5s"}' +``` + +> If no valid duration is found, `sleep` defaults to **250ms**. + +**Shutdown:** + +`Ctrl+C` triggers graceful shutdown by cancelling the worker context. + +--- + +## `enqueue` + +Adds a job to the chosen backend. + +**Usage:** + +```bash +taskharbor enqueue --type [flags] +``` + +**Required:** + +| Flag | Description | +| -------- | ------------------- | +| `--type` | Job type identifier | + +**Optional:** + +| Flag | Example | Description | +| ------------------- | ---------------------- | --------------------------------------------------------------- | +| `--queue` | `my-queue` | Overrides the global `--queue` | +| `--run-at` | `2025-01-01T00:00:00Z` | Schedule a job (RFC3339, RFC3339Nano, unix seconds, or unix ms) | +| `--timeout` | `30s` | Per-job timeout | +| `--max-attempts` | `3` | Attempts before DLQ (`0` = fail immediately on handler error) | +| `--idempotency-key` | `my-key` | Deduplication key (driver-enforced) | + +**Payload options:** + +| Flag | Description | +| ----------------------- | --------------------------------------------------- | +| `--payload ` | String payload — JSON-encoded internally | +| `--payload-json ` | Raw JSON payload — stored as-is, not double-encoded | + +> Use one or the other, not both. +> +> - `--payload "hello"` → stored as JSON string `"hello"` +> - `--payload-json '{"x":1}'` → stored as the raw JSON object bytes + +**Output:** + +- Default: prints the job `id` on stdout +- With `--json`: prints a JSON object including the `id` + +--- + +## `list` + +Lists jobs for a queue. Requires the `driver.Admin` interface, which `memory`, `postgres`, and `redis` all implement. + +**Usage:** + +```bash +taskharbor list [flags] +``` + +**Flags:** + +| Flag | Description | Default | +| ---------- | --------------------------------------------------------- | ---------------- | +| `--queue` | Queue name | global `--queue` | +| `--status` | `ready`, `scheduled`, `inflight`, `dlq`, `done`, or `all` | — | +| `--limit` | Number of jobs to print | `20` | +| `--cursor` | Pagination cursor from a previous call | — | + +**Output:** + +- Default: a small tabular view +- With `--json`: `jobs` array + `next_cursor` + +**Pagination:** + +If there are more results than `--limit`, `list` returns a `next_cursor`. Pass it back with `--cursor` to fetch the next page: + +```bash +taskharbor list --limit 5 +taskharbor list --limit 5 --cursor +``` + +--- + +## `inspect` + +Shows a single job record and its current state. + +**Usage:** + +```bash +taskharbor inspect +``` + +**Printed fields:** + +| Field | Notes | +| --------------------------------- | -------------------------------------------------- | +| `job id`, `type`, `queue` | Identity | +| `state` | `ready`, `scheduled`, `inflight`, `dlq`, or `done` | +| `created_at`, `run_at`, `timeout` | Timing | +| `attempts`, `max_attempts` | Retry tracking | +| `last_error`, `failed_at` | When applicable | +| `lease token` + `expiry` | Only when inflight | +| `dlq reason` + `dlq failed time` | Only when in DLQ | +| `payload preview` | Truncated payload | + +> **If you see `job not found`:** +> +> - Confirm you used a **job id** (long hex from `enqueue`), not a worker id (short, printed by `worker run`). +> - Confirm you are using the **same backend** (`postgres`/`redis`) as the process that enqueued the job. + +--- + +## `dlq list` + +Lists DLQ jobs for a queue. This is essentially `list` filtered to `--status dlq`. + +**Usage:** + +```bash +taskharbor dlq list [flags] +``` + +**Flags:** + +| Flag | Description | Default | +| ---------- | ----------------- | ---------------- | +| `--queue` | Queue name | global `--queue` | +| `--limit` | Page size | `20` | +| `--cursor` | Pagination cursor | — | + +--- + +## `dlq requeue` + +Moves a job from the DLQ back into the runnable pipeline. + +**Usage:** + +```bash +taskharbor dlq requeue [flags] +``` + +**Flags:** + +| Flag | Description | +| ------------------ | --------------------------------------------------------- | +| `--queue` | Queue guard — errors if the job is in a different queue | +| `--run-at` | Optionally reschedule the job (same formats as `enqueue`) | +| `--reset-attempts` | Resets `attempts`, `last_error`, and `failed_at` | + +**Typical usage:** + +```bash +taskharbor dlq requeue --reset-attempts +``` + +--- + +## `job retry` + +An alias for `dlq requeue`. It exists because users often think in terms of retrying a failed job rather than requeuing from DLQ. + +**Usage:** + +```bash +taskharbor job retry [same flags as dlq requeue] +``` + +--- + +## Practical Test Recipes + +These are copy-paste friendly flows you can run locally. Everything below assumes you are in the repo root. + +--- + +### Postgres: Quick Start + +**Terminal 1 — start the worker:** + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" worker run +``` + +**Terminal 2 — enqueue and inspect:** + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +JOB_ID=$(go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" enqueue --type echo --payload hello) +echo "job_id=$JOB_ID" + +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" inspect "$JOB_ID" +``` + +--- + +### Redis: Quick Start + +**Terminal 1 — start the worker:** + +```bash +export REDIS_ADDR='localhost:6379' +go run ./cmd/taskharbor --driver redis --redis-addr "$REDIS_ADDR" worker run +``` + +**Terminal 2 — enqueue and inspect:** + +```bash +export REDIS_ADDR='localhost:6379' + +JOB_ID=$(go run ./cmd/taskharbor --driver redis --redis-addr "$REDIS_ADDR" enqueue --type echo --payload hello) +echo "job_id=$JOB_ID" + +go run ./cmd/taskharbor --driver redis --redis-addr "$REDIS_ADDR" list +go run ./cmd/taskharbor --driver redis --redis-addr "$REDIS_ADDR" inspect "$JOB_ID" +``` + +--- + +### Scheduling Test (`--run-at`) + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +# Schedule a job 10 seconds in the future +RUN_AT=$(date -u -v+10S +%Y-%m-%dT%H:%M:%SZ) +JOB_ID=$(go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" \ + enqueue --type echo --run-at "$RUN_AT" --payload "scheduled hello") + +echo "scheduled job_id=$JOB_ID run_at=$RUN_AT" + +# Confirm it is scheduled +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list --status scheduled +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" inspect "$JOB_ID" +``` + +After 10+ seconds: + +```bash +# Confirm it moved to ready / done +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list --status ready +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list --status done +``` + +--- + +### Fail → DLQ → Requeue + +> Requires the `fail` handler, which is registered by `worker run` by default. + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +# Enqueue a job that will always fail, with max 1 attempt +JOB_ID=$(go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" \ + enqueue --type fail --max-attempts 1 --payload boom) +echo "dlq candidate job_id=$JOB_ID" + +# Inspect the DLQ +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" dlq list +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" inspect "$JOB_ID" +``` + +Requeue and reset attempts: + +```bash +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" dlq requeue "$JOB_ID" --reset-attempts +``` + +Confirm it is runnable again: + +```bash +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" inspect "$JOB_ID" +``` + +--- + +### Sleep Handler Test + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +JOB_ID=$(go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" \ + enqueue --type sleep --payload 1500) +echo "sleep job_id=$JOB_ID" + +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" inspect "$JOB_ID" +``` + +--- + +### JSON Output for Scripting + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" --json list +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" --json inspect +``` + +--- + +### Pagination Demo + +Enqueue 12 jobs: + +```bash +export TH_PG_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +for i in $(seq 1 12); do + go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" \ + enqueue --type echo --payload "msg $i" >/dev/null +done +``` + +Fetch the first page: + +```bash +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list --limit 5 +``` + +Copy the `next_cursor` from the output and fetch the next page: + +```bash +go run ./cmd/taskharbor --driver postgres --dsn "$TH_PG_DSN" list --limit 5 --cursor +``` + +--- + +## Common Mistakes and Troubleshooting + +**1. `inspect` says `job not found`** + +- You may have passed a **worker id**. `worker run` prints a short worker id; `enqueue` prints the long hex **job id**. +- You may be using the `memory` driver across different terminal processes. Switch to `postgres` or `redis`. + +**2. `list` is empty but `enqueue` returned an id** + +- With the `memory` driver, the id exists only inside that command's process. +- With `postgres`/`redis`, verify the DSN/address is **identical** across all commands. + +**3. `driver does not support admin operations`** + +- `list`, `inspect`, and `dlq` require the `driver.Admin` interface. +- `memory`, `postgres`, and `redis` all implement it. A future driver might not. + +**4. Postgres/Redis connection errors** + +- Confirm the service is running. +- Verify your DSN or `host:port`. +- Check firewall rules or Docker port mappings. + +--- diff --git a/taskharbor/driver/postgres/postgres_ack_test.go b/taskharbor/driver/postgres/postgres_ack_test.go index ed33f94..011a3c1 100644 --- a/taskharbor/driver/postgres/postgres_ack_test.go +++ b/taskharbor/driver/postgres/postgres_ack_test.go @@ -23,7 +23,7 @@ func newTestPool(t *testing.T) (*pgxpool.Pool, context.Context, context.CancelFu t.Skip("TASKHARBOR_TEST_DSN not set") } - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) pool, err := pgxpool.New(ctx, dsn) if err != nil { From cad89bbcd194d263f4bc9897d50b5f9f0b4b3a06 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 16:46:42 -0500 Subject: [PATCH 18/19] Updated context timeout window --- taskharbor/driver/postgres/postgres_ack_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskharbor/driver/postgres/postgres_ack_test.go b/taskharbor/driver/postgres/postgres_ack_test.go index 011a3c1..ce14042 100644 --- a/taskharbor/driver/postgres/postgres_ack_test.go +++ b/taskharbor/driver/postgres/postgres_ack_test.go @@ -23,7 +23,7 @@ func newTestPool(t *testing.T) (*pgxpool.Pool, context.Context, context.CancelFu t.Skip("TASKHARBOR_TEST_DSN not set") } - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) pool, err := pgxpool.New(ctx, dsn) if err != nil { From 988030c06bf150aa88a944a321d8b0f5e7f0b8c1 Mon Sep 17 00:00:00 2001 From: ARJ2211 Date: Tue, 24 Feb 2026 17:10:11 -0500 Subject: [PATCH 19/19] Added driver-creation.md --- docs/driver-creation.md | 631 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 631 insertions(+) create mode 100644 docs/driver-creation.md diff --git a/docs/driver-creation.md b/docs/driver-creation.md new file mode 100644 index 0000000..a0b9d9f --- /dev/null +++ b/docs/driver-creation.md @@ -0,0 +1,631 @@ +# Driver Creation Guide (Deep Walkthrough) + +This guide is a deep, practical walkthrough for adding a new TaskHarbor driver so it: + +- Matches the runtime contract and semantics +- Passes the conformance suite +- Plugs into the CLI (list/inspect/DLQ) +- Can be benchmarked using the stress runner + +**Repo structure assumptions** (matches the current TaskHarbor layout): + +``` +taskharbor/driver/* # drivers +cmd/taskharbor/* # CLI +conformance/* # conformance suite +stress/* # stress runner +``` + +_Last updated: 2026-02-24 UTC_ + +--- + +## Quick Mental Model + +There are three layers you'll touch: + +**1. Runtime contract (required)** + +`Client` and `Worker` talk only to `taskharbor/driver.Driver`. If your driver implements `Driver` correctly, TaskHarbor can enqueue and process jobs on it. + +**2. Tooling contract (optional but expected)** + +CLI `list`/`inspect`/`dlq` commands talk to `taskharbor/driver.Admin`. If you don't implement `Admin`, enqueue/worker run can still work, but CLI admin commands will fail with a clear message. + +**3. Benchmark harness (optional but recommended)** + +The stress runner uses `driver.Driver` for actual work, plus a small wrapper that can `Reset` the backend and report `Progress` counts. + +--- + +## The Two Contracts You Must Implement + +### `Driver` (required) — `taskharbor/driver/driver.go` + +| Method | Signature | +| ------------- | -------------------------------------------------------------------- | +| `Enqueue` | `(ctx, rec) (storedID string, existed bool, err error)` | +| `Reserve` | `(ctx, queue, now, leaseFor) (JobRecord, Lease, ok bool, err error)` | +| `ExtendLease` | `(ctx, id, token, now, leaseFor) (Lease, error)` | +| `Ack` | `(ctx, id, token, now) error` | +| `Retry` | `(ctx, id, token, now, upd RetryUpdate) error` | +| `Fail` | `(ctx, id, token, now, reason string) error` | +| `Close` | `() error` | + +### `Admin` (optional) — `taskharbor/driver/admin.go` + +| Method | Signature | +| ------------ | -------------------------------------- | +| `Inspect` | `(ctx, id, now) (JobInfo, error)` | +| `List` | `(ctx, ListRequest) (ListPage, error)` | +| `RequeueDLQ` | `(ctx, id, now, options) error` | + +> If you want your driver to feel first-class, implement both. + +--- + +## File Map + +**Core contract and semantics:** + +``` +docs/driver-contract.md +docs/semantics.md +taskharbor/driver/driver.go +taskharbor/driver/admin.go +conformance/* +``` + +**Drivers (copy patterns from these):** + +``` +taskharbor/driver/memory/* +taskharbor/driver/postgres/* +taskharbor/driver/redis/* +``` + +**CLI wiring:** + +``` +cmd/taskharbor/internal/backend/backend.go +cmd/taskharbor/internal/app/* +``` + +**Stress runner wiring:** + +``` +stress/backend-factory.go +stress/backend-.go +stress/config.go +stress/main.go +stress/handler.go +stress/progress.go +stress/dashboard.go +``` + +--- + +## Part 1: Implementing a New Driver (Runtime) + +### Step 1: Create the Driver Package + +``` +taskharbor/driver// +├── .go # driver implementation +├── options.go # connection / options / config +├── admin.go # Admin interface implementation +├── conformance_test.go # runs the shared conformance suite +└── _test.go # unit / integration tests +``` + +- The package name should match the folder name. +- Exported types generally follow `Driver`, `Options`, etc. + +--- + +### Step 2: Understand What a `JobRecord` Represents + +`JobRecord` is the driver-level representation of a job. Key fields: + +| Field | Type | Notes | +| ---------------- | --------------- | ------------------------------------------------------------------------ | +| `ID` | `string` | Unique job ID, generated by `Client` | +| `Type` | `string` | Routes to handler name | +| `Payload` | `[]byte` | Already encoded by codec in `Client` — drivers store bytes, never decode | +| `Queue` | `string` | Queue name | +| `RunAt` | `time.Time` | Scheduled time; zero means runnable now | +| `Timeout` | `time.Duration` | Per-job timeout; zero means no timeout | +| `IdempotencyKey` | `string` | For enqueue deduplication (optional) | +| `CreatedAt` | `time.Time` | — | +| `Attempts` | `int` | — | +| `MaxAttempts` | `int` | — | +| `LastError` | `string` | — | +| `FailedAt` | `time.Time` | — | + +> Drivers do **not** decode `Payload` and do **not** decide retry delays or policies — `Worker` decides and passes `RetryUpdate.RunAt`. + +--- + +### Step 3: Implement `Enqueue` + +``` +Enqueue(ctx, rec) (storedID string, existed bool, err error) +``` + +**Behavior requirements:** + +- Validate `rec` (call `rec.Validate()` or apply the same constraints) +- Store the record including payload bytes and all metadata +- Treat `RunAt=zero` as runnable now — `Reserve` handles the due/future distinction +- Implement idempotency when `IdempotencyKey` is non-empty + +**Idempotency expectations:** + +| Scenario | Return | +| ------------------ | ---------------------------- | +| Key already exists | `(existingJobID, true, nil)` | +| Key does not exist | `(newJobID, false, nil)` | + +**Practical implementations:** + +- **SQL:** unique constraint on `(queue, idempotency_key)` with `ON CONFLICT` returning existing ID +- **Redis:** `SETNX` / `HSETNX` on an idempotency index key, then reference job ID + +**Common pitfalls:** + +- Returning `existed=true` but a new ID (breaks deduplication) +- Deduping across all queues when the contract is queue-scoped (decide and document your choice) + +--- + +### Step 4: Implement `Reserve` _(most important method)_ + +``` +Reserve(ctx, queue, now, leaseFor) (JobRecord, Lease, ok bool, err error) +``` + +`Reserve` is the heart of correctness. The goal: return exactly one eligible job, atomically move it to inflight, assign a lease, and never double-lease. + +**Eligibility rules:** + +- `queue` matches +- Status is `ready`, or `scheduled` but due (`run_at <= now`) +- Not inflight, OR inflight but `lease_expires_at <= now` (expired lease can be reclaimed) + +**Lease requirements:** + +- Generate a random lease token +- Store it with the job +- Set `lease_expires_at = now + leaseFor` +- Return `Lease{Token, ExpiresAt}` + +**Input validation:** + +- `leaseFor` must be `> 0` → `ErrInvalidLeaseDuration` +- `queue` must not be empty → `ErrQueueRequired` + +**Atomicity:** + +| Backend | Approach | +| ------- | ------------------------------------------------------------------ | +| SQL | Single transaction: `SELECT ... FOR UPDATE SKIP LOCKED` + `UPDATE` | +| Redis | Lua script: atomically pop from ready and write lease fields | + +**Return values:** + +- `ok=false` — no eligible job right now +- `ok=true` — job was reserved +- `err != nil` — actual failure (DB connectivity, script failure, etc.) + +**Common pitfalls:** + +- Selecting a job then updating it without locking → race condition, double lease +- Not reclaiming expired inflight jobs → stuck jobs +- Reclaiming too aggressively → releasing non-expired inflight jobs + +--- + +### Step 5: Implement `ExtendLease` + +``` +ExtendLease(ctx, id, token, now, leaseFor) (Lease, error) +``` + +**Behavior:** Validate the job is inflight and the token matches, then push `lease_expires_at = now + leaseFor`. + +**Errors:** + +| Condition | Error | +| ------------------ | ------------------------------- | +| Job not found | `ErrJobNotFound` | +| Not inflight | `ErrJobNotInflight` | +| Token mismatch | `ErrLeaseMismatch` | +| Lease expired | `ErrLeaseExpired` (if enforced) | +| Invalid `leaseFor` | `ErrInvalidLeaseDuration` | + +--- + +### Step 6: Implement `Ack` + +``` +Ack(ctx, id, token, now) error +``` + +**Behavior:** + +- Validate job exists, is inflight, and token matches +- Mark terminal success (`status = done`) +- Clear lease fields (token, expiry) +- Job must **never** become reservable again + +**Idempotency:** If the job is already `done`, return `nil` — do not re-run side effects. + +--- + +### Step 7: Implement `Retry` + +``` +Retry(ctx, id, token, now, upd RetryUpdate) error +``` + +**`RetryUpdate` fields:** `RunAt`, `Attempts`, `LastError`, `FailedAt` + +**Behavior:** + +- Validate inflight + token match +- Update `attempts`, `last_error`, `failed_at` +- Set `run_at = upd.RunAt` (zero means runnable now) +- Clear inflight lease fields +- Move job back to `scheduled` if `run_at` is future, else `ready` + +> **Separation of responsibilities:** `Worker` computes backoff policy and sets `upd.RunAt`. The driver must not invent backoff or jitter. + +--- + +### Step 8: Implement `Fail` _(DLQ path)_ + +``` +Fail(ctx, id, token, now, reason string) error +``` + +**Behavior:** + +- Validate inflight + token match +- Mark terminal failure (`status = dlq`) +- Record DLQ reason and failed timestamp +- Clear lease fields +- Job must **never** become reservable again unless explicitly requeued + +**Idempotency:** `Fail` called twice must not duplicate DLQ entries or mutate counters incorrectly. + +--- + +### Step 9: Implement `Close` + +- Release connections +- Make further operations return a clear "driver closed" error +- Must be safe to call multiple times + +--- + +## Part 2: Implementing `driver.Admin` (CLI Support) + +If you skip `Admin`, `enqueue` and `worker run` still work — but `list`/`inspect`/`dlq` commands will error for your driver. + +### What the CLI Expects + +| CLI command | Uses | +| ------------------ | ------------------------ | +| `list` | `List` filtered by state | +| `inspect ` | `Inspect` | +| `dlq list` | `List` filtered to `dlq` | +| `dlq requeue ` | `RequeueDLQ` | +| `job retry ` | `RequeueDLQ` (alias) | + +> **Admin must not lie:** if `List` says `inflight`, `Reserve` must not lease it again (unless expired). If `List` says `ready`, `Reserve` must be able to reserve it. + +--- + +### Implement `Inspect` + +``` +Inspect(ctx, id, now) -> JobInfo +``` + +`JobInfo` must include: + +- `Record`: the full `JobRecord` +- `State`: `ready` / `scheduled` / `inflight` / `dlq` / `done` +- Lease info when `inflight` (token, expiry) +- DLQ info when `dlq` (reason, failed time) + +--- + +### Implement `List` (Stable Order + Cursor Pagination) + +``` +List(ctx, req) -> ListPage +``` + +**Requirements:** + +- Stable ordering across calls — recommended: `created_at asc`, `id asc` +- Cursor token represents the last returned row for consistent paging +- Use `EncodeCursor`/`DecodeCursor` helpers from `taskharbor/driver/admin.go` + +--- + +### Implement `RequeueDLQ` + +``` +RequeueDLQ(ctx, id, now, opt) error +``` + +**Behavior:** + +- Only valid for `dlq` jobs +- Clear DLQ markers and lease fields +- Make job `ready` or `scheduled` based on `opt.RunAt` +- If `opt.ResetAttempts` is `true`, reset `attempts` and clear `last_error`/`failed_at` + +--- + +## Part 3: Wiring the Driver into the CLI + +Backend selection lives in `cmd/taskharbor/internal/backend/backend.go`. + +**To add your driver:** + +1. Add a `case` in `Open()` for your driver name +2. Parse required config (DSN, URL, credentials) +3. Construct the driver instance and return it + +If you need new global flags, update: + +``` +cmd/taskharbor/internal/app/app.go # declare flags +cmd/taskharbor/internal/app/usage.go # document flags +``` + +**Example switch case:** + +```go +switch cfg.DriverType { +case "postgres": + // ... +case "redis": + // ... +case "memory": + // ... +case "": + d, err := yourdriver.New(ctx, ...) + return backend{Driver: d}, err +default: + return error +} +``` + +**CLI support matrix:** + +| Implements | Supported commands | +| ------------------ | --------------------------------------------------------- | +| `Driver` only | `worker run`, `enqueue` | +| `Driver` + `Admin` | Everything — `list`, `inspect`, `dlq`, `requeue`, `retry` | + +--- + +## Part 4: Adding Stress Runner Support + +The stress runner compares drivers under load using the same workload. It expects a `driver.Driver` implementation plus a backend wrapper that can `Reset` state and report `Progress` counts. + +### Step 1: Update `stress/config.go` + +Add fields to `Config` and parse them from flags/env. Follow existing patterns: + +| Driver | Env var | Flags | +| -------- | --------------------- | --------------------------------------------------------------------- | +| Postgres | `TASKHARBOR_TEST_DSN` | `-pg-max-conns` | +| Redis | `REDIS_ADDR` | `-redis-db`, `-redis-prefix`, `-redis-pool-size`, `-redis-reset-mode` | + +Define env vars, add flags, and validate config under the `cfg.DriverType` switch. + +--- + +### Step 2: Add `stress/backend-.go` + +Return the backend wrapper used by `stress/main.go`: + +```go +type backend struct { + Driver driver.Driver + CloseFn func() error + ResetFn func(context.Context) error + ProgFn func(context.Context, []string, time.Time) (Progress, error) + DriverID string +} +``` + +**`ResetFn` responsibilities:** + +- Clear all jobs and indexes for a clean run +- Use namespace/table/prefix isolation — don't wipe unrelated data + +**`ProgFn` responsibilities — return counts for:** + +`Done` · `DLQ` · `Ready` · `Scheduled` · `Inflight` + +> If your backend can't query `Done` cheaply, set `DoneApprox=true` and use `okCount` from `stress/handler.go` as an approximation (same approach as memory/redis). + +--- + +### Step 3: Register in `stress/backend-factory.go` + +```go +switch cfg.DriverType { +case "postgres": + return newPostgresBackend(ctx, cfg) +case "redis": + return newRedisBackend(ctx, cfg) +case "memory": + return newMemoryBackend(ctx, cfg) +case "": + return newYourDriverBackend(ctx, cfg) +default: + return backend{}, ErrInvalidDriver +} +``` + +--- + +### Step 4: Run the Stress Runner + +**Postgres example:** + +```bash +export TASKHARBOR_TEST_DSN='postgres://taskharbor:taskharbor@localhost:5432/taskharbor_test?sslmode=disable' + +go run ./stress \ + -driver postgres \ + -jobs 50000 \ + -queues 10 \ + -workers-per-queue 2 \ + -concurrency 5 \ + -poll-ms 10 \ + -heartbeat-ms 50 \ + -max-attempts 1 \ + -retry-pol=false \ + -flaky-pct 0 \ + -fail-pct 10 \ + -spawn-every 0 \ + -schedule-every 0 \ + -work-min-ms 1 \ + -work-max-ms 3 \ + -body-bytes 256 \ + -reset=true \ + -print-ms 100 \ + -timeout-secs 600 +``` + +**Redis example:** + +```bash +export REDIS_ADDR='localhost:6379' + +go run ./stress \ + -driver redis \ + -jobs 50000 \ + -queues 10 \ + -workers-per-queue 2 \ + -concurrency 5 \ + -poll-ms 10 \ + -heartbeat-ms 50 \ + -max-attempts 1 \ + -retry-pol=false \ + -flaky-pct 0 \ + -fail-pct 10 \ + -spawn-every 0 \ + -schedule-every 0 \ + -work-min-ms 1 \ + -work-max-ms 3 \ + -body-bytes 256 \ + -reset=true \ + -redis-db 0 \ + -redis-prefix taskharbor_stress \ + -redis-pool-size 128 \ + -redis-reset-mode scan \ + -print-ms 100 \ + -timeout-secs 600 +``` + +--- + +## Part 5: Conformance + +Conformance keeps semantics consistent across all drivers. + +**What to do:** + +1. Copy an existing `conformance_test.go` from `memory`, `postgres`, or `redis` +2. Change the driver factory to build your driver +3. Ensure `go test ./...` picks it up + +**If your driver needs external infrastructure:** + +- Add a `docker-compose` service +- Set env vars in CI (same pattern as Postgres) +- Skip tests locally when the env isn't set + +--- + +## PR Checklist + +Copy this into your pull request description: + +``` +Driver implementation +- [ ] Enqueue: supports idempotency (storedID, existed) +- [ ] Reserve: atomic claim + lease token/expiry + reclaim expired inflight +- [ ] ExtendLease: strict token match +- [ ] Ack: terminal done, idempotent +- [ ] Retry: updates attempts/last_error/failed_at/run_at, makes runnable/scheduled +- [ ] Fail: terminal dlq, records dlq reason/time, idempotent +- [ ] Close: safe cleanup + +Admin implementation +- [ ] Inspect: returns JobInfo with lease/dlq metadata +- [ ] List: stable ordering + cursor pagination +- [ ] RequeueDLQ: dlq -> ready/scheduled with optional reset + +Everything else +- [ ] Add conformance test for +- [ ] Wire CLI backend selection for --driver +- [ ] (Optional) Add stress backend wrapper + config +- [ ] Add docs/.md and update README.md + +Tests +- [ ] go test ./... +- [ ] go test -race ./... (if concurrency was touched) +- [ ] (Optional) go run ./stress -driver ... +``` + +--- + +## Common Pitfalls + +**1. Memory vs persistent backends** + +The memory driver is per-process. Never use it to validate multi-terminal CLI flows. + +**2. `Reserve` atomicity** + +If `Reserve` is not atomic, you will see double leases under concurrency. + +**3. Lease reclaim corner cases** + +Inflight jobs whose lease has expired must become reservable again — but only when `lease_expires_at <= now`. + +**4. DLQ duplication** + +`Fail` called twice must not create two DLQ entries or corrupt counters. + +**5. `Admin` vs runtime mismatch** + +- If `Admin List` reports `ready`, `Reserve` must be able to reserve it. +- If `Admin List` reports `inflight`, `Reserve` must not return it unless the lease is expired. + +**6. Cursor pagination stability** + +If ordering changes between pages, results will be skipped or duplicated. Pick a stable ordering and never deviate from it. + +--- + +## Where to Look When Something Fails + +| Symptom | Where to look | +| -------------------- | ------------------------------------------------------------------------------ | +| Conformance failures | `conformance/cases_*.go` — exact expected behavior per operation | +| Reserve / lease bugs | Your `Reserve`/`Ack`/`ExtendLease` + any atomic backend queries or Lua scripts | +| CLI failures | `cmd/taskharbor/internal/backend/backend.go`, `cmd/taskharbor/internal/app/*` | +| State bugs | Compare your transitions against `docs/semantics.md` | + +---