Skip to content

Commit a3b7a9c

Browse files
committed
feat: cache downloaded packages
1 parent fd8e8b3 commit a3b7a9c

4 files changed

Lines changed: 115 additions & 68 deletions

File tree

download/download.go

Lines changed: 69 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ package download
33

44
import (
55
"archive/zip"
6-
"errors"
76
"fmt"
87
"io"
98
"net/http"
109
"os"
10+
"path/filepath"
1111
"runtime"
1212
"strings"
1313

@@ -18,8 +18,8 @@ import (
1818
)
1919

2020
const (
21-
LatestVersion = "latest"
22-
duckDbReleasesRoot = "https://github.com/duckdb/duckdb/releases"
21+
LatestVersion = "latest"
22+
PreviewVersion = "preview"
2323
)
2424

2525
type BinType int
@@ -29,38 +29,67 @@ const (
2929
BinTypeCli
3030
)
3131

32+
// Prefix is found in the beginning of some archive and file names in DuckDB packages
33+
func (typ BinType) Prefix() string {
34+
var prefix string
35+
switch typ {
36+
case BinTypeCli:
37+
prefix = "duckdb_cli"
38+
case BinTypeDynLib:
39+
prefix = "libduckdb"
40+
default:
41+
panic("unhandled spec type")
42+
}
43+
return prefix
44+
}
45+
46+
// Spec defines the desired DuckDB binary and download options
47+
// Use DefaultSpec() to get a recommended configuration. The zero value is also valid.
3248
type Spec struct {
3349
// Type of binary to download (enum)
3450
Type BinType
3551

3652
// DuckDB version, defaults to latest
53+
// Supported values are either plain semantic version with optional 'v' prefix - e.g. 1.2.2, v1.3.2,
54+
// or "latest" - latest release version
55+
// or "preview" - latest preview version from https://duckdb.org/docs/installation/?version=main
3756
Version string
3857

39-
// Target OS defaults to runtime.GOOS
58+
// Target OS, defaults to runtime.GOOS
4059
OS string
4160

42-
// Target arch defaults to runtime.GOARCH
61+
// Target arch defaults, to runtime.GOARCH
4362
Arch string
4463

45-
// Overwrite forces downloading a file even if there is an existing appropriate in the working directory
46-
// The definition of "appropriate" will evolve over time - for now, all existing files are accepted
64+
// CacheDownload enables caching the bundle downloaded from the Internet in the temp directory,
65+
// if the server supports it by exposing Etag and Content-Length headers.
66+
// CacheDownload is independent of the Overwrite setting.
67+
CacheDownload bool
68+
69+
// Overwrite forces overwriting the final file even if there is an existing appropriate in the working directory
70+
// The definition of "appropriate" will evolve over time - for now, all existing files are accepted.
4771
Overwrite bool
4872
}
4973

74+
// DefaultSpec creates a recommended spec for downloading releases
75+
// The zero-value of Spec is also a valid configuration.
76+
// NB: Changes to the default spec are not considered breaking changes and may happen in a
77+
// minor release. They won't happen in patch releases.
5078
func DefaultSpec() Spec {
5179
return Spec{
52-
Type: BinTypeDynLib,
53-
Version: LatestVersion,
54-
OS: runtime.GOOS,
55-
Arch: runtime.GOARCH,
80+
Type: BinTypeDynLib,
81+
Version: LatestVersion,
82+
CacheDownload: true,
83+
OS: runtime.GOOS,
84+
Arch: runtime.GOARCH,
5685
}
5786
}
5887

5988
type Result struct {
6089
OutputFile string
61-
// Download may be false if there was an existing appropriate file and Spec.Overwrite was false
90+
// OutputWritten may be false if there was an existing appropriate file and Spec.Overwrite was false
6291
// See Spec.Overwrite for details.
63-
Downloaded bool
92+
OutputWritten bool
6493
}
6594

6695
// Do downloads a DuckDB release
@@ -75,15 +104,17 @@ func Do(spec Spec) (Result, error) {
75104
if !spec.Overwrite && existsAppropriate(entryName) {
76105
return res, nil
77106
}
78-
res.Downloaded = true
107+
res.OutputWritten = true
79108
path := getZipDownloadUrl(spec)
80-
tmpFile, err := fetchZip(path)
109+
tmpFile, err := fetchZip(path, spec.CacheDownload)
81110
if err != nil {
82111
return res, err
83112
}
84-
defer func() {
85-
_ = os.Remove(tmpFile)
86-
}()
113+
if !spec.CacheDownload {
114+
defer func() {
115+
_ = os.Remove(tmpFile)
116+
}()
117+
}
87118
return res, processZip(spec, entryName, tmpFile)
88119
}
89120

@@ -108,24 +139,6 @@ func existsAppropriate(fileName string) bool {
108139
return err == nil && fi.Mode().IsRegular()
109140
}
110141

111-
func getGithubURL(spec Spec) string {
112-
archivePrefix := getPrefixByType(spec.Type)
113-
return fmt.Sprintf("%s/download/%s/%s-%s-%s.zip", duckDbReleasesRoot, spec.Version, archivePrefix, spec.OS, spec.Arch)
114-
}
115-
116-
func getPrefixByType(typ BinType) string {
117-
var prefix string
118-
switch typ {
119-
case BinTypeCli:
120-
prefix = "duckdb_cli"
121-
case BinTypeDynLib:
122-
prefix = "libduckdb"
123-
default:
124-
panic("unhandled spec type")
125-
}
126-
return prefix
127-
}
128-
129142
func normalizeSpec(spec Spec) (Spec, error) {
130143
spec.Arch = strings.ToLower(spec.Arch)
131144
spec.OS = strings.ToLower(spec.OS)
@@ -157,30 +170,6 @@ func normalizeSpec(spec Spec) (Spec, error) {
157170
return spec, err
158171
}
159172

160-
func getLatestVersionPath() (string, error) {
161-
redirectErr := errors.New("redirect")
162-
client := http.Client{
163-
CheckRedirect: func(req *http.Request, via []*http.Request) error {
164-
return redirectErr
165-
},
166-
}
167-
const latestUrl = duckDbReleasesRoot + "/latest"
168-
resp, err := client.Head(latestUrl)
169-
if errors.Is(err, redirectErr) {
170-
location := resp.Header.Get("Location")
171-
prefix := duckDbReleasesRoot + "/tag/"
172-
if !strings.HasPrefix(location, prefix) {
173-
return "", fmt.Errorf("unexpected release redirect location: %s", location)
174-
}
175-
return location[len(prefix):], nil
176-
}
177-
if err != nil {
178-
return "", fmt.Errorf("HEAD failed for %s: %w", latestUrl, err)
179-
}
180-
_ = resp.Body.Close()
181-
return "", fmt.Errorf("redirect expected for %s but got code %d", latestUrl, resp.StatusCode)
182-
}
183-
184173
func extractOne(zipFile string, name string) error {
185174
zipReader, err := zip.OpenReader(zipFile)
186175
if err != nil {
@@ -254,16 +243,34 @@ func getCliName(targetOS string) string {
254243
return name
255244
}
256245

257-
func fetchZip(url string) (string, error) {
246+
func fetchZip(url string, useEtag bool) (string, error) {
247+
// It *may* be more efficient (for whom?) to issue a HEAD request first for the ETag and Content-Length.
248+
// We can't use If-None-Match because we don't know in advance which cached file is for which spec.
249+
// We could encode the entire spec in the cached file name but the complexity would not be worth it.
258250
resp, err := http.Get(url)
259251
if err != nil {
260252
return "", genericDownloadErr(url, err)
261253
}
262254
if resp.StatusCode != http.StatusOK {
263255
return "", fmt.Errorf("HTTP error when trying to download %s: %d", url, resp.StatusCode)
264256
}
257+
etagHeader := resp.Header.Get("ETag")
258+
contentLength := resp.ContentLength
265259
defer helperr.CloseQuietly(resp.Body)
266-
tmpZip, err := os.CreateTemp("", "getaduck")
260+
var tmpZip *os.File
261+
if !useEtag && etagHeader != "" {
262+
tmpZip, err = os.CreateTemp("", "getaduck")
263+
} else {
264+
fileName := fmt.Sprintf("getaduck.zip.etag_%s", etagHeader)
265+
fileName = filepath.Join(os.TempDir(), fileName)
266+
if info, statErr := os.Stat(fileName); statErr == nil {
267+
if info.Size() == contentLength {
268+
return fileName, nil
269+
}
270+
}
271+
272+
tmpZip, err = os.Create(fileName)
273+
}
267274
if err != nil {
268275
return "", fmt.Errorf("failed to create temp file: %w", err)
269276
}

download/download_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99

1010
func TestDo(t *testing.T) {
1111
if testing.Short() {
12-
t.Skip("skipping test that downloads from Github in short mode.")
12+
t.Skip("skipping test that downloads from the Internet in short mode.")
1313
}
1414
t.Run("default lib", func(t *testing.T) {
1515
for _, version := range []string{
@@ -29,6 +29,7 @@ func TestDo(t *testing.T) {
2929
spec.Version = version
3030
spec.Arch = arch
3131
spec.Overwrite = true
32+
spec.CacheDownload = true
3233
res, err := download.Do(spec)
3334
require.NoError(t, err)
3435
require.FileExists(t, res.OutputFile)

download/github.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package download
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"net/http"
7+
"strings"
8+
)
9+
10+
// Code to download from Github - applies to semver and latest releases
11+
12+
const (
13+
duckDbReleasesRoot = "https://github.com/duckdb/duckdb/releases"
14+
)
15+
16+
func getLatestVersionPath() (string, error) {
17+
redirectErr := errors.New("redirect")
18+
client := http.Client{
19+
CheckRedirect: func(req *http.Request, via []*http.Request) error {
20+
return redirectErr
21+
},
22+
}
23+
const latestUrl = duckDbReleasesRoot + "/latest"
24+
resp, err := client.Head(latestUrl)
25+
if errors.Is(err, redirectErr) {
26+
location := resp.Header.Get("Location")
27+
prefix := duckDbReleasesRoot + "/tag/"
28+
if !strings.HasPrefix(location, prefix) {
29+
return "", fmt.Errorf("unexpected release redirect location: %s", location)
30+
}
31+
return location[len(prefix):], nil
32+
}
33+
if err != nil {
34+
return "", fmt.Errorf("HEAD failed for %s: %w", latestUrl, err)
35+
}
36+
_ = resp.Body.Close()
37+
return "", fmt.Errorf("redirect expected for %s but got code %d", latestUrl, resp.StatusCode)
38+
}
39+
40+
func getGithubURL(spec Spec) string {
41+
return fmt.Sprintf("%s/download/%s/%s-%s-%s.zip", duckDbReleasesRoot, spec.Version, spec.Type.Prefix(), spec.OS, spec.Arch)
42+
}

download/preview.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ import (
77
"github.com/ansel1/merry/v2"
88
)
99

10-
const (
11-
PreviewVersion = "preview"
12-
)
10+
// Downloading preview releases
1311

1412
func getPreviewZipUrl(spec Spec) string {
1513
// https://artifacts.duckdb.org/latest/duckdb-binaries-osx.zip
@@ -45,7 +43,6 @@ func getInnerZipName(spec Spec) string {
4543
// libduckdb-windows-amd64.zip
4644
// duckdb_cli-linux-amd64.zip
4745
// libduckdb-linux-amd64.zip
48-
prefix := getPrefixByType(spec.Type)
4946
// For osx, spec.Arch has been normalized to universal in normalizeSpec
50-
return fmt.Sprintf("%s-%s-%s.zip", prefix, spec.OS, spec.Arch)
47+
return fmt.Sprintf("%s-%s-%s.zip", spec.Type.Prefix(), spec.OS, spec.Arch)
5148
}

0 commit comments

Comments
 (0)