-
Notifications
You must be signed in to change notification settings - Fork 158
Adding checkpointing improvement #1062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,12 @@ import ( | |
| "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" | ||
| ) | ||
|
|
||
| const CheckpointVersion = "v2" | ||
|
|
||
| type Checkpoint struct { | ||
| // Version records the latest checkpoint version written, allowing readers | ||
| // to quickly determine the format without inspecting nested fields. | ||
| Version string `json:"version,omitempty"` | ||
| // Note: The Checksum below is only associated with the V1 checkpoint | ||
| // (because it doesn't have an embedded one). All future versions have | ||
| // their checksum directly embedded in them to better support | ||
|
|
@@ -32,6 +37,94 @@ type Checkpoint struct { | |
| Checksum checksum.Checksum `json:"checksum"` | ||
| V1 *CheckpointV1 `json:"v1,omitempty"` | ||
| V2 *CheckpointV2 `json:"v2,omitempty"` | ||
| // other holds unknown fields from a newer checkpoint format | ||
| other map[string]json.RawMessage | ||
| } | ||
|
|
||
| // MarshalJSON implements json.Marshaler, merging known fields with any | ||
| // unknown fields captured from a newer checkpoint format. | ||
| func (cp *Checkpoint) MarshalJSON() ([]byte, error) { | ||
| type Alias struct { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be |
||
| Version string `json:"version,omitempty"` | ||
| Checksum checksum.Checksum `json:"checksum"` | ||
| V1 *CheckpointV1 `json:"v1,omitempty"` | ||
| V2 *CheckpointV2 `json:"v2,omitempty"` | ||
| } | ||
| known, err := json.Marshal(&Alias{ | ||
| Version: cp.Version, | ||
| Checksum: cp.Checksum, | ||
| V1: cp.V1, | ||
| V2: cp.V2, | ||
| }) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| if len(cp.other) == 0 { | ||
| return known, nil | ||
| } | ||
| var merged map[string]json.RawMessage | ||
| if err := json.Unmarshal(known, &merged); err != nil { | ||
| return nil, err | ||
| } | ||
| for k, v := range cp.other { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should do a quick unit test around this maybe. |
||
| merged[k] = v | ||
| } | ||
| return json.Marshal(merged) | ||
| } | ||
|
|
||
| // UnmarshalJSON implements json.Unmarshaler, populating known fields and | ||
| // preserving any unrecognised fields (future versions) in cp.other. | ||
| func (cp *Checkpoint) UnmarshalJSON(data []byte) error { | ||
| type Alias struct { | ||
| Version string `json:"version,omitempty"` | ||
| Checksum checksum.Checksum `json:"checksum"` | ||
| V1 *CheckpointV1 `json:"v1,omitempty"` | ||
| V2 *CheckpointV2 `json:"v2,omitempty"` | ||
| } | ||
| var alias Alias | ||
| if err := json.Unmarshal(data, &alias); err != nil { | ||
| return err | ||
| } | ||
| cp.Version = alias.Version | ||
| cp.Checksum = alias.Checksum | ||
| cp.V1 = alias.V1 | ||
| cp.V2 = alias.V2 | ||
|
|
||
| var all map[string]json.RawMessage | ||
| if err := json.Unmarshal(data, &all); err != nil { | ||
| return err | ||
| } | ||
| delete(all, "version") | ||
| delete(all, "checksum") | ||
| delete(all, "v1") | ||
| delete(all, "v2") | ||
| if len(all) > 0 { | ||
| cp.other = all | ||
| } else { | ||
| cp.other = nil | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (cp *Checkpoint) DeepCopy() *Checkpoint { | ||
| if cp == nil { | ||
| return nil | ||
| } | ||
| out := &Checkpoint{ | ||
| Version: cp.Version, | ||
| Checksum: cp.Checksum, | ||
| V1: cp.V1.DeepCopy(), | ||
| V2: cp.V2.DeepCopy(), | ||
| } | ||
| if len(cp.other) > 0 { | ||
| out.other = make(map[string]json.RawMessage, len(cp.other)) | ||
| for k, v := range cp.other { | ||
| raw := make(json.RawMessage, len(v)) | ||
| copy(raw, v) | ||
| out.other[k] = raw | ||
| } | ||
| } | ||
| return out | ||
| } | ||
|
|
||
| func (cp *Checkpoint) ToLatestVersion() *Checkpoint { | ||
|
|
@@ -52,25 +145,27 @@ func (cp *Checkpoint) ToLatestVersion() *Checkpoint { | |
|
|
||
| func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) { | ||
| cp = cp.ToLatestVersion() | ||
| cp.Version = CheckpointVersion | ||
| cp.V1 = cp.V2.ToV1() | ||
| if err := cp.SetChecksumV1(); err != nil { | ||
| return nil, fmt.Errorf("error setting v1 checksum: %v", err) | ||
| } | ||
| if err := cp.SetChecksumV2(); err != nil { | ||
| return nil, fmt.Errorf("error setting v2 checksum: %v", err) | ||
| } | ||
| return json.Marshal(*cp) | ||
| return json.Marshal(cp) | ||
| } | ||
|
|
||
| // SetChecksumV1 computes and sets the V1 checksum, which covers only the | ||
| // V1 view of the checkpoint (Version, V2, and other are excluded so that | ||
| // older drivers computing the same checksum get identical JSON). | ||
| func (cp *Checkpoint) SetChecksumV1() error { | ||
| v2 := cp.V2 | ||
| cp.V2 = nil | ||
| defer func() { | ||
| cp.V2 = v2 | ||
| }() | ||
|
|
||
| cp.Checksum = 0 | ||
| out, err := json.Marshal(*cp) | ||
| type v1View struct { | ||
| Checksum checksum.Checksum `json:"checksum"` | ||
| V1 *CheckpointV1 `json:"v1,omitempty"` | ||
| } | ||
| view := v1View{V1: cp.V1} | ||
| out, err := json.Marshal(view) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -126,22 +221,19 @@ func (cp *Checkpoint) VerifyChecksum() error { | |
| return nil | ||
| } | ||
|
|
||
| // VerifyChecksumV1 verifies the V1 checksum using the same V1-only view that | ||
| // SetChecksumV1 used, ensuring older drivers can also verify successfully. | ||
| func (cp *Checkpoint) VerifyChecksumV1() error { | ||
| ck := cp.Checksum | ||
| v2 := cp.V2 | ||
| cp.V2 = nil | ||
| defer func() { | ||
| cp.Checksum = ck | ||
| cp.V2 = v2 | ||
| }() | ||
|
|
||
| cp.Checksum = 0 | ||
| out, err := json.Marshal(*cp) | ||
| type v1View struct { | ||
| Checksum checksum.Checksum `json:"checksum"` | ||
| V1 *CheckpointV1 `json:"v1,omitempty"` | ||
| } | ||
| view := v1View{V1: cp.V1} | ||
| out, err := json.Marshal(view) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| return ck.Verify(out) | ||
| return cp.Checksum.Verify(out) | ||
| } | ||
|
|
||
| func (cp *Checkpoint) VerifyChecksumV2() error { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| /* | ||
| Copyright The Kubernetes Authors | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| https://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package main | ||
|
|
||
| import ( | ||
| "k8s.io/dynamic-resource-allocation/kubeletplugin" | ||
| ) | ||
|
|
||
| func (d PreparedDevices) DeepCopy() PreparedDevices { | ||
| if d == nil { | ||
| return nil | ||
| } | ||
| out := make(PreparedDevices, len(d)) | ||
| for i, group := range d { | ||
| out[i] = group.DeepCopy() | ||
| } | ||
| return out | ||
| } | ||
|
|
||
| func (g *PreparedDeviceGroup) DeepCopy() *PreparedDeviceGroup { | ||
| if g == nil { | ||
| return nil | ||
| } | ||
| return &PreparedDeviceGroup{ | ||
| Devices: g.Devices.DeepCopy(), | ||
| ConfigState: g.ConfigState.DeepCopy(), | ||
| } | ||
| } | ||
|
|
||
| func (l PreparedDeviceList) DeepCopy() PreparedDeviceList { | ||
| if l == nil { | ||
| return nil | ||
| } | ||
| out := make(PreparedDeviceList, len(l)) | ||
| for i, d := range l { | ||
| out[i] = d.DeepCopy() | ||
| } | ||
| return out | ||
| } | ||
|
|
||
| func (d PreparedDevice) DeepCopy() PreparedDevice { | ||
| return PreparedDevice{ | ||
| Channel: d.Channel.DeepCopy(), | ||
| Daemon: d.Daemon.DeepCopy(), | ||
| } | ||
| } | ||
|
|
||
| func (c *PreparedComputeDomainChannel) DeepCopy() *PreparedComputeDomainChannel { | ||
| if c == nil { | ||
| return nil | ||
| } | ||
| return &PreparedComputeDomainChannel{ | ||
| Info: c.Info.DeepCopy(), | ||
| Device: deepCopyDevice(c.Device), | ||
| } | ||
| } | ||
|
|
||
| func (d *PreparedComputeDomainDaemon) DeepCopy() *PreparedComputeDomainDaemon { | ||
| if d == nil { | ||
| return nil | ||
| } | ||
| return &PreparedComputeDomainDaemon{ | ||
| Info: d.Info.DeepCopy(), | ||
| Device: deepCopyDevice(d.Device), | ||
| } | ||
| } | ||
|
|
||
| func (d *ComputeDomainChannelInfo) DeepCopy() *ComputeDomainChannelInfo { | ||
| if d == nil { | ||
| return nil | ||
| } | ||
| return &ComputeDomainChannelInfo{ID: d.ID} | ||
| } | ||
|
|
||
| func (d *ComputeDomainDaemonInfo) DeepCopy() *ComputeDomainDaemonInfo { | ||
| if d == nil { | ||
| return nil | ||
| } | ||
| return &ComputeDomainDaemonInfo{ID: d.ID} | ||
| } | ||
|
|
||
| func (d DeviceConfigState) DeepCopy() DeviceConfigState { | ||
| return DeviceConfigState{ | ||
| Type: d.Type, | ||
| ComputeDomain: d.ComputeDomain, | ||
| } | ||
| } | ||
|
|
||
| func deepCopyDevice(d *kubeletplugin.Device) *kubeletplugin.Device { | ||
| if d == nil { | ||
| return nil | ||
| } | ||
| cp := &kubeletplugin.Device{ | ||
| PoolName: d.PoolName, | ||
| DeviceName: d.DeviceName, | ||
| } | ||
| if len(d.Requests) > 0 { | ||
| cp.Requests = make([]string, len(d.Requests)) | ||
| copy(cp.Requests, d.Requests) | ||
| } | ||
| if len(d.CDIDeviceIDs) > 0 { | ||
| cp.CDIDeviceIDs = make([]string, len(d.CDIDeviceIDs)) | ||
| copy(cp.CDIDeviceIDs, d.CDIDeviceIDs) | ||
| } | ||
| if d.ShareID != nil { | ||
| uid := *d.ShareID | ||
| cp.ShareID = &uid | ||
| } | ||
| return cp | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
map[string]anymaybe? Why put the encoding nuance in the type.