diff --git a/cmd/runner/runner.go b/cmd/runner/runner.go index df46f69d..9ebe3e96 100644 --- a/cmd/runner/runner.go +++ b/cmd/runner/runner.go @@ -47,6 +47,7 @@ import ( "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/interface/datalayer/datasource" "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/interface/plugin" "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/interface/requesthandling" + modelconfigcollector "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/plugins/datalayer/modelconfigcollector" requestmetadata "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/plugins/datalayer/requestmetadata" "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/plugins/modelselector/picker/maxscore" "github.com/llm-d/llm-d-inference-payload-processor/pkg/framework/plugins/modelselector/picker/random" @@ -277,6 +278,7 @@ func (r *Runner) registerInTreePlugins() { plugin.Register(bodyfieldtoheader.BodyFieldToHeaderPluginType, bodyfieldtoheader.BodyFieldToHeaderPluginFactory) plugin.Register(basemodelextractor.BaseModelToHeaderPluginType, basemodelextractor.BaseModelToHeaderPluginFactory) plugin.Register(requestmetadata.PluginType, requestmetadata.ExtractorFactory) + plugin.Register(modelconfigcollector.PluginType, modelconfigcollector.DatasourceFactory) // register model selector plugins plugin.Register(random.RandomPickerType, random.RandomPickerFactory) plugin.Register(maxscore.MaxScorePickerType, maxscore.MaxScorePickerFactory) diff --git a/config/charts/payload-processor/README.md b/config/charts/payload-processor/README.md index 080f0d55..d9bd2b49 100644 --- a/config/charts/payload-processor/README.md +++ b/config/charts/payload-processor/README.md @@ -86,6 +86,17 @@ provider: > **Tip:** Only enable events your plugins need. Each extra event adds a network hop between the proxy and the payload processor. +### Configure Model List (model-config-datasource) + +`payloadProcessor.listModels` is a list of model names that the IPP should be aware of. When set, the chart renders a `models.json` file into the ConfigMap, which is mounted at `/config/models.json` inside the pod. The `model-config-datasource` plugin reads this file to populate the IPP datastore and watches it for live updates on ConfigMap remounts. + +```yaml +payloadProcessor: + listModels: + - meta-llama/Llama-3.1-8B-Instruct + - meta-llama/Llama-3.1-70B-Instruct +``` + ### Uninstall Run the following command to uninstall the chart: @@ -111,6 +122,7 @@ The following table list the configurable parameters of the chart. | `payloadProcessor.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | | `payloadProcessor.flags` | map of flags which are passed through to the payload processor. Refer to [runner.go](https://github.com/llm-d/llm-d-inference-payload-processor/blob/main/cmd/payload-processor/runner/runner.go) for complete list. | | `payloadProcessor.plugins` | Custom ordered plugins array to set for the payload processor. Each plugin has fields: type, name and optionally json (which represents parameters of the plugin). If not specified, the payload processor will use by default the `body-field-to-header` to extract the `model` field, and `base-model-to-header` (in that order). | +| `payloadProcessor.listModels` | Optional list of model names. When set, a `models.json` file is rendered into the ConfigMap and mounted at `/config/models.json` in the pod. Used by the `model-config-datasource` plugin to seed the IPP datastore with known models. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `istio`, `gke`. Defaults to `none`. | | `provider.supportedEvents.requestHeaders` | Enable Request Headers event. Defaults to `true`. | | `provider.supportedEvents.requestBody` | Enable Request Body event. Defaults to `true`. | diff --git a/config/charts/payload-processor/templates/config.yaml b/config/charts/payload-processor/templates/config.yaml index 3ce225af..4785d58d 100644 --- a/config/charts/payload-processor/templates/config.yaml +++ b/config/charts/payload-processor/templates/config.yaml @@ -26,4 +26,11 @@ data: kind: PayloadProcessorConfig {{- .Values.payloadProcessor.customConfig | toYaml | nindent 4 }} {{- end }} + {{- if .Values.payloadProcessor.listModels }} + {{- $models := list }} + {{- range .Values.payloadProcessor.listModels }} + {{- $models = append $models (dict "name" .) }} + {{- end }} + models.json: {{ dict "models" $models | toJson | quote }} + {{- end }} ---