TFServingCache/config.yaml at master · erma07/TFServingCache · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
proxyRestPort: 8093
proxyGrpcPort: 8100
cacheRestPort: 8094
cacheGrpcPort: 8095

metrics:
  # this path used to publish metrics from proxy endpoint
  # and the same path is used to obtain metrics from serving
  path: "/monitoring/prometheus/metrics"
  # timeout in seconds
  timeout: 3
  # Whether to add model name and version as prometheus labels
  modelLabels: false

modelProvider:
  type: diskProvider
  diskProvider:
    baseDir: "./model_repo"
#modelProvider:
#  type: s3Provider
#  s3:
#    bucket: foo
#    basePath: models/foo/bar

modelCache:
  hostModelPath: "./models"
  size: 30000

serving:
  servingModelPath: "/models"
  grpcHost: "localhost:8500"
  restHost: "http://localhost:8501"
  maxConcurrentModels: 2
  grpcConfigTimeout: 10 # timeout in seconds
  grpcPredictTimeout: 60
  # the TFServing Prometheus metrics path, if not specified, the metrics.path will be used
  # metricsPath : "/monitoring/prometheus/metrics"

proxy:
  replicasPerModel: 3
  grpcTimeout: 10

serviceDiscovery:
  #### CONSUL ####
  #type: consul
  #heartbeatTTL: 5
  #consul:
  #  serviceName: tfservingcache
  #  serviceId: foo1
  #### ETCD ####
  #type: etcd
  #heartbeatTTL: 5
  #etcd:
  #  serviceName: tfservingcache
  #  endpoints: ["localhost:2379"]
  #  allowLocalhost: true
  #  authorization:
  #    username: root
  #    password: foobar
  type: k8s
  k8s:
    # field selector for k8s TF serving cache pods
    fieldSelector:
      metadata.name: tf-serving-cache
    portNames:
      grpcCache: grpccache
      httpCache: httpcache