nameOverride: ""
fullnameOverride: h2ogpt

h2ogpt:
  enabled: true
  replicaCount: 1
  image:
    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
    pullPolicy: IfNotPresent

  storage:
    size: 128Gi
    class: ebs-csi

  overrideConfig:
    base_model: h2oai/h2ogpt-4096-llama2-7b-chat
    use_safetensors: True
    prompt_type: llama2
    save_dir: /workspace/save/
    use_gpu_id: False
    score_model: None
    max_max_new_tokens: 2048
    max_new_tokens: 1024

  service:
    type: NodePort
    webPort: 80
    gptPort: 8888

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:
      type: RuntimeDefault
  
  resources:
  nodeSelector:
  tolerations:

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

tgi:
  enabled: false
  replicaCount: 1

  image:
    repository: ghcr.io/huggingface/text-generation-inference
    tag: 0.9.3
    pullPolicy: IfNotPresent

  storage:
    size: 512Gi
    class: ebs-csi
  
  overrideConfig:
  hfSecret: 
  containerArgs:

  service:
    type: ClusterIP
    port: 8080

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
  securityContext:

  resources:
  nodeSelector:
  tolerations:

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

vllm:
  enabled: false
  replicaCount: 1

  image:
    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
    pullPolicy: IfNotPresent

  imagePullSecrets:

  storage:
    size: 512Gi
    class: ebs-csi
  
  overrideConfig:

  containerArgs:
    - "--model"
    - h2oai/h2ogpt-4096-llama2-7b-chat
    - "--tokenizer"
    - hf-internal-testing/llama-tokenizer
    - "--tensor-parallel-size"
    - 2
    - "--seed"
    - 1234
    - "--trust-remote-code"

  service:
    type: ClusterIP
    port: 5000

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:

  resources:

  nodeSelector:

  tolerations:

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}