nameOverride: "" fullnameOverride: h2ogpt h2ogpt: enabled: true replicaCount: 1 image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime pullPolicy: IfNotPresent storage: size: 128Gi class: ebs-csi overrideConfig: base_model: h2oai/h2ogpt-4096-llama2-7b-chat use_safetensors: True prompt_type: llama2 save_dir: /workspace/save/ use_gpu_id: False score_model: None max_max_new_tokens: 2048 max_new_tokens: 1024 service: type: NodePort webPort: 80 gptPort: 8888 updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: type: RuntimeDefault resources: nodeSelector: tolerations: podAnnotations: {} podLabels: {} autoscaling: {} tgi: enabled: false replicaCount: 1 image: repository: ghcr.io/huggingface/text-generation-inference tag: 0.9.3 pullPolicy: IfNotPresent storage: size: 512Gi class: ebs-csi overrideConfig: hfSecret: containerArgs: service: type: ClusterIP port: 8080 updateStrategy: type: RollingUpdate podSecurityContext: securityContext: resources: nodeSelector: tolerations: podAnnotations: {} podLabels: {} autoscaling: {} vllm: enabled: false replicaCount: 1 image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime pullPolicy: IfNotPresent imagePullSecrets: storage: size: 512Gi class: ebs-csi overrideConfig: containerArgs: - "--model" - h2oai/h2ogpt-4096-llama2-7b-chat - "--tokenizer" - hf-internal-testing/llama-tokenizer - "--tensor-parallel-size" - 2 - "--seed" - 1234 - "--trust-remote-code" service: type: ClusterIP port: 5000 updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: resources: nodeSelector: tolerations: podAnnotations: {} podLabels: {} autoscaling: {}