diff --git a/manifests/caikit/caikit-tgis-isvc-grpc.yaml b/manifests/caikit/caikit-tgis-isvc-grpc.yaml new file mode 100644 index 00000000..d7a31832 --- /dev/null +++ b/manifests/caikit/caikit-tgis-isvc-grpc.yaml @@ -0,0 +1,20 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + name: caikit-tgis-isvc-grpc +spec: + predictor: + serviceAccountName: sa + model: + modelFormat: + name: caikit + runtime: caikit-tgis-runtime-grpc + storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here + # storageUri: proto://path/to/model # single model here + # Example, using a pvc: + # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ + # Target directory must contain a config.yml \ No newline at end of file diff --git a/manifests/caikit/caikit-tgis-servingruntime-grpc.yaml b/manifests/caikit/caikit-tgis-servingruntime-grpc.yaml new file mode 100644 index 00000000..bec0cfc1 --- /dev/null +++ b/manifests/caikit/caikit-tgis-servingruntime-grpc.yaml @@ -0,0 +1,59 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: caikit-tgis-runtime-grpc +spec: + multiModel: false + supportedModelFormats: + # Note: this currently *only* supports caikit format models + - autoSelect: true + name: caikit + containers: + - name: kserve-container + image: quay.io/opendatahub/text-generation-inference:stable + command: ["text-generation-launcher"] + args: ["--model-name=/mnt/models/artifacts/"] + env: + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + # resources: # configure as required + # requests: + # cpu: 8 + # memory: 16Gi + ## Note: cannot add readiness/liveness probes to this container because knative will refuse them. + # multi-container probing will be available after https://github.com/knative/serving/pull/14853 is merged + - name: transformer-container + image: quay.io/opendatahub/caikit-tgis-serving:stable + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + - name: RUNTIME_GRPC_ENABLED + value: "true" + - name: RUNTIME_HTTP_ENABLED + value: "false" + ports: + - containerPort: 8085 + name: h2c + protocol: TCP + readinessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - readiness + initialDelaySeconds: 5 # might require larger values for large models + livenessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - liveness + initialDelaySeconds: 5 + # resources: # configure as required + # requests: + # cpu: 8 + # memory: 16Gi \ No newline at end of file diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml new file mode 100644 index 00000000..3a7569b8 --- /dev/null +++ b/manifests/configmap.yaml @@ -0,0 +1,28 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: fms-orchestr8-config-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp +data: + config.yaml: | + generation: + provider: tgis + service: + hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com + port: 443 + # tls: caikit + detectors: + regex: + service: + hostname: https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents + port: 443 + chunker_id: whole_doc_chunker + default_threshold: 0.5 + # tls: + # caikit: + # cert_path: /tls/server/tls.crt + # key_path: /tls/server/tls.key + # client_ca_cert_path: /tls/server/ca.crt \ No newline at end of file diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml new file mode 100644 index 00000000..ec63fc54 --- /dev/null +++ b/manifests/deployment.yaml @@ -0,0 +1,98 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: fms-orchestr8-nlp + annotations: + configmap.reloader.stakater.com/reload: 'fms-orchestr8-config' + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp +spec: + replicas: 1 + selector: + matchLabels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp + template: + metadata: + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp + spec: + volumes: + - name: fms-orchestr8-config-nlp + configMap: + name: fms-orchestr8-config-nlp + defaultMode: 420 + - name: server-tls + secret: + secretName: caikitstack-caikit-inf-tls + defaultMode: 256 + containers: + - resources: + limits: + cpu: '1' + memory: 2Gi + requests: + cpu: '1' + memory: 2Gi + readinessProbe: + httpGet: + path: /health + port: 8034 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 1 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 + terminationMessagePath: /dev/termination-log + name: fms-orchestr8-nlp + command: + - /app/bin/fms-guardrails-orchestr8 + env: + - name: ORCHESTRATOR_CONFIG + value: /config/config.yaml + - name: HTTP_PORT + value: '8033' + - name: START_UP_HEALTH_CHECK + value: 'false' + ## Mount certs to /tls/orch and uncomment to enable (m)TLS + # - name: TLS_KEY_PATH + # value: /tls/orch/server.key + # - name: TLS_CERT_PATH + # value: /tls/orch/server.crt + # - name: TLS_CLIENT_CA_CERT_PATH + # value: /tls/orch/ca.crt + - name: RUST_BACKTRACE + value: full + - name: RUST_LOG + value: 'fms_guardrails_orchestr8=debug' + securityContext: + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + seccompProfile: + type: RuntimeDefault + ports: + - name: http + containerPort: 8033 + protocol: TCP + imagePullPolicy: Always + volumeMounts: + - name: fms-orchestr8-config-nlp # This refers to the configmap below + readOnly: true + mountPath: /config/config.yaml + subPath: config.yaml + # - name: server-tls # This is for the caikit server for generation [may want to name this better] + # readOnly: true + # mountPath: /tls/server + terminationMessagePolicy: File + image: quay.io/csantiago/guardrails-orchestrator:latest \ No newline at end of file diff --git a/manifests/guardrails-orchestrator.yaml b/manifests/guardrails-orchestrator.yaml new file mode 100644 index 00000000..cac79958 --- /dev/null +++ b/manifests/guardrails-orchestrator.yaml @@ -0,0 +1,170 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: fms-orchestr8-config-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp +data: + config.yaml: | + generation: + provider: tgis + service: + hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com + port: 443 + # tls: caikit + detectors: + regex: + service: + hostname: https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents + port: 443 + chunker_id: whole_doc_chunker + default_threshold: 0.5 + # tls: + # caikit: + # cert_path: /tls/server/tls.crt + # key_path: /tls/server/tls.key + # client_ca_cert_path: /tls/server/ca.crt +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: fms-orchestr8-nlp + annotations: + configmap.reloader.stakater.com/reload: 'fms-orchestr8-config' + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp +spec: + replicas: 1 + selector: + matchLabels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp + template: + metadata: + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp + spec: + volumes: + - name: fms-orchestr8-config-nlp + configMap: + name: fms-orchestr8-config-nlp + defaultMode: 420 + - name: server-tls + secret: + secretName: caikitstack-caikit-inf-tls + defaultMode: 256 + containers: + - resources: + limits: + cpu: '1' + memory: 2Gi + requests: + cpu: '1' + memory: 2Gi + readinessProbe: + httpGet: + path: /health + port: 8034 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 1 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 + terminationMessagePath: /dev/termination-log + name: fms-orchestr8-nlp + command: + - /app/bin/fms-guardrails-orchestr8 + env: + - name: ORCHESTRATOR_CONFIG + value: /config/config.yaml + - name: HTTP_PORT + value: '8033' + - name: START_UP_HEALTH_CHECK + value: 'false' + ## Mount certs to /tls/orch and uncomment to enable (m)TLS + # - name: TLS_KEY_PATH + # value: /tls/orch/server.key + # - name: TLS_CERT_PATH + # value: /tls/orch/server.crt + # - name: TLS_CLIENT_CA_CERT_PATH + # value: /tls/orch/ca.crt + - name: RUST_BACKTRACE + value: full + - name: RUST_LOG + value: 'fms_guardrails_orchestr8=debug' + securityContext: + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + seccompProfile: + type: RuntimeDefault + ports: + - name: http + containerPort: 8033 + protocol: TCP + imagePullPolicy: Always + volumeMounts: + - name: fms-orchestr8-config-nlp # This refers to the configmap below + readOnly: true + mountPath: /config/config.yaml + subPath: config.yaml + # - name: server-tls # This is for the caikit server for generation [may want to name this better] + # readOnly: true + # mountPath: /tls/server + terminationMessagePolicy: File + image: quay.io/csantiago/guardrails-orchestrator:latest +--- +kind: Service +apiVersion: v1 +metadata: + name: fms-orchestr8-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp +spec: + ipFamilies: + - IPv4 + ports: + - name: http + protocol: TCP + port: 8033 + targetPort: 8033 + internalTrafficPolicy: Cluster + type: ClusterIP + ipFamilyPolicy: SingleStack + sessionAffinity: None + selector: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp +--- +kind: Route +apiVersion: route.openshift.io/v1 +metadata: + name: gr2-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp +spec: + to: + kind: Service + name: fms-orchestr8-nlp + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None diff --git a/manifests/minio/minio-secret.yaml b/manifests/minio/minio-secret.yaml new file mode 100644 index 00000000..104b5760 --- /dev/null +++ b/manifests/minio/minio-secret.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + serving.kserve.io/s3-endpoint: minio..svc:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000 + serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0 + serving.kserve.io/s3-region: "us-east-2" + serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials + name: storage-config +stringData: + "AWS_ACCESS_KEY_ID": "admin" + "AWS_SECRET_ACCESS_KEY": "password" \ No newline at end of file diff --git a/manifests/minio/minio-serviceaccount.yaml b/manifests/minio/minio-serviceaccount.yaml new file mode 100644 index 00000000..6ccfd13b --- /dev/null +++ b/manifests/minio/minio-serviceaccount.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sa +secrets: +- name: storage-config \ No newline at end of file diff --git a/manifests/minio/minio.yaml b/manifests/minio/minio.yaml new file mode 100644 index 00000000..85765017 --- /dev/null +++ b/manifests/minio/minio.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio +spec: + ports: + - name: minio-client-port + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: minio +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: minio + name: minio +spec: + containers: + - args: + - server + - /data1 + env: + - name: MINIO_ACCESS_KEY + value: admin + - name: MINIO_SECRET_KEY + value: password + image: quay.io/opendatahub/modelmesh-minio-examples:caikit-flan-t5 + imagePullPolicy: Always + name: minio \ No newline at end of file diff --git a/manifests/route.yaml b/manifests/route.yaml new file mode 100644 index 00000000..855cc8ed --- /dev/null +++ b/manifests/route.yaml @@ -0,0 +1,18 @@ +kind: Route +apiVersion: route.openshift.io/v1 +metadata: + name: gr2-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp +spec: + to: + kind: Service + name: fms-orchestr8-nlp + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None \ No newline at end of file diff --git a/manifests/service.yaml b/manifests/service.yaml new file mode 100644 index 00000000..56f81928 --- /dev/null +++ b/manifests/service.yaml @@ -0,0 +1,23 @@ +kind: Service +apiVersion: v1 +metadata: + name: fms-orchestr8-nlp + labels: + app: fmstack-nlp + component: fms-orchestr8-nlp +spec: + ipFamilies: + - IPv4 + ports: + - name: http + protocol: TCP + port: 8033 + targetPort: 8033 + internalTrafficPolicy: Cluster + type: ClusterIP + ipFamilyPolicy: SingleStack + sessionAffinity: None + selector: + app: fmstack-nlp + component: fms-orchestr8-nlp + deploy-name: fms-orchestr8-nlp \ No newline at end of file