Kaynağa Gözat

NAS-121481 / 23.10 / Set nvidia caps to void when no gpu is passed, also adds `render` group when a gpu is selected and other small fixes (#1124)

* Set nvidia caps to void when no gpu is passed

* add tests to init containers too

* Additionally add `render` group when gpu is added

* Correctly handle "0" gpu

* handle fsGroup 0 properly

* fix gh highlight

* Correct nvidia variable and add additional check for runtime

* cast both sides of the comparison

* fix externalinterfaces nesting

* Add dnsConfig missing docs
Stavros Kois 2 yıl önce
ebeveyn
işleme
63326effb3

+ 10 - 0
library/common-test/tests/container/envFixed_test.yaml

@@ -45,6 +45,8 @@ tests:
                 value: "002"
               - name: UMASK_SET
                 value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
               - name: S6_READ_ONLY_ROOT
                 value: "1"
 
@@ -87,6 +89,8 @@ tests:
                 value: "002"
               - name: UMASK_SET
                 value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
               - name: PUID
                 value: "568"
               - name: USER_ID
@@ -143,6 +147,8 @@ tests:
                 value: "002"
               - name: UMASK_SET
                 value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
               - name: PUID
                 value: "568"
               - name: USER_ID
@@ -198,6 +204,8 @@ tests:
                 value: "002"
               - name: UMASK_SET
                 value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
               - name: PUID
                 value: "568"
               - name: USER_ID
@@ -351,6 +359,8 @@ tests:
                 value: "002"
               - name: UMASK_SET
                 value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
               - name: PUID
                 value: "0"
               - name: USER_ID

+ 166 - 0
library/common-test/tests/container/resources_test.yaml

@@ -628,6 +628,147 @@ tests:
                 cpu: 10m
                 memory: 50Mi
 
+  - it: should assign GPU on the selected pod/container with multiple GPUs
+    set:
+      image: *image
+      global:
+        ixChartContext:
+          addNvidiaRuntimeClass: true
+          nvidiaRuntimeClassName: nvidia
+      scaleGPU:
+        - gpu:
+            nvidia.com/gpu: 1
+            amd.com/gpu: 0
+          targetSelector:
+            workload-name1:
+              - container-name1
+              - container-name2
+      workload:
+        workload-name1:
+          enabled: true
+          primary: true
+          type: Deployment
+          podSpec:
+            containers:
+              container-name1:
+                enabled: true
+                primary: true
+                imageSelector: image
+                probes: *probes
+              container-name2:
+                enabled: true
+                primary: false
+                imageSelector: image
+                probes: *probes
+    asserts:
+      - documentIndex: &deploymentDoc 0
+        isKind:
+          of: Deployment
+      - documentIndex: *deploymentDoc
+        isAPIVersion:
+          of: apps/v1
+      - documentIndex: *deploymentDoc
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: nvidia
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.containers[0]
+          content:
+            resources:
+              limits:
+                cpu: 4000m
+                memory: 8Gi
+                nvidia.com/gpu: "1"
+              requests:
+                cpu: 10m
+                memory: 50Mi
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.containers[1]
+          content:
+            resources:
+              limits:
+                cpu: 4000m
+                memory: 8Gi
+                nvidia.com/gpu: "1"
+              requests:
+                cpu: 10m
+                memory: 50Mi
+
+  - it: should assign multiple GPU on the selected pod/container with multiple selected GPUs
+    set:
+      image: *image
+      global:
+        ixChartContext:
+          addNvidiaRuntimeClass: true
+          nvidiaRuntimeClassName: nvidia
+      scaleGPU:
+        - gpu:
+            nvidia.com/gpu: 1
+            amd.com/gpu: 0
+          targetSelector:
+            workload-name1:
+              - container-name1
+        - gpu:
+            nvidia.com/gpu: 0
+            amd.com/gpu: 1
+          targetSelector:
+            workload-name1:
+              - container-name2
+      workload:
+        workload-name1:
+          enabled: true
+          primary: true
+          type: Deployment
+          podSpec:
+            containers:
+              container-name1:
+                enabled: true
+                primary: true
+                imageSelector: image
+                probes: *probes
+              container-name2:
+                enabled: true
+                primary: false
+                imageSelector: image
+                probes: *probes
+    asserts:
+      - documentIndex: &deploymentDoc 0
+        isKind:
+          of: Deployment
+      - documentIndex: *deploymentDoc
+        isAPIVersion:
+          of: apps/v1
+      - documentIndex: *deploymentDoc
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: nvidia
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.containers[0]
+          content:
+            resources:
+              limits:
+                cpu: 4000m
+                memory: 8Gi
+                nvidia.com/gpu: "1"
+              requests:
+                cpu: 10m
+                memory: 50Mi
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.containers[1]
+          content:
+            resources:
+              limits:
+                cpu: 4000m
+                memory: 8Gi
+                amd.com/gpu: "1"
+              requests:
+                cpu: 10m
+                memory: 50Mi
+
   # Failures
   - it: should fail with empty requests
     set:
@@ -868,3 +1009,28 @@ tests:
     asserts:
       - failedTemplate:
           errorMessage: Container - Expected non-empty <scaleGPU> <value>
+
+  - it: should fail with no value in gpu
+    set:
+      image: *image
+      scaleGPU:
+        - gpu:
+            key:
+          targetSelector:
+            workload-name1:
+              - container-name1
+      workload:
+        workload-name1:
+          enabled: true
+          primary: true
+          type: Deployment
+          podSpec:
+            containers:
+              container-name1:
+                enabled: true
+                primary: true
+                imageSelector: image
+                probes: *probes
+    asserts:
+      - failedTemplate:
+          errorMessage: Container - Expected non-empty <scaleGPU> <value>

+ 21 - 21
library/common-test/tests/externalInterface/validation_test.yaml

@@ -55,11 +55,11 @@ tests:
         - hostInterface: enp0s3
           ipam:
             type: dhcp
-          staticIPConfigurations:
-            - ipAddress: 1.2.3.4
+            staticIPConfigurations:
+              - ipAddress: 1.2.3.4
     asserts:
       - failedTemplate:
-          errorMessage: External Interface - Expected empty <staticIPConfigurations> and <staticRoutes> when <ipam.type> is not [static]
+          errorMessage: External Interface - Expected empty <ipam.staticIPConfigurations> and <ipam.staticRoutes> when <ipam.type> is not [static]
 
   - it: should fail with non-empty staticRoutes on dhcp
     set:
@@ -67,12 +67,12 @@ tests:
         - hostInterface: enp0s3
           ipam:
             type: dhcp
-          staticRoutes:
-            - gateway: 1.2.3.4
-              destination: 1.2.3.4
+            staticRoutes:
+              - gateway: 1.2.3.4
+                destination: 1.2.3.4
     asserts:
       - failedTemplate:
-          errorMessage: External Interface - Expected empty <staticIPConfigurations> and <staticRoutes> when <ipam.type> is not [static]
+          errorMessage: External Interface - Expected empty <ipam.staticIPConfigurations> and <ipam.staticRoutes> when <ipam.type> is not [static]
 
   - it: should fail with empty staticIPConfigurations on static
     set:
@@ -80,10 +80,10 @@ tests:
         - hostInterface: enp0s3
           ipam:
             type: static
-          staticIPConfigurations: []
+            staticIPConfigurations: []
     asserts:
       - failedTemplate:
-          errorMessage: External Interface - Expected non-empty <staticIPConfigurations> when <ipam.type> is [static]
+          errorMessage: External Interface - Expected non-empty <ipam.staticIPConfigurations> when <ipam.type> is [static]
 
   - it: should fail with empty gateway on staticRoutes on static
     set:
@@ -91,14 +91,14 @@ tests:
         - hostInterface: enp0s3
           ipam:
             type: static
-          staticIPConfigurations:
-            - ipAddress: 1.2.3.4
-          staticRoutes:
-            - gateway: ""
-              destination: 1.2.3.4
+            staticIPConfigurations:
+              - ipAddress: 1.2.3.4
+            staticRoutes:
+              - gateway: ""
+                destination: 1.2.3.4
     asserts:
       - failedTemplate:
-          errorMessage: External Interface - Expected non-empty <gateway> in <staticRoutes>
+          errorMessage: External Interface - Expected non-empty <gateway> in <ipam.staticRoutes>
 
   - it: should fail with empty destination on staticRoutes on static
     set:
@@ -106,14 +106,14 @@ tests:
         - hostInterface: enp0s3
           ipam:
             type: static
-          staticIPConfigurations:
-            - ipAddress: 1.2.3.4
-          staticRoutes:
-            - gateway: 1.2.3.4
-              destination: ""
+            staticIPConfigurations:
+              - ipAddress: 1.2.3.4
+            staticRoutes:
+              - gateway: 1.2.3.4
+                destination: ""
     asserts:
       - failedTemplate:
-          errorMessage: External Interface - Expected non-empty <destination> in <staticRoutes>
+          errorMessage: External Interface - Expected non-empty <destination> in <ipam.staticRoutes>
 
   - it: should fail with empty ixExternalInterfaceConfigurationNames when interface is defined
     set:

+ 32 - 0
library/common-test/tests/initContainer/data_test.yaml

@@ -45,6 +45,14 @@ tests:
                 enabled: "{{ .Values.render }}"
                 type: init
                 imageSelector: initImage
+                env:
+                  key: value
+                  key2: "{{ .Values.initImage.repository }}"
+                  key3:
+                    secretKeyRef:
+                      expandObjectName: false
+                      name: '{{ printf "secret-name" }}'
+                      key: secret-key
               container-name3:
                 enabled: true
                 type: upgrade
@@ -101,6 +109,30 @@ tests:
           path: spec.template.spec.initContainers[1]
           content:
             name: release-name-common-test-init-container-name1
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.initContainers[1]
+          content:
+            env:
+              - name: "TZ"
+                value: "UTC"
+              - name: "UMASK"
+                value: "002"
+              - name: "UMASK_SET"
+                value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
+              - name: "S6_READ_ONLY_ROOT"
+                value: "1"
+              - name: "key"
+                value: "value"
+              - name: "key2"
+                value: "bash"
+              - name: "key3"
+                valueFrom:
+                  secretKeyRef:
+                    key: "secret-key"
+                    name: "secret-name"
       - documentIndex: *deploymentDoc
         isNull:
           path: spec.template.spec.initContainers[1].command

+ 32 - 0
library/common-test/tests/initContainer/data_upgrade_test.yaml

@@ -47,6 +47,14 @@ tests:
                 enabled: true
                 type: init
                 imageSelector: initImage
+                env:
+                  key: value
+                  key2: "{{ .Values.initImage.repository }}"
+                  key3:
+                    secretKeyRef:
+                      expandObjectName: false
+                      name: '{{ printf "secret-name" }}'
+                      key: secret-key
               container-name3:
                 enabled: true
                 type: install
@@ -109,6 +117,30 @@ tests:
       - documentIndex: *deploymentDoc
         isNull:
           path: spec.template.spec.initContainers[1].volumeMounts
+      - documentIndex: *deploymentDoc
+        isSubset:
+          path: spec.template.spec.initContainers[1]
+          content:
+            env:
+              - name: "TZ"
+                value: "UTC"
+              - name: "UMASK"
+                value: "002"
+              - name: "UMASK_SET"
+                value: "002"
+              - name: NVIDIA_VISIBLE_DEVICES
+                value: "void"
+              - name: "S6_READ_ONLY_ROOT"
+                value: "1"
+              - name: "key"
+                value: "value"
+              - name: "key2"
+                value: "bash"
+              - name: "key3"
+                valueFrom:
+                  secretKeyRef:
+                    key: "secret-key"
+                    name: "secret-name"
       - documentIndex: *deploymentDoc
         isNull:
           path: spec.template.spec.initContainers[2]

+ 20 - 0
library/common-test/tests/pod/runtime_class_name_test.yaml

@@ -192,3 +192,23 @@ tests:
         equal:
           path: spec.template.spec.runtimeClassName
           value: some-other-class
+
+  - it: should not add runtimeClassName with gpu value 0
+    set:
+      scaleGPU:
+        - gpu:
+            key: 0
+      global:
+        ixChartContext:
+          addNvidiaRuntimeClass: true
+          nvidiaRuntimeClassName: ix-runtime
+      workload:
+        workload-name1:
+          enabled: true
+          primary: true
+          type: Deployment
+          podSpec: {}
+    asserts:
+      - documentIndex: &deploymentDoc 0
+        isNull:
+          path: spec.template.spec.runtimeClassName

+ 28 - 0
library/common-test/tests/pod/securityContext.yaml

@@ -220,6 +220,30 @@ tests:
               - name: net.ipv4.ip_unprivileged_port_start
                 value: "443"
 
+  - it: should pass with fsGroup 0
+    set:
+      securityContext:
+        pod:
+          fsGroup: 0
+      workload:
+        workload-name1:
+          enabled: true
+          primary: true
+          type: Deployment
+          podSpec: {}
+    asserts:
+      - documentIndex: &deploymentDoc 0
+        isKind:
+          of: Deployment
+      - documentIndex: *deploymentDoc
+        equal:
+          path: spec.template.spec.securityContext
+          value:
+            fsGroup: 0
+            fsGroupChangePolicy: OnRootMismatch
+            supplementalGroups: []
+            sysctls: []
+
   - it: should pass with no sysctls port_start automatically appended based on services when port is higher than 1024
     set:
       workload:
@@ -284,6 +308,7 @@ tests:
             supplementalGroups:
               - 1000
               - 44
+              - 107
             sysctls: []
       - documentIndex: &otherDeploymentDoc 1
         isKind:
@@ -332,6 +357,7 @@ tests:
             supplementalGroups:
               - 1000
               - 44
+              - 107
             sysctls: []
       - documentIndex: &otherDeploymentDoc 1
         isKind:
@@ -382,6 +408,7 @@ tests:
             supplementalGroups:
               - 1000
               - 44
+              - 107
             sysctls: []
       - documentIndex: &otherDeploymentDoc 1
         isKind:
@@ -394,6 +421,7 @@ tests:
             fsGroupChangePolicy: OnRootMismatch
             supplementalGroups:
               - 44
+              - 107
             sysctls: []
 
   # Failures

+ 1 - 1
library/common/Chart.yaml

@@ -2,7 +2,7 @@ apiVersion: v2
 name: common
 description: A library chart for iX Official Catalog
 type: library
-version: 1.0.5
+version: 1.0.6
 appVersion: v1
 annotations:
   title: Common Library Chart

+ 15 - 15
library/common/docs/scaleExternalInterface.md

@@ -1,18 +1,18 @@
 # Scale External Interface
 
-| Key                                               |   Type    |            Required             | Helm Template | Default | Description                                                                     |
-| :------------------------------------------------ | :-------: | :-----------------------------: | :-----------: | :-----: | :------------------------------------------------------------------------------ |
-| scaleExternalInterface                            |  `list`   |               ❌                |      ❌       |  `[]`   | Define the external interfaces as list                                          |
-| scaleExternalInterface.targetSelectAll            | `boolean` |               ❌                |      ❌       | `false` | Whether to add the annotation for this external interface to all workloads      |
-| scaleExternalInterface.targetSelector             |  `list`   |               ❌                |      ❌       |  `[]`   | Which workloads to add the annotations                                          |
-| scaleExternalInterface.hostInterface              | `string`  |               ✅                |      ❌       |  `""`   | Define the hostInterface, (options in GUI populated from Middleware references) |
-| scaleExternalInterface.ipam                       |  `dict`   |               ✅                |      ❌       |  `{}`   | Define the ipam                                                                 |
-| scaleExternalInterface.ipam.type                  | `string`  |               ✅                |      ❌       |  `""`   | Define the ipam type (dchp, static)                                             |
-| scaleExternalInterface.staticIPConfiguration      |  `list`   | ✅ (Only when static ipam type) |      ❌       |  `[]`   | Define static IP Configuration (Only with static ipam type)                     |
-| scaleExternalInterface.staticIPConfiguration.[IP] | `string`  |               ✅                |      ❌       |  `""`   | Define the static IP (Only with static ipam type)                               |
-| scaleExternalInterface.staticRoutes               |  `list`   |               ❌                |      ❌       |  `[]`   | Define static routes (Only with static ipam type)                               |
-| scaleExternalInterface.staticRoutes.destination   | `string`  |               ✅                |      ❌       |  `""`   | Define the static destination (Only with static ipam type)                      |
-| scaleExternalInterface.staticRoutes.gateway       | `string`  |               ✅                |      ❌       |  `""`   | Define the static gateway (Only with static ipam type)                          |
+| Key                                                    |   Type    |            Required             | Helm Template | Default | Description                                                                     |
+| :----------------------------------------------------- | :-------: | :-----------------------------: | :-----------: | :-----: | :------------------------------------------------------------------------------ |
+| scaleExternalInterface                                 |  `list`   |               ❌                |      ❌       |  `[]`   | Define the external interfaces as list                                          |
+| scaleExternalInterface.targetSelectAll                 | `boolean` |               ❌                |      ❌       | `false` | Whether to add the annotation for this external interface to all workloads      |
+| scaleExternalInterface.targetSelector                  |  `list`   |               ❌                |      ❌       |  `[]`   | Which workloads to add the annotations                                          |
+| scaleExternalInterface.hostInterface                   | `string`  |               ✅                |      ❌       |  `""`   | Define the hostInterface, (options in GUI populated from Middleware references) |
+| scaleExternalInterface.ipam                            |  `dict`   |               ✅                |      ❌       |  `{}`   | Define the ipam                                                                 |
+| scaleExternalInterface.ipam.type                       | `string`  |               ✅                |      ❌       |  `""`   | Define the ipam type (dchp, static)                                             |
+| scaleExternalInterface.ipam.staticIPConfiguration      |  `list`   | ✅ (Only when static ipam type) |      ❌       |  `[]`   | Define static IP Configuration (Only with static ipam type)                     |
+| scaleExternalInterface.ipam.staticIPConfiguration.[IP] | `string`  |               ✅                |      ❌       |  `""`   | Define the static IP (Only with static ipam type)                               |
+| scaleExternalInterface.ipam.staticRoutes               |  `list`   |               ❌                |      ❌       |  `[]`   | Define static routes (Only with static ipam type)                               |
+| scaleExternalInterface.ipam.staticRoutes.destination   | `string`  |               ✅                |      ❌       |  `""`   | Define the static destination (Only with static ipam type)                      |
+| scaleExternalInterface.ipam.staticRoutes.gateway       | `string`  |               ✅                |      ❌       |  `""`   | Define the static gateway (Only with static ipam type)                          |
 
 > When `targetSelectAll` is `true`, it will add the annotations to all pods (`targetSelector` is ignored in this case)
 > When `targetSelector` is a list, each entry is a string, with the pod name that will add the annotations. Can have multiple entries.
@@ -39,8 +39,8 @@ scaleExternalInterface:
   - hostInterface: ""
     ipam:
       type: ""
-    staticRoutes: []
-    staticIPConfigurations: []
+      staticRoutes: []
+      staticIPConfigurations: []
     # targetSelectAll: false
     targetSelector:
       - workload-name

+ 1 - 1
library/common/docs/scaleGPU.md

@@ -10,7 +10,7 @@
 
 > When `targetSelector` is a dict, each entry is a list, containing the name(s) of the container(s) to assign the GPU
 > When `targetSelector` is a empty, it will assign the GPU to the primary pod/container
-> Selected pod's will get appended the group `44` in `supplementalGroups`. This is to allow rootless containers to access the GPU
+> Selected pod's will get appended the group `44` and `107` in `supplementalGroups`. This is to allow rootless containers to access the GPU
 
 ---
 

+ 47 - 39
library/common/docs/workload/README.md

@@ -1,44 +1,52 @@
 # workload
 
-| Key                                                                  |   Type    | Required |   Helm Template    |                             Default                             | Description                                                                        |
-| :------------------------------------------------------------------- | :-------: | :------: | :----------------: | :-------------------------------------------------------------: | :--------------------------------------------------------------------------------- |
-| workload                                                             |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define the workload as dicts                                                       |
-| workload.[workload-name]                                             |  `dict`   |    ✅    |         ❌         |                              `{}`                               | Holds workload definition                                                          |
-| workload.[workload-name].enabled                                     | `boolean` |    ✅    |         ❌         |                             `false`                             | Enables or Disables the workload                                                   |
-| workload.[workload-name].primary                                     | `boolean` |    ✅    |         ❌         |                             `false`                             | Sets the workload as primary                                                       |
-| workload.[workload-name].labels                                      |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional labels for workload                                                     |
-| workload.[workload-name].annotations                                 |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional annotations for workload                                                |
-| workload.[workload-name].type                                        | `string`  |    ✅    |         ❌         |                              `""`                               | Define the kind of the workload (Deployment, CronJob, Job) |
-| workload.[workload-name].podSpec                                     |  `dict`   |    ✅    |         ❌         |                              `{}`                               | Holds the pod definition                                                           |
-| workload.[workload-name].podSpec.labels                              |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional Pod Labels                                                              |
-| workload.[workload-name].podSpec.annotations                         |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Pod Annotations                                                                    |
-| workload.[workload-name].podSpec.automountServiceAccountToken        | `boolean` |    ❌    |         ❌         | `{{ .Values.podOptions.automountServiceAccoutnToken }}` (false) | Pod's automountServiceAccountToken                                                 |
-| workload.[workload-name].podSpec.hostNetwork                         | `boolean` |    ❌    |         ❌         |         `{{ .Values.podOptions.hostNetwork }}` (false)          | Pod's hostNetwork                                                                  |
-| workload.[workload-name].podSpec.enableServiceLinks                  | `boolean` |    ❌    |         ❌         |      `{{ .Values.podOptions.enableServiceLinks }}` (false)      | Pod's enableServiceLinks                                                           |
-| workload.[workload-name].podSpec.restartPolicy                       | `string`  |    ❌    |         ✅         |        `{{ .Values.podOptions.restartPolicy }}` (Always)        | Pod's restartPolicy. (Always, Never, OnFailure)                                    |
-| workload.[workload-name].podSpec.hostname                            | `string`  |    ❌    |         ✅         |                              `""`                               | Pod's hostname                                                                     |
-| workload.[workload-name].podSpec.terminationGracePeriodSeconds       |   `int`   |    ❌    |         ✅         | `{{ .Values.podOptions.terminationGracePeriodSeconds }}` (120)  | Pod's terminationGracePeriodSeconds                                                |
-| workload.[workload-name].podSpec.hostAliases                         |  `list`   |    ❌    |         ❌         |                                                                 | Pod's host aliases                                                                 |
-| workload.[workload-name].podSpec.hostAliases.ip                      | `string`  |    ❌    |         ✅         |                                                                 | Value for `ip` in hosts aliases                                                    |
-| workload.[workload-name].podSpec.hostAliases.hostnames               |  `list`   |    ❌    |         ❌         |                                                                 | Hostnames for the `ip` in hosts aliases                                            |
-| workload.[workload-name].podSpec.hostAliases.hostnames.[host-name]   | `string`  |    ❌    |         ✅         |                                                                 | [Value] for `hostnames` for the `ip` in hosts aliases                              |
-| workload.[workload-name].podSpec.dnsPolicy                           | `string`  |    ❌    |         ✅         |       `{{ .Values.podOptions.dnsPolicy }}` (ClusterFirst)       | Pod's DNS Policy (ClusterFirst, ClusterFirstWithHostNet, Default, None).           |
-| workload.[workload-name].podSpec.tolerations                         |  `list`   |    ❌    |         ❌         |           `{{ .Values.podOptions.tolerations }}` ([])           | Pod's Tolerations                                                                  |
-| workload.[workload-name].podSpec.tolerations.operator                | `string`  |    ✅    |         ✅         |                                                                 | Toleration's `operator` (Equal, Exists)                                            |
-| workload.[workload-name].podSpec.tolerations.key                     | `string`  |  ❌/✅   |         ✅         |                                                                 | Toleration's `key`. Required only when `operator` = `Equal`                        |
-| workload.[workload-name].podSpec.tolerations.value                   | `string`  |  ❌/✅   |         ✅         |                                                                 | Toleration's `value`. Required only when `operator` = `Equal`                      |
-| workload.[workload-name].podSpec.tolerations.effect                  | `string`  |    ❌    |         ✅         |                                                                 | Toleration's `effect`.(NoExecute, NoSchedule, PreferNoSchedule)                    |
-| workload.[workload-name].podSpec.tolerations.tolerationSeconds       |   `int`   |    ❌    |         ❌         |                                                                 | Toleration's `tolerationSeconds`.                                                  |
-| workload.[workload-name].podSpec.runtimeClassName                    | `string`  |    ❌    |         ✅         |        `{{ .Values.podOptions.runtimeClassName }}` ("")         | Pod's runtimeClassName                                                             |
-| workload.[workload-name].podSpec.securityContext                     |  `dict`   |    ❌    |         ❌         |               `{{ .Values.securityContext.pod }}`               | Pod's securityContext                                                              |
-| workload.[workload-name].podSpec.securityContext.fsGroup             |   `int`   |    ❌    |         ❌         |                              `568`                              | Pod's fsGroup                                                                      |
-| workload.[workload-name].podSpec.securityContext.fsGroupChangePolicy | `string`  |    ❌    |         ❌         |                        `OnRootMismatch`                         | Pod's fsGroupChangePolicy (Always, OnRootMismatch)                                 |
-| workload.[workload-name].podSpec.securityContext.supplementalGroups  |  `list`   |    ❌    |         ❌         |                              `[]`                               | Pod's supplementalGroups (list of `int`)                                           |
-| workload.[workload-name].podSpec.securityContext.sysctls             |  `list`   |    ❌    |         ❌         |                              `[]`                               | Pod's sysctls                                                                      |
-| workload.[workload-name].podSpec.securityContext.sysctls.name        | `string`  |    ✅    |         ✅         |                              `""`                               | `name` of the sysctl                                                               |
-| workload.[workload-name].podSpec.securityContext.sysctls.value       | `string`  |    ✅    |         ✅         |                              `""`                               | `value` of the sysctl                                                              |
-| workload.[workload-name].podSpec.containers                          |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define container(s)                                                                |
-| workload.[workload-name].podSpec.initContainers                      |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define initContainer(s)                                                            |
+| Key                                                                  |   Type    | Required |   Helm Template    |                             Default                             | Description                                                              |
+| :------------------------------------------------------------------- | :-------: | :------: | :----------------: | :-------------------------------------------------------------: | :----------------------------------------------------------------------- |
+| workload                                                             |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define the workload as dicts                                             |
+| workload.[workload-name]                                             |  `dict`   |    ✅    |         ❌         |                              `{}`                               | Holds workload definition                                                |
+| workload.[workload-name].enabled                                     | `boolean` |    ✅    |         ❌         |                             `false`                             | Enables or Disables the workload                                         |
+| workload.[workload-name].primary                                     | `boolean` |    ✅    |         ❌         |                             `false`                             | Sets the workload as primary                                             |
+| workload.[workload-name].labels                                      |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional labels for workload                                           |
+| workload.[workload-name].annotations                                 |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional annotations for workload                                      |
+| workload.[workload-name].type                                        | `string`  |    ✅    |         ❌         |                              `""`                               | Define the kind of the workload (Deployment, CronJob, Job)               |
+| workload.[workload-name].podSpec                                     |  `dict`   |    ✅    |         ❌         |                              `{}`                               | Holds the pod definition                                                 |
+| workload.[workload-name].podSpec.labels                              |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Additional Pod Labels                                                    |
+| workload.[workload-name].podSpec.annotations                         |  `dict`   |    ❌    | ✅ (On value only) |                              `{}`                               | Pod Annotations                                                          |
+| workload.[workload-name].podSpec.automountServiceAccountToken        | `boolean` |    ❌    |         ❌         | `{{ .Values.podOptions.automountServiceAccoutnToken }}` (false) | Pod's automountServiceAccountToken                                       |
+| workload.[workload-name].podSpec.hostNetwork                         | `boolean` |    ❌    |         ❌         |         `{{ .Values.podOptions.hostNetwork }}` (false)          | Pod's hostNetwork                                                        |
+| workload.[workload-name].podSpec.enableServiceLinks                  | `boolean` |    ❌    |         ❌         |      `{{ .Values.podOptions.enableServiceLinks }}` (false)      | Pod's enableServiceLinks                                                 |
+| workload.[workload-name].podSpec.restartPolicy                       | `string`  |    ❌    |         ✅         |        `{{ .Values.podOptions.restartPolicy }}` (Always)        | Pod's restartPolicy. (Always, Never, OnFailure)                          |
+| workload.[workload-name].podSpec.hostname                            | `string`  |    ❌    |         ✅         |                              `""`                               | Pod's hostname                                                           |
+| workload.[workload-name].podSpec.terminationGracePeriodSeconds       |   `int`   |    ❌    |         ✅         | `{{ .Values.podOptions.terminationGracePeriodSeconds }}` (120)  | Pod's terminationGracePeriodSeconds                                      |
+| workload.[workload-name].podSpec.hostAliases                         |  `list`   |    ❌    |         ❌         |                                                                 | Pod's host aliases                                                       |
+| workload.[workload-name].podSpec.hostAliases.ip                      | `string`  |    ❌    |         ✅         |                                                                 | Value for `ip` in hosts aliases                                          |
+| workload.[workload-name].podSpec.hostAliases.hostnames               |  `list`   |    ❌    |         ❌         |                                                                 | Hostnames for the `ip` in hosts aliases                                  |
+| workload.[workload-name].podSpec.hostAliases.hostnames.[host-name]   | `string`  |    ❌    |         ✅         |                                                                 | [Value] for `hostnames` for the `ip` in hosts aliases                    |
+| workload.[workload-name].podSpec.dnsPolicy                           | `string`  |    ❌    |         ✅         |       `{{ .Values.podOptions.dnsPolicy }}` (ClusterFirst)       | Pod's DNS Policy (ClusterFirst, ClusterFirstWithHostNet, Default, None). |
+| workload.[workload-name].podSpec.dnsConfig                           |  `dict`   |    ❌    |         ❌         |              `{{ .Values.podOptions.dnsConfig }}`               | Pod's DNS Config                                                         |
+| workload.[workload-name].podSpec.dnsConfig.nameservers               |  `list`   |    ❌    |         ✅         |                              `[]`                               | Pod's DNS Config - Nameservers (Max 3)                                   |
+| workload.[workload-name].podSpec.dnsConfig.nameservers.nameserver    | `string`  |    ✅    |         ✅         |                              `""`                               | Pod's DNS Config - Nameserver                                            |
+| workload.[workload-name].podSpec.dnsConfig.searches                  |  `list`   |    ❌    |         ✅         |                              `[]`                               | Pod's DNS Config - Searches (Max 6)                                      |
+| workload.[workload-name].podSpec.dnsConfig.searches.[search]         | `string`  |    ✅    |         ✅         |                              `""`                               | Pod's DNS Config - Search                                                |
+| workload.[workload-name].podSpec.dnsConfig.options                   |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Pod's DNS Config - Options                                               |
+| workload.[workload-name].podSpec.dnsConfig.options.name              | `string`  |    ✅    |         ✅         |                              `""`                               | Pod's DNS Config - Option name                                           |
+| workload.[workload-name].podSpec.dnsConfig.options.value             | `string`  |    ❌    |         ✅         |                              `""`                               | Pod's DNS Config - Option value                                          |
+| workload.[workload-name].podSpec.tolerations                         |  `list`   |    ❌    |         ❌         |           `{{ .Values.podOptions.tolerations }}` ([])           | Pod's Tolerations                                                        |
+| workload.[workload-name].podSpec.tolerations.operator                | `string`  |    ✅    |         ✅         |                                                                 | Toleration's `operator` (Equal, Exists)                                  |
+| workload.[workload-name].podSpec.tolerations.key                     | `string`  |  ❌/✅   |         ✅         |                                                                 | Toleration's `key`. Required only when `operator` = `Equal`              |
+| workload.[workload-name].podSpec.tolerations.value                   | `string`  |  ❌/✅   |         ✅         |                                                                 | Toleration's `value`. Required only when `operator` = `Equal`            |
+| workload.[workload-name].podSpec.tolerations.effect                  | `string`  |    ❌    |         ✅         |                                                                 | Toleration's `effect`.(NoExecute, NoSchedule, PreferNoSchedule)          |
+| workload.[workload-name].podSpec.tolerations.tolerationSeconds       |   `int`   |    ❌    |         ❌         |                                                                 | Toleration's `tolerationSeconds`.                                        |
+| workload.[workload-name].podSpec.runtimeClassName                    | `string`  |    ❌    |         ✅         |        `{{ .Values.podOptions.runtimeClassName }}` ("")         | Pod's runtimeClassName                                                   |
+| workload.[workload-name].podSpec.securityContext                     |  `dict`   |    ❌    |         ❌         |               `{{ .Values.securityContext.pod }}`               | Pod's securityContext                                                    |
+| workload.[workload-name].podSpec.securityContext.fsGroup             |   `int`   |    ❌    |         ❌         |                              `568`                              | Pod's fsGroup                                                            |
+| workload.[workload-name].podSpec.securityContext.fsGroupChangePolicy | `string`  |    ❌    |         ❌         |                        `OnRootMismatch`                         | Pod's fsGroupChangePolicy (Always, OnRootMismatch)                       |
+| workload.[workload-name].podSpec.securityContext.supplementalGroups  |  `list`   |    ❌    |         ❌         |                              `[]`                               | Pod's supplementalGroups (list of `int`)                                 |
+| workload.[workload-name].podSpec.securityContext.sysctls             |  `list`   |    ❌    |         ❌         |                              `[]`                               | Pod's sysctls                                                            |
+| workload.[workload-name].podSpec.securityContext.sysctls.name        | `string`  |    ✅    |         ✅         |                              `""`                               | `name` of the sysctl                                                     |
+| workload.[workload-name].podSpec.securityContext.sysctls.value       | `string`  |    ✅    |         ✅         |                              `""`                               | `value` of the sysctl                                                    |
+| workload.[workload-name].podSpec.containers                          |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define container(s)                                                      |
+| workload.[workload-name].podSpec.initContainers                      |  `dict`   |    ❌    |         ❌         |                              `{}`                               | Define initContainer(s)                                                  |
 
 ---
 

+ 3 - 4
library/common/templates/helpers/_getPortRange.tpl

@@ -38,15 +38,14 @@ objectData: The object data to be used to render the Pod.
 
           {{- $portToCheck := ($portValues.targetPort | default $portValues.port) -}}
           {{- if kindIs "string" $portToCheck -}}
-            {{/* Helm stores ints as floats, so convert string to float before comparing */}}
-            {{- $portToCheck = (tpl $portToCheck $rootCtx) | float64 -}}
+            {{- $portToCheck = (tpl $portToCheck $rootCtx) | int -}}
           {{- end -}}
 
-          {{- if or (not $portRange.low) (lt $portToCheck ($portRange.low | float64)) -}}
+          {{- if or (not $portRange.low) (lt ($portToCheck | int) ($portRange.low | int)) -}}
             {{- $_ := set $portRange "low" $portToCheck -}}
           {{- end -}}
 
-          {{- if or (not $portRange.high) (gt $portToCheck ($portRange.high | float64)) -}}
+          {{- if or (not $portRange.high) (gt ($portToCheck | int) ($portRange.high | int)) -}}
             {{- $_ := set $portRange "high" $portToCheck -}}
           {{- end -}}
 

+ 2 - 0
library/common/templates/lib/container/_fixedEnv.tpl

@@ -49,6 +49,8 @@ objectData: The object data to be used to render the container.
   {{- $fixed = mustAppend $fixed (dict "k" "UMASK_SET" "v" $UMASK) -}}
   {{- if eq (include "ix.v1.common.lib.container.resources.gpu" (dict "rootCtx" $rootCtx "objectData" $objectData "returnBool" true)) "true" -}}
     {{- $fixed = mustAppend $fixed (dict "k" "NVIDIA_DRIVER_CAPABILITIES" "v" (join "," $nvidiaCaps)) -}}
+  {{- else -}}
+    {{- $fixed = mustAppend $fixed (dict "k" "NVIDIA_VISIBLE_DEVICES" "v" "void") -}}
   {{- end -}}
   {{/* If running as root and PUID is set (0 or greater), set related envs */}}
   {{- if and (or (eq (int $secContext.runAsUser) 0) (eq (int $secContext.runAsGroup) 0)) (ge (int $PUID) 0) -}}

+ 4 - 2
library/common/templates/lib/container/_resources.tpl

@@ -75,10 +75,12 @@ objectData: The object data to be used to render the container.
   {{- if not $returnBool -}}
     {{- range $gpu := $gpuResource -}}
       {{- range $k, $v := $gpu -}}
-        {{- if not $v -}}
+        {{- if or (kindIs "invalid" $v) (eq (toString $v) "") -}}
           {{- fail "Container - Expected non-empty <scaleGPU> <value>" -}}
-        {{- end }}
+        {{- end -}} {{/* Dont try to schedule 0 GPUs */}}
+        {{- if gt (int $v) 0 }}
 {{ $k }}: {{ $v | quote }}
+        {{- end -}}
       {{- end -}}
     {{- end -}}
   {{- else -}}

+ 7 - 7
library/common/templates/lib/externalInterface/_validation.tpl

@@ -28,23 +28,23 @@ objectData: The object data to validate that contains the external interface con
     {{- fail (printf "External Interface - Expected <ipam.type> to be one of [%s], but got [%s]" (join ", " $types) $objectData.ipam.type) -}}
   {{- end -}}
 
-  {{- if and (or $objectData.staticIPConfigurations $objectData.staticRoutes) (ne $objectData.ipam.type "static") -}}
-    {{- fail "External Interface - Expected empty <staticIPConfigurations> and <staticRoutes> when <ipam.type> is not [static]" -}}
+  {{- if and (or $objectData.ipam.staticIPConfigurations $objectData.ipam.staticRoutes) (ne $objectData.ipam.type "static") -}}
+    {{- fail "External Interface - Expected empty <ipam.staticIPConfigurations> and <ipam.staticRoutes> when <ipam.type> is not [static]" -}}
   {{- end -}}
 
   {{- if eq $objectData.ipam.type "static" -}}
-    {{- if not $objectData.staticIPConfigurations -}}
-      {{- fail "External Interface - Expected non-empty <staticIPConfigurations> when <ipam.type> is [static]" -}}
+    {{- if not $objectData.ipam.staticIPConfigurations -}}
+      {{- fail "External Interface - Expected non-empty <ipam.staticIPConfigurations> when <ipam.type> is [static]" -}}
     {{- end -}}
 
-    {{- with $objectData.staticRoutes -}}
+    {{- with $objectData.ipam.staticRoutes -}}
       {{- range . -}}
         {{- if not .destination -}}
-          {{- fail "External Interface - Expected non-empty <destination> in <staticRoutes>" -}}
+          {{- fail "External Interface - Expected non-empty <destination> in <ipam.staticRoutes>" -}}
         {{- end -}}
 
         {{- if not .gateway -}}
-          {{- fail "External Interface - Expected non-empty <gateway> in <staticRoutes>" -}}
+          {{- fail "External Interface - Expected non-empty <gateway> in <ipam.staticRoutes>" -}}
         {{- end -}}
       {{- end -}}
     {{- end -}}

+ 2 - 2
library/common/templates/lib/pod/_podSecurityContext.tpl

@@ -39,7 +39,7 @@ objectData: The object data to be used to render the Pod.
   {{- end -}}
 
   {{- if $gpuAdded -}}
-    {{- $_ := set $secContext "supplementalGroups" (concat $secContext.supplementalGroups (list 44)) -}}
+    {{- $_ := set $secContext "supplementalGroups" (concat $secContext.supplementalGroups (list 44 107)) -}}
   {{- end -}}
 
   {{- $portRange := fromJson (include "ix.v1.common.lib.helpers.securityContext.getPortRange" (dict "rootCtx" $rootCtx "objectData" $objectData)) -}}
@@ -47,7 +47,7 @@ objectData: The object data to be used to render the Pod.
     {{- $_ := set $secContext "sysctls" (mustAppend $secContext.sysctls (dict "name" "net.ipv4.ip_unprivileged_port_start" "value" (printf "%v" $portRange.low))) -}}
   {{- end -}}
 
-  {{- if not $secContext.fsGroup -}}
+  {{- if or (kindIs "invalid" $secContext.fsGroup) (eq (toString $secContext.fsGroup) "") -}}
     {{- fail "Pod - Expected non-empty <fsGroup>" -}}
   {{- end -}}
 

+ 17 - 8
library/common/templates/lib/pod/_runtimeClassName.tpl

@@ -25,19 +25,28 @@ objectData: The object data to be used to render the Pod.
 
       {{- range $rootCtx.Values.scaleGPU -}}
         {{- if .gpu -}} {{/* Make sure it has a value... */}}
+          {{- $gpuAssigned := false -}}
 
-          {{- if (kindIs "map" .targetSelector) -}}
-            {{- range $podName, $containers := .targetSelector -}}
-              {{- if eq $objectData.shortName $podName -}} {{/* If the pod is selected */}}
-                {{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
-              {{- end -}}
+          {{- range $k, $v := .gpu -}}
+            {{- if $v -}} {{/* Consider assigned only if value is not "0" or "" */}}
+              {{- $gpuAssigned = true -}}
             {{- end -}}
+          {{- end -}}
 
-          {{- else if $objectData.primary -}}
+          {{- if $gpuAssigned -}} {{/* If GPU is actually assigned */}}
+            {{- if (kindIs "map" .targetSelector) -}}
+              {{- range $podName, $containers := .targetSelector -}}
+                {{- if eq $objectData.shortName $podName -}} {{/* If the pod is selected */}}
+                  {{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
+                {{- end -}}
+              {{- end -}}
 
-            {{/* If the pod is primary and no targetSelector is given, assign to primary */}}
-            {{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
+            {{- else if $objectData.primary -}}
 
+              {{/* If the pod is primary and no targetSelector is given, assign to primary */}}
+              {{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
+
+            {{- end -}}
           {{- end -}}
         {{- end -}}
       {{- end -}}