|
@@ -0,0 +1,265 @@
|
|
|
+groups:
|
|
|
+ - name: FSCrawler Configuration
|
|
|
+ description: Configure FSCrawler
|
|
|
+ - name: Network Configuration
|
|
|
+ description: Configure Network for FSCrawler
|
|
|
+ - name: Storage Configuration
|
|
|
+ description: Configure Storage for FSCrawler
|
|
|
+ - name: Resources Configuration
|
|
|
+ description: Configure Resources for FSCrawler
|
|
|
+
|
|
|
+questions:
|
|
|
+ - variable: TZ
|
|
|
+ group: FSCrawler Configuration
|
|
|
+ label: Timezone
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ default: Etc/UTC
|
|
|
+ required: true
|
|
|
+ $ref:
|
|
|
+ - definitions/timezone
|
|
|
+
|
|
|
+ - variable: fscrawlerConfig
|
|
|
+ label: ""
|
|
|
+ group: FSCrawler Configuration
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: imageSelector
|
|
|
+ label: Image
|
|
|
+ description: |
|
|
|
+ The image to use for FSCrawler.</br>
|
|
|
+ Images with OCR support are a lot larger than images without OCR support.</br>
|
|
|
+ Approximate image sizes:</br>
|
|
|
+ - With OCR Support: 1.2GB</br>
|
|
|
+ - Without OCR Support: 0.5GB
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ default: "ocrImage"
|
|
|
+ required: true
|
|
|
+ enum:
|
|
|
+ - value: "ocrImage"
|
|
|
+ description: With OCR Support - Elasticsearch 7 and 8
|
|
|
+ - value: "noocrImage"
|
|
|
+ description: Without OCR Support - Elasticsearch 7 and 8
|
|
|
+ - variable: jobName
|
|
|
+ label: Job Name
|
|
|
+ description: |
|
|
|
+ The name of the FSCrawler job to run. </br>
|
|
|
+ A _settings.yaml file in the directory named after the job name will have to be manually created.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ default: ""
|
|
|
+ required: true
|
|
|
+ - variable: loop
|
|
|
+ label: Loop
|
|
|
+ description: |
|
|
|
+ The number of times to run the job.</br>
|
|
|
+ https://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#loop </br>
|
|
|
+ -1 means run forever. </br>
|
|
|
+ 0 means never run. </br>
|
|
|
+ schema:
|
|
|
+ type: int
|
|
|
+ default: -1
|
|
|
+ required: true
|
|
|
+ min: -1
|
|
|
+ - variable: restart
|
|
|
+ label: Restart
|
|
|
+ description: |
|
|
|
+ Restart the job from the beginning.</br>
|
|
|
+ https://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#restart
|
|
|
+ schema:
|
|
|
+ type: boolean
|
|
|
+ default: false
|
|
|
+ - variable: additionalEnvs
|
|
|
+ label: Additional Environment Variables
|
|
|
+ description: Configure additional environment variables for FSCrawler.
|
|
|
+ schema:
|
|
|
+ type: list
|
|
|
+ default: []
|
|
|
+ items:
|
|
|
+ - variable: env
|
|
|
+ label: Environment Variable
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: name
|
|
|
+ label: Name
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ required: true
|
|
|
+ - variable: value
|
|
|
+ label: Value
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ required: true
|
|
|
+
|
|
|
+ - variable: fscrawlerNetwork
|
|
|
+ label: ""
|
|
|
+ group: Network Configuration
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: enableRestApiService
|
|
|
+ label: Enable Rest API Service
|
|
|
+ description: |
|
|
|
+ Enable Rest API Service for FSCrawler.</br>
|
|
|
+ https://fscrawler.readthedocs.io/en/latest/admin/fs/rest.html</br>
|
|
|
+ Additional configuration is needed in the job file. Check the Notes card
|
|
|
+ after installation for more information.
|
|
|
+ schema:
|
|
|
+ type: boolean
|
|
|
+ default: false
|
|
|
+ - variable: restPort
|
|
|
+ label: Rest Port
|
|
|
+ description: The port to use for the Rest API Service.
|
|
|
+ schema:
|
|
|
+ type: int
|
|
|
+ show_if: [["enableRestApiService", "=", true]]
|
|
|
+ default: 30084
|
|
|
+ min: 9000
|
|
|
+ max: 65535
|
|
|
+ required: true
|
|
|
+ - variable: hostNetwork
|
|
|
+ label: Host Network
|
|
|
+ description: |
|
|
|
+ Bind to the host network. It's recommended to keep this disabled.
|
|
|
+ schema:
|
|
|
+ type: boolean
|
|
|
+ default: false
|
|
|
+
|
|
|
+ - variable: fscrawlerStorage
|
|
|
+ label: ""
|
|
|
+ group: Storage Configuration
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: jobs
|
|
|
+ label: FSCrawler Jobs Storage
|
|
|
+ description: The path to store FSCrawler Jobs.
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: type
|
|
|
+ label: Type
|
|
|
+ description: |
|
|
|
+ ixVolume: Is dataset created automatically by the system.</br>
|
|
|
+ Host Path: Is a path that already exists on the system.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ required: true
|
|
|
+ default: "ixVolume"
|
|
|
+ enum:
|
|
|
+ - value: "hostPath"
|
|
|
+ description: Host Path (Path that already exists on the system)
|
|
|
+ - value: "ixVolume"
|
|
|
+ description: ixVolume (Dataset created automatically by the system)
|
|
|
+ - variable: datasetName
|
|
|
+ label: Dataset Name
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ show_if: [["type", "=", "ixVolume"]]
|
|
|
+ required: true
|
|
|
+ hidden: true
|
|
|
+ immutable: true
|
|
|
+ default: "jobs"
|
|
|
+ $ref:
|
|
|
+ - "normalize/ixVolume"
|
|
|
+ - variable: hostPath
|
|
|
+ label: Host Path
|
|
|
+ schema:
|
|
|
+ type: hostpath
|
|
|
+ show_if: [["type", "=", "hostPath"]]
|
|
|
+ immutable: true
|
|
|
+ required: true
|
|
|
+ - variable: additionalStorages
|
|
|
+ label: Additional Storage
|
|
|
+ description: Additional storage for FSCrawler.
|
|
|
+ schema:
|
|
|
+ type: list
|
|
|
+ default: []
|
|
|
+ items:
|
|
|
+ - variable: storageEntry
|
|
|
+ label: Storage Entry
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: type
|
|
|
+ label: Type
|
|
|
+ description: |
|
|
|
+ ixVolume: Is dataset created automatically by the system.</br>
|
|
|
+ Host Path: Is a path that already exists on the system.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ required: true
|
|
|
+ default: "ixVolume"
|
|
|
+ enum:
|
|
|
+ - value: "hostPath"
|
|
|
+ description: Host Path (Path that already exists on the system)
|
|
|
+ - value: "ixVolume"
|
|
|
+ description: ixVolume (Dataset created automatically by the system)
|
|
|
+ - variable: mountPath
|
|
|
+ label: Mount Path
|
|
|
+ description: The path inside the container to mount the storage.
|
|
|
+ schema:
|
|
|
+ type: path
|
|
|
+ required: true
|
|
|
+ - variable: hostPath
|
|
|
+ label: Host Path
|
|
|
+ description: The host path to use for storage.
|
|
|
+ schema:
|
|
|
+ type: hostpath
|
|
|
+ show_if: [["type", "=", "hostPath"]]
|
|
|
+ required: true
|
|
|
+ - variable: datasetName
|
|
|
+ label: Dataset Name
|
|
|
+ description: The name of the dataset to use for storage.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ show_if: [["type", "=", "ixVolume"]]
|
|
|
+ required: true
|
|
|
+ immutable: true
|
|
|
+ default: "storage_entry"
|
|
|
+ $ref:
|
|
|
+ - "normalize/ixVolume"
|
|
|
+
|
|
|
+ - variable: resources
|
|
|
+ group: Resources Configuration
|
|
|
+ label: ""
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: limits
|
|
|
+ label: Limits
|
|
|
+ schema:
|
|
|
+ type: dict
|
|
|
+ attrs:
|
|
|
+ - variable: cpu
|
|
|
+ label: CPU
|
|
|
+ description: CPU limit for FSCrawler.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ max_length: 6
|
|
|
+ valid_chars: '^(0\.[1-9]|[1-9][0-9]*)(\.[0-9]|m?)$'
|
|
|
+ valid_chars_error: |
|
|
|
+ Valid CPU limit formats are</br>
|
|
|
+ - Plain Integer - eg. 1</br>
|
|
|
+ - Float - eg. 0.5</br>
|
|
|
+ - Milicpu - eg. 500m
|
|
|
+ default: "4000m"
|
|
|
+ required: true
|
|
|
+ - variable: memory
|
|
|
+ label: Memory
|
|
|
+ description: Memory limit for FSCrawler.
|
|
|
+ schema:
|
|
|
+ type: string
|
|
|
+ max_length: 12
|
|
|
+ valid_chars: '^[1-9][0-9]*([EPTGMK]i?|e[0-9]+)?$'
|
|
|
+ valid_chars_error: |
|
|
|
+ Valid Memory limit formats are</br>
|
|
|
+ - Suffixed with E/P/T/G/M/K - eg. 1G</br>
|
|
|
+ - Suffixed with Ei/Pi/Ti/Gi/Mi/Ki - eg. 1Gi</br>
|
|
|
+ - Plain Integer in bytes - eg. 1024</br>
|
|
|
+ - Exponent - eg. 134e6
|
|
|
+ default: "8Gi"
|
|
|
+ required: true
|