initial_scripts.yaml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. apiVersion: v1
  2. kind: ConfigMap
  3. metadata:
  4. name: "diskover-initial-scripts"
  5. annotations:
  6. rollme: {{ randAlphaNum 5 | quote }}
  7. data:
  8. wait_for_elastic_search.py: |-
  9. # This Script Wait for elastic search to setup completely
  10. import requests
  11. import os
  12. import time
  13. timeout = 100
  14. while True:
  15. try:
  16. if timeout < 0:
  17. print("timeout")
  18. raise requests.exceptions.ConnectTimeout("Elasticsearch is not responding")
  19. timeout -= 1
  20. response = requests.get(f"http://{os.environ['ES_HOST']}:{os.environ['ES_PORT']}")
  21. if response.status_code == 200:
  22. break
  23. except requests.exceptions.ConnectTimeout as e:
  24. print(e)
  25. break
  26. except requests.exceptions.ConnectionError:
  27. print("Trying to connect to elastic search")
  28. time.sleep(3)
  29. .default_crawler.sh: |-
  30. #!/bin/sh
  31. while :
  32. do
  33. # this condition wait for the script to copy into the container .
  34. if test -f "$1"; then
  35. # Empty folders don't generate indices . if folder is empty a default file is generated
  36. if ! [ "$(ls -A $2)" ]; then
  37. echo "Dummy file created as empty dirs are rejected" > $2/diskover_test.txt;
  38. fi
  39. python3 $1 $2/;
  40. break;
  41. fi
  42. sleep 5
  43. done
  44. init_config.py: |-
  45. import os
  46. Config = f"""<?php
  47. namespace diskover;
  48. class Constants {{`{{
  49. const TIMEZONE = '{os.environ['TZ']}';
  50. const ES_HOST = '{os.environ['ES_HOST']}';
  51. const ES_PORT = {os.environ['ES_PORT']};
  52. const ES_USER = '{os.environ['ES_USER']}';
  53. const ES_PASS = '{os.environ['ES_PASS']}';
  54. // if your Elasticsearch cluster uses HTTP TLS/SSL, set ES_HTTPS to TRUE
  55. // override with env var ES_HTTPS
  56. const ES_HTTPS = FALSE;
  57. // login auth for diskover-web
  58. const LOGIN_REQUIRED = TRUE;
  59. // default username and password to login
  60. // the password is no longer used after first login, a hashed password gets stored in separate sqlite db
  61. const USER = '{os.environ['DS_USER']}';
  62. const PASS = '{os.environ['DS_PASS']}';
  63. // default results per search page
  64. const SEARCH_RESULTS = 50;
  65. // default size field (size, size_du) to use for sizes on file tree and charts
  66. const SIZE_FIELD = 'size';
  67. // default file types, used by quick search (file type) and dashboard file type usage chart
  68. // additional extensions can be added/removed from each file types list
  69. const FILE_TYPES = [
  70. 'docs' => ['doc', 'docx', 'odt', 'pdf', 'tex', 'wpd', 'wks', 'txt', 'rtf', 'key', 'odp', 'pps', 'ppt', 'pptx', 'ods', 'xls', 'xlsm', 'xlsx'],
  71. 'images' => ['ai', 'bmp', 'gif', 'ico', 'jpeg', 'jpg', 'png', 'ps', 'psd', 'psp', 'svg', 'tif', 'tiff', 'exr', 'tga'],
  72. 'video' => ['3g2', '3gp', 'avi', 'flv', 'h264', 'm4v', 'mkv', 'qt', 'mov', 'mp4', 'mpg', 'mpeg', 'rm', 'swf', 'vob', 'wmv', 'ogg', 'ogv', 'webm'],
  73. 'audio' => ['au', 'aif', 'aiff', 'cda', 'mid', 'midi', 'mp3', 'm4a', 'mpa', 'ogg', 'wav', 'wma', 'wpl'],
  74. 'apps' => ['apk', 'exe', 'bat', 'bin', 'cgi', 'pl', 'gadget', 'com', 'jar', 'msi', 'py', 'wsf'],
  75. 'programming' => ['c', 'cgi', 'pl', 'class', 'cpp', 'cs', 'h', 'java', 'php', 'py', 'sh', 'swift', 'vb'],
  76. 'internet' => ['asp', 'aspx', 'cer', 'cfm', 'cgi', 'pl', 'css', 'htm', 'html', 'js', 'jsp', 'part', 'php', 'py', 'rss', 'xhtml'],
  77. 'system' => ['bak', 'cab', 'cfg', 'cpl', 'cur', 'dll', 'dmp', 'drv', 'icns', 'ico', 'ini', 'lnk', 'msi', 'sys', 'tmp', 'vdi', 'raw'],
  78. 'data' => ['csv', 'dat', 'db', 'dbf', 'log', 'mdb', 'sav', 'sql', 'tar', 'xml'],
  79. 'disc' => ['bin', 'dmg', 'iso', 'toast', 'vcd', 'img'],
  80. 'compressed' => ['7z', 'arj', 'deb', 'pkg', 'rar', 'rpm', 'tar', 'gz', 'z', 'zip'],
  81. 'trash' => ['old', 'trash', 'tmp', 'temp', 'junk', 'recycle', 'delete', 'deleteme', 'clean', 'remove']
  82. ];
  83. // extra fields for search results and view file/dir info pages
  84. // key is description for field and value is ES field name
  85. // Example:
  86. //const EXTRA_FIELDS = [
  87. // 'Date Changed' => 'ctime'
  88. //];
  89. const EXTRA_FIELDS = [];
  90. // Maximum number of indices to load by default, indices are loaded in order by creation date
  91. // setting this too high can cause slow logins and other timeout issues
  92. // This setting can bo overridden on indices page per user and stored in maxindex cookie
  93. // If MAX_INDEX is set higher than maxindex browser cookie, the cookie will be set to this value
  94. const MAX_INDEX = 250;
  95. // time in seconds for index info to be cached, clicking reload indices forces update
  96. const INDEXINFO_CACHETIME = 600;
  97. // time in seconds to check Elasticsearch for new index info
  98. const NEWINDEX_CHECKTIME = 10;
  99. // sqlite database file path
  100. const DATABASE = '../diskoverdb.sqlite3';
  101. }}`}}
  102. """
  103. os.makedirs(os.environ['DEST'], exist_ok=True)
  104. path = os.path.join(os.environ['DEST'], os.environ['FILE'])
  105. with open(path, 'w') as w:
  106. w.write(Config)
  107. initial_es_config.py: |-
  108. import os
  109. Config = f"""# diskover default/sample config file
  110. #
  111. # default search paths for config
  112. # macOS: ~/.config/diskover and ~/Library/Application Support/diskover
  113. # Other Unix: ~/.config/diskover and /etc/diskover
  114. # Windows: %APPDATA%\diskover where the APPDATA environment variable falls back to %HOME%\AppData\Roaming if undefined
  115. #
  116. appName: diskover
  117. #logLevel: WARN
  118. #logLevel: DEBUG
  119. logLevel: INFO
  120. logToFile: False
  121. #logToFile: True
  122. logDirectory: /tmp/
  123. diskover:
  124. # max number of crawl threads
  125. # a thread is created up to maxthreads for each directory at level 1 of tree dir arg
  126. # set to a number or leave blank to auto set based on number of cpus
  127. #maxthreads: 20
  128. maxthreads:
  129. # block size used for du size
  130. blocksize: 512
  131. excludes:
  132. # directory names and absolute paths you want to exclude from crawl
  133. # directory excludes uses python re.search for string search (regex)
  134. # directory excludes are case-sensitive
  135. # Examples: .* or .backup or .backup* or /dir/dirname
  136. # to exclude none use empty list []
  137. dirs: [".*", ".snapshot", ".Snapshot", "~snapshot", "~Snapshot", ".zfs"]
  138. #dirs: []
  139. # files you want to exclude from crawl
  140. # can include wildcards (.*, *.doc or NULLEXT for files with no extension)
  141. # file names are case-sensitive, extensions are not
  142. files: [".*", "Thumbs.db", ".DS_Store", "._.DS_Store", ".localized", "desktop.ini"]
  143. #files: []
  144. # exclude empty 0 byte files, set to True to exclude empty files or False to not exclude
  145. emptyfiles: True
  146. # exclude empty dirs, set to True to exclude empty dirs or False to not exclude
  147. emptydirs: True
  148. # exclude files smaller than min size in bytes
  149. minfilesize: 1
  150. #minfilesize: 512
  151. # exclude files modified less than x days ago
  152. minmtime: 0
  153. #minmtime: 30
  154. # exclude files modified more than x days ago
  155. maxmtime: 36500
  156. # exclude files changed less than x days ago
  157. minctime: 0
  158. # exclude files changed more than x days ago
  159. maxctime: 36500
  160. # exclude files accessed less than x days ago
  161. minatime: 0
  162. # exclude files accessed more than x days ago
  163. maxatime: 36500
  164. includes:
  165. # directory names and absolute paths you want to include (whitelist), case-sensitive,
  166. # to include none use empty list []
  167. #dirs: [".recycle"]
  168. dirs: []
  169. # files you want to include (whitelist), case-sensitive
  170. files: []
  171. ownersgroups:
  172. # control how owner (username) and group fields are stored for file and directory docs
  173. # store uid and gid's instead of trying to get owner and group names
  174. uidgidonly: False
  175. # owner/group names contain domain name set to True
  176. domain: False
  177. # character separator used on cifs/nfs mounts to separte user/group and domain name, usually \ or @
  178. domainsep: \
  179. # if domain name comes first before character separator, set this to True, otherwise False
  180. domainfirst: True
  181. # when indexing owner and group fields, keep the domain name
  182. keepdomain: False
  183. replacepaths:
  184. # translate path names set to True to enable or False to disable.
  185. # Set to True if crawling in Windows to replace drive letters and \ with /
  186. replace: False
  187. #from: /mnt/
  188. #to: /vols/
  189. from:
  190. to:
  191. plugins:
  192. # set to True to enable all plugins or False to disable all plugins
  193. enable: False
  194. # list of plugins (by name) to use for directories
  195. dirs: ['unixperms']
  196. # list of plugins (by name) to use for files
  197. files: ['unixperms']
  198. other:
  199. # restore atime/mtime for files and dirs during crawl
  200. # set to True or False, default False (useful for cifs which does not work with noatime mount option)
  201. # for nfs, it's preferable to use mount options ro,noatime,nodiratime
  202. restoretimes: False
  203. databases:
  204. elasticsearch:
  205. host: '{os.environ['ES_HOST']}'
  206. port: {os.environ['ES_PORT']}
  207. #user: elastic
  208. #password: changeme
  209. user: '{os.environ['ES_USER']}'
  210. password: '{os.environ['ES_PASS']}'
  211. # set https to True if using HTTP TLS/SSL or False if using http
  212. # for AWS ES, you will most likely want to set this to True
  213. # override with env var ES_HTTPS
  214. https: False
  215. # compress http data
  216. # for AWS ES, you will most likely want to set this to True
  217. httpcompress: False
  218. # timeout for connection to ES (default is 10)
  219. timeout: 30
  220. # number of connections kept open to ES when crawling (default is 10)
  221. maxsize: 20
  222. # max retries for ES operations (default is 0)
  223. maxretries: 10
  224. # wait for at least yellow status before bulk uploading (default is False), set to True if you want to wait
  225. wait: False
  226. # chunk size for ES bulk operations (default is 500)
  227. chunksize: 1000
  228. # the below settings are to optimize ES for crawling
  229. # index refresh interval (default is 1s), set to -1 to disable refresh during crawl (fastest performance but no index searches), after crawl is set back to 1s
  230. indexrefresh: 30s
  231. # transaction log flush threshold size (default 512mb)
  232. translogsize: 1gb
  233. # transaction log sync interval time (default 5s)
  234. translogsyncint: 30s
  235. # search scroll size (default 100 docs)
  236. scrollsize: 1000
  237. """
  238. os.makedirs(os.environ['DEST'], exist_ok=True)
  239. path = os.path.join(os.environ['DEST'], os.environ['FILE'])
  240. with open(path, 'w') as w:
  241. w.write(Config)