Compare commits

..

2 Commits

Author SHA1 Message Date
il 1096981ef2 feat(paperless): change paperless OCR engine model from tesseract_fast to tesseract_best 2026-05-12 08:00:37 +09:00
il e1936b494d fix(crowdsec): update whitelist.yaml to prevent false positive
false positive:
- nextcloud chunk problem (crowdsecurity/http-crawl-non_statics)
- change expression 'chunks.mjs' to 'chunk.mjs'
2026-05-11 19:40:50 +09:00
6 changed files with 43 additions and 5 deletions
@@ -57,8 +57,16 @@
- "data/containers/paperless/consume"
- "containers/paperless"
- "containers/paperless/ssl"
- "containers/paperless/build"
become: true
- name: Deploy containerfile for build
ansible.builtin.template:
src: "{{ hostvars['console']['node']['config_path'] }}/services/containers/app/paperless/build/paperless.containerfile.j2"
dest: "{{ node['home_path'] }}/containers/paperless/build/Containerfile"
owner: "{{ ansible_user }}"
group: "svadmins"
mode: "0640"
- name: Deploy root certificate
ansible.builtin.copy:
@@ -72,6 +80,18 @@
notify: "notification_restart_paperless"
no_log: true
- name: Build paperless container image
containers.podman.podman_image:
name: "{{ domain['internal'] }}/{{ node['name'] }}/paperless-ngx"
# check tags from container file
tag: "{{ version['containers']['paperless'] }}"
state: "build"
path: "{{ node['home_path'] }}/containers/paperless/build"
- name: Prune paperless dangling images
containers.podman.podman_prune:
image: true
- name: Register secret value to podman secret
containers.podman.podman_secret:
name: "{{ item.name }}"
@@ -129,8 +149,8 @@
loop:
- image: "docker.io/library/redis:{{ version['containers']['redis'] }}"
file: "docker.io_library_redis_{{ version['containers']['redis'] }}"
- image: "ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}"
file: "ghcr.io_paperless-ngx_paperless-ngx_{{ version['containers']['paperless'] }}"
- image: "ilnmors.internal/{{ node['name'] }}/paperless-ngx:{{ version['containers']['paperless'] }}"
file: "ilnmors.internal_{{ node['name'] }}_paperless-ngx_{{ version['containers']['paperless'] }}"
loop_control:
label: "{{ item.file }}"
register: container_archive_images
@@ -0,0 +1,13 @@
FROM ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
USER root
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/kor.traineddata \
-o /usr/share/tesseract-ocr/5/tessdata/kor.traineddata \
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata \
-o /usr/share/tesseract-ocr/5/tessdata/eng.traineddata \
&& rm -rf /var/lib/apt/lists/*
USER paperless
@@ -8,7 +8,7 @@ After=redis_paperless.service
Wants=redis_paperless.service
[Container]
Image=ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
Image=ilnmors.internal/app/paperless-ngx:{{ version['containers']['paperless'] }}
ContainerName=paperless
HostName=paperless
PublishPort={{ services['paperless']['ports']['http'] }}:8000/tcp
@@ -17,5 +17,5 @@ whitelist:
# nextcloud thumbnail/preview request error false positive
- "evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status == '404' && evt.Meta.http_verb == 'GET' && evt.Meta.http_path startsWith '/index.php/core/preview?'"
# nextcloud chunks.mjs request false positive
- "evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status in ['200', '304'] && evt.Meta.http_verb == 'GET' && evt.Meta.http_path contains 'chunks.mjs'"
- "evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status in ['200', '304'] && evt.Meta.http_verb == 'GET' && evt.Meta.http_path contains 'chunk.mjs'"
{% endif %}
+1 -1
View File
@@ -30,7 +30,7 @@
- Install crowdsecurity/nextcloud-whitelist on auth node
- Add expression on whitelist
- evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status == '404' && evt.Meta.http_verb == 'GET' && evt.Meta.http_path startsWith '/index.php/core/preview?'
- evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status in ['200', '304'] && evt.Meta.http_verb == 'GET' && evt.Meta.http_path contains 'chunks.mjs'
- evt.Meta.target_fqdn == '{{ services['nextcloud']['domain']['public'] }}.{{ domain['public'] }}' && evt.Meta.http_status in ['200', '304'] && evt.Meta.http_verb == 'GET' && evt.Meta.http_path contains 'chunk.mjs'
### Deprecated solution
- Access to fw
+5
View File
@@ -45,6 +45,11 @@ ALTER DATABASE paperless_db OWNER TO paperless;
- "paperless"
```
### Paperless custom build
- paperless-ngx uses 'tesseract_fast' model
- building custom container to use 'tesseract_best' model to improve OCR accuracy.
## Configuration
### Access to paperless