feat(paperless): change paperless OCR engine model from tesseract_fast to tesseract_best
This commit is contained in:
@@ -57,8 +57,16 @@
|
|||||||
- "data/containers/paperless/consume"
|
- "data/containers/paperless/consume"
|
||||||
- "containers/paperless"
|
- "containers/paperless"
|
||||||
- "containers/paperless/ssl"
|
- "containers/paperless/ssl"
|
||||||
|
- "containers/paperless/build"
|
||||||
become: true
|
become: true
|
||||||
|
|
||||||
|
- name: Deploy containerfile for build
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: "{{ hostvars['console']['node']['config_path'] }}/services/containers/app/paperless/build/paperless.containerfile.j2"
|
||||||
|
dest: "{{ node['home_path'] }}/containers/paperless/build/Containerfile"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "svadmins"
|
||||||
|
mode: "0640"
|
||||||
|
|
||||||
- name: Deploy root certificate
|
- name: Deploy root certificate
|
||||||
ansible.builtin.copy:
|
ansible.builtin.copy:
|
||||||
@@ -72,6 +80,18 @@
|
|||||||
notify: "notification_restart_paperless"
|
notify: "notification_restart_paperless"
|
||||||
no_log: true
|
no_log: true
|
||||||
|
|
||||||
|
- name: Build paperless container image
|
||||||
|
containers.podman.podman_image:
|
||||||
|
name: "{{ domain['internal'] }}/{{ node['name'] }}/paperless-ngx"
|
||||||
|
# check tags from container file
|
||||||
|
tag: "{{ version['containers']['paperless'] }}"
|
||||||
|
state: "build"
|
||||||
|
path: "{{ node['home_path'] }}/containers/paperless/build"
|
||||||
|
|
||||||
|
- name: Prune paperless dangling images
|
||||||
|
containers.podman.podman_prune:
|
||||||
|
image: true
|
||||||
|
|
||||||
- name: Register secret value to podman secret
|
- name: Register secret value to podman secret
|
||||||
containers.podman.podman_secret:
|
containers.podman.podman_secret:
|
||||||
name: "{{ item.name }}"
|
name: "{{ item.name }}"
|
||||||
@@ -129,8 +149,8 @@
|
|||||||
loop:
|
loop:
|
||||||
- image: "docker.io/library/redis:{{ version['containers']['redis'] }}"
|
- image: "docker.io/library/redis:{{ version['containers']['redis'] }}"
|
||||||
file: "docker.io_library_redis_{{ version['containers']['redis'] }}"
|
file: "docker.io_library_redis_{{ version['containers']['redis'] }}"
|
||||||
- image: "ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}"
|
- image: "ilnmors.internal/{{ node['name'] }}/paperless-ngx:{{ version['containers']['paperless'] }}"
|
||||||
file: "ghcr.io_paperless-ngx_paperless-ngx_{{ version['containers']['paperless'] }}"
|
file: "ilnmors.internal_{{ node['name'] }}_paperless-ngx_{{ version['containers']['paperless'] }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.file }}"
|
label: "{{ item.file }}"
|
||||||
register: container_archive_images
|
register: container_archive_images
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
FROM ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends curl ca-certificates \
|
||||||
|
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/kor.traineddata \
|
||||||
|
-o /usr/share/tesseract-ocr/5/tessdata/kor.traineddata \
|
||||||
|
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata \
|
||||||
|
-o /usr/share/tesseract-ocr/5/tessdata/eng.traineddata \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
USER paperless
|
||||||
@@ -8,7 +8,7 @@ After=redis_paperless.service
|
|||||||
Wants=redis_paperless.service
|
Wants=redis_paperless.service
|
||||||
|
|
||||||
[Container]
|
[Container]
|
||||||
Image=ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
|
Image=ilnmors.internal/app/paperless-ngx:{{ version['containers']['paperless'] }}
|
||||||
ContainerName=paperless
|
ContainerName=paperless
|
||||||
HostName=paperless
|
HostName=paperless
|
||||||
PublishPort={{ services['paperless']['ports']['http'] }}:8000/tcp
|
PublishPort={{ services['paperless']['ports']['http'] }}:8000/tcp
|
||||||
|
|||||||
@@ -45,6 +45,11 @@ ALTER DATABASE paperless_db OWNER TO paperless;
|
|||||||
- "paperless"
|
- "paperless"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Paperless custom build
|
||||||
|
|
||||||
|
- paperless-ngx uses 'tesseract_fast' model
|
||||||
|
- building custom container to use 'tesseract_best' model to improve OCR accuracy.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Access to paperless
|
### Access to paperless
|
||||||
|
|||||||
Reference in New Issue
Block a user