feat(paperless): change paperless OCR engine model from tesseract_fast to tesseract_best
This commit is contained in:
@@ -57,8 +57,16 @@
|
||||
- "data/containers/paperless/consume"
|
||||
- "containers/paperless"
|
||||
- "containers/paperless/ssl"
|
||||
- "containers/paperless/build"
|
||||
become: true
|
||||
|
||||
- name: Deploy containerfile for build
|
||||
ansible.builtin.template:
|
||||
src: "{{ hostvars['console']['node']['config_path'] }}/services/containers/app/paperless/build/paperless.containerfile.j2"
|
||||
dest: "{{ node['home_path'] }}/containers/paperless/build/Containerfile"
|
||||
owner: "{{ ansible_user }}"
|
||||
group: "svadmins"
|
||||
mode: "0640"
|
||||
|
||||
- name: Deploy root certificate
|
||||
ansible.builtin.copy:
|
||||
@@ -72,6 +80,18 @@
|
||||
notify: "notification_restart_paperless"
|
||||
no_log: true
|
||||
|
||||
- name: Build paperless container image
|
||||
containers.podman.podman_image:
|
||||
name: "{{ domain['internal'] }}/{{ node['name'] }}/paperless-ngx"
|
||||
# check tags from container file
|
||||
tag: "{{ version['containers']['paperless'] }}"
|
||||
state: "build"
|
||||
path: "{{ node['home_path'] }}/containers/paperless/build"
|
||||
|
||||
- name: Prune paperless dangling images
|
||||
containers.podman.podman_prune:
|
||||
image: true
|
||||
|
||||
- name: Register secret value to podman secret
|
||||
containers.podman.podman_secret:
|
||||
name: "{{ item.name }}"
|
||||
@@ -129,8 +149,8 @@
|
||||
loop:
|
||||
- image: "docker.io/library/redis:{{ version['containers']['redis'] }}"
|
||||
file: "docker.io_library_redis_{{ version['containers']['redis'] }}"
|
||||
- image: "ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}"
|
||||
file: "ghcr.io_paperless-ngx_paperless-ngx_{{ version['containers']['paperless'] }}"
|
||||
- image: "ilnmors.internal/{{ node['name'] }}/paperless-ngx:{{ version['containers']['paperless'] }}"
|
||||
file: "ilnmors.internal_{{ node['name'] }}_paperless-ngx_{{ version['containers']['paperless'] }}"
|
||||
loop_control:
|
||||
label: "{{ item.file }}"
|
||||
register: container_archive_images
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
|
||||
|
||||
USER root
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl ca-certificates \
|
||||
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/kor.traineddata \
|
||||
-o /usr/share/tesseract-ocr/5/tessdata/kor.traineddata \
|
||||
&& curl -fsSL https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata \
|
||||
-o /usr/share/tesseract-ocr/5/tessdata/eng.traineddata \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
USER paperless
|
||||
@@ -8,7 +8,7 @@ After=redis_paperless.service
|
||||
Wants=redis_paperless.service
|
||||
|
||||
[Container]
|
||||
Image=ghcr.io/paperless-ngx/paperless-ngx:{{ version['containers']['paperless'] }}
|
||||
Image=ilnmors.internal/app/paperless-ngx:{{ version['containers']['paperless'] }}
|
||||
ContainerName=paperless
|
||||
HostName=paperless
|
||||
PublishPort={{ services['paperless']['ports']['http'] }}:8000/tcp
|
||||
|
||||
@@ -45,6 +45,11 @@ ALTER DATABASE paperless_db OWNER TO paperless;
|
||||
- "paperless"
|
||||
```
|
||||
|
||||
### Paperless custom build
|
||||
|
||||
- paperless-ngx uses 'tesseract_fast' model
|
||||
- building custom container to use 'tesseract_best' model to improve OCR accuracy.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Access to paperless
|
||||
|
||||
Reference in New Issue
Block a user