docs: update paperless-ngx.md to add how to deal with non-standard format pdf such as korean government documents
This commit is contained in:
@@ -59,3 +59,48 @@ ALTER DATABASE paperless_db OWNER TO paperless;
|
|||||||
- My Profiles: Connect new social account: Authelia
|
- My Profiles: Connect new social account: Authelia
|
||||||
- Continue
|
- Continue
|
||||||
- Login with Authelia
|
- Login with Authelia
|
||||||
|
|
||||||
|
## The non-standard pdf file
|
||||||
|
|
||||||
|
- Some pdf files doesn't follow the standard, for example korean court or government pdf files.
|
||||||
|
- Before upload this kind of non-standard pdf files, convert it first.
|
||||||
|
- This process uses ghostscript and powershell in Windows for console
|
||||||
|
|
||||||
|
```PowerShell
|
||||||
|
# 1. The engine
|
||||||
|
$gsPath = "C:\Program Files\gs\gs10.07.0\bin\gswin64c.exe"
|
||||||
|
|
||||||
|
# 2. new folder which the converted file will be stored
|
||||||
|
$outputDirName = "converted_pdfs"
|
||||||
|
$outputDir = Join-Path (Get-Location) $outputDirName
|
||||||
|
if (!(Test-Path $outputDir)) { New-Item -ItemType Directory -Path $outputDir }
|
||||||
|
|
||||||
|
# 3. Find all pdf files
|
||||||
|
$files = Get-ChildItem -Filter *.pdf
|
||||||
|
|
||||||
|
foreach ($file in $files) {
|
||||||
|
if ($file.FullName -like "*$outputDirName*") { continue }
|
||||||
|
|
||||||
|
$inputPath = $file.FullName
|
||||||
|
$outputPath = Join-Path $outputDir $file.Name
|
||||||
|
|
||||||
|
Write-Host "convert: $($file.Name)" -ForegroundColor Cyan
|
||||||
|
|
||||||
|
$gsArgs = @(
|
||||||
|
"-sDEVICE=pdfwrite",
|
||||||
|
"-dCompatibilityLevel=1.4",
|
||||||
|
"-dPDFSETTINGS=/default",
|
||||||
|
"-dNOPAUSE",
|
||||||
|
"-dQUIET",
|
||||||
|
"-dBATCH",
|
||||||
|
"-dNoOutputFonts", # Change all text as image
|
||||||
|
"-sOutputFile=$outputPath",
|
||||||
|
"$inputPath"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 실행
|
||||||
|
& $gsPath @gsArgs
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "`n[Complete] All file is stored in '$outputDirName'." -ForegroundColor Green
|
||||||
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user