pantry/projects/tesseract-ocr.github.io/package.yml
Marc Seitz 1086e9737f
+tesseract (#454)
* add tesseract'
* download eng and osd trained data in build process
* cleanup
* set tessdata prefix
* add runtime env var
* remove fixture; download it instead
2023-03-16 16:00:48 -04:00

54 lines
1.4 KiB
YAML

distributable:
url: https://github.com/tesseract-ocr/tesseract/archive/refs/tags/{{version}}.tar.gz
strip-components: 1
versions:
github: tesseract-ocr/tesseract/releases/tags
dependencies:
cairographics.org: 1
unicode.org: 71
leptonica.org: '*'
libarchive.org: '*'
gnome.org/pango: 1
runtime:
env:
TESSDATA_PREFIX: "{{prefix}}/share/tessdata/"
build:
dependencies:
tea.xyz/gx/cc: c99
tea.xyz/gx/make: '*'
gnu.org/autoconf: '*'
gnu.org/automake: '*'
gnu.org/libtool: '*'
freedesktop.org/pkg-config: '*'
gnu.org/wget: '*'
script: |
./autogen.sh
./configure $ARGS
make --jobs {{ hw.concurrency }}
make install
# install trained datafiles and move to share/tessdata
wget https://github.com/tesseract-ocr/tessdata/blob/main/eng.traineddata?raw=true -O eng.traineddata
wget https://github.com/tesseract-ocr/tessdata/blob/main/osd.traineddata?raw=true -O osd.traineddata
mv eng.traineddata "{{prefix}}"/share/tessdata/
mv osd.traineddata "{{prefix}}"/share/tessdata/
env:
ARGS:
- --prefix="{{prefix}}"
- --disable-dependency-tracking
- --datarootdir="{{prefix}}"/share
provides:
- bin/tesseract
test:
dependencies:
gnu.org/wget: '*'
script: |
wget https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif
tesseract eurotext.tif stdout -l eng