2016-02-06 13:57:02 +01:00
|
|
|
TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract
|
|
|
|
TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available"
|
2018-06-21 00:34:03 +02:00
|
|
|
TERMUX_PKG_VERSION=3.05.02
|
2018-06-26 13:29:06 +02:00
|
|
|
TERMUX_PKG_REVISION=1
|
2018-06-21 00:34:03 +02:00
|
|
|
TERMUX_PKG_SHA256=494d64ffa7069498a97b909a0e65a35a213989e0184f1ea15332933a90d43445
|
2016-02-06 13:57:02 +01:00
|
|
|
TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz
|
2018-06-21 00:34:03 +02:00
|
|
|
TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica"
|
2016-02-06 13:57:02 +01:00
|
|
|
|
|
|
|
termux_step_pre_configure() {
|
2016-03-27 13:21:42 +02:00
|
|
|
# http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion
|
2016-02-06 13:57:02 +01:00
|
|
|
export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica
|
|
|
|
|
2017-03-27 05:27:36 +02:00
|
|
|
perl -p -i -e 's|ADD_RT], true|ADD_RT], false|g' configure.ac
|
2016-02-06 13:57:02 +01:00
|
|
|
./autogen.sh
|
|
|
|
}
|
|
|
|
|
|
|
|
termux_step_post_make_install() {
|
2016-03-27 13:21:42 +02:00
|
|
|
# download english trained data
|
2016-02-06 13:57:02 +01:00
|
|
|
cd "${TERMUX_PREFIX}/share/tessdata"
|
|
|
|
rm -f eng.*
|
2016-05-31 11:52:40 +02:00
|
|
|
for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do
|
|
|
|
f=eng.$f
|
2017-01-07 06:59:09 +01:00
|
|
|
# From the tessdata README: "These language data files only work with
|
|
|
|
# Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub.
|
|
|
|
# Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree."
|
2017-03-27 05:27:36 +02:00
|
|
|
termux_download \
|
|
|
|
https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \
|
|
|
|
$f
|
2016-05-31 11:52:40 +02:00
|
|
|
done
|
2016-02-06 13:57:02 +01:00
|
|
|
}
|