From 11a829a47ef725367469241bc7feed272f61c602 Mon Sep 17 00:00:00 2001 From: Robby Workman Date: Thu, 1 Jul 2010 00:07:58 -0500 Subject: graphics/tesseract: Added (OCR engine) This was dropped from the 13.1 repo due to build failure, but it's needed by tucan, so let's fix it and add it back. Signed-off-by: Robby Workman --- graphics/tesseract/README | 21 +++ graphics/tesseract/slack-desc | 19 +++ graphics/tesseract/svutil.cpp-include_stdio_h.diff | 11 ++ graphics/tesseract/tesseract.SlackBuild | 144 +++++++++++++++++++++ graphics/tesseract/tesseract.info | 12 ++ 5 files changed, 207 insertions(+) create mode 100644 graphics/tesseract/README create mode 100644 graphics/tesseract/slack-desc create mode 100644 graphics/tesseract/svutil.cpp-include_stdio_h.diff create mode 100644 graphics/tesseract/tesseract.SlackBuild create mode 100644 graphics/tesseract/tesseract.info diff --git a/graphics/tesseract/README b/graphics/tesseract/README new file mode 100644 index 0000000000..f9be7a1c4c --- /dev/null +++ b/graphics/tesseract/README @@ -0,0 +1,21 @@ +Tesseract is a commercial quality OCR engine originally developed at HP +between 1985 and 1995. In 1995, this engine was among the top 3 evaluated +by UNLV. It was open-sourced by HP and UNLV in 2005. + +You will need to get one of the language packs in order to do anything +useful with tesseract, and that language pack tarball should be present +in the same directory as the SlackBuild script when the package is created. +See http://code.google.com/p/tesseract-ocr/downloads/list for a list of +all available language packs. Note that you can install more than one +(or even all) of the language packs, as they do not conflict with each +other. The build script defaults to use English, but this is easily +changed by passing an alternate value on the command line. + +Here is the relevant code from the build script: + # Language pack(s) to use + # We'll install English by default, but you can pass another one (or all) + # of them on the command line (space delimited). If you pass more than one + # (again, space delimited), you must enclose the string in quotes. Examples: + # TESSLANG=fra ./tesseract.SlackBuild + # TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild + TESSLANG=${TESSLANG:-eng} # Default to English diff --git a/graphics/tesseract/slack-desc b/graphics/tesseract/slack-desc new file mode 100644 index 0000000000..2136326be8 --- /dev/null +++ b/graphics/tesseract/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. Line +# up the first '|' above the ':' following the base package name, and the '|' on +# the right side marks the last column you can put a character in. You must make +# exactly 11 lines for the formatting to be correct. It's also customary to +# leave one space after the ':'. + + |-----handy-ruler--------------------------------------------------| +tesseract: Tesseract (OCR Engine) +tesseract: +tesseract: Tesseract is a commercial quality OCR engine originally developed +tesseract: at HP between 1985 and 1995. In 1995, this engine was among the +tesseract: top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in +tesseract: 2005. +tesseract: +tesseract: http://code.google.com/p/tesseract-ocr/ +tesseract: +tesseract: +tesseract: diff --git a/graphics/tesseract/svutil.cpp-include_stdio_h.diff b/graphics/tesseract/svutil.cpp-include_stdio_h.diff new file mode 100644 index 0000000000..b2a55a9514 --- /dev/null +++ b/graphics/tesseract/svutil.cpp-include_stdio_h.diff @@ -0,0 +1,11 @@ +diff -Nur tesseract-2.04.orig//viewer/svutil.cpp tesseract-2.04/viewer/svutil.cpp +--- tesseract-2.04.orig//viewer/svutil.cpp 2009-06-03 11:29:38.000000000 -0500 ++++ tesseract-2.04/viewer/svutil.cpp 2010-07-01 00:03:45.253070024 -0500 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #ifdef __linux__ diff --git a/graphics/tesseract/tesseract.SlackBuild b/graphics/tesseract/tesseract.SlackBuild new file mode 100644 index 0000000000..95f010a6cf --- /dev/null +++ b/graphics/tesseract/tesseract.SlackBuild @@ -0,0 +1,144 @@ +#!/bin/sh + +# Pierre Cazenave 10/11/2007. +# Updated 25/01/2009. +# Updated 08/04/2009. +# Updated 28/05/2009 for Slackware64. +# Modified by Robby Workman for better +# consistency with our other scripts +# Thanks to S+*n_Pe*rm*n for a bug report from OCRopus. + +# Copyright 2009 Pierre Cazenave +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +PRGNAM=tesseract +VERSION=2.04 +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i486 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +CWD=$(pwd) +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +# Language pack(s) to use +# We'll install English by default, but you can pass another one (or all) +# of them on the command line (space delimited). If you pass more than one +# (again, space delimited), you must enclose the string in quotes. Examples: +# TESSLANG=fra ./tesseract.SlackBuild +# TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild +TESSLANG=${TESSLANG:-eng} # Default to English + +if [ "$ARCH" = "i486" ]; then + SLKCFLAGS="-O2 -march=i486 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +set -e + +rm -rf $TMP/$PRGNAM-$VERSION $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +tar xvf $CWD/$PRGNAM-$VERSION.tar.gz +cd $PRGNAM-$VERSION +chown -R root:root . +chmod -R u+w,go+r-w,a-s . + +# Fix missing snprintf() +patch -p1 < $CWD/svutil.cpp-include_stdio_h.diff + +CFLAGS="$SLKCFLAGS" \ +./configure \ + --prefix=/usr \ + --libdir=/usr/lib${LIBDIRSUFFIX} \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --mandir=/usr/man \ + --disable-static \ + --build=$ARCH-slackware-linux + +make + +# Let's extract the desired language tarballs, with a hack for the different +# version numbers for certain language files. I shied away from wildcards as +# they're a bit unpredictable... +for _language in $(echo "$TESSLANG") ; do + if [ "$_language" == "deu-f" -o "$_language" == "por" -o "$_language" == "vie" ]; then + if [ -r $CWD/tesseract-2.01.$_language.tar.gz ]; then + tar xf $CWD/tesseract-2.01.$_language.tar.gz + SUCCESS=yes + else + echo "$CWD/tesseract-2.01.$_language.tar.gz not found." + sleep 5 + fi; + else + if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then + tar xf $CWD/tesseract-2.00.$_language.tar.gz + SUCCESS=yes + else + echo "$CWD/tesseract-2.00.$_language.tar.gz not found." + sleep 5 + fi; + fi +done + +if [ ! "$SUCCESS" = "yes" ]; then + echo "No language packs were found, so this package will not work as is." + echo "See $CWD/README for more information." + exit 1 +fi + +make install DESTDIR=$PKG + +find $PKG | xargs file | grep -e "executable" -e "shared object" | grep ELF \ + | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a AUTHORS COPYING ChangeLog INSTALL NEWS README ReleaseNotes \ + $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild +cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README.$TAG +find $PKG/usr/doc/$PRGNAM-$VERSION -type f -exec chmod 644 {} \; + +# remove zero size files (from extra potentially not installed language packs). +find $PKG/usr/share/tessdata -type f -size 0 -exec rm {} + + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz} diff --git a/graphics/tesseract/tesseract.info b/graphics/tesseract/tesseract.info new file mode 100644 index 0000000000..115d24f619 --- /dev/null +++ b/graphics/tesseract/tesseract.info @@ -0,0 +1,12 @@ +PRGNAM="tesseract" +VERSION="2.04" +HOMEPAGE="http://code.google.com/p/tesseract-ocr/" +DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.04.tar.gz \ + http://tesseract-ocr.googlecode.com/files/tesseract-2.00.eng.tar.gz" +MD5SUM="b44eba1a9f4892ac62e484c807fe0533 \ + b8291d6b3a63ce7879d688e845e341a9" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +MAINTAINER="Pierre Cazenave" +EMAIL="pwcazenave gmail {dot} com" +APPROVED="rworkman" -- cgit v1.2.3