#!/bin/bash # Copyright (C) 2008-2018 Richard Kimberly Heck # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Updates etc should be available from: # https://gitlab.com/rikiheck/text # Change log # Version 0.3: Add ability to embed text so djvu's are searchable. # Version 0.2: Add ability to merge two pages into one. Mostly, this is # due to my starting to use the fantastic ScanTailor program, which can # be found at http://scantailor.sourceforge.net/. # Version 0.1: First public release # TODO # Implement losslevel # convert input files to bitonal PROG=`basename $0`; function printUsage { cat </dev/null`; if [ -z "$CVT" ]; then echo "This script uses the convert utility from the ImageMagick package. It does not appear to be in your path."; exit 1; fi # Get filename we're looking for if [ -z "$1" ]; then echo "No input files given."; printUsage; exit 1; fi FIRSTFILE=""; LASTFILE=""; FILES=""; echo "Copying original files"; while [ -n "$1" ]; do fil="$1"; shift; echo -n "."; NEWFILE="tiff2djvu-$$-$fil.$EXT"; $DEBUG cp $fil $NEWFILE; if [ -z "$FILES" ]; then FILES="$NEWFILE"; else FILES="$FILES $NEWFILE"; fi if [ -z "$FIRSTFILE" ]; then FIRSTFILE="$NEWFILE"; fi; LASTFILE="$NEWFILE"; done echo "done."; GENFILES="$FILES"; # Crop pages if requested if [ -n "$CROPDIMS" ]; then echo "Cropping to $CROPDIMS"; for fil in $FILES; do echo -n "."; CFIL="precrop-$fil"; if [ -z "$DEBUG" ]; then mv $fil $CFIL; fi $DEBUG convert $CFIL -crop $CROPDIMS +repage $fil; $DEBUG $KEEP rm $CFIL; done echo "done."; fi # Rotate pages if requested if [ -n "$ROTATE" ]; then echo "Rotating pages"; for fil in $FILES; do echo -n "."; RFIL="prerot-$fil"; if [ -z "$DEBUG" ]; then mv $fil $RFIL; fi $DEBUG convert -rotate $ROTATE $RFIL $fil; $DEBUG $KEEP rm $RFIL; done echo "done."; fi # Crop first and last pages if requested if [ -n "$CROPFIRST$CROPLAST" ]; then echo -n "Cropping"; if [ -n "$CROPFIRST" ]; then echo -n " first page"; crop $FIRSTFILE "W"; fi if [ -n "$CROPLAST" ]; then if [ -n "$CROPFIRST" ]; then echo -n " and"; fi if [ -z "$QUIET" ]; then echo " last page."; fi crop $LASTFILE "E"; else echo "."; fi fi # Are we doing 2 in 1? if [ -n "$TWOINONE" ]; then echo "Merging pages"; PAGE=""; NUM=0; NEWPAGES=""; for FIL in $FILES; do if [ -z "$VERBOSE" ]; then echo -n "."; fi # If we don't yet have a first page, this one is it. if [ "$PAGE" = "" ]; then PAGE="$FIL"; # Otherwise we have the first page, so... else NUM=$(($NUM + 1)); #just a counter to form filenames # normalize it if (($NUM < 10)); then P="0$NUM"; else P=$NUM; fi; NEWPAGE="tiff2djvu-$$-$OUTFILE-$P.$EXT"; if [ -n "$VERBOSE" ]; then echo "Creating page $NEWPAGE from $PAGE and $FIL"; fi $DEBUG montage -geometry +0+0 $PAGE $FIL $NEWPAGE; #No first page any more. PAGE=""; NEWPAGES="$NEWPAGES $NEWPAGE"; GENFILES="$GENFILES $NEWPAGE"; fi; done #If there were an odd number of pages, then one page will be left over, so... if [ -n "$PAGE" ]; then if [ -z "$VERBOSE" ]; then echo -n "."; fi #...we use ImageMagick to create an empty page... EMPTYPAGE="emptyPage-$$.$EXT"; #...of the same size as the other pages... SIZE=`identify -format '%wx%h' $PAGE`; NUM=$(($NUM + 1)); if (($NUM < 10)); then P="0$NUM"; else P=$NUM; fi; if [ -n "$VERBOSE" ]; then echo "Creating empty page $EMPTYPAGE at $SIZE"; fi; $DEBUG convert -size $SIZE xc:white $EMPTYPAGE; GENFILES="$GENFILES $EMPTYPAGE"; NEWPAGE="tiff2djvu-$$-$OUTFILE-$P.$EXT"; if [ -n "$VERBOSE" ]; then echo "Creating page $NEWPAGE from $PAGE and $EMPTYPAGE"; fi; #...and collate the last page with the empty page. $DEBUG montage -geometry +0+0 $PAGE $EMPTYPAGE $NEWPAGE; NEWPAGES="$NEWPAGES $NEWPAGE"; GENFILES="$GENFILES $NEWPAGE"; fi echo "done."; # so now we're working with these... FILES="$NEWPAGES"; fi DJVUFILES=""; PDFFILES=""; PDFSPLIT=""; # not yet implemented RMFILS=""; echo "Converting...."; WORKING=1; for i in $FILES; do echo Page $WORKING; WORKING=$(($WORKING + 1)); if [ -n "$MKDJVU" ]; then DJVUFIL="djvu-$$-$i"; GENFILES="$GENFILES $DJVUFIL"; if identify $i | grep -q Bilevel; then ln -s $i "$DJVUFIL"; else $DEBUG convert $i -depth 1 "$DJVUFIL"; fi NEWFIL="${i%$EXT}djvu" # create djvu page $DEBUG cjb2 "$DJVUFIL" $NEWFIL; GENFILES="$GENFILES $NEWFIL"; if [ -n "$DOOCR" ]; then $DEBUG tesseract "$DJVUFIL" "$DJVUFIL" makebox >/dev/null 2>&1; $DEBUG tesseract "$DJVUFIL" "$DJVUFIL" >/dev/null 2>&1; $DEBUG mv "$DJVUFIL.box" temp.box; $DEBUG box2box.pl temp.box "$DJVUFIL.box"; $DEBUG box2sed.pl "$DJVUFIL.box" "$DJVUFIL.txt" > "$DJVUFIL.djvutxt"; GENFILES="$GENFILES $DJVUFIL.box $DJVUFIL.txt $DJVUFIL.djvutxt"; # the page may not have any text, which would upset djvused # generally that seems to mean a single line file TMP=$(cat $DJVUFIL.djvutxt | wc -l); if [ $TMP -ge 2 ]; then $DEBUG djvused -e "select 1; set-txt \"$DJVUFIL.djvutxt\"; save" $NEWFIL; # GENFILES="$GENFILES $DJVUFIL.djvutxt"; fi fi DJVUFILES="$DJVUFILES $NEWFIL"; fi if [ -n "$PDF" ]; then if [ -n "$DOOCR" ]; then echo "Not implemented." exit 1; fi PNGFIL="${i%$EXT}png"; PDFFIL="${i%$EXT}pdf"; $DEBUG convert $i "$PNGFIL"; if [ -n "$BATCH" ]; then $DEBUG convert "$PNGFIL" "$PDFFIL"; else PDFFILES="$PDFFILES $PNGFIL"; fi GENFILES="$GENFILES $PNGFIL"; fi if [ -n "$PDFSPLIT" ]; then echo "Need to check if we need to collect filenames here, as well."; exit 1; DATA=`identify $i | cut -d' ' -f 3`; WIDTH=${DATA%x*}; HEIGHT=${DATA#*x}; NEWDT=$(($WIDTH / 2)); NEWHT=$(($HEIGHT / 2)); if [ "$i" != "$FIRSTFILE" -o -z "$CROPFIRST" ]; then #$DEBUG convert $i -resize ${NEWDT}x$NEWHT +repage -type Grayscale +compress $i; #NEWDT=$(($NEWDT / 2)); $DEBUG convert $i -crop ${NEWDT}x$HEIGHT+0+0 +repage -type Grayscale -depth 4 +compress SPLIT-$$-$i.1.png; GENFILES="$GENFILES SPLIT-$$-$i.1.png"; fi if [ "$i" != "$LASTFILE" -o -z "$CROPLAST" ]; then #$DEBUG convert $i -resize ${NEWDT}x$NEWHT +repage -type Grayscale +compress $i; #NEWDT=$(($NEWDT / 2)); $DEBUG convert $i -crop ${NEWDT}x$HEIGHT+${NEWDT}+0 +repage -type Grayscale -depth 4 +compress SPLIT-$$-$i.2.png; GENFILES="$GENFILES SPLIT-$$-$i.2.png"; fi fi done echo "done."; if [ -z "$BATCH" ]; then if [ -n "$MKDJVU" ]; then echo -n "Creating DJVU..."; $DEBUG djvm -c x$OUTFILE.djvu $DJVUFILES; $DEBUG mv x$OUTFILE.djvu $OUTFILE.djvu echo "done."; fi if [ -n "$PDF" ]; then echo -n "Creating PDF..."; $DEBUG convert tiff2djvu-$$*png $OUTFILE.pdf echo "done."; fi if [ -n "$PDFSPLIT" ]; then echo -n "Creating single-page PDF..."; if [ -z "$PDF" ]; then PDFNAME=$OUTFILE.pdf; else PDFNAME=$OUTFILE-split.pdf; fi $DEBUG convert SPLIT-$$-*.png $PDFNAME; echo "done."; fi fi $DEBUG $KEEP rm $GENFILES;