#!/bin/bash #script zum durchsuchen von verschiedenen Dateien nach einem Begriff /rb 2018 #erforderlich: ghostscript, libreoffice, tesseract,tesseract-ocr-all, imagemagick, dwg2dxf,unrar #usage: searchin searchpath mysearch d if [ -z "$1" ] || [ -z "$2" ];then echo please use: echo $0 searchpath \"my search\" echo or for debug: echo $0 searchpath \"my search\" d exit 0 fi search=$2 ret=0 cores=4 found=0 d=$3 echo search for: \"$search\" IFS=$'\n' find $1 -type f | while read i; do fname=`basename -s .$ext "$i"` ext=`basename ${i##*.}` uext=`echo $ext | tr [:upper:] [:lower:]` fname=`basename -s .$ext $i` ############################## function searchinPict (){ #####Bild-Dateien durchsuchen##### if [[ "$(file "$1")" =~ .*image,* && ($found == 0) ]]; then test $d && echo search in: "$1" if [ -n "$5" ];then #tdir=$(mktemp -d) tmppath=$6 else #tdir=$5 tmppath=$1 fi # horz=`identify -format "%h" $1` # if [ $horz -lt 200 ];then tmpfile=$(mktemp) convert -strip -colorspace Gray "$1" -resize 200% $tmpfile.tif tesseract $tmpfile.tif stdout -l deu+eng 2>/dev/null| grep -qi "$2" ret=$? rm $tmpfile $tmpfile.tif # else # tesseract "$1" stdout -l deu+eng 2>/dev/null | grep -qi "$2" # ret=$? # fi if [ $ret -eq 0 ]; then echo -e "$2" found in: "$tmppath" fi #echo tmppath $tmppath #rm -rf $tmpdir unset tmppath fi } #####PDF-Dateien durchsuchen##### function searchinPDF () { #if [ $3 == 'pdf' ];then if [[ "$(file "$1")" =~ .*PDF.* && ($found == 0) ]]; then if [ -n "$5" ];then tmpdir=$(mktemp -d) tmppath=$6 else tmpdir=$5 tmppath=$1 fi test $d && echo search in: "$1" tmpfile=$(mktemp) gs -q -dSAFER -sDEVICE=png16m -dINTERPOLATE -dNumRenderingThreads=$cores \ -dFirstPage=1 -dLastPage=1 -r300 -o $tmpfile -c 3000000 \ setvmthreshold -f "$1" -dQUIET -dBATCH -dNOPAUSE \ -sProcessColorModel=DeviceGray \ -sColorConversionStrategy=Gray \ -dOverrideICC tesseract $tmpfile stdout -l deu+eng | grep -qi "$2" ret=$? if [ $ret -eq 0 ]; then echo -e "$2" found in: "$tmppath" fi rm -rf $tmpfile &>/dev/null unset tmpfile tmppath tmpdir fi } #####Office-Dateien durchsuchen##### function searchinOfficefiles () { if [[ "$(file "$1")" =~ .*Microsoft* && ($found == 0) ]]; then if [ -n "$5" ];then tdir=$5 tmppath=$6 else tdir=$(mktemp -d) tmppath=$1 fi test $d && echo search in: "$1" lowriter --nodefault --nofirststartwizard --nologo --norestore \ --accept="socket,host=127.0.0.1,port=2002;urp;" --invisible --headless \ --convert-to html "$1" --outdir $tdir &>/dev/null if grep -qir "$2" "$tdir/$4.html";then echo -e "$2" found in: "$tmppath" fi rm -rf "$tdir/$4.html" $tdir &>/dev/null unset tdir tmppath fi } #####DWG-Dateien durchsuchen##### function searchinDWG () { if [[ "$(file "$1")" =~ .*DWG* && ($found == 0) ]]; then if [ -n "$5" ];then tdir=$5 tmppath=$6 else tdir=$(mktemp -d) tmppath=$1 fi test $d && echo search in: "$1" dwg2dxf $1 $tdir/$4.dxf >>/dev/null test $d && echo search in: "$1" if grep -qi "$2" $tdir/$4.dxf;then echo -e "$2" found in: "$tmppath" fi rm -rf $tdir &>/dev/null unset tdir tmppath fi } #####Dateinamen durchsuchen##### function searchinNAME () { a="$(tr [A-Z] [a-z] <<< "$1")" b="$(tr [A-Z] [a-z] <<< "$2")" test $d && echo search in: "$1" if [[ "$a" =~ "$b" ]]; then echo -e "$2" found in: "$1" found=1 unset tmppath a b fi } #####Ascii-Dateien durchsuchen##### function searchinASCII () { if [[ "$(file "$1")" =~ ': ASCII text'$ && ($found == 0) ]]; then if [ -n "$5" ];then tmppath=$6 else tmppath=$1 fi test $d && echo search in: "$1" if grep -qi "$2" $1;then echo -e "$2" found in: "$tmppath" fi unset tmppath fi } #####GZIP-Dateien durchsuchen##### function searchinGZIP () { if [[ "$(file "$1")" =~ .*gzip* && ($found == 0) ]]; then if [[ "$(gzip -l $1)" ]];then &>/dev/null test $d && echo search in: "$1" tdir=$(mktemp -d) gunzip -c "$1" >$tdir/$4 z=`find $tdir -type f` ext=`basename ${z##*.}` fname=`basename -s .$ext "$z"` searchinNAME $z $2 $uext $fname $tdir $1 searchinOfficefiles $z $2 $ext $fname $tdir $1 searchinPict $z $2 $ext $fname $tdir $1 searchinPDF $z $2 $ext $fname $tdir $1 searchinASCII $z $2 $ext $fname $tdir $1 searchinDWG $z $2 $uext $fname $tdir $1 rm -rf $tdir unset tdir z buf fi fi } #####Zip-Dateien durchsuchen?#### function searchinZIP () { if [[ "$(file "$1")" =~ .*Zip.* && ($found == 0) ]]; then test $d && echo search in: "$1" if [[ "$(unzip -t $1)" ]];then tdir=$(mktemp -d) unzip ---jqod $tdir $1 find $tdir -type f | while read z;do ext=`basename ${z##*.}` fname=`basename -s .$ext "$z"` searchinNAME $z $2 $uext $fname $tdir $1 searchinPict $z $2 $ext $fname $tdir $1 searchinPDF $z $2 $ext $fname $tdir $1 searchinOfficefiles $z $2 $ext $fname $tdir $1 searchinASCII $z $2 $uext $fname $tdir $1 searchinDWG $z $2 $uext $fname $tdir $1 done fi rm -rf $tdir unset tdir z fi } #####RAR-Dateien durchsuchen?#### function searchinRAR () { if [[ "$(file "$1")" =~ .*RAR* && ($found == 0) ]]; then test $d && echo search in: "$1" if [[ "$(unrar -t $1)" ]];then tdir=$(mktemp -d) unrar -inul e $1 $tdir find $tdir -type f | while read z;do ext=`basename ${z##*.}` fname=`basename -s .$ext "$z"` searchinNAME $z $2 $uext $fname $tdir $1 searchinPict $z $2 $ext $fname $tdir $1 searchinPDF $z $2 $ext $fname $tdir $1 searchinOfficefiles $z $2 $ext $fname $tdir $1 searchinASCII $z $2 $uext $fname $tdir $1 searchinDWG $z $2 $uext $fname $tdir $1 done fi rm -rf $tdir unset tdir z fi } ################################## searchinNAME $i $search #$uext $fname searchinPict $i $search $uext $fname searchinPDF $i $search $uext $fname searchinOfficefiles $i $search $uext $fname searchinASCII $i $search $uext $fname searchinZIP $i $search $uext $fname searchinGZIP $i $search $uext $fname searchinRAR $i $search $uext $fname searchinDWG $i $search $uext $fname found=0 ################################# done #rm -rf /tmp/tmp.* #ls /tmp/tmp.* unset fname ext uext search ret d i found