diff options
Diffstat (limited to 'html/div/search.sh')
-rw-r--r-- | html/div/search.sh | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/html/div/search.sh b/html/div/search.sh new file mode 100644 index 0000000..8cfa325 --- /dev/null +++ b/html/div/search.sh @@ -0,0 +1,60 @@ + +#!/bin/sh + +# Simple indexing test using HTTrack +# A "real" script/program would use advanced search, and +# use dichotomy to find the word in the index.txt file +# This script is really basic and NOT optimized, and +# should not be used for professional purpose :) + +TESTSITE="http://localhost/" + +# Create an index if necessary +if ! test -f "index.txt"; then + echo "Building the index .." + rm -rf test + httrack --display "$TESTSITE" -%I -O test + mv test/index.txt ./ +fi + +# Convert crlf to lf +if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then + echo "Converting index to Unix LF style (not CR/LF) .." + mv -f index.txt index.txt.old + cat index.txt.old|tr -d '\r' > index.txt +fi + +keyword=- +while test -n "$keyword"; do + printf "Enter a keyword: " + read keyword + + if test -n "$keyword"; then + FOUNDK="`grep -niE \"^$keyword\" index.txt`" + + if test -n "$FOUNDK"; then + if ! test `echo "$FOUNDK"|wc -l` = "1"; then + # Multiple matches + printf "Found multiple keywords: " + echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' ' + echo "" + echo "Use keyword$ to find only one" + else + # One match + N=`echo "$FOUNDK"|cut -f1 -d':'` + PM=`tail +$N index.txt|grep -nE "\("|head -n 1` + if ! echo "$PM"|grep "ignored">/dev/null; then + M=`echo $PM|cut -f1 -d':'` + echo "Found in:" + cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' ' + else + echo "keyword ignored (too many hits)" + fi + fi + else + echo "not found" + fi + + fi +done + |