diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:36:11 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:36:11 +0000 |
commit | ad5b7acc19290ff91e0f42a0de448a26760fcf99 (patch) | |
tree | 2d1867758835fd0c4e443ff3cc7e5c774af85874 /HelpHtml/div |
Imported httrack 3.20.2
Diffstat (limited to 'HelpHtml/div')
-rw-r--r-- | HelpHtml/div/search.sh | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/HelpHtml/div/search.sh b/HelpHtml/div/search.sh new file mode 100644 index 0000000..fb0c7cd --- /dev/null +++ b/HelpHtml/div/search.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +# Simple indexing test using HTTrack +# A "real" script/program would use advanced search, and +# use dichotomy to find the word in the index.txt file +# This script is really basic and NOT optimized, and +# should not be used for professional purpose :) + +TESTSITE="http://localhost/" + +# Create an index if necessary +if ! test -f "index.txt"; then + echo "Building the index .." + rm -rf test + httrack --display "$TESTSITE" -%I -O test + mv test/index.txt ./ +fi + +# Convert crlf to lf +if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then + echo "Converting index to Unix LF style (not CR/LF) .." + mv -f index.txt index.txt.old + cat index.txt.old|tr -d '\r' > index.txt +fi + +keyword=- +while test -n "$keyword"; do + printf "Enter a keyword: " + read keyword + + if test -n "$keyword"; then + FOUNDK="`grep -niE \"^$keyword\" index.txt`" + + if test -n "$FOUNDK"; then + if ! test `echo "$FOUNDK"|wc -l` = "1"; then + # Multiple matches + printf "Found multiple keywords: " + echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' ' + echo "" + echo "Use keyword$ to find only one" + else + # One match + N=`echo "$FOUNDK"|cut -f1 -d':'` + PM=`tail +$N index.txt|grep -nE "\("|head -n 1` + if ! echo "$PM"|grep "ignored">/dev/null; then + M=`echo $PM|cut -f1 -d':'` + echo "Found in:" + cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' ' + else + echo "keyword ignored (too many hits)" + fi + fi + else + echo "not found" + fi + + fi +done + |