html/div/search.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


#!/bin/sh

# Simple indexing test using HTTrack
# A "real" script/program would use advanced search, and 
# use dichotomy to find the word in the index.txt file
# This script is really basic and NOT optimized, and
# should not be used for professional purpose :)

TESTSITE="http://localhost/"

# Create an index if necessary
if ! test -f "index.txt"; then
	echo "Building the index .."
	rm -rf test
	httrack --display "$TESTSITE"  -%I -O test
	mv test/index.txt ./
fi

# Convert crlf to lf
if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then
	echo "Converting index to Unix LF style (not CR/LF) .."
	mv -f index.txt index.txt.old
	cat index.txt.old|tr -d '\r' > index.txt
fi

keyword=-
while test -n "$keyword"; do
	printf "Enter a keyword: "
	read keyword

	if test -n "$keyword"; then
		FOUNDK="`grep -niE \"^$keyword\" index.txt`"

		if test -n "$FOUNDK"; then	
			if ! test `echo "$FOUNDK"|wc -l` = "1"; then
				# Multiple matches
				printf "Found multiple keywords: "
				echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' '
				echo ""
				echo "Use keyword$ to find only one"
			else
				# One match
				N=`echo "$FOUNDK"|cut -f1 -d':'`
				PM=`tail +$N index.txt|grep -nE "\("|head -n 1`
				if ! echo "$PM"|grep "ignored">/dev/null; then
					M=`echo $PM|cut -f1 -d':'`
					echo "Found in:"
					cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
				else
					echo "keyword ignored (too many hits)"
				fi
					fi
		else
			echo "not found"
		fi

	fi
done