blob: 8cfa3255140d7017244083eaa0da6c4371ec7753 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
#!/bin/sh
# Simple indexing test using HTTrack
# A "real" script/program would use advanced search, and
# use dichotomy to find the word in the index.txt file
# This script is really basic and NOT optimized, and
# should not be used for professional purpose :)
TESTSITE="http://localhost/"
# Create an index if necessary
if ! test -f "index.txt"; then
echo "Building the index .."
rm -rf test
httrack --display "$TESTSITE" -%I -O test
mv test/index.txt ./
fi
# Convert crlf to lf
if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then
echo "Converting index to Unix LF style (not CR/LF) .."
mv -f index.txt index.txt.old
cat index.txt.old|tr -d '\r' > index.txt
fi
keyword=-
while test -n "$keyword"; do
printf "Enter a keyword: "
read keyword
if test -n "$keyword"; then
FOUNDK="`grep -niE \"^$keyword\" index.txt`"
if test -n "$FOUNDK"; then
if ! test `echo "$FOUNDK"|wc -l` = "1"; then
# Multiple matches
printf "Found multiple keywords: "
echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' '
echo ""
echo "Use keyword$ to find only one"
else
# One match
N=`echo "$FOUNDK"|cut -f1 -d':'`
PM=`tail +$N index.txt|grep -nE "\("|head -n 1`
if ! echo "$PM"|grep "ignored">/dev/null; then
M=`echo $PM|cut -f1 -d':'`
echo "Found in:"
cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
else
echo "keyword ignored (too many hits)"
fi
fi
else
echo "not found"
fi
fi
done
|