From 25adbdabb47499fe641c7bd9595024ff82667058 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 19 Mar 2012 12:51:31 +0000 Subject: httrack 3.30.1 --- html/scripting.html | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100755 html/scripting.html (limited to 'html/scripting.html') diff --git a/html/scripting.html b/html/scripting.html new file mode 100755 index 0000000..2752a0d --- /dev/null +++ b/html/scripting.html @@ -0,0 +1,261 @@ + + + + + + + HTTrack Website Copier - Offline Browser + + + + + + + + + +
HTTrack Website Copier
+ + + + +
Open Source offline browser
+ + + + +
+ + + + +
+ + + + +
+ + +

HTTrack Programming page - scripting

+ +
+ +We will see here several examples, written in batch script (can be adapted to almost all batch script languages) or in C. + +

+ +

+ + + +
+How to get one single file

+ +httrack --get http://localhost/ + +
+ +

+ + + +
+How to get one single file and pipe it to stdout

+ +httrack --quiet --get http://localhost/ -O tmpget -V "cat \$0" | grep -iE "TITLE" +rm -rf tmpget + +
+ +

+ + + +
+How to search in all HTML files on a website

+ +httrack --skeleton http://localhost/ -V "if grep -iE \"TITLE\" \"\$0\">/dev/null; then echo \"Match found at \$0\"; fi"
+rm -rf tmpget
+
+ +
Same thing but matches only the first file:
+ + +httrack --skeleton http://localhost/ -V "if grep -iE \"TITLE\" \"\$0\">/dev/null; then echo \"Match found at \$0\"; kill -9 \$PPID; fi"
+rm -rf tmpget
+
+ +
+ +

+ + + + +
+Indexing a website, and using the index as a search engine

+ +httrack localhost -%I
+ +Will generate an index.txt file, which contains all detected keywords, sorted and indexed using this format:
+ +
+
+keyword
+<tab>   number_of_hits_in_current_page_for_this_keyword   page_location
+<tab>   number_of_hits_in_current_page_for_this_keyword   page_location
+<tab>   number_of_hits_in_current_page_for_this_keyword   page_location
+...
+=total_number_of_hits_for_this_keyword
+((total_number_of_hits_for_this_keyword*1000)/total_number_of_keywords)
+
+
+ +Example: + +
+
+
+abilities
+	1 localhost/manual/mod/index-2.html
+	1 localhost/manual/mod/index.html
+	1 localhost/manual/mod/mod_negotiation.html
+	=3
+	(0)
+ability
+	2 localhost/manual/misc/FAQ.html
+	2 localhost/manual/suexec.html
+	1 localhost/manual/handler.html
+	1 localhost/manual/misc/security_tips.html
+	1 localhost/manual/mod/mod_rewrite.html
+	1 localhost/manual/mod/mod_setenvif.html
+	1 localhost/manual/multilogs.html
+	1 localhost/manual/netware.html
+	1 localhost/manual/new_features_1_3.html
+	1 localhost/manual/windows.html
+	=12
+	(0)
+able
+	4 localhost/manual/dso.html
+	4 localhost/manual/mod/core.html
+	3 localhost/manual/dns-caveats.html
+	3 localhost/manual/mod/mod_auth.html
+	3 localhost/manual/mod/mod_rewrite.html
+	3 localhost/manual/upgrading_to_1_3.html
+	2 localhost/manual/misc/API.html
+	2 localhost/manual/misc/FAQ.html
+	2 localhost/manual/misc/windoz_keepalive.html
+	2 localhost/manual/mod/mod_auth_db.html
+	2 localhost/manual/mod/mod_auth_dbm.html
+	1 localhost/manual/misc/descriptors.html
+	1 localhost/manual/misc/fin_wait_2.html
+	1 localhost/manual/misc/security_tips.html
+	1 localhost/manual/mod/mod_auth_digest.html
+	1 localhost/manual/mod/mod_cern_meta.html
+	1 localhost/manual/mod/mod_env.html
+	1 localhost/manual/mod/mod_example.html
+	1 localhost/manual/mod/mod_unique_id.html
+	1 localhost/manual/mod/mod_usertrack.html
+	1 localhost/manual/stopping.html
+	1 localhost/manual/suexec.html
+	1 localhost/manual/vhosts/ip-based.html
+	1 localhost/manual/windows.html
+	=43
+	(0)
+
+... +
+
+ +Script example: +search.sh + +
+ +

+ + +
+
+
+ + + + + +
+ + + + + + -- cgit v1.2.3