diff options
-rw-r--r-- | src/htsname.c | 14 | ||||
-rwxr-xr-x | tests/11_crawl-idna.test | 7 | ||||
-rwxr-xr-x | tests/11_crawl-international.test | 48 | ||||
-rwxr-xr-x | tests/11_crawl-parsing.test | 11 | ||||
-rwxr-xr-x | tests/crawl-test.sh | 34 | ||||
-rwxr-xr-x | tests/run-all-tests.sh | 20 |
6 files changed, 122 insertions, 12 deletions
diff --git a/src/htsname.c b/src/htsname.c index 28e3edc..61bc66f 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -41,6 +41,7 @@ Please visit our Website: http://www.httrack.com #include "htsmd5.h" #include "htstools.h" #include "htscharset.h" +#include "htsencoding.h" #include <ctype.h> #define ADD_STANDARD_PATH \ @@ -290,11 +291,14 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } fil = newfil; } - // Decode remaining % - strcpybuff(fil, unescape_http(catbuff, fil)); - // , BUT do not decode high chars - //strcpybuff(fil,unescape_http_unharm(fil, 1)); - // YES (not server side, but fs/client side) + + // decode remaining % (normally not necessary; already done in htsparse.c) + if (hts_unescapeUrl(fil, catbuff, sizeof(catbuff)) == 0) { + strcpybuff(fil, catbuff); + } else { + hts_log_print(opt, LOG_WARNING, + "could not URL-decode string '%s'", fil); + } #if HTS_USEMMS /* .asx hack */ diff --git a/tests/11_crawl-idna.test b/tests/11_crawl-idna.test index 5a541ca..6866d80 100755 --- a/tests/11_crawl-idna.test +++ b/tests/11_crawl-idna.test @@ -2,7 +2,12 @@ # # unicode tests -bash crawl-test.sh --errors 1 --files 5 \ +bash crawl-test.sh \ + --errors 1 --files 5 \ + --found 'café.ut.httrack.com/unicode-links/café3860.html' \ + --found 'café.ut.httrack.com/unicode-links/café30f4.html' \ + --found 'café.ut.httrack.com/unicode-links/café5e1f.html' \ + --found 'café.ut.httrack.com/unicode-links/café7b30.html' \ httrack 'http://ut.httrack.com/unicode-links/idna.html' \ '+*.ut.httrack.com/*' --robots=0 diff --git a/tests/11_crawl-international.test b/tests/11_crawl-international.test index b661963..cc5f627 100755 --- a/tests/11_crawl-international.test +++ b/tests/11_crawl-international.test @@ -2,6 +2,48 @@ # # unicode tests -bash crawl-test.sh --errors 1 --files 10 httrack http://ut.httrack.com/unicode-links/utf8.html -bash crawl-test.sh --errors 4 --files 7 httrack http://ut.httrack.com/unicode-links/default.html -bash crawl-test.sh --errors 2 --files 9 httrack http://ut.httrack.com/unicode-links/iso88591.html +bash crawl-test.sh \ + --errors 1 --files 10 \ + --found ut.httrack.com/unicode-links/caf%a91bce.html \ + --found ut.httrack.com/unicode-links/café30f4.html \ + --found ut.httrack.com/unicode-links/café3860.html \ + --found ut.httrack.com/unicode-links/café463e.html \ + --found ut.httrack.com/unicode-links/café5e1f.html \ + --found ut.httrack.com/unicode-links/café7b30.html \ + --found ut.httrack.com/unicode-links/café8007.html \ + --found ut.httrack.com/unicode-links/café9fa8.html \ + --found ut.httrack.com/unicode-links/caféae52.html \ + --found ut.httrack.com/unicode-links/caféc009.html \ + --found ut.httrack.com/unicode-links/utf8.html \ + httrack http://ut.httrack.com/unicode-links/utf8.html + +bash crawl-test.sh \ + --errors 4 --files 7 \ + --found ut.httrack.com/unicode-links/café3860.html \ + --found ut.httrack.com/unicode-links/café9fa8.html \ + --found ut.httrack.com/unicode-links/café30f4.html \ + --found ut.httrack.com/unicode-links/café5e1f.html \ + --found ut.httrack.com/unicode-links/café7b30.html \ + --found ut.httrack.com/unicode-links/café8007.html \ + --found ut.httrack.com/unicode-links/caf%e939bd.html \ + --found ut.httrack.com/unicode-links/caf%e9ae52.html \ + --found ut.httrack.com/unicode-links/caféaec2.html \ + --found ut.httrack.com/unicode-links/caféfad6.html \ + --found ut.httrack.com/unicode-links/default.html \ + httrack http://ut.httrack.com/unicode-links/default.html + +bash crawl-test.sh \ + --errors 2 --files 9 \ + --found ut.httrack.com/unicode-links/caf%a9ae52.html \ + --found ut.httrack.com/unicode-links/caf%a9bf59.html \ + --found ut.httrack.com/unicode-links/café30f4.html \ + --found ut.httrack.com/unicode-links/café3860.html \ + --found ut.httrack.com/unicode-links/café5e1f.html \ + --found ut.httrack.com/unicode-links/café647f.html \ + --found ut.httrack.com/unicode-links/café7b30.html \ + --found ut.httrack.com/unicode-links/café8007.html \ + --found ut.httrack.com/unicode-links/caféaec2.html \ + --found ut.httrack.com/unicode-links/caféfad6.html \ + --found ut.httrack.com/unicode-links/iso88591.html \ + httrack http://ut.httrack.com/unicode-links/iso88591.html + diff --git a/tests/11_crawl-parsing.test b/tests/11_crawl-parsing.test new file mode 100755 index 0000000..f64cca0 --- /dev/null +++ b/tests/11_crawl-parsing.test @@ -0,0 +1,11 @@ +#!/bin/bash +# + +# http://code.google.com/p/httrack/issues/detail?id=4&can=1 +bash crawl-test.sh --errors 0 --files 4 \ + httrack http://ut.httrack.com/parsing/events.html + +# http://code.google.com/p/httrack/issues/detail?id=2&can=1 +bash crawl-test.sh --errors 0 --files 3 \ + httrack http://ut.httrack.com/parsing/background-image.html + diff --git a/tests/crawl-test.sh b/tests/crawl-test.sh index 8713325..370fb57 100755 --- a/tests/crawl-test.sh +++ b/tests/crawl-test.sh @@ -68,7 +68,7 @@ function start-crawl { ;; --no-purge|--summary) ;; - --errors|--files) + --errors|--files|--found|--not-found|--directory) pos=$[${pos}+1] test "$#" -ge "$pos" || warning "missing argument" || return 1 ;; @@ -127,12 +127,40 @@ function start-crawl { ;; --errors) shift - test "$#" -gt 0 || warning "missing argument" || return 1 assert_equals "checking errors" "$1" "$(grep -iEc "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt")" ;; + --found) + shift + info "checking for $1" + if test -f "${tmp}/$1" ; then + result "OK" + else + result "not found" + exit 1 + fi + ;; + --not-found) + shift + info "checking for $1" + if test -f "${tmp}/$1" ; then + result "OK" + else + result "not found" + exit 1 + fi + ;; + --directory) + shift + info "checking for $1" + if test -d "${tmp}/$1" ; then + result "OK" + else + result "not found" + exit 1 + fi + ;; --files) shift - test "$#" -gt 0 || warning "missing argument" || return 1 nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt" \ | sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g') assert_equals "checking files" "$1" "$nFiles" diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh new file mode 100755 index 0000000..8e68e8d --- /dev/null +++ b/tests/run-all-tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# + +error=0 +for i in *.test ; do + if bash $i ; then + echo "$i: passed" >&2 + else + echo "$i: ERROR" >&2 + error=1 + fi +done + +if test "$error" -eq 0; then + echo "all tests passed" >&2 +else + echo "one or more tests failed" >&2 +fi + +exit $error |