diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-31 11:38:53 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-31 11:38:53 +0000 |
commit | bc31ec0da9573d482de24f27241482f50e46e60c (patch) | |
tree | e5e80dd055b2e4790802728d4e3b4b5b8c361277 /src/htsencoding.h | |
parent | 8767fd0e750b70a121d95e3ecf7e59bcec499d95 (diff) |
Fixed issue 14 (http://code.google.com/p/httrack/issues/detail?id=14)
Rationale:
* hostname is ASCII, non-ascii characters shall be encoded with IDNA
* URI filenames may embed non-ascii characters, which MUST be UTF-8 encoded
* query string may embed non-ascii characters, which are encoded with the pahe charset into %xx codes
Diffstat (limited to 'src/htsencoding.h')
-rw-r--r-- | src/htsencoding.h | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/src/htsencoding.h b/src/htsencoding.h index 4dfd367..cd35a00 100644 --- a/src/htsencoding.h +++ b/src/htsencoding.h @@ -31,8 +31,8 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#ifndef HTS_CHARSET_DEFH -#define HTS_CHARSET_DEFH +#ifndef HTS_ENCODING_DEFH +#define HTS_ENCODING_DEFH /** Standard includes. **/ #include <stdlib.h> @@ -48,8 +48,19 @@ Please visit our Website: http://www.httrack.com * needs to hold as space as the source. * Returns 0 upon success. **/ -extern int hts_unescape_entities(const char *src, - char *dest, const size_t max); +extern int hts_unescapeEntities(const char *src, + char *dest, const size_t max); + +/** + * Unescape HTML entities (as per HTML 4.0 Specification) + * and replace them in-place by their charset equivalents. + * Note: source and destination may be the same, and the destination only + * needs to hold as space as the source. + * Returns 0 upon success. + **/ +extern int hts_unescapeEntitiesWithCharset(const char *src, + char *dest, const size_t max, + const char *charset); #endif |