summaryrefslogtreecommitdiff
path: root/src/htsencoding.h
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-05-31 11:38:53 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-05-31 11:38:53 +0000
commitbc31ec0da9573d482de24f27241482f50e46e60c (patch)
treee5e80dd055b2e4790802728d4e3b4b5b8c361277 /src/htsencoding.h
parent8767fd0e750b70a121d95e3ecf7e59bcec499d95 (diff)
Fixed issue 14 (http://code.google.com/p/httrack/issues/detail?id=14)
Rationale: * hostname is ASCII, non-ascii characters shall be encoded with IDNA * URI filenames may embed non-ascii characters, which MUST be UTF-8 encoded * query string may embed non-ascii characters, which are encoded with the pahe charset into %xx codes
Diffstat (limited to 'src/htsencoding.h')
-rw-r--r--src/htsencoding.h19
1 files changed, 15 insertions, 4 deletions
diff --git a/src/htsencoding.h b/src/htsencoding.h
index 4dfd367..cd35a00 100644
--- a/src/htsencoding.h
+++ b/src/htsencoding.h
@@ -31,8 +31,8 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#ifndef HTS_CHARSET_DEFH
-#define HTS_CHARSET_DEFH
+#ifndef HTS_ENCODING_DEFH
+#define HTS_ENCODING_DEFH
/** Standard includes. **/
#include <stdlib.h>
@@ -48,8 +48,19 @@ Please visit our Website: http://www.httrack.com
* needs to hold as space as the source.
* Returns 0 upon success.
**/
-extern int hts_unescape_entities(const char *src,
- char *dest, const size_t max);
+extern int hts_unescapeEntities(const char *src,
+ char *dest, const size_t max);
+
+/**
+ * Unescape HTML entities (as per HTML 4.0 Specification)
+ * and replace them in-place by their charset equivalents.
+ * Note: source and destination may be the same, and the destination only
+ * needs to hold as space as the source.
+ * Returns 0 upon success.
+ **/
+extern int hts_unescapeEntitiesWithCharset(const char *src,
+ char *dest, const size_t max,
+ const char *charset);
#endif