summaryrefslogtreecommitdiff
path: root/src/htsencoding.h
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-05-30 19:04:51 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-05-30 19:04:51 +0000
commit850f165f4ac90a6e6687c392ddfdd0c6a05b3fe5 (patch)
tree7cac2f01468639c4ab63fe523c17d7638e8cd2ac /src/htsencoding.h
parent01af2a5e73f53ebf8a092e4bda77cd1326c1da11 (diff)
Added hts_unescape_entities(), a rewrite of the HTML entities decoder.
Fixed HTML entities decoding which was done before charset decoding.
Diffstat (limited to 'src/htsencoding.h')
-rw-r--r--src/htsencoding.h55
1 files changed, 55 insertions, 0 deletions
diff --git a/src/htsencoding.h b/src/htsencoding.h
new file mode 100644
index 0000000..4dfd367
--- /dev/null
+++ b/src/htsencoding.h
@@ -0,0 +1,55 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 3
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: Encoding conversion functions */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_CHARSET_DEFH
+#define HTS_CHARSET_DEFH
+
+/** Standard includes. **/
+#include <stdlib.h>
+#include <string.h>
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+/**
+ * Unescape HTML entities (as per HTML 4.0 Specification)
+ * and replace them in-place by their UTF-8 equivalents.
+ * Note: source and destination may be the same, and the destination only
+ * needs to hold as space as the source.
+ * Returns 0 upon success.
+ **/
+extern int hts_unescape_entities(const char *src,
+ char *dest, const size_t max);
+
+#endif
+