summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-06-06 18:43:50 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-06-06 18:43:50 +0000
commit073a55ab1cfed2bd8dd7eec9a8aede82dcf91f19 (patch)
tree0d425d697b89d993f258434cb102062b85599480 /src
parent9ff2518be647832a7d3fe21b56bd268b9be3d4e3 (diff)
Splitted typed arrays in htsarrays.h
Cleaned-up page generation
Diffstat (limited to 'src')
-rw-r--r--src/htsarrays.h149
-rw-r--r--src/htscore.c52
-rw-r--r--src/htsparse.c145
3 files changed, 219 insertions, 127 deletions
diff --git a/src/htsarrays.h b/src/htsarrays.h
new file mode 100644
index 0000000..5c83a07
--- /dev/null
+++ b/src/htsarrays.h
@@ -0,0 +1,149 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) 1998-2014 Xavier Roche and other contributors
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: Arrays */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_ARRAYS_DEFSTATIC
+#define HTS_ARRAYS_DEFSTATIC
+
+/* System definitions. */
+#include <stdlib.h>
+#include <string.h>
+
+#include "htssafe.h"
+
+/* Memory allocation assertion failure */
+static void hts_record_assert_memory_failed(const size_t size) {
+ fprintf(stderr, "memory allocation failed (%lu bytes)", \
+ (long int) size); \
+ assertf(! "memory allocation failed"); \
+}
+
+/** Dynamic array of T elements. **/
+#define TypedArray(T) \
+ struct { \
+ /** Elements. **/ \
+ union { \
+ /** Typed. **/ \
+ T* elts; \
+ /** Opaque. **/ \
+ void* ptr; \
+ } data; \
+ /** Count. **/ \
+ size_t size; \
+ /** Capacity. **/ \
+ size_t capa; \
+ }
+#define EMPTY_TYPED_ARRAY { NULL, 0, 0 }
+
+/** Array size, in elements. **/
+#define TypedArraySize(A) ((A).size)
+
+/** Array capacity, in elements. **/
+#define TypedArrayCapa(A) ((A).capa)
+
+/**
+ * Remaining free space, in elements.
+ * Macro, first element evaluated multiple times.
+ **/
+#define TypedArrayRoom(A) ( TypedArrayCapa(A) - TypedArraySize(A) )
+
+/** Array elements, of type T*. **/
+#define TypedArrayElts(A) ((A).data.elts)
+
+/** Array pointer, of type void*. **/
+#define TypedArrayPtr(A) ((A).data.ptr)
+
+/** Size of T. **/
+#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A)))
+
+/** Nth element of the array. **/
+#define TypedArrayNth(A, N) (TypedArrayElts(A)[N])
+
+/**
+ * Tail of the array (outside the array).
+ * The returned pointer points to the beginning of TypedArrayRoom(A)
+ * free elements.
+ **/
+#define TypedArrayTail(A) (TypedArrayNth(A, TypedArraySize(A)))
+
+/**
+ * Ensure at least 'ROOM' elements can be put in the remaining space.
+ * After a call to this macro, TypedArrayRoom(A) is guaranteed to be at
+ * least equal to 'ROOM'.
+ **/
+#define TypedArrayEnsureRoom(A, ROOM) do { \
+ const size_t room_ = (ROOM); \
+ while (TypedArrayRoom(A) < room_) { \
+ TypedArrayCapa(A) = TypedArrayCapa(A) < 16 ? 16 : TypedArrayCapa(A) * 2; \
+ } \
+ TypedArrayPtr(A) = realloc(TypedArrayPtr(A), \
+ TypedArrayCapa(A)*TypedArrayWidth(A)); \
+ if (TypedArrayPtr(A) == NULL) { \
+ hts_record_assert_memory_failed(TypedArrayCapa(A)*TypedArrayWidth(A)); \
+ } \
+} while(0)
+
+/** Add an element. Macro, first element evaluated multiple times. **/
+#define TypedArrayAdd(A, E) do { \
+ TypedArrayEnsureRoom(A, 1); \
+ assertf(TypedArraySize(A) < TypedArrayCapa(A)); \
+ TypedArrayTail(A) = (E); \
+ TypedArraySize(A)++; \
+} while(0)
+
+/**
+ * Add 'COUNT' elements from 'PTR'.
+ * Macro, first element evaluated multiple times.
+ **/
+#define TypedArrayAppend(A, PTR, COUNT) do { \
+ const size_t count_ = (COUNT); \
+ /* This 1-case is to benefit from type safety. */ \
+ if (count_ == 1) { \
+ TypedArrayAdd(A, *(PTR)); \
+ } else { \
+ const void *const source_ = (PTR); \
+ TypedArrayEnsureRoom(A, count_); \
+ assertf(count_ <= TypedArrayRoom(A)); \
+ memcpy(&TypedArrayTail(A), source_, count_ * TypedArrayWidth(A)); \
+ TypedArraySize(A) += count_; \
+ } \
+} while(0)
+
+/** Clear an array, freeing memory and clearing size and capacity. **/
+#define TypedArrayFree(A) do { \
+ if (TypedArrayPtr(A) != NULL) { \
+ TypedArrayCapa(A) = TypedArraySize(A) = 0; \
+ free(TypedArrayPtr(A)); \
+ TypedArrayPtr(A) = NULL; \
+ } \
+} while(0)
+
+#endif
diff --git a/src/htscore.c b/src/htscore.c
index f6b8804..f315d3b 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -61,6 +61,9 @@ Please visit our Website: http://www.httrack.com
/* Charset handling */
#include "htscharset.h"
+/* Dynamic typed arrays */
+#include "htsarrays.h"
+
/* END specific definitions */
/* external modules */
@@ -157,47 +160,6 @@ RUN_CALLBACK0(opt, end); \
} while(0)
#define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0)
-// memory allocation assertion failure
-static void hts_record_assert_memory_failed(const size_t size) {
- fprintf(stderr, "memory allocation failed (%lu bytes)", \
- (long int) size); \
- assertf(! "memory allocation failed"); \
-}
-
-// Typed array
-#define TypedArray(T) \
- struct { \
- T* elts; \
- size_t size; \
- size_t capa; \
- }
-#define EMPTY_TYPED_ARRAY { NULL, 0, 0 }
-
-#define TypedArrayAdd(A, E) do { \
- if ((A).capa == (A).size) { \
- (A).capa = (A).capa < 16 ? 16 : (A).capa * 2; \
- (A).elts = realloct((A).elts, (A).capa*sizeof(*(A).elts)); \
- if ((A).elts == NULL) { \
- hts_record_assert_memory_failed((A).capa*sizeof(*(A).elts)); \
- } \
- } \
- assertf((A).size < (A).capa); \
- (A).elts[(A).size++] = (E); \
-} while(0)
-
-#define TypedArrayFree(A) do { \
- if ((A).elts != NULL) { \
- freet((A).elts); \
- (A).elts = NULL; \
- (A).capa = (A).size = 0; \
- } \
-} while(0)
-
-#define TypedArraySize(A) ((A).size)
-#define TypedArrayCapa(A) ((A).capa)
-#define TypedArrayElts(A) ((A).elts)
-#define TypedArrayNth(A, N) (TypedArrayElts(A)[N])
-
struct lien_buffers {
/* Main array of pointers.
This is the real "lien_url **liens" pointer base. */
@@ -282,9 +244,11 @@ static size_t hts_record_link_alloc(httrackp *opt) {
// Create a new chunk of lien_url[]
// There are references to item pointers, so we can not just realloc()
if (liensbuf->lien_buffer_size == liensbuf->lien_buffer_capa) {
- TypedArrayAdd(liensbuf->lien_buffers, liensbuf->lien_buffer);
- liensbuf->lien_buffer_size = 0;
- liensbuf->lien_buffer_capa = 0;
+ if (liensbuf->lien_buffer != NULL) {
+ TypedArrayAdd(liensbuf->lien_buffers, liensbuf->lien_buffer);
+ liensbuf->lien_buffer_size = 0;
+ liensbuf->lien_buffer_capa = 0;
+ }
liensbuf->lien_buffer = (lien_url*) malloct(block_capa*sizeof(*liensbuf->lien_buffer));
if (liensbuf->lien_buffer == NULL) {
diff --git a/src/htsparse.c b/src/htsparse.c
index 084ec8e..9174173 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -60,87 +60,62 @@ Please visit our Website: http://www.httrack.com
#include "htsparse.h"
#include "htsback.h"
-// does nothing
-#define XH_uninit do {} while(0)
+// arrays
+#include "htsarrays.h"
-// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
-#define REALLOC_SIZE 8192
-#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
- char message[256]; \
- ht_size=(A)+ht_len+REALLOC_SIZE; \
- ht_buff=(char*) realloct(ht_buff,ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n", __LINE__); \
- XH_uninit; \
- snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_CHK : realloct("LLintP") failed", (LLint) ht_size); \
- abortLog(message); \
- abort(); \
+static void AppendString(TypedArray(char) *const a, const char *s, size_t size) {
+ TypedArrayAppend(*a, s, size);
+}
+
+/** Append bytes to the output buffer up to the pointer 'html'. **/
+#define HT_add_adr do { \
+ if ( (opt->getmode & 1) != 0 && ptr > 0 ) { \
+ const size_t sz_ = html - lastsaved; \
+ if (sz_ != 0) { \
+ TypedArrayAppend(output_buffer, lastsaved, sz_); \
+ lastsaved = html; \
+ } \
} \
-} \
- ht_len+=A;
-#define HT_add_adr \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
- memcpy(ht_buff+j, lastsaved, i); \
- ht_buff[j+i]='\0'; \
- lastsaved=html; \
- }
-#define HT_ADD(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_ = strlen(A), j_ = ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, A, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_HTMLESCAPED(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_, j_; \
- char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
- escape_for_html_print(A, tempo_, sizeof(tempo_)); \
- i_=strlen(tempo_); \
- j_=ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, tempo_, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_HTMLESCAPED_FULL(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_, j_; \
- char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
- escape_for_html_print_full(A, tempo_, sizeof(tempo_)); \
- i_=strlen(tempo_); \
- j_=ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, tempo_, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_START \
- char message[256]; \
- size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \
- size_t ht_len=0; \
- char* ht_buff=NULL; \
- if ((opt->getmode & 1) && (ptr>0)) { \
- ht_buff=(char*) malloct(ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
- XH_uninit; \
- snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_START : malloct("LLintP") failed", (LLint) ht_size); \
- abortLog(message); \
- abort(); \
+} while(0)
+
+/** Append to the output buffer the string 'A'. **/
+#define HT_ADD(A) TypedArrayAppend(output_buffer, A, strlen(A))
+
+/** Append to the output buffer the string 'A', html-escaped. **/
+#define HT_ADD_HTMLESCAPED_ANY(A, FUNCTION) do { \
+ if ((opt->getmode & 1) != 0 && ptr>0) { \
+ const char *const str_ = (A); \
+ size_t size_; \
+ /* &amp; is the maximum expansion */ \
+ TypedArrayEnsureRoom(output_buffer, strlen(str_) * 5 + 1024); \
+ size_ = FUNCTION(str_, &TypedArrayTail(output_buffer), \
+ TypedArrayRoom(output_buffer)); \
+ TypedArraySize(output_buffer) += size_; \
} \
- ht_buff[0]='\0'; \
- }
+} while(0)
+
+/** Append to the output buffer the string 'A', html-escaped for &. **/
+#define HT_ADD_HTMLESCAPED(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print)
+
+/**
+ * Append to the output buffer the string 'A', html-escaped for & and
+ * high chars.
+ **/
+#define HT_ADD_HTMLESCAPED_FULL(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print_full)
+
+// does nothing
+#define XH_uninit do {} while(0)
+
#define HT_ADD_END { \
int ok=0;\
- if (ht_buff) { \
+ if (TypedArraySize(output_buffer) != 0) { \
+ const size_t ht_len = TypedArraySize(output_buffer); \
+ const char *const ht_buff = TypedArrayElts(output_buffer); \
char digest[32+2];\
off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename()));\
- digest[0]='\0';\
- domd5mem(ht_buff,ht_len,digest,1);\
- if (fsize_old==ht_len) { \
+ digest[0] = '\0';\
+ domd5mem(TypedArrayElts(output_buffer), ht_len, digest, 1);\
+ if (fsize_old == (off_t) ht_len) { \
int mlen = 0;\
char* mbuff;\
cache_readdata(cache,"//[HTML-MD5]//",savename(),&mbuff,&mlen);\
@@ -192,7 +167,7 @@ Please visit our Website: http://www.httrack.com
if (cache->ndx)\
cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename(),digest,(int)strlen(digest));\
} \
- freet(ht_buff); ht_buff=NULL; \
+ TypedArrayFree(output_buffer); \
}
#define HT_ADD_FOP
@@ -362,11 +337,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
if (!error) {
+ // output HTML
+ TypedArray(char) output_buffer = EMPTY_TYPED_ARRAY;
+
time_t user_interact_timestamp = 0;
int detect_title = 0; // détection du title
int back_add_stats = opt->state.back_add_stats;
- //
const char *in_media = NULL; // in other media type (real media and so..)
int intag = 0; // on est dans un tag
int incomment = 0; // dans un <!--
@@ -417,7 +394,6 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//
int parent_relative = 0; // the parent is the base path (.js, .css..)
- HT_ADD_START; // débuter
lastsaved = html;
/* Initialize script automate for comments, quotes.. */
@@ -3368,15 +3344,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((opt->getmode & 1) && (ptr > 0)) {
{
- char *cAddr = ht_buff;
- int cSize = (int) ht_len;
+ char *cAddr = TypedArrayElts(output_buffer);
+ int cSize = (int) TypedArraySize(output_buffer);
hts_log_print(opt, LOG_DEBUG, "engine: postprocess-html: %s%s",
urladr(), urlfil());
- if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) ==
- 1) {
- ht_buff = cAddr;
- ht_len = cSize;
+ if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) == 1) {
+ if (cAddr != TypedArrayElts(output_buffer)) {
+ hts_log_print(opt, LOG_DEBUG,
+ "engine: postprocess-html: callback modified data, applying %d bytes", cSize);
+ TypedArraySize(output_buffer) = 0;
+ TypedArrayAppend(output_buffer, cAddr, cSize);
+ }
}
}