diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2014-06-06 18:43:50 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2014-06-06 18:43:50 +0000 |
commit | 073a55ab1cfed2bd8dd7eec9a8aede82dcf91f19 (patch) | |
tree | 0d425d697b89d993f258434cb102062b85599480 /src | |
parent | 9ff2518be647832a7d3fe21b56bd268b9be3d4e3 (diff) |
Splitted typed arrays in htsarrays.h
Cleaned-up page generation
Diffstat (limited to 'src')
-rw-r--r-- | src/htsarrays.h | 149 | ||||
-rw-r--r-- | src/htscore.c | 52 | ||||
-rw-r--r-- | src/htsparse.c | 145 |
3 files changed, 219 insertions, 127 deletions
diff --git a/src/htsarrays.h b/src/htsarrays.h new file mode 100644 index 0000000..5c83a07 --- /dev/null +++ b/src/htsarrays.h @@ -0,0 +1,149 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) 1998-2014 Xavier Roche and other contributors + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: Arrays */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef HTS_ARRAYS_DEFSTATIC +#define HTS_ARRAYS_DEFSTATIC + +/* System definitions. */ +#include <stdlib.h> +#include <string.h> + +#include "htssafe.h" + +/* Memory allocation assertion failure */ +static void hts_record_assert_memory_failed(const size_t size) { + fprintf(stderr, "memory allocation failed (%lu bytes)", \ + (long int) size); \ + assertf(! "memory allocation failed"); \ +} + +/** Dynamic array of T elements. **/ +#define TypedArray(T) \ + struct { \ + /** Elements. **/ \ + union { \ + /** Typed. **/ \ + T* elts; \ + /** Opaque. **/ \ + void* ptr; \ + } data; \ + /** Count. **/ \ + size_t size; \ + /** Capacity. **/ \ + size_t capa; \ + } +#define EMPTY_TYPED_ARRAY { NULL, 0, 0 } + +/** Array size, in elements. **/ +#define TypedArraySize(A) ((A).size) + +/** Array capacity, in elements. **/ +#define TypedArrayCapa(A) ((A).capa) + +/** + * Remaining free space, in elements. + * Macro, first element evaluated multiple times. + **/ +#define TypedArrayRoom(A) ( TypedArrayCapa(A) - TypedArraySize(A) ) + +/** Array elements, of type T*. **/ +#define TypedArrayElts(A) ((A).data.elts) + +/** Array pointer, of type void*. **/ +#define TypedArrayPtr(A) ((A).data.ptr) + +/** Size of T. **/ +#define TypedArrayWidth(A) (sizeof(*TypedArrayElts(A))) + +/** Nth element of the array. **/ +#define TypedArrayNth(A, N) (TypedArrayElts(A)[N]) + +/** + * Tail of the array (outside the array). + * The returned pointer points to the beginning of TypedArrayRoom(A) + * free elements. + **/ +#define TypedArrayTail(A) (TypedArrayNth(A, TypedArraySize(A))) + +/** + * Ensure at least 'ROOM' elements can be put in the remaining space. + * After a call to this macro, TypedArrayRoom(A) is guaranteed to be at + * least equal to 'ROOM'. + **/ +#define TypedArrayEnsureRoom(A, ROOM) do { \ + const size_t room_ = (ROOM); \ + while (TypedArrayRoom(A) < room_) { \ + TypedArrayCapa(A) = TypedArrayCapa(A) < 16 ? 16 : TypedArrayCapa(A) * 2; \ + } \ + TypedArrayPtr(A) = realloc(TypedArrayPtr(A), \
+ TypedArrayCapa(A)*TypedArrayWidth(A)); \
+ if (TypedArrayPtr(A) == NULL) { \
+ hts_record_assert_memory_failed(TypedArrayCapa(A)*TypedArrayWidth(A)); \
+ } \
+} while(0) + +/** Add an element. Macro, first element evaluated multiple times. **/ +#define TypedArrayAdd(A, E) do { \ + TypedArrayEnsureRoom(A, 1); \ + assertf(TypedArraySize(A) < TypedArrayCapa(A)); \ + TypedArrayTail(A) = (E); \ + TypedArraySize(A)++; \ +} while(0) + +/** + * Add 'COUNT' elements from 'PTR'. + * Macro, first element evaluated multiple times. + **/ +#define TypedArrayAppend(A, PTR, COUNT) do { \ + const size_t count_ = (COUNT); \ + /* This 1-case is to benefit from type safety. */ \ + if (count_ == 1) { \ + TypedArrayAdd(A, *(PTR)); \ + } else { \ + const void *const source_ = (PTR); \ + TypedArrayEnsureRoom(A, count_); \ + assertf(count_ <= TypedArrayRoom(A)); \ + memcpy(&TypedArrayTail(A), source_, count_ * TypedArrayWidth(A)); \ + TypedArraySize(A) += count_; \ + } \ +} while(0) + +/** Clear an array, freeing memory and clearing size and capacity. **/ +#define TypedArrayFree(A) do { \ + if (TypedArrayPtr(A) != NULL) { \ + TypedArrayCapa(A) = TypedArraySize(A) = 0; \ + free(TypedArrayPtr(A)); \ + TypedArrayPtr(A) = NULL; \ + } \ +} while(0) + +#endif diff --git a/src/htscore.c b/src/htscore.c index f6b8804..f315d3b 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -61,6 +61,9 @@ Please visit our Website: http://www.httrack.com /* Charset handling */ #include "htscharset.h" +/* Dynamic typed arrays */ +#include "htsarrays.h" + /* END specific definitions */ /* external modules */ @@ -157,47 +160,6 @@ RUN_CALLBACK0(opt, end); \ } while(0) #define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0) -// memory allocation assertion failure -static void hts_record_assert_memory_failed(const size_t size) { - fprintf(stderr, "memory allocation failed (%lu bytes)", \ - (long int) size); \ - assertf(! "memory allocation failed"); \ -} - -// Typed array -#define TypedArray(T) \ - struct { \ - T* elts; \ - size_t size; \ - size_t capa; \ - } -#define EMPTY_TYPED_ARRAY { NULL, 0, 0 } - -#define TypedArrayAdd(A, E) do { \ - if ((A).capa == (A).size) { \ - (A).capa = (A).capa < 16 ? 16 : (A).capa * 2; \ - (A).elts = realloct((A).elts, (A).capa*sizeof(*(A).elts)); \ - if ((A).elts == NULL) { \ - hts_record_assert_memory_failed((A).capa*sizeof(*(A).elts)); \ - } \ - } \ - assertf((A).size < (A).capa); \ - (A).elts[(A).size++] = (E); \ -} while(0) - -#define TypedArrayFree(A) do { \ - if ((A).elts != NULL) { \ - freet((A).elts); \ - (A).elts = NULL; \ - (A).capa = (A).size = 0; \ - } \ -} while(0) - -#define TypedArraySize(A) ((A).size) -#define TypedArrayCapa(A) ((A).capa) -#define TypedArrayElts(A) ((A).elts) -#define TypedArrayNth(A, N) (TypedArrayElts(A)[N]) - struct lien_buffers { /* Main array of pointers. This is the real "lien_url **liens" pointer base. */ @@ -282,9 +244,11 @@ static size_t hts_record_link_alloc(httrackp *opt) { // Create a new chunk of lien_url[] // There are references to item pointers, so we can not just realloc() if (liensbuf->lien_buffer_size == liensbuf->lien_buffer_capa) { - TypedArrayAdd(liensbuf->lien_buffers, liensbuf->lien_buffer); - liensbuf->lien_buffer_size = 0; - liensbuf->lien_buffer_capa = 0; + if (liensbuf->lien_buffer != NULL) { + TypedArrayAdd(liensbuf->lien_buffers, liensbuf->lien_buffer); + liensbuf->lien_buffer_size = 0; + liensbuf->lien_buffer_capa = 0; + } liensbuf->lien_buffer = (lien_url*) malloct(block_capa*sizeof(*liensbuf->lien_buffer)); if (liensbuf->lien_buffer == NULL) { diff --git a/src/htsparse.c b/src/htsparse.c index 084ec8e..9174173 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -60,87 +60,62 @@ Please visit our Website: http://www.httrack.com #include "htsparse.h" #include "htsback.h" -// does nothing -#define XH_uninit do {} while(0) +// arrays +#include "htsarrays.h" -// version optimisée, qui permet de ne pas toucher aux html non modifiés (update) -#define REALLOC_SIZE 8192 -#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \ - char message[256]; \ - ht_size=(A)+ht_len+REALLOC_SIZE; \ - ht_buff=(char*) realloct(ht_buff,ht_size); \ - if (ht_buff==NULL) { \ - printf("PANIC! : Not enough memory [%d]\n", __LINE__); \ - XH_uninit; \ - snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_CHK : realloct("LLintP") failed", (LLint) ht_size); \ - abortLog(message); \ - abort(); \ +static void AppendString(TypedArray(char) *const a, const char *s, size_t size) { + TypedArrayAppend(*a, s, size); +} + +/** Append bytes to the output buffer up to the pointer 'html'. **/ +#define HT_add_adr do { \ + if ( (opt->getmode & 1) != 0 && ptr > 0 ) { \ + const size_t sz_ = html - lastsaved; \ + if (sz_ != 0) { \ + TypedArrayAppend(output_buffer, lastsaved, sz_); \ + lastsaved = html; \ + } \ } \ -} \ - ht_len+=A; -#define HT_add_adr \ - if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ - memcpy(ht_buff+j, lastsaved, i); \ - ht_buff[j+i]='\0'; \ - lastsaved=html; \ - } -#define HT_ADD(A) \ - if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i_ = strlen(A), j_ = ht_len; \ - if (i_) { \ - HT_ADD_CHK(i_) \ - memcpy(ht_buff+j_, A, i_); \ - ht_buff[j_+i_]='\0'; \ - } } -#define HT_ADD_HTMLESCAPED(A) \ - if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i_, j_; \ - char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ - escape_for_html_print(A, tempo_, sizeof(tempo_)); \ - i_=strlen(tempo_); \ - j_=ht_len; \ - if (i_) { \ - HT_ADD_CHK(i_) \ - memcpy(ht_buff+j_, tempo_, i_); \ - ht_buff[j_+i_]='\0'; \ - } } -#define HT_ADD_HTMLESCAPED_FULL(A) \ - if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i_, j_; \ - char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ - escape_for_html_print_full(A, tempo_, sizeof(tempo_)); \ - i_=strlen(tempo_); \ - j_=ht_len; \ - if (i_) { \ - HT_ADD_CHK(i_) \ - memcpy(ht_buff+j_, tempo_, i_); \ - ht_buff[j_+i_]='\0'; \ - } } -#define HT_ADD_START \ - char message[256]; \ - size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \ - size_t ht_len=0; \ - char* ht_buff=NULL; \ - if ((opt->getmode & 1) && (ptr>0)) { \ - ht_buff=(char*) malloct(ht_size); \ - if (ht_buff==NULL) { \ - printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ - XH_uninit; \ - snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_START : malloct("LLintP") failed", (LLint) ht_size); \ - abortLog(message); \ - abort(); \ +} while(0) + +/** Append to the output buffer the string 'A'. **/ +#define HT_ADD(A) TypedArrayAppend(output_buffer, A, strlen(A)) + +/** Append to the output buffer the string 'A', html-escaped. **/ +#define HT_ADD_HTMLESCAPED_ANY(A, FUNCTION) do { \ + if ((opt->getmode & 1) != 0 && ptr>0) { \ + const char *const str_ = (A); \ + size_t size_; \ + /* & is the maximum expansion */ \ + TypedArrayEnsureRoom(output_buffer, strlen(str_) * 5 + 1024); \ + size_ = FUNCTION(str_, &TypedArrayTail(output_buffer), \ + TypedArrayRoom(output_buffer)); \ + TypedArraySize(output_buffer) += size_; \ } \ - ht_buff[0]='\0'; \ - } +} while(0) + +/** Append to the output buffer the string 'A', html-escaped for &. **/ +#define HT_ADD_HTMLESCAPED(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print) + +/** + * Append to the output buffer the string 'A', html-escaped for & and + * high chars. + **/ +#define HT_ADD_HTMLESCAPED_FULL(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print_full) + +// does nothing +#define XH_uninit do {} while(0) + #define HT_ADD_END { \ int ok=0;\ - if (ht_buff) { \ + if (TypedArraySize(output_buffer) != 0) { \ + const size_t ht_len = TypedArraySize(output_buffer); \ + const char *const ht_buff = TypedArrayElts(output_buffer); \ char digest[32+2];\ off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename()));\ - digest[0]='\0';\ - domd5mem(ht_buff,ht_len,digest,1);\ - if (fsize_old==ht_len) { \ + digest[0] = '\0';\ + domd5mem(TypedArrayElts(output_buffer), ht_len, digest, 1);\ + if (fsize_old == (off_t) ht_len) { \ int mlen = 0;\ char* mbuff;\ cache_readdata(cache,"//[HTML-MD5]//",savename(),&mbuff,&mlen);\ @@ -192,7 +167,7 @@ Please visit our Website: http://www.httrack.com if (cache->ndx)\ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename(),digest,(int)strlen(digest));\ } \ - freet(ht_buff); ht_buff=NULL; \ + TypedArrayFree(output_buffer); \ } #define HT_ADD_FOP @@ -362,11 +337,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } if (!error) { + // output HTML + TypedArray(char) output_buffer = EMPTY_TYPED_ARRAY; + time_t user_interact_timestamp = 0; int detect_title = 0; // détection du title int back_add_stats = opt->state.back_add_stats; - // const char *in_media = NULL; // in other media type (real media and so..) int intag = 0; // on est dans un tag int incomment = 0; // dans un <!-- @@ -417,7 +394,6 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // int parent_relative = 0; // the parent is the base path (.js, .css..) - HT_ADD_START; // débuter lastsaved = html; /* Initialize script automate for comments, quotes.. */ @@ -3368,15 +3344,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->getmode & 1) && (ptr > 0)) { { - char *cAddr = ht_buff; - int cSize = (int) ht_len; + char *cAddr = TypedArrayElts(output_buffer); + int cSize = (int) TypedArraySize(output_buffer); hts_log_print(opt, LOG_DEBUG, "engine: postprocess-html: %s%s", urladr(), urlfil()); - if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) == - 1) { - ht_buff = cAddr; - ht_len = cSize; + if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) == 1) { + if (cAddr != TypedArrayElts(output_buffer)) { + hts_log_print(opt, LOG_DEBUG, + "engine: postprocess-html: callback modified data, applying %d bytes", cSize); + TypedArraySize(output_buffer) = 0; + TypedArrayAppend(output_buffer, cAddr, cSize); + } } } |