summaryrefslogtreecommitdiff
path: root/src/htsparse.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-06-06 18:43:50 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-06-06 18:43:50 +0000
commit073a55ab1cfed2bd8dd7eec9a8aede82dcf91f19 (patch)
tree0d425d697b89d993f258434cb102062b85599480 /src/htsparse.c
parent9ff2518be647832a7d3fe21b56bd268b9be3d4e3 (diff)
Splitted typed arrays in htsarrays.h
Cleaned-up page generation
Diffstat (limited to 'src/htsparse.c')
-rw-r--r--src/htsparse.c145
1 files changed, 62 insertions, 83 deletions
diff --git a/src/htsparse.c b/src/htsparse.c
index 084ec8e..9174173 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -60,87 +60,62 @@ Please visit our Website: http://www.httrack.com
#include "htsparse.h"
#include "htsback.h"
-// does nothing
-#define XH_uninit do {} while(0)
+// arrays
+#include "htsarrays.h"
-// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
-#define REALLOC_SIZE 8192
-#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
- char message[256]; \
- ht_size=(A)+ht_len+REALLOC_SIZE; \
- ht_buff=(char*) realloct(ht_buff,ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n", __LINE__); \
- XH_uninit; \
- snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_CHK : realloct("LLintP") failed", (LLint) ht_size); \
- abortLog(message); \
- abort(); \
+static void AppendString(TypedArray(char) *const a, const char *s, size_t size) {
+ TypedArrayAppend(*a, s, size);
+}
+
+/** Append bytes to the output buffer up to the pointer 'html'. **/
+#define HT_add_adr do { \
+ if ( (opt->getmode & 1) != 0 && ptr > 0 ) { \
+ const size_t sz_ = html - lastsaved; \
+ if (sz_ != 0) { \
+ TypedArrayAppend(output_buffer, lastsaved, sz_); \
+ lastsaved = html; \
+ } \
} \
-} \
- ht_len+=A;
-#define HT_add_adr \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
- memcpy(ht_buff+j, lastsaved, i); \
- ht_buff[j+i]='\0'; \
- lastsaved=html; \
- }
-#define HT_ADD(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_ = strlen(A), j_ = ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, A, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_HTMLESCAPED(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_, j_; \
- char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
- escape_for_html_print(A, tempo_, sizeof(tempo_)); \
- i_=strlen(tempo_); \
- j_=ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, tempo_, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_HTMLESCAPED_FULL(A) \
- if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i_, j_; \
- char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
- escape_for_html_print_full(A, tempo_, sizeof(tempo_)); \
- i_=strlen(tempo_); \
- j_=ht_len; \
- if (i_) { \
- HT_ADD_CHK(i_) \
- memcpy(ht_buff+j_, tempo_, i_); \
- ht_buff[j_+i_]='\0'; \
- } }
-#define HT_ADD_START \
- char message[256]; \
- size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \
- size_t ht_len=0; \
- char* ht_buff=NULL; \
- if ((opt->getmode & 1) && (ptr>0)) { \
- ht_buff=(char*) malloct(ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
- XH_uninit; \
- snprintf(message, sizeof(message), "not enough memory for current html document in HT_ADD_START : malloct("LLintP") failed", (LLint) ht_size); \
- abortLog(message); \
- abort(); \
+} while(0)
+
+/** Append to the output buffer the string 'A'. **/
+#define HT_ADD(A) TypedArrayAppend(output_buffer, A, strlen(A))
+
+/** Append to the output buffer the string 'A', html-escaped. **/
+#define HT_ADD_HTMLESCAPED_ANY(A, FUNCTION) do { \
+ if ((opt->getmode & 1) != 0 && ptr>0) { \
+ const char *const str_ = (A); \
+ size_t size_; \
+ /* &amp; is the maximum expansion */ \
+ TypedArrayEnsureRoom(output_buffer, strlen(str_) * 5 + 1024); \
+ size_ = FUNCTION(str_, &TypedArrayTail(output_buffer), \
+ TypedArrayRoom(output_buffer)); \
+ TypedArraySize(output_buffer) += size_; \
} \
- ht_buff[0]='\0'; \
- }
+} while(0)
+
+/** Append to the output buffer the string 'A', html-escaped for &. **/
+#define HT_ADD_HTMLESCAPED(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print)
+
+/**
+ * Append to the output buffer the string 'A', html-escaped for & and
+ * high chars.
+ **/
+#define HT_ADD_HTMLESCAPED_FULL(A) HT_ADD_HTMLESCAPED_ANY(A, escape_for_html_print_full)
+
+// does nothing
+#define XH_uninit do {} while(0)
+
#define HT_ADD_END { \
int ok=0;\
- if (ht_buff) { \
+ if (TypedArraySize(output_buffer) != 0) { \
+ const size_t ht_len = TypedArraySize(output_buffer); \
+ const char *const ht_buff = TypedArrayElts(output_buffer); \
char digest[32+2];\
off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename()));\
- digest[0]='\0';\
- domd5mem(ht_buff,ht_len,digest,1);\
- if (fsize_old==ht_len) { \
+ digest[0] = '\0';\
+ domd5mem(TypedArrayElts(output_buffer), ht_len, digest, 1);\
+ if (fsize_old == (off_t) ht_len) { \
int mlen = 0;\
char* mbuff;\
cache_readdata(cache,"//[HTML-MD5]//",savename(),&mbuff,&mlen);\
@@ -192,7 +167,7 @@ Please visit our Website: http://www.httrack.com
if (cache->ndx)\
cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename(),digest,(int)strlen(digest));\
} \
- freet(ht_buff); ht_buff=NULL; \
+ TypedArrayFree(output_buffer); \
}
#define HT_ADD_FOP
@@ -362,11 +337,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
if (!error) {
+ // output HTML
+ TypedArray(char) output_buffer = EMPTY_TYPED_ARRAY;
+
time_t user_interact_timestamp = 0;
int detect_title = 0; // détection du title
int back_add_stats = opt->state.back_add_stats;
- //
const char *in_media = NULL; // in other media type (real media and so..)
int intag = 0; // on est dans un tag
int incomment = 0; // dans un <!--
@@ -417,7 +394,6 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//
int parent_relative = 0; // the parent is the base path (.js, .css..)
- HT_ADD_START; // débuter
lastsaved = html;
/* Initialize script automate for comments, quotes.. */
@@ -3368,15 +3344,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((opt->getmode & 1) && (ptr > 0)) {
{
- char *cAddr = ht_buff;
- int cSize = (int) ht_len;
+ char *cAddr = TypedArrayElts(output_buffer);
+ int cSize = (int) TypedArraySize(output_buffer);
hts_log_print(opt, LOG_DEBUG, "engine: postprocess-html: %s%s",
urladr(), urlfil());
- if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) ==
- 1) {
- ht_buff = cAddr;
- ht_len = cSize;
+ if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) == 1) {
+ if (cAddr != TypedArrayElts(output_buffer)) {
+ hts_log_print(opt, LOG_DEBUG,
+ "engine: postprocess-html: callback modified data, applying %d bytes", cSize);
+ TypedArraySize(output_buffer) = 0;
+ TypedArrayAppend(output_buffer, cAddr, cSize);
+ }
}
}