From 660b569b0980fc8f71b03ed666dd02eec8388b4c Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 19 Mar 2012 12:59:03 +0000 Subject: httrack 3.41.2 --- src/htsparse.c | 985 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 495 insertions(+), 490 deletions(-) (limited to 'src/htsparse.c') diff --git a/src/htsparse.c b/src/htsparse.c index 4aa1b7e..b39b41f 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -76,7 +76,7 @@ Please visit our Website: http://www.httrack.com #define relativeurlfil ((!parent_relative)?urlfil:parenturlfil) #define relativesavename ((!parent_relative)?savename:parentsavename) -#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->log) { fflush(opt->log); } } // does nothing #define XH_uninit do {} while(0) @@ -96,14 +96,14 @@ Please visit our Website: http://www.httrack.com ht_len+=A; #define HT_ADD_ADR \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ memcpy(ht_buff+j, lastsaved, i); \ ht_buff[j+i]='\0'; \ lastsaved=adr; \ } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_=strlen(A),j_=ht_len; \ + size_t i_ = strlen(A), j_ = ht_len; \ if (i_) { \ HT_ADD_CHK(i_) \ memcpy(ht_buff+j_, A, i_); \ @@ -111,7 +111,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print(A, tempo_); \ i_=strlen(tempo_); \ @@ -123,7 +123,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED_FULL(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print_full(A, tempo_); \ i_=strlen(tempo_); \ @@ -134,15 +134,15 @@ Please visit our Website: http://www.httrack.com ht_buff[j_+i_]='\0'; \ } } #define HT_ADD_START \ - int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \ - int ht_len=0; \ + size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \ + size_t ht_len=0; \ char* ht_buff=NULL; \ if ((opt->getmode & 1) && (ptr>0)) { \ ht_buff=(char*) malloct(ht_size); \ if (ht_buff==NULL) { \ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ XH_uninit; \ - abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \ + abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ (int) ht_size); \ exit(1); \ } \ ht_buff[0]='\0'; \ @@ -151,7 +151,7 @@ Please visit our Website: http://www.httrack.com int ok=0;\ if (ht_buff) { \ char digest[32+2];\ - INTsys fsize_old=fsize(fconv(savename));\ + off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),savename));\ digest[0]='\0';\ domd5mem(ht_buff,ht_len,digest,1);\ if (fsize_old==ht_len) { \ @@ -163,7 +163,7 @@ Please visit our Website: http://www.httrack.com if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\ ok=1;\ if ( (opt->debug>1) && (opt->log!=NULL) ) {\ - fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ test_flush;\ }\ } else {\ @@ -171,20 +171,21 @@ Please visit our Website: http://www.httrack.com } \ }\ if (!ok) { \ - file_notify(urladr, urlfil, savename, 1, 1, r->notmodified); \ - fp=filecreate(savename); \ + file_notify(opt,urladr, urlfil, savename, 1, 1, r->notmodified); \ + fp=filecreate(&opt->state.strc, savename); \ if (fp) { \ if (ht_len>0) {\ - if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ + if (fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unable to write HTML file %s: %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ @@ -196,23 +197,24 @@ Please visit our Website: http://www.httrack.com } else {\ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ - fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ test_flush; \ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"Unable to save file %s : %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ }\ } else {\ - file_notify(urladr, urlfil, savename, 0, 0, r->notmodified); \ - filenote(savename,NULL); \ + file_notify(opt,urladr, urlfil, savename, 0, 0, r->notmodified); \ + filenote(&opt->state.strc, savename,NULL); \ }\ if (cache->ndx)\ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ @@ -237,7 +239,7 @@ Please visit our Website: http://www.httrack.com fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \ + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"),"primary","primary"); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -255,7 +257,7 @@ Please visit our Website: http://www.httrack.com #define liens_record(A,F,S,FA,FF) { \ int notecode=0; \ - int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ + size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ @@ -314,27 +316,27 @@ Please visit our Website: http://www.httrack.com #define ENGINE_LOAD_CONTEXT() \ ENGINE_LOAD_CONTEXT_BASE(); \ /* */ \ - htsblk* r = stre->r_; \ - hash_struct* hash = stre->hash_; \ - int lien_max = *stre->lien_max_; \ + htsblk* r HTS_UNUSED = stre->r_; \ + hash_struct* hash HTS_UNUSED = stre->hash_; \ + int lien_max HTS_UNUSED = *stre->lien_max_; \ /* */ \ - int error = * stre->error_; \ - int store_errpage = * stre->store_errpage_; \ - char* codebase = stre->codebase; \ - char* base = stre->base; \ + int error HTS_UNUSED = * stre->error_; \ + int store_errpage HTS_UNUSED = * stre->store_errpage_; \ + char* codebase HTS_UNUSED = stre->codebase; \ + char* base HTS_UNUSED = stre->base; \ /* */ \ - int makeindex_done = *stre->makeindex_done_; \ - FILE* makeindex_fp = *stre->makeindex_fp_; \ - int makeindex_links = *stre->makeindex_links_; \ - char* makeindex_firstlink = stre->makeindex_firstlink_; \ + int makeindex_done HTS_UNUSED = *stre->makeindex_done_; \ + FILE* makeindex_fp HTS_UNUSED = *stre->makeindex_fp_; \ + int makeindex_links HTS_UNUSED = *stre->makeindex_links_; \ + char* makeindex_firstlink HTS_UNUSED = stre->makeindex_firstlink_; \ /* */ \ - char *template_header = stre->template_header_; \ - char *template_body = stre->template_body_; \ - char *template_footer = stre->template_footer_; \ + char *template_header HTS_UNUSED = stre->template_header_; \ + char *template_body HTS_UNUSED = stre->template_body_; \ + char *template_footer HTS_UNUSED = stre->template_footer_; \ /* */ \ - LLint stat_fragment = *stre->stat_fragment_; \ - TStamp makestat_time = stre->makestat_time; \ - FILE* makestat_fp = stre->makestat_fp + LLint stat_fragment HTS_UNUSED = *stre->stat_fragment_; \ + TStamp makestat_time HTS_UNUSED = stre->makestat_time; \ + FILE* makestat_fp HTS_UNUSED = stre->makestat_fp #define ENGINE_SAVE_CONTEXT() \ ENGINE_SAVE_CONTEXT_BASE(); \ @@ -369,7 +371,7 @@ Please visit our Website: http://www.httrack.com /* Increment current pointer to 'steps' characters, modifying automate if necessary */ #define INCREMENT_CURRENT_ADR(steps) do { \ - int steps__ = (steps); \ + int steps__ = (int) ( steps ); \ while(steps__ > 0) { \ adr++; \ AUTOMATE_LOOKUP_CURRENT_ADR(); \ @@ -382,39 +384,38 @@ Please visit our Website: http://www.httrack.com int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ ENGINE_LOAD_CONTEXT(); + char catbuff[CATBUFF_SIZE]; -#if HTS_ANALYSTE { char* cAddr = r->adr; int cSize = (int) r->size; if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); } - if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + if (RUN_CALLBACK4(opt, preprocess, &cAddr, &cSize, urladr, urlfil) == 1) { r->adr = cAddr; r->size = cSize; } } - if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) { -#endif + if (RUN_CALLBACK4(opt, check_html, r->adr,(int)r->size,urladr,urlfil)) { FILE* fp=NULL; // fichier écrit localement char* adr=r->adr; // pointeur (on parcourt) char* lastsaved; // adresse du dernier octet sauvé + 1 if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; } // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { - if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) { + if (index_keyword(r->adr,r->size,r->contenttype,savename,StringBuff(opt->path_html))) { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..done"LF); test_flush; } } else { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..error!"LF); test_flush; } } } @@ -470,6 +471,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // int parent_relative=0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter + lastsaved=adr; /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); @@ -498,15 +500,6 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; /* #8: escape in "" */ inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; /* #9: escape in '' */ - - /* statistics */ - if ((opt->getmode & 1) && (ptr>0)) { - /* - HTS_STAT.stat_files++; - HTS_STAT.stat_bytes+=r->size; - */ - } - /* Primary list or URLs */ if (ptr == 0) { intag=1; @@ -515,8 +508,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } /* Check is the file is a .js file */ else if ( - (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0) - || (compare_mime(r->contenttype, str->url_file, "text/css")!=0) + (compare_mime(opt,r->contenttype, str->url_file, "application/x-javascript")!=0) + || (compare_mime(opt,r->contenttype, str->url_file, "text/css")!=0) ) { /* JavaScript js file */ inscript=1; if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } @@ -524,10 +517,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { intag=1; // because après