summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/htsalias.c2
-rw-r--r--src/htsback.c69
-rw-r--r--src/htscache.c35
-rw-r--r--src/htscore.c179
-rw-r--r--src/htscore.h1
-rw-r--r--src/htscoremain.c34
-rw-r--r--src/htsglobal.h4
-rw-r--r--src/htshelp.c3
-rw-r--r--src/htsindex.c6
-rw-r--r--src/htslib.c146
-rw-r--r--src/htslib.h25
-rw-r--r--src/htsmodules.h1
-rw-r--r--src/htsname.c42
-rw-r--r--src/htsname.h11
-rw-r--r--src/htsopt.h2
-rw-r--r--src/htsparse.c47
-rw-r--r--src/htstools.c8
17 files changed, 486 insertions, 129 deletions
diff --git a/src/htsalias.c b/src/htsalias.c
index d06936f..1a413fe 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -176,6 +176,8 @@ const char* hts_optalias[][4] = {
{"disable-module","-%w","param1",""},
{"no-background-on-suspend","-y0","single",""},
{"background-on-suspend","-y","single",""},
+ {"utf8-conversion","-%T","single",""},
+ {"no-utf8-conversion","-%T0","single",""},
/* */
/* DEPRECATED */
diff --git a/src/htsback.c b/src/htsback.c
index a6b19ab..a7fe76c 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -128,7 +128,7 @@ void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) {
#ifndef HTS_NO_BACK_ON_DISK
char *filename = (char*) item->value.ptr;
if (filename != NULL) {
- (void) unlink(filename);
+ (void) UNLINK(filename);
}
#else
/* clear entry content (but not yet the entry) */
@@ -196,7 +196,7 @@ static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char*
FILE *fp;
char* fileback = (char*) ptr;
char catbuff[CATBUFF_SIZE];
- if (( fp = fopen(fconv(catbuff, fileback), "rb") ) != NULL ) {
+ if (( fp = FOPEN(fconv(catbuff, fileback), "rb") ) != NULL ) {
if (back_unserialize(fp, &itemback) != 0) {
if (itemback != NULL) {
back_clear_entry(itemback);
@@ -217,7 +217,7 @@ static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char*
test_flush;
}
}
- (void) unlink(fileback);
+ (void) UNLINK(fileback);
#else
itemback = (lien_back*) ptr;
#endif
@@ -293,10 +293,10 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback)
if (opt->getmode != 0) {
sprintf(filename, "%s.tmp", back[i].url_sav);
} else {
- sprintf(filename, "%stmpfile%d.tmp", StringBuff(opt->path_html), opt->state.tmpnameid++);
+ sprintf(filename, "%stmpfile%d.tmp", StringBuff(opt->path_html_utf8), opt->state.tmpnameid++);
}
/* Security check */
- if (fexist(filename)) {
+ if (fexist_utf8(filename)) {
if (opt->log != NULL) {
HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: temporary file %s already exists"LF, filename);
test_flush;
@@ -323,7 +323,7 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback)
} else {
if (opt->log != NULL) {
int last_errno = errno;
- HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: open error: %s (%s, %s)"LF, back[i].url_adr, back[i].url_fil, filename, strerror(last_errno), dir_exists(filename) ? "directory exists" : "directory does NOT exist!", fexist(filename) ? "file already exists!" : "file does not exist");
+ HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: open error: %s (%s, %s)"LF, back[i].url_adr, back[i].url_fil, filename, strerror(last_errno), dir_exists(filename) ? "directory exists" : "directory does NOT exist!", fexist_utf8(filename) ? "file already exists!" : "file does not exist");
test_flush;
}
}
@@ -501,7 +501,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
#endif
if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
- back[p].r.out=fopen(back[p].tmpfile,"wb");
+ back[p].r.out=FOPEN(back[p].tmpfile,"wb");
if (back[p].r.out) {
if ((back[p].r.adr) && (back[p].r.size>0)) {
if (fwrite(back[p].r.adr,1,(size_t)back[p].r.size,back[p].r.out) != back[p].r.size) {
@@ -541,12 +541,12 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
back[p].r.statuscode=STATUSCODE_INVALID;
strcpybuff(back[p].r.msg,"Read error when decompressing");
}
- unlink(back[p].url_sav);
+ UNLINK(back[p].url_sav);
}
}
}
/* encore that no remaining temporary file exists */
- unlink(back[p].tmpfile);
+ UNLINK(back[p].tmpfile);
back[p].tmpfile = NULL;
}
// stats
@@ -578,10 +578,10 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
if (back[p].r.is_write) { // Written file
if (may_be_hypertext_mime(opt,back[p].r.contenttype, back[p].url_fil)) { // to parse!
off_t sz;
- sz=fsize(back[p].url_sav);
+ sz=fsize_utf8(back[p].url_sav);
if (sz>0) { // ok, exists!
if (sz < 8192) { // ok, small file --> to parse!
- FILE* fp=fopen(back[p].url_sav,"rb");
+ FILE* fp=FOPEN(back[p].url_sav,"rb");
if (fp) {
back[p].r.adr=malloct((int)sz + 2);
if (back[p].r.adr) {
@@ -599,7 +599,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
fclose(fp);
fp=NULL;
// remove (temporary) file!
- unlink(fconv(catbuff,back[p].url_sav));
+ UNLINK(fconv(catbuff,back[p].url_sav));
}
if (fp)
fclose(fp);
@@ -909,6 +909,7 @@ int back_unserialize(FILE *fp, lien_back** dst) {
}
/* serialize a reference ; used to store references of files being downloaded in case of broken download */
+/* Note: NOT utf-8 */
int back_serialize_ref(httrackp* opt, const lien_back* src) {
char *filename = url_savename_refname_fullpath(opt, src->url_adr, src->url_fil);
FILE *fp = fopen(filename, "wb");
@@ -934,7 +935,7 @@ int back_serialize_ref(httrackp* opt, const lien_back* src) {
/* unserialize a reference ; used to store references of files being downloaded in case of broken download */
int back_unserialize_ref(httrackp* opt, const char *adr, const char *fil, lien_back** dst) {
char *filename = url_savename_refname_fullpath(opt, adr, fil);
- FILE *fp = fopen(filename, "rb");
+ FILE *fp = FOPEN(filename, "rb");
if (fp != NULL) {
int ser = back_unserialize(fp, dst);
fclose(fp);
@@ -1174,7 +1175,7 @@ int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int
/* écrire date "remote" */
if (strnotempty(back[p].url_sav)
&& strnotempty(back[p].r.lastmodified)
- && fexist(back[p].url_sav)) // normalement existe si on a un fichier de sortie
+ && fexist_utf8(back[p].url_sav)) // normalement existe si on a un fichier de sortie
{
set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
}
@@ -1268,7 +1269,7 @@ int back_clear_entry(lien_back* back) {
// only for security
if (back->tmpfile && back->tmpfile[0] != '\0') {
- (void) unlink(back->tmpfile);
+ (void) UNLINK(back->tmpfile);
back->tmpfile = NULL;
}
@@ -1471,7 +1472,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
if (pos<0) { // pas de mise en cache data, vérifier existence
#endif
/* note: no check with IS_DELAYED_EXT() enabled - postcheck by client please! */
- if (save[0] != '\0' && !IS_DELAYED_EXT(save) && fsize(fconv(catbuff,save)) <= 0) { // fichier final n'existe pas ou est vide!
+ if (save[0] != '\0' && !IS_DELAYED_EXT(save) && fsize_utf8(fconv(catbuff,save)) <= 0) { // fichier final n'existe pas ou est vide!
int found=0;
/* It is possible that the file has been moved due to changes in build structure */
@@ -1483,9 +1484,9 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
/* Is supposed to be on disk only */
if (r.is_write && previous_save[0] != '\0') {
/* Exists, but with another (old) filename: rename (almost) silently */
- if (strcmp(previous_save, save) != 0 && fexist(fconv(catbuff, previous_save))) {
+ if (strcmp(previous_save, save) != 0 && fexist_utf8(fconv(catbuff, previous_save))) {
rename(fconv(catbuff, previous_save), fconv(catbuff2,save));
- if (fexist(fconv(catbuff,save))) {
+ if (fexist_utf8(fconv(catbuff,save))) {
found = 1;
if ((opt->debug>1) && (opt->log!=NULL)) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush;
@@ -1511,8 +1512,8 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
// sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour
// en être sûr
if (opt->norecatch) { // tester norecatch
- if (!fexist(fconv(catbuff,save))) { // fichier existe pas mais déclaré: on l'a effacé
- FILE* fp=fopen(fconv(catbuff,save),"wb");
+ if (!fexist_utf8(fconv(catbuff,save))) { // fichier existe pas mais déclaré: on l'a effacé
+ FILE* fp=FOPEN(fconv(catbuff,save),"wb");
if (fp) fclose(fp);
if (opt->log!=NULL) {
HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Previous file '%s' not found (erased by user ?), ignoring: %s%s"LF,save,back[p].url_adr,back[p].url_fil); test_flush;
@@ -1655,7 +1656,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
}
/* Not in cache ; maybe in temporary cache ? Warning: non-movable "url_sav" */
else if (back_unserialize_ref(opt, adr, fil, &itemback) == 0) {
- const long file_size = fsize(itemback->url_sav);
+ const long file_size = fsize_utf8(itemback->url_sav);
/* Found file on disk */
if (file_size > 0) {
char *send_too = back[p].send_too;
@@ -1686,8 +1687,8 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
itemback = NULL;
}
/* Not in cache or temporary cache ; found on disk ? (hack) */
- else if (fexist(save)) {
- off_t sz=fsize(save);
+ else if (fexist_utf8(save)) {
+ off_t sz=fsize_utf8(save);
// Bon, là il est possible que le fichier ait été partiellement transféré
// (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque)
// PAS de If-Modified-Since, on a pas connaissance des données à la date du cache
@@ -2668,7 +2669,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
else if (back[i].status==STATUS_FTP_TRANSFER) { // en réception ftp
if (!fexist(back[i].location_buffer)) { // terminé
FILE* fp;
- fp=fopen(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),"rb");
+ fp=FOPEN(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),"rb");
if (fp) {
int j=0;
fscanf(fp,"%d ",&(back[i].r.statuscode));
@@ -2679,7 +2680,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
}
back[i].r.msg[j++]='\0';
fclose(fp);
- unlink(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"));
+ UNLINK(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"));
strcpybuff(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),"");
} else {
strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok");
@@ -2772,7 +2773,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
back[i].tmpfile=tmpnam(back[p].tmpfile_buffer);
#endif
if (back[i].tmpfile != NULL && back[i].tmpfile[0]) {
- if ((back[i].r.out=fopen(back[i].tmpfile,"wb")) == NULL) {
+ if ((back[i].r.out=FOPEN(back[i].tmpfile,"wb")) == NULL) {
last_errno = errno;
}
}
@@ -3292,7 +3293,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
// with an error : consider a 304 error
if (!opt->delete_old) {
if (HTTP_IS_ERROR(back[i].r.statuscode) && back[i].is_update && !back[i].testmode) {
- if (back[i].url_sav[0] && fexist(back[i].url_sav)) {
+ if (back[i].url_sav[0] && fexist_utf8(back[i].url_sav)) {
if ((opt->debug>1) && (opt->log!=NULL)) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush;
}
@@ -3350,7 +3351,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
if (back[i].r.statuscode==HTTP_OK && !back[i].testmode) { // 'OK'
if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // not HTML
if (strnotempty(back[i].url_sav)) { // target found
- int size = fsize(back[i].url_sav); // target size
+ int size = fsize_utf8(back[i].url_sav); // target size
if (size >= 0) {
if (back[i].r.totalsize == size) { // same size!
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
@@ -3473,11 +3474,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
// In case of 'if-unmodified-since' hack, a 304 status can be sent
// then, force 'ok' status
if (back[i].r.statuscode == STATUSCODE_INVALID) {
- if (fexist(back[i].url_sav)) {
+ if (fexist_utf8(back[i].url_sav)) {
back[i].r.statuscode=HTTP_OK; // OK
strcpybuff(back[i].r.msg, "OK (cached)");
back[i].r.is_file=1;
- back[i].r.totalsize = back[i].r.size = fsize(back[i].url_sav);
+ back[i].r.totalsize = back[i].r.size = fsize_utf8(back[i].url_sav);
get_httptype(opt,back[i].r.contenttype, back[i].url_sav, 1);
if ((opt->debug>0) && (opt->log!=NULL)) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -3544,7 +3545,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
// traiter 206 (partial content)
// xxc SI CHUNK VERIFIER QUE CA MARCHE??
if (back[i].r.statuscode==206) { // on nous envoie un morceau (la fin) coz une partie sur disque!
- off_t sz=fsize(back[i].url_sav);
+ off_t sz=fsize_utf8(back[i].url_sav);
#if HDEBUG
printf("partial content: "LLintP" on disk..\n",(LLint)sz);
#endif
@@ -3553,7 +3554,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir**
filenote(&opt->state.strc,back[i].url_sav,NULL); // noter fichier comme connu
file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified);
- back[i].r.out=fopen(fconv(catbuff,back[i].url_sav),"ab"); // append
+ back[i].r.out=FOPEN(fconv(catbuff,back[i].url_sav),"ab"); // append
if (back[i].r.out) {
back[i].r.is_write=1; // écrire
back[i].r.size=sz; // déja écrit
@@ -3577,7 +3578,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
}
}
} else { // mémoire
- FILE* fp=fopen(fconv(catbuff,back[i].url_sav),"rb");
+ FILE* fp=FOPEN(fconv(catbuff,back[i].url_sav),"rb");
if (fp) {
LLint alloc_mem=sz + 1;
if (back[i].r.totalsize>0)
@@ -3703,7 +3704,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
#if HTS_REMOVE_BAD_FILES
if (back[i].status<0) {
if (!back[i].testmode) { // pas en test
- unlink(back[i].url_sav); // éliminer fichier (endommagé)
+ UNLINK(back[i].url_sav); // éliminer fichier (endommagé)
//printf("&& %s\n",back[i].url_sav);
}
}
diff --git a/src/htscache.c b/src/htscache.c
index 9ff8055..2679111 100644
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -147,7 +147,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,const char* url_adr,
// stocker fichiers (et robots.txt)
if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
// ajouter le fichier au cache
- cache_add(opt,cache,r,url_adr,url_fil,url_save,opt->all_in_cache,StringBuff(opt->path_html));
+ cache_add(opt,cache,r,url_adr,url_fil,url_save,opt->all_in_cache,StringBuff(opt->path_html_utf8));
//
// store a reference NOT to redo the same test zillions of times!
// (problem reported by Lars Clausen)
@@ -367,9 +367,9 @@ void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,const char* url_a
} else {
FILE* fp;
// On recopie le fichier->.
- off_t file_size=fsize(fconv(catbuff, url_save));
+ off_t file_size=fsize_utf8(fconv(catbuff, url_save));
if (file_size>=0) {
- fp=fopen(fconv(catbuff, url_save),"rb");
+ fp=FOPEN(fconv(catbuff, url_save),"rb");
if (fp!=NULL) {
char BIGSTK buff[32768];
size_t nl;
@@ -498,10 +498,10 @@ void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,char* url_adr,cha
} else { // recopier fichier dans cache
FILE* fp;
// On recopie le fichier->.
- off_t file_size=fsize(fconv(catbuff, url_save));
+ off_t file_size=fsize_utf8(fconv(catbuff, url_save));
if (file_size>=0) {
if (cache_wLLint(cache_dat,file_size)!=-1) {
- fp=fopen(fconv(catbuff, url_save),"rb");
+ fp=FOPEN(fconv(catbuff, url_save),"rb");
if (fp!=NULL) {
char BIGSTK buff[32768];
ssize_t nl;
@@ -678,9 +678,9 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,c
/* Previous entry */
if (previous_save_[0] != '\0') {
- int pathLen = (int) strlen(StringBuff(opt->path_html));
- if (pathLen != 0 && strncmp(previous_save_, StringBuff(opt->path_html), pathLen) != 0) { // old (<3.40) buggy format
- sprintf(previous_save, "%s%s", StringBuff(opt->path_html), previous_save_);
+ int pathLen = (int) strlen(StringBuff(opt->path_html_utf8));
+ if (pathLen != 0 && strncmp(previous_save_, StringBuff(opt->path_html_utf8), pathLen) != 0) { // old (<3.40) buggy format
+ sprintf(previous_save, "%s%s", StringBuff(opt->path_html_utf8), previous_save_);
} else {
strcpy(previous_save, previous_save_);
}
@@ -710,8 +710,8 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,c
r.is_write=1; // écrire
if (!dataincache) {
- if (fexist(fconv(catbuff, save))) { // un fichier existe déja
- //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ if (fexist_utf8(fconv(catbuff, save))) { // un fichier existe déja
+ //if (fsize_utf8(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
ok=1; // plus rien à faire
filenote(&opt->state.strc,save,NULL); // noter comme connu
file_notify(opt,adr, fil, save, 0, 0, 1); // data in cache
@@ -797,8 +797,8 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,c
strcpybuff(r.msg,"Previous cache file not found (2)");
}
} else { /* Read in memory from cache */
- if (strnotempty(previous_save) && fexist(previous_save)) {
- FILE* fp = fopen(fconv(catbuff, previous_save), "rb");
+ if (strnotempty(previous_save) && fexist_utf8(previous_save)) {
+ FILE* fp = FOPEN(fconv(catbuff, previous_save), "rb");
if (fp != NULL) {
r.adr = (char*) malloct((int) r.size + 4);
if (r.adr != NULL) {
@@ -1016,8 +1016,8 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,c
int ok=0;
r.is_write=1; // écrire
- if (fexist(fconv(catbuff, save))) { // un fichier existe déja
- //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ if (fexist_utf8(fconv(catbuff, save))) { // un fichier existe déja
+ //if (fsize_utf8(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
ok=1; // plus rien à faire
filenote(&opt->state.strc,save,NULL); // noter comme connu
file_notify(opt,adr, fil, save, 0, 0, 0);
@@ -1082,8 +1082,8 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,c
r.statuscode=STATUSCODE_INVALID;
strcpybuff(r.msg,"Previous cache file not found (2)");
} else { /* Read in memory from cache */
- if (strnotempty(return_save) && fexist(return_save)) {
- FILE* fp = fopen(fconv(catbuff, return_save), "rb");
+ if (strnotempty(return_save) && fexist_utf8(return_save)) {
+ FILE* fp = FOPEN(fconv(catbuff, return_save), "rb");
if (fp != NULL) {
r.adr = (char*) malloct((size_t)r.size + 4);
if (r.adr != NULL) {
@@ -1685,10 +1685,12 @@ void cache_init(cache_back* cache,httrackp* opt) {
// lire un fichier.. (compatible \0)
+/* Note: NOT utf-8 */
char* readfile(char* fil) {
return readfile2(fil, NULL);
}
+/* Note: NOT utf-8 */
char* readfile2(char* fil, LLint* size) {
char* adr=NULL;
char catbuff[CATBUFF_SIZE];
@@ -1714,6 +1716,7 @@ char* readfile2(char* fil, LLint* size) {
return adr;
}
+/* Note: NOT utf-8 */
char* readfile_or(char* fil,char* defaultdata) {
char* realfile=fil;
char* ret;
diff --git a/src/htscore.c b/src/htscore.c
index fc352f4..8d62df7 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -64,6 +64,9 @@ Please visit our Website: http://www.httrack.com
/* Cache */
#include "htszlib.h"
+/* Charset handling */
+#include "htscharset.h"
+
/* END specific definitions */
@@ -256,7 +259,7 @@ if (makeindex_fp) { \
fflush(makeindex_fp); \
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
makeindex_fp=NULL; \
- usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_html),"index.html"),"",""); \
+ usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_html_utf8),"index.html"),"",""); \
} \
} \
makeindex_done=1; /* ok c'est fait */ \
@@ -601,7 +604,7 @@ int httpmirror(char* url1, httrackp* opt) {
// lien primaire
- liens_record("primary","/primary",fslash(OPT_GET_BUFF(opt),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")),"","",opt->urlhack);
+ liens_record("primary","/primary",fslash(OPT_GET_BUFF(opt),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),"index.html")),"","",opt->urlhack);
if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
printf("PANIC! : Not enough memory [%d]\n",__LINE__);
if (opt->log) {
@@ -890,6 +893,8 @@ int httpmirror(char* url1, httrackp* opt) {
str.lien_size_ = &lien_size;
str.lien_buffer_ = &lien_buffer;
/* */
+ str.page_charset_ = NULL;
+ /* */
/* */
stre.r_ = &r;
/* */
@@ -1048,6 +1053,33 @@ int httpmirror(char* url1, httrackp* opt) {
(is_hypertext_mime(opt,r.contenttype, urlfil) /* Is HTML or Js, .. */
|| may_be_hypertext_mime(opt,r.contenttype, urlfil)) /* Is real media, .. */
) {
+
+ /* Convert charset to UTF-8 - NOT! (what about links ? remote server side will have troubles with converted names) */
+ //if (r.adr != NULL && r.size != 0 && opt->convert_utf8) {
+ // char *charset;
+ // char *pos;
+ // if (r.charset[0] != '\0') {
+ // charset = strdup(r.charset);
+ // } else {
+ // charset = hts_getCharsetFromMeta(r.adr, r.size);
+ // }
+ // if (charset != NULL) {
+ // char *const utf8 = hts_convertStringToUTF8(r.adr, r.size, charset);
+ // /* Use new buffer */
+ // if (utf8 != NULL) {
+ // freet(r.adr);
+ // r.size = strlen(utf8);
+ // r.adr = utf8;
+ // /* New UTF-8 charset */
+ // r.charset[0] = '\0';
+ // strcpy(r.charset, "utf-8");
+ // }
+ // /* Free charset */
+ // free(charset);
+ // }
+ //}
+
+ /* Check bogus chars */
if ((r.adr) && (r.size)) {
unsigned int map[256];
int i;
@@ -1199,10 +1231,10 @@ int httpmirror(char* url1, httrackp* opt) {
// if (r.adr==NULL) { // Written file
// if (may_be_hypertext_mime(r.contenttype, urlfil)) { // to parse!
// LLint sz;
- // sz=fsize(savename);
+ // sz=fsize_utf8(savename);
// if (sz>0) { // ok, exists!
// if (sz < 8192) { // ok, small file --> to parse!
- // FILE* fp=fopen(savename,"rb");
+ // FILE* fp=FOPEN(savename,"rb");
// if (fp) {
// r.adr=malloct((int)sz + 2);
// if (r.adr) {
@@ -1285,6 +1317,8 @@ int httpmirror(char* url1, httrackp* opt) {
str.lien_size_ = &lien_size;
str.lien_buffer_ = &lien_buffer;
/* */
+ str.page_charset_ = NULL;
+ /* */
/* */
stre.r_ = &r;
/* */
@@ -1401,6 +1435,7 @@ int httpmirror(char* url1, httrackp* opt) {
// -- -- -- --
// Parsing HTML
if (!error) {
+ char page_charset[32];
/* Remove file if being processed */
if (is_loaded_from_file) {
@@ -1408,6 +1443,23 @@ int httpmirror(char* url1, httrackp* opt) {
is_loaded_from_file = 0;
}
+ /* Detect charset to convert links into proper UTF8 filenames */
+ page_charset[0] = '\0';
+ if (opt->convert_utf8) {
+ if (r.charset[0] != '\0') {
+ if (strlen(r.charset) < sizeof(page_charset)) {
+ strcpy(page_charset, r.charset);
+ }
+ } else if (is_html_mime_type(r.contenttype)) {
+ char *const charset = hts_getCharsetFromMeta(r.adr, r.size);
+ if (charset != NULL && strlen(charset) < sizeof(page_charset)) {
+ strcpy(page_charset, charset);
+ }
+ if (charset != NULL)
+ free(charset);
+ }
+ }
+
/* Info for wrappers */
if ( (opt->debug>0) && (opt->log!=NULL) ) {
HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: check-html: %s%s"LF,urladr,urlfil);
@@ -1442,6 +1494,8 @@ int httpmirror(char* url1, httrackp* opt) {
str.lien_size_ = &lien_size;
str.lien_buffer_ = &lien_buffer;
/* */
+ str.page_charset_ = page_charset[0] != '\0' ? page_charset : NULL;
+ /* */
/* */
stre.r_ = &r;
/* */
@@ -1750,7 +1804,7 @@ int httpmirror(char* url1, httrackp* opt) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Real Media): parsing %s"LF,savename); test_flush;
}
if (fexist(savename)) { // ok, existe bien!
- FILE* fp=fopen(savename,"r+b");
+ FILE* fp=FOPEN(savename,"r+b");
if (fp) {
if (!fseek(fp,0,SEEK_SET)) {
char BIGSTK line[HTS_URLMAXSIZE*2];
@@ -2328,7 +2382,7 @@ static int mkdir_compat(const char *pathname) {
/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */
HTSEXT_API int dir_exists(const char* path) {
- struct stat st;
+ STRUCT_STAT st;
char BIGSTK file[HTS_URLMAXSIZE*2];
int i = 0;
if (strnotempty(path) == 0) {
@@ -2356,7 +2410,7 @@ HTSEXT_API int dir_exists(const char* path) {
file[i + 1] = '\0';
/* Check the final dir */
- if (stat(file, &st) == 0 && S_ISDIR(st.st_mode)) {
+ if (STAT(file, &st) == 0 && S_ISDIR(st.st_mode)) {
errno = 0;
return 1; /* EXISTS */
}
@@ -2365,6 +2419,7 @@ HTSEXT_API int dir_exists(const char* path) {
}
/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */
+/* Note: *not* UTF-8 */
HTSEXT_API int structcheck(const char* path) {
struct stat st;
char BIGSTK tmpbuf[HTS_URLMAXSIZE*2];
@@ -2459,6 +2514,102 @@ HTSEXT_API int structcheck(const char* path) {
return 0;
}
+/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */
+/* Note: UTF-8 */
+HTSEXT_API int structcheck_utf8(const char* path) {
+ STRUCT_STAT st;
+ char BIGSTK tmpbuf[HTS_URLMAXSIZE*2];
+ char BIGSTK file[HTS_URLMAXSIZE*2];
+ int i = 0;
+ int npaths;
+ if (strnotempty(path) == 0)
+ return 0;
+ if (strlen(path) > HTS_URLMAXSIZE) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* Get a copy */
+ strcpybuff(file, path);
+#ifdef _WIN32
+ /* To system name */
+ for(i = 0 ; file[i] != 0 ; i++) {
+ if (file[i] == '/') {
+ file[i] = PATH_SEPARATOR;
+ }
+ }
+#endif
+ /* Get prefix (note: file can not be empty here) */
+ for(i = (int) strlen(file) - 1 ; i > 0 && file[i] != PATH_SEPARATOR ; i--);
+ for( ; i > 0 && file[i] == PATH_SEPARATOR ; i--);
+ file[i + 1] = '\0';
+
+ /* First check the final dir */
+ if (STAT(file, &st) == 0 && S_ISDIR(st.st_mode)) {
+ return 0; /* OK */
+ }
+
+ /* Start from the beginning */
+ i = 0;
+
+ /* Skip irrelevant part (the root slash, or the drive path) */
+#ifdef _WIN32
+ if (file[0] != 0 && file[1] == ':') { /* f:\ */
+ i+= 2;
+ if (file[i] == PATH_SEPARATOR) { /* f:\ */
+ i++;
+ }
+ } else if (file[0] == PATH_SEPARATOR && file[1] == PATH_SEPARATOR) { /* \\mch */
+ i+= 2;
+ }
+#endif
+
+ /* Check paths */
+ for(npaths = 1 ; ; npaths++) {
+ char end_char;
+
+ /* Go to next path */
+
+ /* Skip separator(s) */
+ for( ; file[i] == PATH_SEPARATOR ; i++);
+ /* Next separator */
+ for( ; file[i] != 0 && file[i] != PATH_SEPARATOR ; i++);
+
+ /* Check */
+ end_char = file[i];
+ if (end_char != 0) {
+ file[i] = '\0';
+ }
+ if (STAT(file, &st) == 0) { /* Something exists */
+ if (!S_ISDIR(st.st_mode)) {
+#if HTS_REMOVE_ANNOYING_INDEX
+ if (S_ISREG(st.st_mode)) { /* Regular file in place ; move it and create directory */
+ sprintf(tmpbuf, "%s.txt", file);
+ if (RENAME(file, tmpbuf) != 0) { /* Can't rename regular file */
+ return -1;
+ }
+ if (MKDIR(file) != 0) { /* Can't create directory */
+ return -1;
+ }
+ }
+#else
+#error Not implemented
+#endif
+ }
+ } else { /* Nothing exists ; create directory */
+ if (MKDIR(file) != 0) { /* Can't create directory */
+ return -1;
+ }
+ }
+ if (end_char == 0) { /* End */
+ break;
+ } else {
+ file[i] = end_char; /* Restore / */
+ }
+ }
+ return 0;
+}
+
// sauver un fichier
int filesave(httrackp* opt,const char* adr,int len,const char* s,const char* url_adr,const char* url_fil) {
FILE* fp;
@@ -2497,6 +2648,7 @@ int check_fatal_io_errno(void) {
// ouvrir un fichier (avec chemin Un*x)
+/* Note: utf-8 */
FILE* filecreate(filenote_strc *strc, const char* s) {
char BIGSTK fname[HTS_URLMAXSIZE*2];
FILE* fp;
@@ -2523,17 +2675,17 @@ FILE* filecreate(filenote_strc *strc, const char* s) {
#endif
/* Try to open the file */
- fp = fopen(fname, "wb");
+ fp = FOPEN(fname, "wb");
/* Error ? Check the directory structure and retry. */
if (fp == NULL) {
last_errno = errno;
- if (structcheck(s) != 0) {
+ if (structcheck_utf8(s) != 0) {
last_errno = errno;
} else {
last_errno = 0;
}
- fp = fopen(fname, "wb");
+ fp = FOPEN(fname, "wb");
}
if (fp == NULL && last_errno != 0) {
errno = last_errno;
@@ -2571,7 +2723,7 @@ FILE* fileappend(filenote_strc *strc,const char* s) {
#endif
// ouvrir
- fp=fopen(fname,"ab");
+ fp=FOPEN(fname,"ab");
#ifndef _WIN32
if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE);
@@ -2616,6 +2768,7 @@ int filenote(filenote_strc *strc, const char* s, filecreate_params* params) {
return 1;
}
+/* Note: utf-8 */
void file_notify(httrackp* opt,const char* adr,const char* fil,const char* save,int create,int modify,int not_updated) {
RUN_CALLBACK6(opt, filesave2, adr, fil, save, create, modify, not_updated);
}
@@ -2681,7 +2834,7 @@ static void postprocess_file(httrackp* opt,const char* save, const char* adr, co
int n;
if (rsc_fil == NULL)
rsc_fil = fil;
- if (strncmp(fslash(OPT_GET_BUFF(opt),save), fslash(OPT_GET_BUFF(opt),StringBuff(opt->path_html)), (n = (int)strlen(StringBuff(opt->path_html)))) == 0) {
+ if (strncmp(fslash(OPT_GET_BUFF(opt),save), fslash(OPT_GET_BUFF(opt),StringBuff(opt->path_html_utf8)), (n = (int)strlen(StringBuff(opt->path_html_utf8)))) == 0) {
rsc_save += n;
}
@@ -2716,7 +2869,7 @@ static void postprocess_file(httrackp* opt,const char* save, const char* adr, co
}
}
if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) {
- FILE* fp = fopen(save, "rb");
+ FILE* fp = FOPEN(save, "rb");
if (fp != NULL) {
char buff[60*100 + 2];
char mimebuff[256];
diff --git a/src/htscore.h b/src/htscore.h
index a3467f4..21161a9 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -340,6 +340,7 @@ void usercommand_exe(const char* cmd,const char* file);
int filters_init(char*** ptrfilters, int maxfilter, int filterinc);
#ifndef HTTRACK_DEFLIB
HTSEXT_API int structcheck(const char* path);
+HTSEXT_API int structcheck_utf8(const char* path);
HTSEXT_API int dir_exists(const char* path);
#endif
HTS_INLINE int fspc(httrackp *opt,FILE* fp,const char* type);
diff --git a/src/htscoremain.c b/src/htscoremain.c
index e7d7ad0..3654c7d 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -47,6 +47,7 @@ Please visit our Website: http://www.httrack.com
#include "htswrap.h"
#include "htsmodules.h"
#include "htszlib.h"
+#include "htscharset.h"
#include <ctype.h>
#if USE_BEGINTHREAD
@@ -394,6 +395,22 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt) {
} // for
+ // Convert path to UTF-8
+#ifdef _WIN32
+ {
+ char *const path = hts_convertStringSystemToUTF8(StringBuff(opt->path_html), (int) StringLength(opt->path_html));
+ if (path != NULL) {
+ StringCopy(opt->path_html_utf8, path);
+ free(path);
+ } else {
+ StringCopyN(opt->path_html_utf8, StringBuff(opt->path_html), StringLength(opt->path_html));
+ }
+ }
+#else
+ // Assume UTF-8 filesystem.
+ StringCopyN(opt->path_html_utf8, StringBuff(opt->path_html), StringLength(opt->path_html));
+#endif
+
/* if doit.log exists, or if new URL(s) defined,
then DO NOT load standard config files */
/* (config files are added in doit.log) */
@@ -1058,6 +1075,7 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt) {
case 'i': opt->dir_topindex = 1; if (*(com+1)=='0') { opt->dir_topindex=0; com++; } break;
case 'N': opt->savename_delayed = 2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->savename_delayed); while(isdigit((unsigned char)*(com+1))) com++; } break;
case 'D': opt->delayed_cached=1; if (*(com+1)=='0') { opt->delayed_cached=0; com++; } break; // url hack
+ case 'T': opt->convert_utf8=1; if (*(com+1)=='0') { opt->convert_utf8=0; com++; } break; // convert to utf-8
case '!': opt->bypass_limits = 1; if (*(com+1)=='0') { opt->bypass_limits=0; com++; } break;
#if HTS_USEMMS
case 'm': sscanf(com+1,"%d",&opt->mms_maxtime); while(isdigit((unsigned char)*(com+1))) com++; break;
@@ -2114,10 +2132,20 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt) {
t, url);
fprintf(opt->log,"(");
for(i=0;i<argc;i++) {
- if (strchr(argv[i],' ') == NULL || strchr(argv[i],'\"') != NULL)
- fprintf(opt->log,"%s ",argv[i]);
+#ifdef _WIN32
+ char *carg = hts_convertStringSystemToUTF8(argv[i], (int) strlen(argv[i]));
+ char *arg = carg != NULL ? carg : argv[i];
+#else
+ const char *arg = argv[i];
+#endif
+ if (strchr(arg, ' ') == NULL || strchr(arg, '\"') != NULL)
+ fprintf(opt->log,"%s ", arg);
else // entre "" (si espace(s) et pas déja de ")
- fprintf(opt->log,"\"%s\" ",argv[i]);
+ fprintf(opt->log,"\"%s\" ", arg);
+#ifdef _WIN32
+ if (carg != NULL)
+ free(carg);
+#endif
}
fprintf(opt->log,")"LF);
fprintf(opt->log,LF);
diff --git a/src/htsglobal.h b/src/htsglobal.h
index 2fc430f..2b10a8a 100644
--- a/src/htsglobal.h
+++ b/src/htsglobal.h
@@ -40,8 +40,8 @@ Please visit our Website: http://www.httrack.com
#define HTTRACK_GLOBAL_DEFH
// Version (also check external version information)
-#define HTTRACK_VERSION "3.45-4"
-#define HTTRACK_VERSIONID "3.45.4"
+#define HTTRACK_VERSION "3.46-1"
+#define HTTRACK_VERSIONID "3.46.1"
#define HTTRACK_AFF_VERSION "3.x"
#define HTTRACK_LIB_VERSION "2.0"
diff --git a/src/htshelp.c b/src/htshelp.c
index 676ed46..6ba34c6 100644
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -262,7 +262,7 @@ void help_wizard(httrackp* opt) {
linput(stdin,str,250);
if (strnotempty(str)) {
if (!((str[0]=='y') || (str[0]=='Y')))
- return 0;
+ return ;
}
printf("\n");
@@ -468,6 +468,7 @@ void help(char* app,int more) {
infomsg(" o *generate output html file in case of error (404..) (o0 don't generate)");
infomsg(" X *purge old files after update (X0 keep delete)");
infomsg(" %p preserve html files 'as is' (identical to '-K4 -%F \"\"')");
+ infomsg(" %T links conversion to UTF-8");
infomsg("");
infomsg("Spider options:");
infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)");
diff --git a/src/htsindex.c b/src/htsindex.c
index f4a984b..eea1c47 100644
--- a/src/htsindex.c
+++ b/src/htsindex.c
@@ -142,6 +142,7 @@ void index_init(const char* indexpath) {
But should be okay on most cases
Tags and javascript handled (ignored)
*/
+/* Note: utf-8 */
int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) {
#if HTS_MAKE_KEYWORD_INDEX
char catbuff[CATBUFF_SIZE];
@@ -166,8 +167,8 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char*
// Init ?
if (hts_index_init) {
- remove(concat(catbuff,indexpath,"index.txt"));
- remove(concat(catbuff,indexpath,"sindex.html"));
+ UNLINK(concat(catbuff,indexpath,"index.txt"));
+ UNLINK(concat(catbuff,indexpath,"sindex.html"));
hts_index_init=0;
}
@@ -338,6 +339,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char*
/*
Sort index!
*/
+/* Note: NOT utf-8 */
void index_finish(const char* indexpath,int mode) {
#if HTS_MAKE_KEYWORD_INDEX
char catbuff[CATBUFF_SIZE];
diff --git a/src/htslib.c b/src/htslib.c
index c2fcc7d..1c1e54e 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -56,6 +56,7 @@ Please visit our Website: http://www.httrack.com
#include "htswrap.h"
#include "htsmd5.h"
#include "htsmodules.h"
+#include "htscharset.h"
#ifdef _WIN32
#ifndef _WIN32_WCE
@@ -745,7 +746,7 @@ int http_xfopen(httrackp *opt,int mode,int treat,int waitconnect,char* xsend,cha
else {
// Note: On passe par un FILE* (plus propre)
//soc=open(fil,O_RDONLY,0); // en lecture seule!
- retour->fp=fopen(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)),"rb"); // ouvrir
+ retour->fp=FOPEN(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)),"rb"); // ouvrir
if (retour->fp==NULL)
soc=INVALID_SOCKET;
else
@@ -861,7 +862,7 @@ int http_sendhead(httrackp *opt,t_cookie* cookie,int mode,char* xsend,char* adr,
search_tag=strstr(fil,POSTTOK"file:");
if (search_tag) { // postfile
if (mode==0) { // GET!
- FILE* fp=fopen(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+5),"rb");
+ FILE* fp=FOPEN(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+5),"rb");
if (fp) {
char BIGSTK line[1100];
char BIGSTK protocol[256],url[HTS_URLMAXSIZE*2],method[256];
@@ -2710,9 +2711,9 @@ int set_filetime_rfc822(const char* file, const char* date) {
}
int get_filetime_rfc822(const char* file, char* date) {
- struct stat buf;
+ STRUCT_STAT buf;
date[0] = '\0';
- if (stat(file, &buf) == 0) {
+ if (STAT(file, &buf) == 0) {
struct tm* A;
time_t tt = buf.st_mtime;
A=gmtime(&tt);
@@ -4306,6 +4307,7 @@ void fprintfio(FILE* fp,char* buff,char* prefix) {
}
/* Le fichier existe-t-il? (ou est-il accessible?) */
+/* Note: NOT utf-8 */
int fexist(const char* s) {
char catbuff[CATBUFF_SIZE];
struct stat st;
@@ -4318,27 +4320,44 @@ int fexist(const char* s) {
return 0;
}
+/* Le fichier existe-t-il? (ou est-il accessible?) */
+/* Note: utf-8 */
+int fexist_utf8(const char* s) {
+ char catbuff[CATBUFF_SIZE];
+ STRUCT_STAT st;
+ memset(&st, 0, sizeof(st));
+ if (STAT(fconv(catbuff,s), &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Taille d'un fichier, -1 si n'existe pas */
-/* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */
-/* Note: NOT YET READY FOR 64-bit */
+/* Note: NOT utf-8 */
off_t fsize(const char* s) {
- char catbuff[CATBUFF_SIZE];
- FILE* fp;
- if (strnotempty(s)==0) // nom vide: erreur
+ struct stat st;
+ if (!strnotempty(s)) // nom vide: erreur
return -1;
- fp=fopen(fconv(catbuff,s),"rb");
- if (fp!=NULL) {
- off_t i;
- fseek(fp,0,SEEK_END);
-#ifdef HTS_FSEEKO
- i=ftello(fp);
-#else
- i=ftell(fp);
-#endif
- fclose(fp);
- return i;
- } else
+ if (stat(s, &st) == 0) {
+ return st.st_size;
+ } else {
return -1;
+ }
+}
+
+/* Taille d'un fichier, -1 si n'existe pas */
+/* Note: utf-8 */
+off_t fsize_utf8(const char* s) {
+ STRUCT_STAT st;
+ if (!strnotempty(s)) // nom vide: erreur
+ return -1;
+ if (STAT(s, &st) == 0) {
+ return st.st_size;
+ } else {
+ return -1;
+ }
}
off_t fpsize(FILE* fp) {
@@ -5029,7 +5048,7 @@ FILE *hts_dgb_(void) {
#ifdef _WIN32_WCE
hts_dgb_init_fp = fopen("\\Temp\\hts-debug.txt", "wb");
#else
- hts_dgb_init_fp = fopen("hts-debug.txt", "wb");
+ hts_dgb_init_fp = FOPEN("hts-debug.txt", "wb");
#endif
if (hts_dgb_init_fp != NULL) {
fprintf(hts_dgb_init_fp, "* Creating file\r\n");
@@ -5296,6 +5315,7 @@ HTSEXT_API httrackp *hts_create_opt(void) {
opt->urlhack=1; // url hack (normalizer)
StringCopy(opt->footer,HTS_DEFAULT_FOOTER);
opt->ftp_proxy=1; // proxy http pour ftp
+ opt->convert_utf8 = 1; // convert html to UTF-8
StringCopy(opt->filelist,"");
StringCopy(opt->lang_iso,"en, *");
StringCopy(opt->mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT)
@@ -5308,6 +5328,7 @@ HTSEXT_API httrackp *hts_create_opt(void) {
opt->keyboard=0;
//
StringCopy(opt->path_html,"");
+ StringCopy(opt->path_html_utf8,"");
StringCopy(opt->path_log,"");
StringCopy(opt->path_bin,"");
//
@@ -5420,6 +5441,7 @@ HTSEXT_API void hts_free_opt(httrackp *opt) {
StringFree(opt->mod_blacklist);
StringFree(opt->path_html);
+ StringFree(opt->path_html_utf8);
StringFree(opt->path_log);
StringFree(opt->path_bin);
@@ -5690,6 +5712,86 @@ int closedir(DIR *dir) {
errno = EBADF;
return -1;
}
+
+// UTF-8 aware FILE API
+
+static void copyWchar(LPWSTR dest, const char *src) {
+ int i;
+ for(i = 0 ; src[i] ; i++) {
+ dest[i] = src[i];
+ }
+ dest[i] = '\0';
+}
+
+FILE* hts_fopen_utf8(const char *path, const char *mode) {
+ WCHAR wmode[32];
+ LPWSTR wpath = hts_convertUTF8StringToUCS2(path, strlen(path), NULL);
+ assertf(strlen(mode) < sizeof(wmode) / sizeof(WCHAR));
+ copyWchar(wmode, mode);
+ if (wpath != NULL) {
+ FILE *const fp = _wfopen(wpath, wmode);
+ free(wpath);
+ return fp;
+ } else {
+ // Fallback on conversion error.
+ return fopen(path, mode);
+ }
+}
+
+int hts_stat_utf8(const char *path, STRUCT_STAT *buf) {
+ LPWSTR wpath = hts_convertUTF8StringToUCS2(path, strlen(path), NULL);
+ if (wpath != NULL) {
+ const int result = _wstat(wpath, buf);
+ free(wpath);
+ return result;
+ } else {
+ // Fallback on conversion error.
+ return stat(path, buf);
+ }
+}
+
+int hts_unlink_utf8(const char *path) {
+ LPWSTR wpath = hts_convertUTF8StringToUCS2(path, strlen(path), NULL);
+ if (wpath != NULL) {
+ const int result = _wunlink(wpath);
+ free(wpath);
+ return result;
+ } else {
+ // Fallback on conversion error.
+ return unlink(path);
+ }
+}
+
+int hts_rename_utf8(const char *oldpath, const char *newpath) {
+ LPWSTR woldpath = hts_convertUTF8StringToUCS2(oldpath, strlen(oldpath), NULL);
+ LPWSTR wnewpath = hts_convertUTF8StringToUCS2(newpath, strlen(newpath), NULL);
+ if (woldpath != NULL && wnewpath != NULL) {
+ const int result = _wrename(woldpath, wnewpath);
+ free(woldpath);
+ free(wnewpath);
+ return result;
+ } else {
+ if (woldpath != NULL)
+ free(woldpath);
+ if (wnewpath != NULL)
+ free(wnewpath);
+ // Fallback on conversion error.
+ return rename(oldpath, newpath);
+ }
+}
+
+int hts_mkdir_utf8(const char *path) {
+ LPWSTR wpath = hts_convertUTF8StringToUCS2(path, strlen(path), NULL);
+ if (wpath != NULL) {
+ const int result = _wmkdir(wpath);
+ free(wpath);
+ return result;
+ } else {
+ // Fallback on conversion error.
+ return mkdir(path);
+ }
+}
+
#endif
// Fin
diff --git a/src/htslib.h b/src/htslib.h
index d9b6a42..521fd3c 100644
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -411,9 +411,11 @@ int sig_ignore_flag( int setflag ); // flag ignore
void cut_path(char* fullpath,char* path,char* pname);
int fexist(const char* s);
+int fexist_utf8(const char* s);
/*LLint fsize(const char* s); */
off_t fpsize(FILE* fp);
off_t fsize(const char* s);
+off_t fsize_utf8(const char* s);
/* root dir */
#ifndef HTTRACK_DEFLIB
HTSEXT_API char* hts_rootdir(char* file);
@@ -488,6 +490,29 @@ void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name);
)
*/
+/* UTF-8 aware FILE operations */
+#ifdef _WIN32
+#define FOPEN hts_fopen_utf8
+extern FILE* hts_fopen_utf8(const char *path, const char *mode);
+#define STAT hts_stat_utf8
+typedef struct _stat STRUCT_STAT;
+extern int hts_stat_utf8(const char *path, STRUCT_STAT *buf);
+#define UNLINK hts_unlink_utf8
+extern int hts_unlink_utf8(const char *pathname);
+#define RENAME hts_rename_utf8
+extern int hts_rename_utf8(const char *oldpath, const char *newpath);
+#define MKDIR(F) hts_mkdir_utf8(F)
+extern int hts_mkdir_utf8(const char *pathname);
+#else
+/* The underlying filesystem charset is supposed to be UTF-8 */
+#define FOPEN fopen
+#define STAT stat
+typedef struct stat STRUCT_STAT;
+#define UNLINK unlink
+#define RENAME rename
+#define MKDIR(F) mkdir(F, HTS_ACCESS_FOLDER)
+#endif
+
#endif // internals
#undef PATH_SEPARATOR
diff --git a/src/htsmodules.h b/src/htsmodules.h
index 2712b8f..e03354e 100644
--- a/src/htsmodules.h
+++ b/src/htsmodules.h
@@ -119,6 +119,7 @@ struct htsmoduleStruct {
int* ptr_;
size_t* lien_size_;
char** lien_buffer_;
+ const char *page_charset_;
/* Internal use - please don't touch */
};
diff --git a/src/htsname.c b/src/htsname.c
index e5f0cb5..e5b0715 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -43,6 +43,7 @@ Please visit our Website: http://www.httrack.com
#include "md5.h"
#include "htsmd5.h"
#include "htstools.h"
+#include "htscharset.h"
#include <ctype.h>
#undef test_flush
@@ -119,16 +120,28 @@ static void cleanDoubleSlash(char *s) {
}
}
+// legacy version, without page charset
+int url_savename(char* adr_complete, char* fil_complete, char* save,
+ char* former_adr, char* former_fil,
+ char* referer_adr, char* referer_fil,
+ httrackp* opt,
+ lien_url** liens, int lien_tot,
+ struct_back* sback, cache_back* cache, hash_struct* hash,
+ int ptr, int numero_passe, const lien_back* headers) {
+ return url_savename2(adr_complete, fil_complete, save, former_adr, former_fil,
+ referer_adr, referer_fil, opt,
+ liens, lien_tot, sback, cache, hash, ptr, numero_passe, headers, /* unknown */ NULL);
+}
// forme le nom du fichier à sauver (save) à partir de fil et adr
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
-int url_savename(char* adr_complete, char* fil_complete, char* save,
- char* former_adr, char* former_fil,
- char* referer_adr, char* referer_fil,
- httrackp* opt,
- lien_url** liens, int lien_tot,
- struct_back* sback, cache_back* cache, hash_struct* hash,
- int ptr, int numero_passe, const lien_back* headers) {
+int url_savename2(char* adr_complete, char* fil_complete, char* save,
+ char* former_adr, char* former_fil,
+ char* referer_adr, char* referer_fil,
+ httrackp* opt,
+ lien_url** liens, int lien_tot,
+ struct_back* sback, cache_back* cache, hash_struct* hash,
+ int ptr, int numero_passe, const lien_back* headers, const char *charset) {
char catbuff[CATBUFF_SIZE];
const char* mime_type = ( headers && !HTTP_IS_REDIRECT(headers->r.statuscode) ) ? headers->r.contenttype : NULL;
/*const char* mime_type = ( headers && HTTP_IS_OK(headers->r.statuscode) ) ? headers->r.contenttype : NULL;*/
@@ -1306,6 +1319,15 @@ int url_savename(char* adr_complete, char* fil_complete, char* save,
/* ensure that there is no ../ (potential vulnerability) */
fil_simplifie(save);
+ /* convert name to UTF-8 ? */
+ if (charset != NULL && charset[0] != '\0') {
+ char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset);
+ if (s != NULL) {
+ strcpy(save, s);
+ free(s);
+ }
+ }
+
/* callback */
RUN_CALLBACK5(opt, savename, adr_complete,fil_complete,referer_adr,referer_fil,save);
@@ -1333,9 +1355,9 @@ int url_savename(char* adr_complete, char* fil_complete, char* save,
}
// chemin primaire éventuel A METTRE AVANT
- if (strnotempty(StringBuff(opt->path_html))) {
+ if (strnotempty(StringBuff(opt->path_html_utf8))) {
char BIGSTK tempo[HTS_URLMAXSIZE*2];
- strcpybuff(tempo,StringBuff(opt->path_html));
+ strcpybuff(tempo,StringBuff(opt->path_html_utf8));
strcatbuff(tempo,save);
strcpybuff(save,tempo);
}
@@ -1531,7 +1553,7 @@ char *url_savename_refname_fullpath(httrackp* opt, const char *adr, const char *
/* remove refname if any */
void url_savename_refname_remove(httrackp* opt, const char *adr, const char *fil) {
char *filename = url_savename_refname_fullpath(opt, adr, fil);
- (void) unlink(filename);
+ (void) UNLINK(filename);
}
#undef test_flush
diff --git a/src/htsname.h b/src/htsname.h
index 225fa92..7cb7dda 100644
--- a/src/htsname.h
+++ b/src/htsname.h
@@ -96,6 +96,17 @@ int url_savename(char* adr_complete, char* fil_complete, char* save,
hash_struct* hash,
int ptr, int numero_passe,
const lien_back* headers);
+int url_savename2(char* adr_complete, char* fil_complete, char* save,
+ char* former_adr, char* former_fil,
+ char* referer_adr, char* referer_fil,
+ httrackp* opt,
+ lien_url** liens, int lien_tot,
+ struct_back* sback,
+ cache_back* cache,
+ hash_struct* hash,
+ int ptr, int numero_passe,
+ const lien_back* headers,
+ const char *charset);
void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver);
void url_savename_addstr(char* d,char* s);
char* url_md5(char* digest_buffer, char* fil_complete);
diff --git a/src/htsopt.h b/src/htsopt.h
index b5e0212..2f586bb 100644
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -309,6 +309,7 @@ struct httrackp {
String from; // from
String path_log; // chemin pour cache et log
String path_html; // chemin pour miroir
+ String path_html_utf8; // chemin pour miroir, UTF-8
String path_bin; // chemin pour templates
int retry; // nombre d'essais supplémentaires en cas d'échec
int makestat; // mettre à jour un fichier log de statistiques de transfert
@@ -349,6 +350,7 @@ struct httrackp {
String lang_iso; // en, fr ..
String mimedefs; // ext1=mimetype1\next2=mimetype2..
String mod_blacklist; // (3.41)
+ int convert_utf8; // UTF-8 conversion ; 3.46
//
int maxlink; // nombre max de liens
int maxfilter; // nombre max de filtres
diff --git a/src/htsparse.c b/src/htsparse.c
index 7e6bbc4..f127f0d 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -239,7 +239,7 @@ Please visit our Website: http://www.httrack.com
fflush(makeindex_fp); \
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
makeindex_fp=NULL; \
- usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"),"primary","primary"); \
+ usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),"index.html"),"primary","primary"); \
} \
} \
makeindex_done=1; /* ok c'est fait */ \
@@ -429,7 +429,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
// Indexing!
#if HTS_MAKE_KEYWORD_INDEX
if (opt->kindex) {
- if (index_keyword(r->adr,r->size,r->contenttype,savename,StringBuff(opt->path_html))) {
+ if (index_keyword(r->adr,r->size,r->contenttype,savename,StringBuff(opt->path_html_utf8))) {
if ( (opt->debug>1) && (opt->log!=NULL) ) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..done"LF); test_flush;
}
@@ -656,9 +656,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if (p) { // ok center
if (makeindex_fp==NULL) {
- file_notify(opt,"", "", fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"), 1, 1, 0);
- verif_backblue(opt,StringBuff(opt->path_html)); // générer gif
- makeindex_fp=filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"));
+ file_notify(opt,"", "", fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),"index.html"), 1, 1, 0);
+ verif_backblue(opt,StringBuff(opt->path_html_utf8)); // générer gif
+ makeindex_fp=filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),"index.html"));
if (makeindex_fp!=NULL) {
// Header
@@ -683,7 +683,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
b=strchr(a,'<'); // prochain tag
}
}
- if (lienrelatif(tempo,liens[ptr]->sav,concat(OPT_GET_BUFF(opt),StringBuff(opt->path_html),"index.html"))==0) {
+ if (lienrelatif(tempo,liens[ptr]->sav,concat(OPT_GET_BUFF(opt),StringBuff(opt->path_html_utf8),"index.html"))==0) {
detect_title=1; // ok détecté pour cette page!
makeindex_links++; // un de plus
strcpybuff(makeindex_firstlink,tempo);
@@ -753,6 +753,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if (len > 0) {
if (strfield(token, "content-type")) {
intag_ctype=1;
+ //NOPE-we do not convert the whole page actually
+ //intag_start[1] = 'X';
}
else if (strfield(token, "refresh")) {
intag_ctype=2;
@@ -1104,7 +1106,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
) {
chpos++;
while(is_space(*chpos)) chpod++;
- chpos
+ //chpos
}
}
#endif
@@ -2381,7 +2383,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
//char last_fil[HTS_URLMAXSIZE*2]="";
strcpybuff(last_adr,adr); // ancienne adresse
//strcpybuff(last_fil,fil); // ancien chemin
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL);
+ r_sv=url_savename2(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL,str->page_charset_);
if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
// 2e test si moved
@@ -2578,7 +2580,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if (patch_it) {
char BIGSTK save[HTS_URLMAXSIZE*2];
char BIGSTK tempo[HTS_URLMAXSIZE*2];
- strcpybuff(save,StringBuff(opt->path_html));
+ strcpybuff(save,StringBuff(opt->path_html_utf8));
strcatbuff(save,cat_name);
if (lienrelatif(tempo,save, relativesavename)==0) {
/* Never escape high-chars (we don't know the encoding!!) */
@@ -2626,17 +2628,16 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
// écrire fichier?
if (verif_external(opt,cat_nb,1)) {
- //if (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name))) {
- FILE* fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name));
+ FILE* fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),cat_name));
if (fp) {
if (cat_data_len==0) { // texte
- verif_backblue(opt,StringBuff(opt->path_html));
+ verif_backblue(opt,StringBuff(opt->path_html_utf8));
fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
} else { // data
fwrite(cat_data,cat_data_len,1,fp);
}
fclose(fp);
- usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name),"","");
+ usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html_utf8),cat_name),"","");
}
}
} else { // écrire normalement le nom de fichier
@@ -2769,8 +2770,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
uri = save;
// .. after stripping the path prefix (ex: "www.example.com\foo4242.html)
- if (strnotempty(StringBuff(opt->path_html))) {
- uri += StringLength(opt->path_html);
+ if (strnotempty(StringBuff(opt->path_html_utf8))) {
+ uri += StringLength(opt->path_html_utf8);
for( ; uri[0] == '/' || uri[0] == '\\' ; uri++) ;
}
@@ -3383,7 +3384,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
{
char BIGSTK mov_sav[HTS_URLMAXSIZE*2];
// calculer lien et éventuellement modifier addresse/fichier
- if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL)!=-1) {
+ if (url_savename2(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL,str->page_charset_)!=-1) {
if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja
// enregistrer lien (MACRO) avec SAV IDENTIQUE
liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
@@ -3480,9 +3481,9 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
(r->statuscode==HTTP_PRECONDITION_FAILED)
|| (r->statuscode==HTTP_REQUESTED_RANGE_NOT_SATISFIABLE)
) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
- if (fexist(liens[ptr]->sav)) {
+ if (fexist_utf8(liens[ptr]->sav)) {
remove(liens[ptr]->sav); // Eliminer
- if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
+ if (!fexist_utf8(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
#if HDEBUG
printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
#endif
@@ -3800,7 +3801,7 @@ void hts_mirror_process_user_interaction(htsmoduleStruct* str, htsmoduleStructEx
// noter NOUVEAU lien
char BIGSTK add_sav[HTS_URLMAXSIZE*2];
// calculer lien et éventuellement modifier addresse/fichier
- if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL)!=-1) {
+ if (url_savename2(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL,str->page_charset_)!=-1) {
if (hash_read(hash,add_sav,"",0,0)<0) { // n'existe pas déja
// enregistrer lien (MACRO)
liens_record(add_adr,add_fil,add_sav,"","");
@@ -4296,7 +4297,7 @@ int hts_wait_delayed(htsmoduleStruct* str,
/* Recompute filename with MIME type */
save[0] = '\0';
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&back);
+ r_sv=url_savename2(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&back,str->page_charset_);
/* Recompute authorization with MIME type */
{
@@ -4364,7 +4365,7 @@ int hts_wait_delayed(htsmoduleStruct* str,
/* Recompute filename with MIME type */
save[0] = '\0';
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back);
+ r_sv=url_savename2(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back,str->page_charset_);
/* Recompute authorization with MIME type */
{
@@ -4482,7 +4483,7 @@ int hts_wait_delayed(htsmoduleStruct* str,
strcpybuff(mov_url, back[b].r.location); // copier URL
/* Remove (temporarily created) file if it was created */
- unlink(fconv(OPT_GET_BUFF(opt),back[b].url_sav));
+ UNLINK(fconv(OPT_GET_BUFF(opt),back[b].url_sav));
/* Remove slot! */
if (back[b].status == STATUS_READY) {
@@ -4553,7 +4554,7 @@ int hts_wait_delayed(htsmoduleStruct* str,
/* Recompute filename for hash lookup */
save[0] = '\0';
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back);
+ r_sv=url_savename2(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back,str->page_charset_);
} else {
if ( opt->log!=NULL ) {
HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop to same filename)"LF,adr,fil);
diff --git a/src/htstools.c b/src/htstools.c
index d615f23..ebb5e01 100644
--- a/src/htstools.c
+++ b/src/htstools.c
@@ -67,7 +67,7 @@ struct find_handle_struct {
struct find_handle_struct {
DIR * hdir;
struct dirent* dirp;
- struct stat filestat;
+ STRUCT_STAT filestat;
char path[2048];
};
#endif
@@ -481,6 +481,7 @@ void longfile_to_83(int mode,char* n83,char* save) {
}
// écrire backblue.gif
+/* Note: utf-8 */
int verif_backblue(httrackp* opt, const char* base) {
int* done = &opt->state.verif_backblue_done;
int ret=0;
@@ -490,7 +491,7 @@ int verif_backblue(httrackp* opt, const char* base) {
return 0;
}
if ( (!*done)
- || (fsize(fconcat(OPT_GET_BUFF(opt), base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) {
+ || (fsize_utf8(fconcat(OPT_GET_BUFF(opt), base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) {
FILE* fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), base,"backblue.gif"));
*done=1;
if (fp) {
@@ -683,6 +684,7 @@ static int sortTopIndexFnc(const void * a_, const void * b_) {
HTSEXT_API char* hts_getcategory(const char* filename);
+/* Note: NOT utf-8 */
HTSEXT_API int hts_buildtopindex(httrackp* opt,const char* path,const char* binpath) {
FILE* fpo;
int retval=0;
@@ -1006,7 +1008,7 @@ HTSEXT_API int hts_findnext(find_handle find) {
memset(&(find->filestat), 0, sizeof(find->filestat));
if ((find->dirp=readdir(find->hdir)))
if (find->dirp->d_name)
- if (!stat(concat(catbuff, find->path,find->dirp->d_name),&find->filestat))
+ if (!STAT(concat(catbuff, find->path,find->dirp->d_name),&find->filestat))
return 1;
#endif
}