summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/htsalias.c1
-rw-r--r--src/htsback.c19
-rw-r--r--src/htscache.c23
-rw-r--r--src/htscore.c13
-rw-r--r--src/htscore.h1
-rw-r--r--src/htsjava.c17
-rw-r--r--src/htslib.c8
-rw-r--r--src/htslib.h17
-rw-r--r--src/htsname.c1
-rw-r--r--src/htsparse.c5
-rw-r--r--src/htstools.c2
-rw-r--r--src/htszlib.c11
-rw-r--r--src/httrack-library.h27
13 files changed, 109 insertions, 36 deletions
diff --git a/src/htsalias.c b/src/htsalias.c
index 1a413fe..908bc13 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -451,6 +451,7 @@ const char* optalias_help(const char* token) {
allow *.gif
deny ad.*
*/
+/* Note: NOT utf-8 */
int optinclude_file(const char* name,
int* argc,char** argv,char* x_argvblk,int* x_ptr) {
FILE* fp;
diff --git a/src/htsback.c b/src/htsback.c
index a7fe76c..16c54d0 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -501,7 +501,8 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
#endif
if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
- back[p].r.out=FOPEN(back[p].tmpfile,"wb");
+ /* note: tmpfile is a local system filename */
+ back[p].r.out=fopen(back[p].tmpfile, "wb");
if (back[p].r.out) {
if ((back[p].r.adr) && (back[p].r.size>0)) {
if (fwrite(back[p].r.adr,1,(size_t)back[p].r.size,back[p].r.out) != back[p].r.size) {
@@ -531,22 +532,25 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
LLint size;
file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified);
filecreateempty(&opt->state.strc, back[p].url_sav); // filenote & co
- if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
+ if ((size = hts_zunpack(back[p].tmpfile, back[p].url_sav))>=0) {
back[p].r.size=back[p].r.totalsize=size;
// fichier -> mémoire
if (!back[p].r.is_write) {
deleteaddr(&back[p].r);
- back[p].r.adr=readfile(back[p].url_sav);
+ back[p].r.adr = readfile_utf8(back[p].url_sav);
if (!back[p].r.adr) {
back[p].r.statuscode=STATUSCODE_INVALID;
strcpybuff(back[p].r.msg,"Read error when decompressing");
}
UNLINK(back[p].url_sav);
}
- }
+ } else {
+ back[p].r.statuscode = STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg, "Error when decompressing");
+ }
}
- /* encore that no remaining temporary file exists */
- UNLINK(back[p].tmpfile);
+ /* ensure that no remaining temporary file exists */
+ unlink(back[p].tmpfile);
back[p].tmpfile = NULL;
}
// stats
@@ -920,6 +924,7 @@ int back_serialize_ref(httrackp* opt, const lien_back* src) {
if (mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), CACHE_REFNAME), S_IRWXU | S_IRWXG | S_IRWXO) == 0)
#endif
{
+ /* note: local filename */
filename = url_savename_refname_fullpath(opt, src->url_adr, src->url_fil);
fp = fopen(filename, "wb");
}
@@ -1269,7 +1274,7 @@ int back_clear_entry(lien_back* back) {
// only for security
if (back->tmpfile && back->tmpfile[0] != '\0') {
- (void) UNLINK(back->tmpfile);
+ (void) unlink(back->tmpfile);
back->tmpfile = NULL;
}
diff --git a/src/htscache.c b/src/htscache.c
index 2679111..e8e7701 100644
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -1716,6 +1716,29 @@ char* readfile2(char* fil, LLint* size) {
return adr;
}
+/* Note: utf-8 */
+char* readfile_utf8(char* fil) {
+ char* adr=NULL;
+ char catbuff[CATBUFF_SIZE];
+ const off_t len = fsize_utf8(fil);
+ if (len >= 0) { // exists
+ FILE*const fp = FOPEN(fconv(catbuff, fil),"rb");
+ if (fp!=NULL) { // n'existe pas (!)
+ adr = (char*) malloct(len+1);
+ if (adr!=NULL) {
+ if (len > 0 && fread(adr,1,len,fp) != len) { // fichier endommagé ?
+ freet(adr);
+ adr=NULL;
+ } else {
+ adr[len] = '\0';
+ }
+ }
+ fclose(fp);
+ }
+ }
+ return adr;
+}
+
/* Note: NOT utf-8 */
char* readfile_or(char* fil,char* defaultdata) {
char* realfile=fil;
diff --git a/src/htscore.c b/src/htscore.c
index de202db..6e2b198 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -249,7 +249,10 @@ if (!makeindex_done) { \
if (makeindex_fp) { \
char BIGSTK tempo[1024]; \
if (makeindex_links == 1) { \
- sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ char BIGSTK link_escaped[HTS_URLMAXSIZE*2]; \
+ strcpybuff(link_escaped, makeindex_firstlink); \
+ escape_check_url(link_escaped); \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF, link_escaped); \
} else \
tempo[0]='\0'; \
fprintf(makeindex_fp,template_footer, \
@@ -1461,6 +1464,12 @@ int httpmirror(char* url1, httrackp* opt) {
if (charset != NULL)
free(charset);
}
+ /* Could not detect charset: could it be UTF-8 ? */
+ if (page_charset[0] == '\0') {
+ if (is_unicode_utf8(r.adr, r.size)) {
+ strcpy(page_charset, "utf-8");
+ }
+ }
/* Could not detect charset */
if (page_charset[0] == '\0') {
if ( (opt->debug>0) && (opt->log!=NULL) ) {
@@ -1741,7 +1750,7 @@ int httpmirror(char* url1, httrackp* opt) {
// a partir d'ici le slash devient antislash
#endif
- if ((fp=fopen(tempo,"wb"))!=NULL) {
+ if ((fp=FOPEN(tempo,"wb"))!=NULL) {
fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, hts_get_version_info(opt));
fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename);
fprintf(fp,"Some links contained in it may be unreachable locally."CRLF);
diff --git a/src/htscore.h b/src/htscore.h
index 21161a9..2272327 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -348,6 +348,7 @@ char* next_token(char* p,int flag);
//
char* readfile(char* fil);
char* readfile2(char* fil, LLint* size);
+char* readfile_utf8(char* fil);
char* readfile_or(char* fil,char* defaultdata);
#if 0
void check_rate(TStamp stat_timestart,int maxrate);
diff --git a/src/htsjava.c b/src/htsjava.c
index 335d378..e3af5ea 100644
--- a/src/htsjava.c
+++ b/src/htsjava.c
@@ -46,6 +46,7 @@ Please visit our Website: http://www.httrack.com
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/stat.h>
#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) )
#include <sys/types.h>
#endif
@@ -136,16 +137,12 @@ static int hts_detect_java(t_hts_callbackarg *carg, httrackp *opt,
}
static off_t fsize(const char* s) {
- FILE* fp;
- fp=fopen(s,"rb");
- if (fp!=NULL) {
- off_t i;
- fseek(fp,0,SEEK_END);
- i = ftell(fp);
- fclose(fp);
- return i;
- } else
+ STRUCT_STAT st;
+ if (STAT(s, &st) == 0 && S_ISREG(st.st_mode)) {
+ return st.st_size;
+ } else {
return -1;
+ }
}
static int hts_parse_java(t_hts_callbackarg *carg, httrackp *opt,
@@ -174,7 +171,7 @@ static int hts_parse_java(t_hts_callbackarg *carg, httrackp *opt,
#if JAVADEBUG
printf("fopen\n");
#endif
- if ((fpout = fopen(fconv(catbuff, file), "r+b")) == NULL)
+ if ((fpout = FOPEN(fconv(catbuff, file), "r+b")) == NULL)
{
//fprintf(stderr, "Cannot open input file.\n");
sprintf(str->err_msg,"Unable to open file %s",file);
diff --git a/src/htslib.c b/src/htslib.c
index 1c1e54e..feba33a 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -3032,13 +3032,13 @@ typedef struct {
// 0 : no
// 1 : yes
// -1: don't know
-int is_unicode_utf8(unsigned char* buffer, unsigned int size) {
+int is_unicode_utf8(const unsigned char* buffer, size_t size) {
t_auto_seq seq;
- unsigned int i;
- int is_utf=-1;
+ size_t i;
+ int is_utf = -1;
seq.pos=0;
- for(i=0 ; i < size ; i++) {
+ for(i = 0 ; i < size ; i++) {
unsigned int ok=0;
unsigned int inseq=0;
unsigned int err=0;
diff --git a/src/htslib.h b/src/htslib.h
index 521fd3c..9290c78 100644
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -337,7 +337,7 @@ void rawlinput(FILE* fp,char* s,int max);
char* strstrcase(char *s,char *o);
int ident_url_absolute(const char* url,char* adr,char* fil);
void fil_simplifie(char* f);
-int is_unicode_utf8(unsigned char* buffer, unsigned int size);
+int is_unicode_utf8(const unsigned char* buffer, size_t size);
void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map);
int ishtml(httrackp *opt,const char* urlfil);
int ishtml_ext(const char* a);
@@ -490,19 +490,20 @@ void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name);
)
*/
-/* UTF-8 aware FILE operations */
+/* UTF-8 aware FILE API */
+#ifndef HTS_DEF_FILEAPI
#ifdef _WIN32
#define FOPEN hts_fopen_utf8
-extern FILE* hts_fopen_utf8(const char *path, const char *mode);
+HTSEXT_API FILE* hts_fopen_utf8(const char *path, const char *mode);
#define STAT hts_stat_utf8
typedef struct _stat STRUCT_STAT;
-extern int hts_stat_utf8(const char *path, STRUCT_STAT *buf);
+HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT *buf);
#define UNLINK hts_unlink_utf8
-extern int hts_unlink_utf8(const char *pathname);
+HTSEXT_API int hts_unlink_utf8(const char *pathname);
#define RENAME hts_rename_utf8
-extern int hts_rename_utf8(const char *oldpath, const char *newpath);
+HTSEXT_API int hts_rename_utf8(const char *oldpath, const char *newpath);
#define MKDIR(F) hts_mkdir_utf8(F)
-extern int hts_mkdir_utf8(const char *pathname);
+HTSEXT_API int hts_mkdir_utf8(const char *pathname);
#else
/* The underlying filesystem charset is supposed to be UTF-8 */
#define FOPEN fopen
@@ -512,6 +513,8 @@ typedef struct stat STRUCT_STAT;
#define RENAME rename
#define MKDIR(F) mkdir(F, HTS_ACCESS_FOLDER)
#endif
+#define HTS_DEF_FILEAPI
+#endif
#endif // internals
diff --git a/src/htsname.c b/src/htsname.c
index e5b0715..74172e2 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -1544,6 +1544,7 @@ void url_savename_refname(const char *adr, const char *fil, char *filename) {
bindigest[12], bindigest[13], bindigest[14], bindigest[15]);
}
+/* note: return a local filename */
char *url_savename_refname_fullpath(httrackp* opt, const char *adr, const char *fil) {
char digest_filename[64];
url_savename_refname(adr, fil, digest_filename);
diff --git a/src/htsparse.c b/src/htsparse.c
index 18059f5..35d1bb9 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -230,7 +230,10 @@ Please visit our Website: http://www.httrack.com
if (makeindex_fp) { \
char BIGSTK tempo[1024]; \
if (makeindex_links == 1) { \
- sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ char BIGSTK link_escaped[HTS_URLMAXSIZE*2]; \
+ strcpybuff(link_escaped, makeindex_firstlink); \
+ escape_check_url(link_escaped); \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,link_escaped); \
} else \
tempo[0]='\0'; \
fprintf(makeindex_fp,template_footer, \
diff --git a/src/htstools.c b/src/htstools.c
index ebb5e01..049fc3c 100644
--- a/src/htstools.c
+++ b/src/htstools.c
@@ -845,6 +845,7 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,const char* path,const char* binp
return retval;
}
+/* Note: NOT utf-8 */
HTSEXT_API char* hts_getcategory(const char* filename) {
String categ = STRING_EMPTY;
if (fexist(filename)) {
@@ -867,6 +868,7 @@ HTSEXT_API char* hts_getcategory(const char* filename) {
return StringBuffRW(categ);
}
+/* Note: NOT utf-8 */
HTSEXT_API char* hts_getcategories(char* path, int type) {
String categ = STRING_EMPTY;
String profiles = STRING_EMPTY;
diff --git a/src/htszlib.c b/src/htszlib.c
index 4fd33da..d313d89 100644
--- a/src/htszlib.c
+++ b/src/htszlib.c
@@ -54,13 +54,16 @@ Please visit our Website: http://www.httrack.com
Unpack file into a new file
Return value: size of the new file, or -1 if an error occured
*/
+/* Note: utf-8 */
int hts_zunpack(char* filename,char* newfile) {
+ int ret = -1;
char catbuff[CATBUFF_SIZE];
if (gz_is_available && filename && newfile) {
if (filename[0] && newfile[0]) {
- gzFile gz = gzopen (filename, "rb");
+ // not: NOT an UTF-8 filename
+ gzFile gz = gzopen(filename, "rb");
if (gz) {
- FILE* fpout=fopen(fconv(catbuff, newfile),"wb");
+ FILE*const fpout = FOPEN(fconv(catbuff, newfile), "wb");
int size=0;
if (fpout) {
int nr;
@@ -77,11 +80,11 @@ int hts_zunpack(char* filename,char* newfile) {
} else
size=-1;
gzclose(gz);
- return (int) size;
+ ret = (int) size;
}
}
}
- return -1;
+ return ret;
}
int hts_extract_meta(const char* path) {
diff --git a/src/httrack-library.h b/src/httrack-library.h
index 352e2c2..39b00c0 100644
--- a/src/httrack-library.h
+++ b/src/httrack-library.h
@@ -5,7 +5,7 @@ Copyright (C) Xavier Roche and other contributors
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 3
+as published by the Free Software Foundation; either version 2
of the License, or any later version.
This program is distributed in the hope that it will be useful,
@@ -214,4 +214,29 @@ HTSEXT_API int hts_findisdir(find_handle find);
HTSEXT_API int hts_findisfile(find_handle find);
HTSEXT_API int hts_findissystem(find_handle find);
+/* UTF-8 aware FILE API */
+#ifndef HTS_DEF_FILEAPI
+#ifdef _WIN32
+#define FOPEN hts_fopen_utf8
+HTSEXT_API FILE* hts_fopen_utf8(const char *path, const char *mode);
+#define STAT hts_stat_utf8
+typedef struct _stat STRUCT_STAT;
+HTSEXT_API int hts_stat_utf8(const char *path, STRUCT_STAT *buf);
+#define UNLINK hts_unlink_utf8
+HTSEXT_API int hts_unlink_utf8(const char *pathname);
+#define RENAME hts_rename_utf8
+HTSEXT_API int hts_rename_utf8(const char *oldpath, const char *newpath);
+#define MKDIR(F) hts_mkdir_utf8(F)
+HTSEXT_API int hts_mkdir_utf8(const char *pathname);
+#else
+#define FOPEN fopen
+#define STAT stat
+typedef struct stat STRUCT_STAT;
+#define UNLINK unlink
+#define RENAME rename
+#define MKDIR(F) mkdir(F, HTS_ACCESS_FOLDER)
+#endif
+#define HTS_DEF_FILEAPI
+#endif
+
#endif