summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-10-15 19:17:29 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-10-15 19:17:29 +0000
commit9b5c6cf86ed8dbf749bc2e401d4f87d340b6413d (patch)
tree38e035f0b797edeb5bdbc708681eacedeef53887 /src
parentcce112d40a6e36b2c437418e84c57490818603cb (diff)
Fixed webhttrack incompatibility with Chrome
* closes:#53 Also fixed HTML-escaping issues inside webhttrack Rationale: The webhttrack script made the wrong assumption that once the "browse" command returned, it meant the user killed the navigation window, and it had to kill the server itself. However, modern browsers tend to "attach" to an existing session (creating a new tab, for example, within an existing window), causing the browsing command to return immediately, thus causing the server to be killed immediately by the webhttrack script. I have rewritten the logic behind, and now the server is able to kill himself if the parent script dies, AND if the browsing client did not make any activity for two minutes. The "activity" can be any browser/refreshed page, or the internal "ping" iframe (which pings the server every 30 seconds). With this model, we *should* be compatible with old browsers, and modern ones.
Diffstat (limited to 'src')
-rw-r--r--src/htsserver.c70
-rw-r--r--src/htsserver.h1
-rw-r--r--src/htsweb.c87
-rwxr-xr-xsrc/webhttrack79
4 files changed, 148 insertions, 89 deletions
diff --git a/src/htsserver.c b/src/htsserver.c
index 0ee5907..e358d1e 100644
--- a/src/htsserver.c
+++ b/src/htsserver.c
@@ -92,13 +92,17 @@ int commandReturnSet = 0;
httrackp *global_opt = NULL;
+static void (*pingFun)(void*) = NULL;
+static void* pingFunArg = NULL;
+
/* Extern */
extern void webhttrack_main(char *cmd);
extern void webhttrack_lock(void);
extern void webhttrack_release(void);
static int is_image(const char *file) {
- return ((strstr(file, ".gif") != NULL));
+ return strstr(file, ".gif") != NULL
+ || strstr(file, ".png") != NULL;
}
static int is_text(const char *file) {
return ((strstr(file, ".txt") != NULL));
@@ -106,6 +110,12 @@ static int is_text(const char *file) {
static int is_html(const char *file) {
return ((strstr(file, ".htm") != NULL));
}
+static int is_css(const char *file) {
+ return ((strstr(file, ".css") != NULL));
+}
+static int is_js(const char *file) {
+ return ((strstr(file, ".js") != NULL));
+}
static void sig_brpipe(int code) {
/* ignore */
@@ -409,6 +419,11 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
/* Accept */
while((soc_c = (T_SOC) accept(soc, NULL, NULL)) == INVALID_SOCKET) ;
+ /* Ping */
+ if (pingFun != NULL) {
+ pingFun(pingFunArg);
+ }
+
/* Lock */
webhttrack_lock();
@@ -811,12 +826,13 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
virtualpath = 1;
}
+ /* override */
if (commandRunning) {
- if (!is_image(file)) {
+ if (is_html(file)) {
file = "/server/refresh.html";
}
} else if (commandEnd && !virtualpath && !willexit) {
- if (!is_image(file)) {
+ if (is_html(file)) {
file = "/server/finished.html";
}
}
@@ -843,9 +859,18 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
char ok_img[] =
"HTTP/1.0 200 OK\r\n" "Connection: close\r\n"
"Server: httrack small server\r\n" "Content-type: image/gif\r\n";
+ char ok_js[] =
+ "HTTP/1.0 200 OK\r\n" "Connection: close\r\n"
+ "Server: httrack small server\r\n" "Content-type: text/javascript\r\n";
+ char ok_css[] =
+ "HTTP/1.0 200 OK\r\n" "Connection: close\r\n"
+ "Server: httrack small server\r\n" "Content-type: text/css\r\n";
char ok_text[] =
"HTTP/1.0 200 OK\r\n" "Connection: close\r\n"
"Server: httrack small server\r\n" "Content-type: text/plain\r\n";
+ char ok_unknown[] =
+ "HTTP/1.0 200 OK\r\n" "Connection: close\r\n"
+ "Server: httrack small server\r\n" "Content-type: application/octet-stream\r\n";
/* register current page */
coucal_write(NewLangList, "thisfile", (intptr_t) strdup(file));
@@ -911,6 +936,7 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
name[0] = '\0';
strncatbuff(name, str, n);
+
if (strncmp(name, "/*", 2) == 0) {
/* comments */
} else if ((p = strfield(name, "html:"))) {
@@ -1179,6 +1205,8 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
StringCat(output, "&gt;");
} else if (outputmode && a[0] == '&') {
StringCat(output, "&amp;");
+ } else if (outputmode && a[0] == '\'') {
+ StringCat(output, "&#39;");
} else if (outputmode == 3 && a[0] == ' ') {
StringCat(output, "%20");
} else if (outputmode >= 2
@@ -1252,6 +1280,9 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
case '&':
StringCat(tmpbuff, "&amp;");
break;
+ case '\'':
+ StringCat(tmpbuff, "&#39;");
+ break;
default:
StringMemcat(tmpbuff, fstr, 1);
break;
@@ -1292,17 +1323,18 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
assert(len == (int) StringLength(output));
}
#endif
- } else if (is_text(file)) {
- StringMemcat(headers, ok_text, sizeof(ok_text) - 1);
- while(!feof(fp)) {
- int n = (int) fread(line, 1, sizeof(line) - 2, fp);
-
- if (n > 0) {
- StringMemcat(output, line, n);
- }
- }
} else {
- StringMemcat(headers, ok_img, sizeof(ok_img) - 1);
+ if (is_text(file)) {
+ StringMemcat(headers, ok_text, sizeof(ok_text) - 1);
+ } else if (is_js(file)) {
+ StringMemcat(headers, ok_js, sizeof(ok_js) - 1);
+ } else if (is_css(file)) {
+ StringMemcat(headers, ok_css, sizeof(ok_css) - 1);
+ } else if (is_image(file)) {
+ StringMemcat(headers, ok_img, sizeof(ok_img) - 1);
+ } else {
+ StringMemcat(headers, ok_unknown, sizeof(ok_unknown) - 1);
+ }
while(!feof(fp)) {
int n = (int) fread(line, 1, sizeof(line) - 2, fp);
@@ -1312,6 +1344,13 @@ int smallserver(T_SOC soc, char *url, char *method, char *data, char *path) {
}
}
fclose(fp);
+ } else if (strcmp(file, "/ping") == 0
+ || strncmp(file, "/ping?", 6) == 0) {
+ char error_hdr[] =
+ "HTTP/1.0 200 Pong\r\n" "Server: httrack small server\r\n"
+ "Content-type: text/html\r\n";
+
+ StringCat(headers, error_hdr);
} else {
char error_hdr[] =
"HTTP/1.0 404 Not Found\r\n" "Server: httrack small server\r\n"
@@ -1429,6 +1468,11 @@ int htslang_uninit(void) {
return 1;
}
+void smallserver_setpinghandler(void (*fun)(void*), void*arg) {
+ pingFun = fun;
+ pingFunArg = arg;
+}
+
int smallserver_setkey(const char *key, const char *value) {
return coucal_write(NewLangList, key, (intptr_t) strdup(value));
}
diff --git a/src/htsserver.h b/src/htsserver.h
index 9b3b125..b5decfd 100644
--- a/src/htsserver.h
+++ b/src/htsserver.h
@@ -91,6 +91,7 @@ extern httrackp *global_opt;
#define min(a,b) ((a)>(b)?(b):(a))
#define max(a,b) ((a)>(b)?(a):(b))
+extern void smallserver_setpinghandler(void (*fun)(void*), void*arg);
extern int smallserver_setkey(const char *key, const char *value);
extern int smallserver_setkeyint(const char *key, LLint value);
extern int smallserver_setkeyarr(const char *key, int id, const char *key2, const char *value);
diff --git a/src/htsweb.c b/src/htsweb.c
index c695f3a..c189c0a 100644
--- a/src/htsweb.c
+++ b/src/htsweb.c
@@ -77,6 +77,13 @@ Please visit our Website: http://www.httrack.com
#else
#endif
+#undef DEBUG
+#if 0
+#define DEBUG(A) do { A; } while(0)
+#else
+#define DEBUG(A) do {} while(0)
+#endif
+
static htsmutex refreshMutex = HTSMUTEX_INIT;
static int help_server(char *dest_path, int defaultPort);
@@ -91,10 +98,59 @@ static void htsweb_sig_brpipe(int code) {
/* ignore */
}
+/* Number of background threads */
+static int background_threads = 0;
+
+/* Server/client ping handling */
+static htsmutex pingMutex = HTSMUTEX_INIT;
+static unsigned int pingId = 0;
+static unsigned int getPingId(void) {
+ unsigned int id;
+ hts_mutexlock(&pingMutex);
+ id = pingId;
+ hts_mutexrelease(&pingMutex);
+ return id;
+}
+static void ping(void) {
+ hts_mutexlock(&pingMutex);
+ pingId++;
+ hts_mutexrelease(&pingMutex);
+}
+
+static void client_ping(void *pP) {
+#ifndef _WIN32
+ /* Timeout to 120s ; normally client pings every 30 second */
+ static int timeout = 120;
+ /* Wait for parent to die (legacy browser mode). */
+ const pid_t ppid = (pid_t) (uintptr_t) pP;
+ while (!kill(ppid, 0)) {
+ sleep(1);
+ }
+ /* Parent (webhttrack script) is dead: is client pinging ? */
+ for(;;) {
+ unsigned int id = getPingId();
+ sleep(timeout);
+ if (getPingId() == id) {
+ break;
+ }
+ }
+ /* Die! */
+ fprintf(stderr,
+ "Parent process %d died, and client did not ping for %ds: exiting!\n",
+ (int) ppid, timeout);
+ exit(EXIT_FAILURE);
+#endif
+}
+
+static void pingHandler(void*arg) {
+ ping();
+}
+
int main(int argc, char *argv[]) {
int i;
int ret = 0;
int defaultPort = 0;
+ int parentPid = 0;
printf("Initialzing the server..\n");
@@ -120,7 +176,7 @@ int main(int argc, char *argv[]) {
if (argc < 2 || (argc % 2) != 0) {
fprintf(stderr, "** Warning: use the webhttrack frontend if available\n");
fprintf(stderr,
- "usage: %s [--port <port>] <path-to-html-root-dir> [key value [key value]..]\n",
+ "usage: %s [--port <port>] [--ppid parent-pid] <path-to-html-root-dir> [key value [key value]..]\n",
argv[0]);
fprintf(stderr, "example: %s /usr/share/httrack/\n", argv[0]);
return 1;
@@ -200,14 +256,22 @@ int main(int argc, char *argv[]) {
/* set commandline keys */
for(i = 2; i < argc; i += 2) {
- if (strcmp(argv[i], "--port") == 0) {
+ if (strcmp(argv[i], "--port") == 0 && i + 1 < argc) {
if (sscanf(argv[i + 1], "%d", &defaultPort) != 1 || defaultPort < 0
|| defaultPort >= 65535) {
fprintf(stderr, "couldn't set the port number to %s\n", argv[i + 1]);
return -1;
}
- } else {
+ } else if (strcmp(argv[i], "--ppid") == 0 && i + 1 < argc) {
+ if (sscanf(argv[i + 1], "%u", &parentPid) != 1) {
+ fprintf(stderr, "couldn't set the parent PID to %s\n", argv[i + 1]);
+ return -1;
+ }
+ } else if (i + 1 < argc) {
smallserver_setkey(argv[i], argv[i + 1]);
+ } else {
+ fprintf(stderr, "Error in commandline!\n");
+ return -1;
}
}
@@ -216,6 +280,13 @@ int main(int argc, char *argv[]) {
signal(SIGPIPE, htsweb_sig_brpipe); // broken pipe (write into non-opened socket)
#endif
+ /* pinger */
+ if (parentPid > 0) {
+ hts_newthread(client_ping, (void *) (uintptr_t) parentPid);
+ background_threads++; /* Do not wait for this thread! */
+ smallserver_setpinghandler(pingHandler, NULL);
+ }
+
/* launch */
ret = help_server(argv[1], defaultPort);
@@ -292,6 +363,7 @@ static void back_launch_cmd(void *pP) {
/* finished */
commandEnd = 1;
+ DEBUG(fprintf(stderr, "commandEnd=1\n"));
/* free */
free(cmd);
@@ -301,7 +373,9 @@ static void back_launch_cmd(void *pP) {
void webhttrack_main(char *cmd) {
commandRunning = 1;
+ DEBUG(fprintf(stderr, "commandRunning=1\n"));
hts_newthread(back_launch_cmd, (void *) strdup(cmd));
+ background_threads++; /* Do not wait for this thread! */
}
void webhttrack_lock(void) {
@@ -339,8 +413,11 @@ static int webhttrack_runmain(httrackp * opt, int argc, char **argv) {
CHAIN_FUNCTION(opt, sendhead, htsshow_sendheader, NULL);
CHAIN_FUNCTION(opt, receivehead, htsshow_receiveheader, NULL);
+ /* Rock'in! */
ret = hts_main2(argc, argv, opt);
- htsthread_wait_n(1);
+
+ /* Wait for pending threads to finish */
+ htsthread_wait_n(background_threads);
return ret;
}
@@ -404,12 +481,14 @@ void __cdecl htsshow_init(t_hts_callbackarg * carg) {
void __cdecl htsshow_uninit(t_hts_callbackarg * carg) {
}
int __cdecl htsshow_start(t_hts_callbackarg * carg, httrackp * opt) {
+ DEBUG(fprintf(stderr, "htsshow_start()\n"));
return 1;
}
int __cdecl htsshow_chopt(t_hts_callbackarg * carg, httrackp * opt) {
return htsshow_start(carg, opt);
}
int __cdecl htsshow_end(t_hts_callbackarg * carg, httrackp * opt) {
+ DEBUG(fprintf(stderr, "htsshow_end()\n"));
return 1;
}
int __cdecl htsshow_preprocesshtml(t_hts_callbackarg * carg, httrackp * opt,
diff --git a/src/webhttrack b/src/webhttrack
index e41991a..879a5d8 100755
--- a/src/webhttrack
+++ b/src/webhttrack
@@ -29,81 +29,15 @@ echo "$0($$): $@" >&2
return 0
}
-function mozillabrowser {
-# returns 0, if the browser is mozilla type
-echo "$1" | grep -q "iceape"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "mozilla"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "netscape"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "firebird"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "firefox"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "iceweasel"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "abrowser"
-[ $? -eq 0 ] && return 0
-echo "$1" | grep -q "icecat"
-[ $? -eq 0 ] && return 0
-return 1;
-}
-function mozillaloaded {
-user_name=`logname 2>/dev/null`
-if ! test -n "${user_name}"; then
-user_name=`id -un`
-fi
-if test -n "${user_name}"; then
-ps -e -U "$user_name" | grep -E "(iceape|mozilla|netscape|firebird|firefox)" | grep -qv "grep -E"
-else
-false
-fi
-}
-
function launch_browser {
log "launching $1"
-start_t=`date +%s`
browser=$1
url=$2
-moz=
-if mozillaloaded; then
-moz=1
-fi
-# launch any browser
-# if it is a mozilla like browser, check if the browser is running and use
-# -remote if needed. Change the URL into openURL($url) too.
-# (thanks to Torsten Werner for the patch)
-# see http://www.mozilla.org/unix/remote.html
-# 04/2006: openurl() fix from Samuel Suther
-if mozillabrowser ${browser}; then
- if ! ${browser} -remote "openurl(${url})"; then
- log "spawning browser.."
- ${browser} "${url}"
- fi
-else
- log "spawning regular browser.."
- ${browser} "${url}"
-fi
-# this is a real pain in the neck: browser can hiddenly use the -remote feature of
+log "spawning browser.."
+${browser} "${url}"
+# note: browser can hiddenly use the -remote feature of
# mozilla and therefore return immediately
-# this loop is the only reliable solution AFAIK
-end_t=`date +%s`
-if test -n "$start_t" -a -n "$end_t"; then
- int_t=$[$end_t-$start_t]
-else
- int_t=0
-fi
-if test -n "${int_t}" -a "${int_t}" -lt 60; then
- if test -n "$moz"; then
- log "waiting for browser to terminate.."
- while mozillaloaded; do
- sleep 3
- done
- log "browser seems to have been closed.."
- fi
-fi
-log "browser exited"
+log "browser (or helper) exited"
}
# First ensure that we can launch the server
@@ -164,7 +98,7 @@ fi
# Create a temporary filename
TMPSRVFILE="$(mktemp ${TMPDIR:-/tmp}/.webhttrack.XXXXXXXX)" || ! log "cound not create the temporary file ${TMPSRVFILE}" || exit 1
# Launch htsserver binary and setup the server
-(${BINPATH}/htsserver "${DISTPATH}/" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}&
+(${BINPATH}/htsserver "${DISTPATH}/" --ppid "$$" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}&
# Find the generated SRVURL
SRVURL=
MAXCOUNT=60
@@ -180,7 +114,8 @@ done
# Cleanup function
function cleanup {
test -n "$1" && log "nasty signal caught, cleaning up.."
-test -f ${TMPSRVFILE} && SRVPID=`grep -E PID= ${TMPSRVFILE} | cut -f2- -d=`
+# Do not kill if browser exited (chrome bug issue) ; server will die itself
+test -n "$1" && test -f ${TMPSRVFILE} && SRVPID=`grep -E PID= ${TMPSRVFILE} | cut -f2- -d=`
test -n "${SRVPID}" && kill -9 ${SRVPID}
test -f ${TMPSRVFILE} && rm ${TMPSRVFILE}
test -n "$1" && log "..done"