summaryrefslogtreecommitdiff
path: root/src/proxy
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:57:43 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:57:43 +0000
commit64cc4a88da8887ef1f7f4d90be0158d2cc76222d (patch)
treee72af709fbce8bc495f51e7f0518de9a9a2c3b7f /src/proxy
parent844ecc37072d515513177c65a8c9dc35c9cdfc1a (diff)
httrack 3.40.4
Diffstat (limited to 'src/proxy')
-rw-r--r--src/proxy/AUTHORS1
-rw-r--r--src/proxy/COPYING340
-rw-r--r--src/proxy/changelog.txt20
-rw-r--r--src/proxy/main.c164
-rwxr-xr-xsrc/proxy/proxystrings.h153
-rw-r--r--src/proxy/proxytrack.c1621
-rw-r--r--src/proxy/proxytrack.h288
-rw-r--r--src/proxy/store.c1505
-rw-r--r--src/proxy/store.h105
9 files changed, 4197 insertions, 0 deletions
diff --git a/src/proxy/AUTHORS b/src/proxy/AUTHORS
new file mode 100644
index 0000000..66da09f
--- /dev/null
+++ b/src/proxy/AUTHORS
@@ -0,0 +1 @@
+Xavier Roche <roche@httrack.com>
diff --git a/src/proxy/COPYING b/src/proxy/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/src/proxy/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/src/proxy/changelog.txt b/src/proxy/changelog.txt
new file mode 100644
index 0000000..f5ae48b
--- /dev/null
+++ b/src/proxy/changelog.txt
@@ -0,0 +1,20 @@
+0.4 - Sept 18 2005
+- implemented very limited WebDAV (RFC2518) primitives
+- index enumeration fixes
+- limited access to the proxy server through HTTP in non-proxy mode
+
+0.3 - Sept 10 2005
+- implemented ICPv2 server (tested with Squid Web Proxy Cache) implementing ICP_OP_QUERY and ICP_OP_SECHO
+- redirects for URLs with missing ending '/'
+- fixed htsnet.h macro errors (bogus port during address copy)
+- keep-alive fixes
+
+0.2 - Sept 4 2005
+- hack to fix the "external files stored as absolute references" bug
+- proper locking for indexes (unlocked zFile)
+- added previous httrack .dat/.ndx cache format
+- added catalog as index fallback
+- started to write ICPv2 server (RFC2186), but not yet ready
+
+0.1 - Aug 27 2005
+- initial release: HTTP (RFC2616) proxy and aggregation ready
diff --git a/src/proxy/main.c b/src/proxy/main.c
new file mode 100644
index 0000000..e48b51d
--- /dev/null
+++ b/src/proxy/main.c
@@ -0,0 +1,164 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: ProxyTrack, httrack cache-based proxy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+/* Standard includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htslib.h"
+#include "store.h"
+#include "proxytrack.h"
+
+#ifndef _WIN32
+#include <signal.h>
+static void sig_brpipe( int code ) {
+ /* ignore */
+}
+#endif
+
+static int scanHostPort(const char* str, char *host, int *port) {
+ char* pos = strrchr(str, ':');
+ if (pos != NULL) {
+ int n = (int) ( pos - str );
+ if (n < 256) {
+ host[0] = '\0';
+ strncat(host, str, n);
+ if (sscanf(pos + 1, "%d", port) == 1) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char* argv[])
+{
+ int i;
+ int ret = 0;
+ int proxyPort, icpPort;
+ char proxyAddr[256 + 1], icpAddr[256 + 1];
+ PT_Indexes index;
+
+#ifdef _WIN32
+ {
+ WORD wVersionRequested; // requested version WinSock API
+ WSADATA wsadata; // Windows Sockets API data
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ fprintf(stderr, "Winsock not found!\n");
+ return -1;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ return -1;
+ }
+ }
+#endif
+
+ /* Args */
+ printf("ProxyTrack %s, build proxies upon HTTrack Website Copier Archives\n", PROXYTRACK_VERSION);
+ printf("Copyright (C) Xavier Roche and other contributors\n");
+ printf("\n");
+ printf("This program is free software; you can redistribute it and/or\n");
+ printf("modify it under the terms of the GNU General Public License\n");
+ printf("as published by the Free Software Foundation; either version 2\n");
+ printf("of the License, or any later version.\n");
+ printf("\n");
+ printf("*** This version is a development release ***\n");
+ printf("\n");
+ if (argc < 3
+ || !scanHostPort(argv[1], proxyAddr, &proxyPort)
+ || !scanHostPort(argv[2], icpAddr, &icpPort))
+ {
+ fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | --list <file-list> ) ..]\n", argv[0]);
+ fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]);
+ return 1;
+ }
+ index = PT_New();
+ for(i = 3 ; i < argc ; i++) {
+ if (argv[i][0] == '-') {
+ if (strcmp(argv[i], "--list") == 0) {
+ if (i + 1 < argc) {
+ char line[256 + 1];
+ FILE *fp = fopen(argv[++i], "rb");
+ if (fp == NULL) {
+ fprintf(stderr, "error: could not process list %s\n", argv[i]);
+ exit(1);
+ }
+ while(linput(fp, line, 256)) {
+ int itemsAdded = PT_AddIndex(index, line);
+ if (itemsAdded > 0) {
+ fprintf(stderr, "processed: %s (%d items added)\n", line, itemsAdded);
+ } else if (itemsAdded == 0) {
+ fprintf(stderr, "processed: %s (no items added)\n", line);
+ } else {
+ fprintf(stderr, "error: could not process %s\n", line);
+ }
+ }
+ fclose(fp);
+ }
+ } else {
+ fprintf(stderr, "* bad arg %s\n", argv[i]);
+ exit(1);
+ }
+ } else {
+ int itemsAdded = PT_AddIndex(index, argv[i]);
+ if (itemsAdded > 0) {
+ fprintf(stderr, "processed: %s (%d items added)\n", argv[i], itemsAdded);
+ } else if (itemsAdded == 0) {
+ fprintf(stderr, "processed: %s (no items added)\n", argv[i]);
+ } else {
+ fprintf(stderr, "error: could not process %s\n", argv[i]);
+ }
+ }
+ }
+
+ /* sigpipe */
+#ifndef _WIN32
+ signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket)
+#endif
+
+ /* Go */
+ ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index);
+
+ /* Wipe */
+ PT_Delete(index);
+
+#ifdef _WIN32
+ WSACleanup();
+#endif
+
+ return ret;
+}
+
diff --git a/src/proxy/proxystrings.h b/src/proxy/proxystrings.h
new file mode 100755
index 0000000..87bcf34
--- /dev/null
+++ b/src/proxy/proxystrings.h
@@ -0,0 +1,153 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Strings */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Strings a bit safer than static buffers
+
+#ifndef HTS_STRINGS_DEFSTATIC
+#define HTS_STRINGS_DEFSTATIC
+
+typedef struct String {
+ char* buff;
+ int len;
+ int capa;
+} String;
+
+#define STRING_EMPTY {NULL, 0, 0}
+#define STRING_BLK_SIZE 256
+#define StringBuff(blk) ((blk).buff)
+#define StringLength(blk) ((blk).len)
+#define StringCapacity(blk) ((blk).capa)
+#define StringRoom(blk, size) do { \
+ if ((blk).len + (int)(size) + 1 > (blk).capa) { \
+ (blk).capa = ((blk).len + (size) + 1) * 2; \
+ (blk).buff = (char*) realloc((blk).buff, (blk).capa); \
+ } \
+} while(0)
+#define StringBuffN(blk, size) StringBuffN_(&(blk), size)
+static char* StringBuffN_(String* blk, int size) {
+ StringRoom(*blk, (blk->len) + size);
+ return StringBuff(*blk);
+}
+#define StringClear(blk) do { \
+ StringRoom(blk, 0); \
+ (blk).buff[0] = '\0'; \
+ (blk).len = 0; \
+} while(0)
+#define StringFree(blk) do { \
+ if ((blk).buff != NULL) { \
+ free((blk).buff); \
+ (blk).buff = NULL; \
+ } \
+ (blk).capa = 0; \
+ (blk).len = 0; \
+} while(0)
+#define StringMemcat(blk, str, size) do { \
+ StringRoom(blk, size); \
+ if ((int)(size) > 0) { \
+ memcpy((blk).buff + (blk).len, (str), (size)); \
+ (blk).len += (size); \
+ } \
+ *((blk).buff + (blk).len) = '\0'; \
+} while(0)
+#define StringAddchar(blk, c) do { \
+ char __c = (c); \
+ StringMemcat(blk, &__c, 1); \
+} while(0)
+static void* StringAcquire(String* blk) {
+ void* buff = blk->buff;
+ blk->buff = NULL;
+ blk->capa = 0;
+ blk->len = 0;
+ return buff;
+}
+static StringAttach(String* blk, char** str) {
+ StringFree(*blk);
+ if (str != NULL && *str != NULL) {
+ blk->buff = *str;
+ blk->capa = (int)strlen(blk->buff);
+ blk->len = blk->capa;
+ *str = NULL;
+ }
+}
+#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0)
+#define StringStrcpy(blk, str) do { \
+ StringClear(blk); \
+ StringStrcat(blk, str); \
+} while(0)
+
+/* Tools */
+
+static int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+static int ehex(const char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+}
+
+static void unescapehttp(const char* s, String* tempo) {
+ int i;
+ for (i = 0; s[i] != '\0' ; i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(*tempo, '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ StringAddchar(*tempo, (char) hc);
+ i++; // sauter 2 caractères finalement
+ }
+ else if (s[i]=='+') {
+ StringAddchar(*tempo, ' ');
+ }
+ else
+ StringAddchar(*tempo, s[i]);
+ }
+}
+
+static void escapexml(const char* s, String* tempo) {
+ int i;
+ for (i=0 ; s[i] != '\0' ; i++) {
+ if (s[i] == '&')
+ StringStrcat(*tempo, "&amp;");
+ else if (s[i] == '<')
+ StringStrcat(*tempo, "&lt;");
+ else if (s[i] == '>')
+ StringStrcat(*tempo, "&gt;");
+ else if (s[i] == '\"')
+ StringStrcat(*tempo, "&quot;");
+ else
+ StringAddchar(*tempo, s[i]);
+ }
+}
+
+#endif
diff --git a/src/proxy/proxytrack.c b/src/proxy/proxytrack.c
new file mode 100644
index 0000000..7604804
--- /dev/null
+++ b/src/proxy/proxytrack.c
@@ -0,0 +1,1621 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: ProxyTrack, httrack cache-based proxy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+/*
+
+/\/\/\/\/\/\/\/\/\/\/\/\/\ PENDING WORK /\/\/\/\/\/\/\/\/\/\/\/\/\
+- Etag update handling
+- Other cache archive handling (.arc)
+- Live plug/unplug of archives
+- Listing
+\/\/\/\/\/\/\/\/\/\/\/\/\/ PENDING WORK \/\/\/\/\/\/\/\/\/\/\/\/\/
+
+*/
+
+/*
+Architecture rough draft
+Xavier Roche 2005
+
+Aim: Building a sub-proxy to be linked with other top level proxies (such as Squid)
+Basic design: Classical HTTP/1.0 proxy server, with ICP server support
+Internal data design: HTTrack cache indexing in fast hashtables, with 'pluggable' design (add/removal of caches on-the-fly)
+
+
+Index structure organization:
+-----------------------------
+
+foo/hts-cache/new.zip -----> Index[0] \
+bar/hts-cache/new.zip -----> Index[1] > Central Index Lookup (CIL)
+baz/hts-cache/new.zip -----> Index[2] /
+.. -----> ..
+
+Indexes are hashtables with URL (STRING) -> INTEGER lookup.
+
+URL -----> CIL Ask for index ID
+URL -----> Index[ID] Ask for index properties (ZIP cache index)
+
+
+Lookup of an entry:
+-------------------
+
+ID = CIL[URL]
+If ID is valid Then
+ return SUCCESS
+Else
+ return FAILURE
+EndIf
+
+
+Fetching of an entry:
+---------------------
+
+RESOURCE = null
+ID = CIL[URL]
+If ID is valid Then
+ OFFSET = Index[ID][URL]
+ If OFFSET is valid Then
+ RESOURCE = Fetch(ID, OFFSET)
+ EndIf
+EndIf
+
+
+Removal of index N:
+-------------------
+
+For all entries in Index[N]
+ URL(key) -----> Lookup all other caches
+ Found: Replace in CIL
+ Not Found: Delete entry in CIL
+Done
+Delete Index[N]
+
+
+Adding of index N:
+------------------
+
+Build Index[N]
+For all entries in Index[N]
+ URL(key) -----> Lookup in CIL
+ Found: Do nothing if corresponding Cache is newer than this one
+ Not Found: Add/Replace entry in CIL
+Done
+
+Remark: If no cache newer than the added one is found, all entries can be added without any lookup (optim)
+
+*/
+
+/* HTTrack definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htslib.h"
+#include "htsglobal.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#if HTS_WIN
+#else
+#include <arpa/inet.h>
+#endif
+#ifndef _WIN32
+#include <signal.h>
+#endif
+/* END specific definitions */
+
+/* String */
+#include "proxystrings.h"
+
+/* Network base */
+#include "htsbasenet.h"
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* htslib */
+/*#include "htslib.h"*/
+
+/* HTTrack Website Copier Library */
+#include "httrack-library.h"
+
+/* htsweb */
+#include "htsinthash.h"
+
+/* ProxyTrack */
+#include "proxytrack.h"
+
+/* Store manager */
+#include "../minizip/mztools.h"
+#include "store.h"
+
+/* threads */
+#ifdef _WIN32
+#include <process.h> /* _beginthread, _endthread */
+#else
+#include <pthread.h>
+#endif
+
+/* External references */
+// htsErrorCallback htsCallbackErr = NULL;
+int htsMemoryFastXfr = 1; /* fast xfr by default */
+void abortLog__fnc(char* msg, char* file, int line);
+void abortLog__fnc(char* msg, char* file, int line) {
+ FILE* fp = fopen("CRASH.TXT", "wb");
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb");
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb");
+ if (!fp) fp = fopen("CRASH.TXT", "wb");
+ if (fp) {
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line);
+ fprintf(fp, "Reason:\r\n%s\r\n", msg);
+ fflush(fp);
+ fclose(fp);
+ }
+}
+// HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */
+#define webhttrack_lock(A) do{}while(0)
+
+/* Static definitions */
+
+static int linputsoc(T_SOC soc, char* s, int max) {
+ int c;
+ int j=0;
+ do {
+ unsigned char ch;
+ if (recv(soc, &ch, 1, 0) == 1) {
+ c = ch;
+ } else {
+ c = EOF;
+ }
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+
+static int check_readinput_t(T_SOC soc, int timeout) {
+ if (soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(soc,&fds);
+ tv.tv_sec=timeout;
+ tv.tv_usec=0;
+ select((int)(soc + 1),&fds,NULL,NULL,&tv);
+ if (FD_ISSET(soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ return linputsoc(soc, s, max);
+ }
+ return -1;
+}
+
+static void unescapeini(char* s, String* tempo) {
+ int i;
+ char lastc=0;
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(*tempo, lastc = '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ if (!is_retorsep(hc) || !is_retorsep(lastc)) {
+ StringAddchar(*tempo, lastc = (char) hc);
+ }
+ i++; // sauter 2 caractères finalement
+ }
+ else
+ StringAddchar(*tempo, lastc = s[i]);
+ }
+}
+
+static int gethost(const char* hostname, SOCaddr *server, size_t server_size) {
+ if (hostname != NULL && *hostname != '\0') {
+#if HTS_INET6==0
+ /*
+ ipV4 resolver
+ */
+ t_hostent* hp=gethostbyname(hostname);
+ if (hp!=NULL) {
+ if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) {
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ return 1;
+ }
+ }
+#else
+ /*
+ ipV6 resolver
+ */
+ struct addrinfo* res = NULL;
+ struct addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+#if 0
+ if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
+ hints.ai_family = PF_INET;
+ else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
+ hints.ai_family = PF_INET6;
+ else
+#endif
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ if (getaddrinfo(hostname, NULL, &hints, &res) == 0) {
+ if (res) {
+ if ( (res->ai_addr) && (res->ai_addrlen) ) {
+ SOCaddr_copyaddr(*server, server_size, res->ai_addr, res->ai_addrlen);
+ freeaddrinfo(res);
+ return 1;
+ }
+ }
+ }
+ if (res) {
+ freeaddrinfo(res);
+ }
+
+#endif
+ }
+ return 0;
+}
+
+static String getip(SOCaddr *server, int serverLen) {
+ String s = STRING_EMPTY;
+#if HTS_INET6==0
+ unsigned int sizeMax = sizeof("999.999.999.999:65535");
+#else
+ unsigned int sizeMax = sizeof("ffff:ffff:ffff:ffff:ffff:ffff:ffff:65535");
+#endif
+ char * dotted = malloc(sizeMax + 1);
+ unsigned short port = ntohs(SOCaddr_sinport(*server));
+ if (dotted == NULL) {
+ CRITICAL("memory exhausted");
+ return s;
+ }
+ SOCaddr_inetntoa(dotted, sizeMax, *server, serverLen);
+ sprintf(dotted + strlen(dotted), ":%d", port);
+ StringAttach(&s, &dotted);
+ return s;
+}
+
+
+static T_SOC smallserver_init(const char* adr, int port, int family) {
+ SOCaddr server;
+ size_t server_size = sizeof(server);
+
+ memset(&server, 0, sizeof(server));
+ SOCaddr_initany(server, server_size);
+ if (gethost(adr, &server, server_size)) { // host name
+ T_SOC soc = INVALID_SOCKET;
+ if ( (soc = socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) {
+ SOCaddr_initport(server, port);
+ if ( bind(soc,(struct sockaddr*) &server, (int)server_size) == 0 ) {
+ if (family != SOCK_STREAM
+ || listen(soc, 10) >=0 ) {
+ return soc;
+ } else {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ } else {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ return INVALID_SOCKET;
+}
+
+static int proxytrack_start(PT_Indexes indexes, T_SOC soc, T_SOC socICP);
+int proxytrack_main(char* proxyAddr, int proxyPort,
+ char* icpAddr, int icpPort,
+ PT_Indexes index) {
+ int returncode = 0;
+ T_SOC soc = smallserver_init(proxyAddr, proxyPort, SOCK_STREAM);
+ T_SOC socICP = smallserver_init(proxyAddr, icpPort, SOCK_DGRAM);
+ if (soc != INVALID_SOCKET
+ && socICP != INVALID_SOCKET)
+ {
+ char url[HTS_URLMAXSIZE*2];
+ char method[32];
+ char data[32768];
+ url[0]=method[0]=data[0]='\0';
+ //
+ printf("HTTP Proxy installed on %s:%d/\n", proxyAddr, proxyPort);
+ printf("ICP Proxy installed on %s:%d/\n", icpAddr, icpPort);
+#ifndef _WIN32
+ {
+ pid_t pid = getpid();
+ printf("PID=%d\n", (int)pid);
+ }
+#endif
+ fflush(stdout);
+ fflush(stderr);
+ //
+ if (!proxytrack_start(index, soc, socICP)) {
+ fprintf(stderr, "Unable to create the server: %s\n", strerror(errno));
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ printf("Done\n");
+ returncode = 1;
+ } else {
+ returncode = 0;
+ }
+ } else {
+ fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(errno));
+ returncode = 1;
+ }
+ printf("EXITED\n");
+ fflush(stdout);
+ fflush(stderr);
+ return returncode;
+}
+
+static const char* GetHttpMessage(int statuscode) {
+ // Erreurs HTTP, selon RFC
+ switch( statuscode) {
+ case 100: return "Continue"; break;
+ case 101: return "Switching Protocols"; break;
+ case 200: return "OK"; break;
+ case 201: return "Created"; break;
+ case 202: return "Accepted"; break;
+ case 203: return "Non-Authoritative Information"; break;
+ case 204: return "No Content"; break;
+ case 205: return "Reset Content"; break;
+ case 206: return "Partial Content"; break;
+ case 207: return "Multi-Status"; break;
+ case 300: return "Multiple Choices"; break;
+ case 301: return "Moved Permanently"; break;
+ case 302: return "Moved Temporarily"; break;
+ case 303: return "See Other"; break;
+ case 304: return "Not Modified"; break;
+ case 305: return "Use Proxy"; break;
+ case 306: return "Undefined 306 error"; break;
+ case 307: return "Temporary Redirect"; break;
+ case 400: return "Bad Request"; break;
+ case 401: return "Unauthorized"; break;
+ case 402: return "Payment Required"; break;
+ case 403: return "Forbidden"; break;
+ case 404: return "Not Found"; break;
+ case 405: return "Method Not Allowed"; break;
+ case 406: return "Not Acceptable"; break;
+ case 407: return "Proxy Authentication Required"; break;
+ case 408: return "Request Time-out"; break;
+ case 409: return "Conflict"; break;
+ case 410: return "Gone"; break;
+ case 411: return "Length Required"; break;
+ case 412: return "Precondition Failed"; break;
+ case 413: return "Request Entity Too Large"; break;
+ case 414: return "Request-URI Too Large"; break;
+ case 415: return "Unsupported Media Type"; break;
+ case 416: return "Requested Range Not Satisfiable"; break;
+ case 417: return "Expectation Failed"; break;
+ case 500: return "Internal Server Error"; break;
+ case 501: return "Not Implemented"; break;
+ case 502: return "Bad Gateway"; break;
+ case 503: return "Service Unavailable"; break;
+ case 504: return "Gateway Time-out"; break;
+ case 505: return "HTTP Version Not Supported"; break;
+ default: return "Unknown HTTP Error"; break;
+ }
+}
+
+#ifndef NO_WEBDAV
+static void proxytrack_add_DAV_Item(String *item, String *buff,
+ const char* filename,
+ unsigned long int size,
+ time_t timestamp,
+ const char* mime,
+ int isDir,
+ int isRoot,
+ int isDefault)
+{
+ struct tm * timetm;
+ if (timestamp == (time_t) 0 || timestamp == (time_t) -1) {
+ timestamp = time(NULL);
+ }
+ if ((timetm = gmtime(&timestamp)) != NULL) {
+ char tms[256 + 1];
+ const char * name;
+ strftime(tms, 256, "%a, %d %b %Y %H:%M:%S GMT", timetm); /* Sun, 18 Sep 2005 11:45:45 GMT */
+
+ if (mime == NULL || *mime == 0)
+ mime = "application/octet-stream";
+
+ StringLength(*buff) = 0;
+ escapexml(filename, buff);
+
+ name = strrchr(StringBuff(*buff), '/');
+ if (name != NULL)
+ name++;
+ if (name == NULL || *name == 0) {
+ if (strcmp(mime, "text/html") == 0)
+ name = "Default Document for the Folder.html";
+ else
+ name = "Default Document for the Folder";
+ }
+
+ StringRoom(*item, 1024);
+ sprintf(StringBuff(*item),
+ "<response xmlns=\"DAV:\">\r\n"
+ "<href>/webdav%s%s</href>\r\n"
+ "<propstat>\r\n"
+ "<prop>\r\n"
+ "<displayname>%s</displayname>\r\n"
+ "<iscollection>%d</iscollection>\r\n"
+ "<haschildren>%d</haschildren>\r\n"
+ "<isfolder>%d</isfolder>\r\n"
+ "<resourcetype>%s</resourcetype>\r\n"
+ "<creationdate>%d-%02d-%02dT%02d:%02d:%02dZ</creationdate>\r\n"
+ "<getlastmodified>%s</getlastmodified>\r\n"
+ "<supportedlock></supportedlock>\r\n"
+ "<lockdiscovery/>\r\n"
+ "<getcontenttype>%s</getcontenttype>\r\n"
+ "<getcontentlength>%d</getcontentlength>\r\n"
+ "<isroot>%d</isroot>\r\n"
+ "</prop>\r\n"
+ "<status>HTTP/1.1 200 OK</status>\r\n"
+ "</propstat>\r\n"
+ "</response>\r\n",
+ /* */
+ ( StringBuff(*buff)[0] == '/' ) ? "" : "/", StringBuff(*buff),
+ name,
+ isDir ? 1 : 0,
+ isDir ? 1 : 0,
+ isDir ? 1 : 0,
+ isDir ? "<collection/>" : "",
+ timetm->tm_year + 1900, timetm->tm_mon + 1, timetm->tm_mday, timetm->tm_hour, timetm->tm_min, timetm->tm_sec,
+ tms,
+ isDir ? "httpd/unix-directory" : mime,
+ (int)size,
+ isRoot ? 1 : 0
+ );
+ StringLength(*item) = (int) strlen(StringBuff(*item));
+ }
+}
+
+/* Convert a RFC822 time to time_t */
+time_t get_time_rfc822(const char* s) {
+ struct tm result;
+ /* */
+ char months[]="jan feb mar apr may jun jul aug sep oct nov dec";
+ char str[256];
+ char* a;
+ int i;
+ /* */
+ int result_mm=-1;
+ int result_dd=-1;
+ int result_n1=-1;
+ int result_n2=-1;
+ int result_n3=-1;
+ int result_n4=-1;
+ /* */
+
+ if ((int) strlen(s) > 200)
+ return (time_t)0;
+ for(i = 0 ; s[i] != 0 ; i++) {
+ if (s[i] >= 'A' && s[i] <= 'Z')
+ str[i] = s[i] + ('a' - 'A');
+ else
+ str[i] = s[i];
+ }
+ str[i] = 0;
+ /* éliminer :,- */
+ while( (a=strchr(str,'-')) ) *a=' ';
+ while( (a=strchr(str,':')) ) *a=' ';
+ while( (a=strchr(str,',')) ) *a=' ';
+ /* tokeniser */
+ a=str;
+ while(*a) {
+ char *first,*last;
+ char tok[256];
+ /* découper mot */
+ while(*a==' ') a++; /* sauter espaces */
+ first=a;
+ while((*a) && (*a!=' ')) a++;
+ last=a;
+ tok[0]='\0';
+ if (first!=last) {
+ char* pos;
+ strncat(tok,first,(int) (last - first));
+ /* analyser */
+ if ( (pos=strstr(months,tok)) ) { /* month always in letters */
+ result_mm=((int) (pos - months))/4;
+ } else {
+ int number;
+ if (sscanf(tok,"%d",&number) == 1) { /* number token */
+ if (result_dd<0) /* day always first number */
+ result_dd=number;
+ else if (result_n1<0)
+ result_n1=number;
+ else if (result_n2<0)
+ result_n2=number;
+ else if (result_n3<0)
+ result_n3=number;
+ else if (result_n4<0)
+ result_n4=number;
+ } /* sinon, bruit de fond(+1GMT for exampel) */
+ }
+ }
+ }
+ if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) {
+ if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */
+ result.tm_year=result_n4-1900;
+ result.tm_hour=result_n1;
+ result.tm_min=result_n2;
+ result.tm_sec=max(result_n3,0);
+ } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
+ result.tm_hour=result_n2;
+ result.tm_min=result_n3;
+ result.tm_sec=max(result_n4,0);
+ if (result_n1<=50) /* 00 means 2000 */
+ result.tm_year=result_n1+100;
+ else if (result_n1<1000) /* 99 means 1999 */
+ result.tm_year=result_n1;
+ else /* 2000 */
+ result.tm_year=result_n1-1900;
+ }
+ result.tm_isdst=0; /* assume GMT */
+ result.tm_yday=-1; /* don't know */
+ result.tm_wday=-1; /* don't know */
+ result.tm_mon=result_mm;
+ result.tm_mday=result_dd;
+ return mktime(&result);
+ }
+ return (time_t) 0;
+}
+
+static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char * urlFull, int depth) {
+ const char * file = jump_protocol_and_auth(urlFull);
+ if ( (file = strchr(file, '/')) == NULL)
+ return NULL;
+
+ if (strncmp(file, "/webdav", 7) != 0) {
+ PT_Element elt = PT_ElementNew();
+ elt->statuscode = 405;
+ strcpy(elt->msg, "Method Not Allowed");
+ return elt;
+ }
+
+ /* Skip /webdav */
+ file += 7;
+
+ /* */
+ {
+ PT_Element elt = PT_ElementNew();
+ int i, isDir;
+ String url = STRING_EMPTY;
+ String response = STRING_EMPTY;
+ String item = STRING_EMPTY;
+ String itemUrl = STRING_EMPTY;
+ String buff = STRING_EMPTY;
+ StringClear(response);
+ StringClear(item);
+ StringClear(itemUrl);
+ StringClear(buff);
+
+ /* Canonize URL */
+ StringStrcpy(url, file + ((file[0] == '/') ? 1 : 0));
+ if (StringLength(url) > 0) {
+ if (StringBuff(url)[StringLength(url) - 1] == '/') {
+ StringBuff(url)[StringLength(url) - 1] = '\0';
+ StringLength(url)--;
+ }
+ }
+
+ /* Form response */
+ StringRoom(response, 1024);
+ sprintf(StringBuff(response),
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n"
+ "<multistatus xmlns=\"DAV:\">\r\n");
+ StringLength(response) = (int) strlen(StringBuff(response));
+ /* */
+
+ /* Root */
+ StringLength(item) = 0;
+ proxytrack_add_DAV_Item(&item, &buff,
+ StringBuff(url), /*size*/0, /*timestamp*/(time_t) 0, /*mime*/NULL, /*isDir*/1, /*isRoot*/1, /*isDefault*/0);
+ StringMemcat(response, StringBuff(item), StringLength(item));
+
+ /* Childrens (Depth > 0) */
+ if (depth > 0) {
+ time_t timestampRep = (time_t) -1;
+ const char * prefix = StringBuff(url);
+ unsigned int prefixLen = (unsigned int) strlen(prefix);
+ char ** list = PT_Enumerate(indexes, prefix, 0);
+ if (list != NULL) {
+ for(isDir = 1 ; isDir >= 0 ; isDir--) {
+ for(i = 0 ; list[i] != NULL ; i++) {
+ const char * thisUrl = list[i];
+ const char * mimeType = "application/octet-stream";
+ unsigned int thisUrlLen = (unsigned int) strlen(thisUrl);
+ int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0;
+
+ /* Item URL */
+ StringRoom(itemUrl, thisUrlLen + prefixLen + sizeof("/webdav/") + 1);
+ StringClear(itemUrl);
+ sprintf(StringBuff(itemUrl), "/%s/%s", prefix, thisUrl);
+ if (!thisIsDir)
+ StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl));
+ else
+ StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)) - 1;
+ StringBuff(itemUrl)[StringLength(itemUrl)] = '\0';
+
+ if (thisIsDir == isDir) {
+ unsigned long size = 0;
+ time_t timestamp = (time_t) 0;
+ PT_Element file = NULL;
+
+ /* Item stats */
+ if (!isDir) {
+ file = PT_ReadIndex(indexes, StringBuff(itemUrl) + 1, FETCH_HEADERS);
+ if (file != NULL && file->statuscode == 200 ) {
+ size = file->size;
+ if (file->lastmodified) {
+ timestamp = get_time_rfc822(file->lastmodified);
+ }
+ if (timestamp == (time_t) 0) {
+ if (timestampRep == (time_t) -1) {
+ timestampRep = 0;
+ if (file->indexId != -1) {
+ timestampRep = PT_Index_Timestamp(PT_GetIndex(indexes, file->indexId));
+ }
+ }
+ timestamp = timestampRep;
+ }
+ if (file->contenttype) {
+ mimeType = file->contenttype;
+ }
+ }
+ }
+
+ /* Add item */
+ StringLength(item) = 0;
+ proxytrack_add_DAV_Item(&item, &buff,
+ StringBuff(itemUrl), size, timestamp, mimeType, isDir, /*isRoot*/0, /*isDefault*/(thisUrlLen == 0));
+ StringMemcat(response, StringBuff(item), StringLength(item));
+
+ /* Wipe element */
+ if (file != NULL)
+ PT_Element_Delete(&file);
+ }
+ }
+ }
+ PT_Enumerate_Delete(&list);
+ } /* items != NULL */
+ } /* Depth > 0 */
+
+ /* End of responses */
+ StringStrcat(response,
+ "</multistatus>\r\n"
+ );
+
+ StringFree(item);
+ StringFree(itemUrl);
+ StringFree(url);
+ StringFree(buff);
+
+ elt->size = StringLength(response);
+ elt->adr = StringAcquire(&response);
+ elt->statuscode = 207; /* Multi-Status */
+ strcpy(elt->charset, "utf-8");
+ strcpy(elt->contenttype, "text/xml");
+ strcpy(elt->msg, "Multi-Status");
+ StringFree(response);
+
+ fprintf(stderr, "RESPONSE:\n%s\n", elt->adr);
+
+ return elt;
+ }
+ return NULL;
+}
+#endif
+
+static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * url) {
+ char ** list = PT_Enumerate(indexes, url, 0);
+ if (list != NULL) {
+ PT_Element elt = PT_ElementNew();
+ int i, isDir;
+ String html = STRING_EMPTY;
+ StringClear(html);
+ StringStrcat(html,
+ "<html>"
+ PROXYTRACK_COMMENT_HEADER
+ DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
+ "<head>\r\n"
+ "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>"
+ "</head>\r\n"
+ "<body>\r\n"
+ "<h3>Directory index:</h3><br />"
+ "<br />"
+ "<hr>"
+ "<tt>[DIR] <a href=\"..\">..</a></tt><br />"
+ );
+ for(isDir = 1 ; isDir >= 0 ; isDir--) {
+ for(i = 0 ; list[i] != NULL ; i++) {
+ char * thisUrl = list[i];
+ unsigned int thisUrlLen = (unsigned int) strlen(thisUrl);
+ int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0;
+ if (thisIsDir == isDir) {
+ if (isDir)
+ StringStrcat(html, "<tt>[DIR] ");
+ else
+ StringStrcat(html, "<tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
+ StringStrcat(html, "<a href=\"");
+ if (isDir) {
+ StringStrcat(html, "http://proxytrack/");
+ }
+ StringStrcat(html, url);
+ StringStrcat(html, list[i]);
+ StringStrcat(html, "\">");
+ StringStrcat(html, list[i]);
+ StringStrcat(html, "</a></tt><br />");
+ }
+ }
+ }
+ StringStrcat(html,
+ "</body>"
+ "</html>");
+ PT_Enumerate_Delete(&list);
+ elt->size = StringLength(html);
+ elt->adr = StringAcquire(&html);
+ elt->statuscode = 200;
+ strcpy(elt->charset, "iso-8859-1");
+ strcpy(elt->contenttype, "text/html");
+ strcpy(elt->msg, "OK");
+ StringFree(html);
+ return elt;
+ }
+ return NULL;
+}
+
+static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
+ int timeout=30;
+ int retour=0;
+ int willexit=0;
+ int buffer_size = 32768;
+ char * buffer = (char*)malloc(buffer_size);
+ int line1Size = 1024;
+ char * line1 = (char*)malloc(line1Size);
+ int lineSize = 8192;
+ char * line = (char*)malloc(lineSize);
+ int length = 0;
+ int keepAlive = 1;
+
+ String url = STRING_EMPTY;
+ String urlRedirect = STRING_EMPTY;
+ String headers = STRING_EMPTY;
+ String output = STRING_EMPTY;
+ String host = STRING_EMPTY;
+ String localhost = STRING_EMPTY;
+#ifndef NO_WEBDAV
+ String davHeaders = STRING_EMPTY;
+ String davRequest = STRING_EMPTY;
+#endif
+
+ StringRoom(localhost, 256);
+ if (gethostname(StringBuff(localhost), StringCapacity(localhost) - 1) == 0) {
+ StringLength(localhost) = (int) strlen(StringBuff(localhost));
+ } else {
+ StringStrcpy(localhost, "localhost");
+ }
+
+#ifdef _DEBUG
+ Sleep(1000);
+#endif
+
+ if (buffer == NULL || line == NULL || line1 == NULL) {
+ CRITICAL("proxytrack_process_HTTP:memory exhausted");
+#if HTS_WIN
+ closesocket(soc_c);
+#else
+ close(soc_c);
+#endif
+ return ;
+ }
+
+ do {
+ const char* msgError = NULL;
+ int msgCode = 0;
+ PT_Element element = NULL;
+ char* command;
+ char* proto;
+ char* surl;
+ int directHit = 0;
+ int headRequest = 0;
+ int listRequest = 0;
+#ifndef NO_WEBDAV
+ int davDepth = 0;
+#endif
+
+ /* Clear context */
+ line[0] = line1[0] = '\0';
+ buffer[0] = '\0';
+ command = line1;
+ StringClear(url);
+ StringClear(urlRedirect);
+ StringClear(headers);
+ StringClear(output);
+ StringClear(host);
+#ifndef NO_WEBDAV
+ StringClear(davHeaders);
+ StringClear(davRequest);
+#endif
+
+ /* line1: "GET http://www.example.com/ HTTP/1.0" */
+ if (linputsoc_t(soc_c, line1, line1Size - 2, timeout) > 0
+ && ( surl = strchr(line1, ' ') )
+ && !(*surl = '\0')
+ && ++surl
+ && (proto = strchr(surl, ' ')) && !(*proto = '\0') && ++proto)
+ {
+ /* Flush headers */
+ while(linputsoc_t(soc_c, line, lineSize - 2, timeout) > 0
+ && line[0] != 0)
+ {
+ int p;
+ if ((p = strfield(line, "Content-length:"))!=0) {
+ if (sscanf(line+p, "%d", &length) != 1) {
+ msgCode = 500;
+ msgError = "Bad HTTP Content-Length Field";
+ keepAlive = 0;
+ length = 0;
+ }
+ } else if (strcasecmp(line, "Connection: close") == 0) {
+ keepAlive = 0;
+ } else if (strcasecmp(line, "Connection: keep-alive") == 0) {
+ keepAlive = 1;
+ } else if ((p = strfield(line, "Host:"))) {
+ char* chost = line + p;
+ if (*chost == ' ')
+ chost++;
+ StringStrcpy(host, chost);
+ }
+#ifndef NO_WEBDAV
+ else if ((p = strfield(line, "Depth: "))) {
+ char* depth = line + p;
+ if (sscanf(depth, "%d", &davDepth) != 1) {
+ davDepth = 0;
+ }
+ }
+#endif
+ }
+
+ /* Flush body */
+#ifndef NO_WEBDAV
+ if (length > 0) {
+ if (length < 32768) {
+ StringRoom(davRequest, length + 1);
+ if (recv(soc_c, StringBuff(davRequest), length, 0) == length) {
+ StringBuff(davRequest)[length] = 0;
+ } else {
+ msgCode = 500;
+ msgError = "Posted Data Read Error";
+ keepAlive = 0;
+ }
+ } else {
+ msgCode = 500;
+ msgError = "Posted Data Too Large";
+ keepAlive = 0;
+ }
+ }
+#endif
+
+ /* Switch protocol ID */
+ if (strcasecmp(command, "post") == 0) {
+#ifndef NO_WEBDAV
+ msgCode = 404;
+#else
+ msgCode = 501;
+ keepAlive = 0;
+#endif
+ msgError = "Proxy Error (POST Request Forbidden)";
+ }
+ else if (strcasecmp(command, "get") == 0) {
+ headRequest = 0;
+ }
+ else if (strcasecmp(command, "head") == 0) {
+ headRequest = 1;
+ }
+#ifndef NO_WEBDAV
+ else if (strcasecmp(command, "options") == 0) {
+ const char * options = "GET, HEAD, OPTIONS, POST, PROPFIND, TRACE"
+ ", MKCOL, DELETE, PUT"; /* Not supported */
+ msgCode = 200;
+ StringRoom(headers, 8192);
+ sprintf(StringBuff(headers),
+ "HTTP/1.1 %d %s\r\n"
+ "DAV: 1, 2\r\n"
+ "MS-Author-Via: DAV\r\n"
+ "Cache-Control: private\r\n"
+ "Allow: %s\r\n",
+ msgCode, GetHttpMessage(msgCode), options);
+ StringLength(headers) = (int) strlen(StringBuff(headers));
+ }
+ else if (strcasecmp(command, "propfind") == 0) {
+ if (davDepth > 1) {
+ msgCode = 403;
+ msgError = "DAV Depth Limit Forbidden";
+ } else {
+ fprintf(stderr, "DEBUG: DAV-DATA=<%s>\n", StringBuff(davRequest));
+ listRequest = 2; /* propfind */
+ }
+ }
+ else if (strcasecmp(command, "mkcol") == 0
+ || strcasecmp(command, "delete") == 0
+ || strcasecmp(command, "put") == 0
+ || strcasecmp(command, "proppatch") == 0
+ || strcasecmp(command, "lock") == 0
+ || strcasecmp(command, "unlock") == 0
+ || strcasecmp(command, "copy") == 0
+ || strcasecmp(command, "trace") == 0)
+ {
+ msgCode = 403;
+ msgError = "Method Forbidden";
+ }
+#endif
+ else {
+ msgCode = 501;
+ msgError = "Proxy Error (Unsupported or Unknown HTTP Command Request)";
+ keepAlive = 0;
+ }
+ if (strcasecmp(proto, "http/1.1") == 0) {
+ keepAlive = 1;
+ } else if (strcasecmp(proto, "http/1.0") == 0) {
+ keepAlive = 0;
+ } else {
+ msgCode = 505;
+ msgError = "Proxy Error (Unknown HTTP Version)";
+ keepAlive = 0;
+ }
+
+ /* Post-process request */
+ if (link_has_authority(surl)) {
+ const unsigned int prefixLen = sizeof("http://proxytrack/") - 1;
+ if (strncasecmp(surl, "http://proxytrack/", prefixLen) == 0) {
+ directHit = 1; /* Another direct hit hack */
+ }
+ StringStrcpy(url, surl);
+ } else {
+ if (StringLength(host) > 0) {
+ /* Direct hit */
+ if (
+#ifndef NO_WEBDAV
+ listRequest != 2
+ &&
+#endif
+ strncasecmp(StringBuff(host), StringBuff(localhost), StringLength(localhost)) == 0
+ &&
+ (StringBuff(host)[StringLength(localhost)] == '\0'
+ || StringBuff(host)[StringLength(localhost)] == ':')
+ && surl[0] == '/'
+ )
+ {
+ const char * toHit = surl + 1;
+ if (strncmp(toHit, "webdav/", 7) == 0) {
+ toHit += 7;
+ }
+ /* Direct hit */
+ directHit = 1;
+ StringStrcpy(url, "");
+ if (!link_has_authority(toHit))
+ StringStrcat(url, "http://");
+ StringStrcat(url, toHit);
+ } else {
+ /* Transparent proxy */
+ StringStrcpy(url, "http://");
+ StringStrcat(url, StringBuff(host));
+ StringStrcat(url, surl);
+ }
+ } else {
+ msgCode = 500;
+ msgError = "Transparent Proxy Error ('Host' HTTP Request Header Field Missing)";
+ keepAlive = 0;
+ }
+ }
+
+ /* Response */
+ if (msgCode == 0) {
+ if (listRequest == 1) {
+ element = proxytrack_process_HTTP_List(indexes, StringBuff(url));
+ }
+#ifndef NO_WEBDAV
+ else if (listRequest == 2) {
+ if ((element = proxytrack_process_DAV_Request(indexes, StringBuff(url), davDepth)) != NULL) {
+ msgCode = element->statuscode;
+ StringRoom(davHeaders, 1024);
+ sprintf(StringBuff(davHeaders),
+ "DAV: 1, 2\r\n"
+ "MS-Author-Via: DAV\r\n"
+ "Cache-Control: private\r\n");
+ StringLength(davHeaders) = (int) strlen(StringBuff(davHeaders));
+ }
+ }
+#endif
+ else {
+ element = PT_ReadIndex(indexes, StringBuff(url), FETCH_BODY);
+ }
+ if (element == NULL
+#ifndef NO_WEBDAV
+ && listRequest == 2
+#endif
+ && StringLength(url) > 0
+ && StringBuff(url)[StringLength(url) - 1] == '/'
+ )
+ {
+ element = PT_Index_HTML_BuildRootInfo(indexes);
+ if (element != NULL) {
+ element->statuscode = 404; /* HTML page, but in error */
+ }
+ }
+ if (element != NULL) {
+ msgCode = element->statuscode;
+ StringRoom(headers, 8192);
+ sprintf(StringBuff(headers),
+ "HTTP/1.1 %d %s\r\n"
+#ifndef NO_WEBDAV
+ "%s"
+#endif
+ "Content-Type: %s%s%s%s\r\n"
+ "%s%s%s"
+ "%s%s%s"
+ "%s%s%s",
+ /* */
+ msgCode,
+ element->msg,
+#ifndef NO_WEBDAV
+ /* DAV */
+ StringBuff(davHeaders),
+#endif
+ /* Content-type: foo; [ charset=bar ] */
+ element->contenttype,
+ ( ( element->charset[0]) ? "; charset=\"" : ""),
+ element->charset,
+ ( ( element->charset[0]) ? "\"" : ""),
+ /* location */
+ ( ( element->location != NULL && element->location[0]) ? "Location: " : ""),
+ ( ( element->location != NULL && element->location[0]) ? element->location : ""),
+ ( ( element->location != NULL && element->location[0]) ? "\r\n" : ""),
+ /* last-modified */
+ ( ( element->lastmodified[0]) ? "Last-Modified: " : ""),
+ ( ( element->lastmodified[0]) ? element->lastmodified : ""),
+ ( ( element->lastmodified[0]) ? "\r\n" : ""),
+ /* etag */
+ ( ( element->etag[0]) ? "ETag: " : ""),
+ ( ( element->etag[0]) ? element->etag : ""),
+ ( ( element->etag[0]) ? "\r\n" : "")
+ );
+ StringLength(headers) = (int) strlen(StringBuff(headers));
+ } else {
+ /* No query string, no ending / : check the the <url>/ page */
+ if (StringLength(url) > 0 && StringBuff(url)[StringLength(url) - 1] != '/' && strchr(StringBuff(url), '?') == NULL) {
+ StringStrcpy(urlRedirect, StringBuff(url));
+ StringStrcat(urlRedirect, "/");
+ if (PT_LookupIndex(indexes, StringBuff(urlRedirect))) {
+ msgCode = 301; /* Moved Permanently */
+ StringRoom(headers, 8192);
+ sprintf(StringBuff(headers),
+ "HTTP/1.1 %d %s\r\n"
+ "Content-Type: text/html\r\n"
+ "Location: %s\r\n",
+ /* */
+ msgCode,
+ GetHttpMessage(msgCode),
+ StringBuff(urlRedirect)
+ );
+ StringLength(headers) = (int) strlen(StringBuff(headers));
+ /* */
+ StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES));
+ sprintf(StringBuff(output),
+ "<html>"
+ PROXYTRACK_COMMENT_HEADER
+ DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
+ "<head>"
+ "<title>ProxyTrack - Page has moved</title>"
+ "</head>\r\n"
+ "<body>"
+ "<h3>The correct location is:</h3><br />"
+ "<b><a href=\"%s\">%s</a></b><br />"
+ "<br />"
+ "<br />\r\n"
+ "<i>Generated by ProxyTrack " PROXYTRACK_VERSION ", (C) Xavier Roche and other contributors</i>"
+ "\r\n"
+ "</body>"
+ "</header>",
+ StringBuff(urlRedirect),
+ StringBuff(urlRedirect));
+ StringLength(output) = (int) strlen(StringBuff(output));
+ }
+ }
+ if (msgCode == 0) {
+ msgCode = 404;
+ msgError = "Not Found in this cache";
+ }
+ }
+ }
+ } else {
+ msgCode = 500;
+ msgError = "Server Error";
+ keepAlive = 0;
+ }
+ if (StringLength(headers) == 0) {
+ if (msgCode == 0) {
+ msgCode = 500;
+ msgError = "Internal Proxy Error";
+ } else if (msgError == NULL) {
+ msgError = GetHttpMessage(msgCode);
+ }
+ StringRoom(headers, 256);
+ sprintf(StringBuff(headers),
+ "HTTP/1.1 %d %s\r\n"
+ "Content-type: text/html\r\n",
+ msgCode,
+ msgError);
+ StringLength(headers) = (int) strlen(StringBuff(headers));
+ StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES));
+ sprintf(StringBuff(output),
+ "<html>"
+ PROXYTRACK_COMMENT_HEADER
+ DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
+ "<head>"
+ "<title>ProxyTrack - HTTP Proxy Error %d</title>"
+ "</head>\r\n"
+ "<body>"
+ "<h3>A proxy error has occured while processing the request.</h3><br />"
+ "<b>Error HTTP %d: <i>%s</i></b><br />"
+ "<br />"
+ "<br />\r\n"
+ "<i>Generated by ProxyTrack " PROXYTRACK_VERSION ", (C) Xavier Roche and other contributors</i>"
+ "\r\n"
+ "</body>"
+ "</html>",
+ msgCode,
+ msgCode,
+ msgError);
+ StringLength(output) = (int) strlen(StringBuff(output));
+ }
+ {
+ char tmp[20 + 1]; /* 2^64 = 18446744073709551616 */
+ unsigned int dataSize = 0;
+ if (!headRequest) {
+ dataSize = StringLength(output);
+ if (dataSize == 0 && element != NULL) {
+ dataSize = element->size;
+ }
+ }
+ sprintf(tmp, "%d", (int) dataSize);
+ StringStrcat(headers, "Content-length: ");
+ StringStrcat(headers, tmp);
+ StringStrcat(headers, "\r\n");
+ }
+ if (keepAlive) {
+ StringStrcat(headers,
+ "Connection: Keep-Alive\r\n"
+ "Proxy-Connection: Keep-Alive\r\n");
+ } else {
+ StringStrcat(headers,
+ "Connection: Close\r\n"
+ "Proxy-Connection: Close\r\n");
+ }
+ if (msgCode != 500)
+ StringStrcat(headers, "X-Cache: HIT from ");
+ else
+ StringStrcat(headers, "X-Cache: MISS from ");
+ StringStrcat(headers, StringBuff(localhost));
+ StringStrcat(headers, "\r\n");
+
+ /* Logging */
+ {
+ const char * contentType = "text/html";
+ unsigned long int size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 );
+ /* */
+ String ip = STRING_EMPTY;
+ SOCaddr serverClient;
+ int lenServerClient = (int) sizeof(serverClient);
+ memset(&serverClient, 0, sizeof(serverClient));
+ if (getsockname(soc_c, (struct sockaddr*) &serverClient, &lenServerClient) == 0) {
+ ip = getip(&serverClient, lenServerClient);
+ } else {
+ StringStrcpy(ip, "unknown");
+ }
+ if (element != NULL && element->contenttype[0] != '\0') {
+ contentType = element->contenttype;
+ }
+ LOG("HTTP %s %d %d %s %s %s" _ StringBuff(ip) _ msgCode _ (int)size _ command _ StringBuff(url) _ contentType);
+ StringFree(ip);
+ }
+
+ /* Send reply */
+ StringStrcat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n");
+ StringStrcat(headers, "\r\n"); /* Headers separator */
+ if (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers)
+ || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output))
+ || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, element->size, 0) != element->size)
+ )
+ {
+ keepAlive = 0; /* Error, abort connection */
+ }
+ PT_Element_Delete(&element);
+
+ /* Shutdown (FIN) and wait until confirmed */
+ if (!keepAlive) {
+ char c;
+#ifdef _WIN32
+ shutdown(soc_c, SD_SEND);
+#else
+ shutdown(soc_c, 1);
+#endif
+ while(recv(soc_c, ((char*)&c), 1, 0) > 0);
+ }
+ } while(keepAlive);
+
+#if HTS_WIN
+ closesocket(soc_c);
+#else
+ close(soc_c);
+#endif
+
+ StringFree(url);
+ StringFree(urlRedirect);
+ StringFree(headers);
+ StringFree(output);
+ StringFree(host);
+
+ if (buffer)
+ free(buffer);
+}
+
+#ifdef _WIN32
+#define PTHREAD_RETURN
+#define PTHREAD_TYPE void
+#define PTHREAD_TYPE_FNC __cdecl
+#else
+#define PTHREAD_RETURN NULL
+#define PTHREAD_TYPE void*
+#define PTHREAD_TYPE_FNC
+#endif
+
+/* Generic threaded function start */
+static int startThread(PTHREAD_TYPE (PTHREAD_TYPE_FNC * funct)(void* ),
+ void* param)
+{
+ if (param != NULL) {
+#ifdef _WIN32
+ if (_beginthread(funct, 0, param) == -1) {
+ free(param);
+ return 0;
+ }
+ return 1;
+#else
+ pthread_t handle = 0;
+ int retcode;
+ retcode = pthread_create(&handle, NULL, funct, param);
+ if (retcode != 0) { /* error */
+ free(param);
+ return 0;
+ } else {
+ /* detach the thread from the main process so that is can be independent */
+ pthread_detach(handle);
+ return 1;
+ }
+#endif
+ } else {
+ return 0;
+ }
+}
+
+/* Generic socket/index structure */
+typedef struct proxytrack_process_th_p {
+ T_SOC soc_c;
+ PT_Indexes indexes;
+ void (*process)(PT_Indexes indexes, T_SOC soc_c);
+} proxytrack_process_th_p;
+
+/* Generic socket/index function stub */
+static PTHREAD_TYPE PTHREAD_TYPE_FNC proxytrack_process_th(void* param_) {
+ proxytrack_process_th_p *param = (proxytrack_process_th_p *) param_;
+ T_SOC soc_c = param->soc_c;
+ PT_Indexes indexes = param->indexes;
+ void (*process)(PT_Indexes indexes, T_SOC soc_c) = param->process;
+ free(param);
+ process(indexes, soc_c);
+ return PTHREAD_RETURN ;
+}
+
+/* Process generic socket/index operation */
+static int proxytrack_process_generic(void (*process)(PT_Indexes indexes, T_SOC soc_c),
+ PT_Indexes indexes, T_SOC soc_c)
+{
+ proxytrack_process_th_p *param = calloc(sizeof(proxytrack_process_th_p), 1);
+ if (param != NULL) {
+ param->soc_c = soc_c;
+ param->indexes = indexes;
+ param->process = process;
+ return startThread(proxytrack_process_th, param);
+ } else {
+ CRITICAL("proxytrack_process_generic:Memory exhausted");
+ return 0;
+ }
+ return 0;
+}
+
+/* Process HTTP proxy requests */
+static int proxytrack_process_HTTP_threaded(PT_Indexes indexes, T_SOC soc) {
+ return proxytrack_process_generic(proxytrack_process_HTTP, indexes, soc);
+}
+
+/* HTTP Server */
+static int proxytrack_start_HTTP(PT_Indexes indexes, T_SOC soc) {
+ while(soc != INVALID_SOCKET) {
+ T_SOC soc_c;
+ struct sockaddr clientAddr;
+ int clientAddrLen = sizeof(struct sockaddr);
+ memset(&clientAddr, 0, sizeof(clientAddr));
+ if ( (soc_c = accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) {
+ if (!proxytrack_process_HTTP_threaded(indexes, soc_c)) {
+ CRITICAL("proxytrack_start_HTTP::Can not fork a thread");
+ }
+ }
+ }
+ if (soc != INVALID_SOCKET) {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ }
+ return 1;
+}
+
+/* Network order is big endian */
+#define READ_NET16(buffer) ( ( ((unsigned char*)buffer)[0] << 8 ) + ((unsigned char*)buffer)[1] )
+#define READ_NET32(buffer) ( ( READ_NET16(buffer) << 16 ) + READ_NET16(((unsigned char*)buffer) + 2) )
+#define WRITE_NET8(buffer, value) do { \
+ ((unsigned char*)buffer)[0] = (unsigned char)(value); \
+} while(0)
+#define WRITE_NET16(buffer, value) do { \
+ ((unsigned char*)buffer)[0] = (((unsigned short)(value)) >> 8) & 0xff; \
+ ((unsigned char*)buffer)[1] = ((unsigned short)(value)) & 0xff; \
+} while(0)
+#define WRITE_NET32(buffer, value) do { \
+ WRITE_NET16(buffer, ( ((unsigned int)(value)) >> 16 ) & 0xffff); \
+ WRITE_NET16(((unsigned char*)buffer) + 2, ( ((unsigned int)(value)) ) & 0xffff); \
+} while(0)
+
+static int ICP_reply(struct sockaddr * clientAddr,
+ int clientAddrLen,
+ T_SOC soc,
+ /* */
+ unsigned char Opcode,
+ unsigned char Version,
+ unsigned short Message_Length,
+ unsigned int Request_Number,
+ unsigned int Options,
+ unsigned int Option_Data,
+ unsigned int Sender_Host_Address,
+ unsigned char *Message
+ )
+{
+ int ret = 0;
+ unsigned long int BufferSize;
+ unsigned char * buffer;
+ if (Message_Length == 0 && Message != NULL) /* We have to get the message size */
+ Message_Length = (unsigned int) strlen(Message) + 1; /* NULL terminated */
+ BufferSize = 20 + Message_Length;
+ buffer = malloc(BufferSize);
+ if (buffer != NULL) {
+ WRITE_NET8(&buffer[0], Opcode);
+ WRITE_NET8(&buffer[1], Version);
+ WRITE_NET16(&buffer[2], Message_Length);
+ WRITE_NET32(&buffer[4], Request_Number);
+ WRITE_NET32(&buffer[8], Options);
+ WRITE_NET32(&buffer[12], Option_Data);
+ WRITE_NET32(&buffer[16], Sender_Host_Address);
+ if (Message != NULL && Message_Length > 0) {
+ memcpy(buffer + 20, Message, Message_Length);
+ }
+ if (sendto(soc, buffer, BufferSize, 0, clientAddr, clientAddrLen) == BufferSize) {
+ ret = 1;
+ }
+ free(buffer);
+ }
+ return ret;
+}
+
+/* ICP Server */
+static int proxytrack_start_ICP(PT_Indexes indexes, T_SOC soc) {
+ /* "ICP messages MUST not exceed 16,384 octets in length." (RFC2186) */
+ int bufferSize = 16384;
+ unsigned char * buffer = (unsigned char*) malloc(bufferSize + 1);
+ if (buffer == NULL) {
+ CRITICAL("proxytrack_start_ICP:memory exhausted");
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ return -1;
+ }
+ while(soc != INVALID_SOCKET) {
+ struct sockaddr clientAddr;
+ int clientAddrLen = sizeof(struct sockaddr);
+ int n;
+ memset(&clientAddr, 0, sizeof(clientAddr));
+ n = recvfrom(soc, (char*)buffer, bufferSize, 0, &clientAddr, &clientAddrLen);
+ if (n != -1) {
+ const char * LogRequest = "ERROR";
+ const char * LogReply = "ERROR";
+ unsigned char * UrlRequest = NULL;
+ if (n >= 20) {
+ enum {
+ ICP_OP_MIN = 0,
+ ICP_OP_INVALID = 0,
+ ICP_OP_QUERY = 1,
+ ICP_OP_HIT = 2,
+ ICP_OP_MISS = 3,
+ ICP_OP_ERR = 4,
+ ICP_OP_SECHO = 10,
+ ICP_OP_DECHO = 11,
+ ICP_OP_MISS_NOFETCH = 21,
+ ICP_OP_DENIED = 22,
+ ICP_OP_HIT_OBJ = 23,
+ ICP_OP_MAX = ICP_OP_HIT_OBJ
+ };
+ unsigned char Opcode = buffer[0];
+ unsigned char Version = buffer[1];
+ unsigned short Message_Length = READ_NET16(&buffer[2]);
+ unsigned int Request_Number = READ_NET32(&buffer[4]); /* Session ID */
+ unsigned int Options = READ_NET32(&buffer[8]);
+ unsigned int Option_Data = READ_NET32(&buffer[12]); /* ICP_FLAG_SRC_RTT */
+ unsigned int Sender_Host_Address = READ_NET32(&buffer[16]); /* ignored */
+ unsigned char* Payload = &buffer[20];
+ buffer[bufferSize] = '\0'; /* Ensure payload is NULL terminated */
+ if (Message_Length <= bufferSize - 20) {
+ if (Opcode >= ICP_OP_MIN && Opcode <= ICP_OP_MAX) {
+ if (Version == 2) {
+ switch(Opcode) {
+ case ICP_OP_QUERY:
+ {
+ unsigned int UrlRequestSize;
+ UrlRequest = &Payload[4];
+ UrlRequestSize = (unsigned int)strlen((char*)UrlRequest);
+ LogRequest = "ICP_OP_QUERY";
+ if (indexes == NULL) {
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_DENIED, Version, 0, Request_Number, 0, 0, 0, UrlRequest);
+ LogReply = "ICP_OP_DENIED";
+ } else if (PT_LookupIndex(indexes, UrlRequest)) {
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_HIT, Version, 0, Request_Number, 0, 0, 0, UrlRequest);
+ LogReply = "ICP_OP_HIT";
+ } else {
+ if (UrlRequestSize > 0 && UrlRequest[UrlRequestSize - 1] != '/' && strchr(UrlRequest, '?') == NULL) {
+ char * UrlRedirect = malloc(UrlRequestSize + 1 + 1);
+ if (UrlRedirect != NULL) {
+ sprintf(UrlRedirect, "%s/", UrlRequest);
+ if (PT_LookupIndex(indexes, UrlRedirect)) { /* We'll generate a redirect */
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_HIT, Version, 0, Request_Number, 0, 0, 0, UrlRequest);
+ LogReply = "ICP_OP_HIT";
+ free(UrlRedirect);
+ break;
+ }
+ free(UrlRedirect);
+ }
+ }
+ /* We won't retrive the cache MISS online, no way! */
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_MISS_NOFETCH, Version, 0, Request_Number, 0, 0, 0, UrlRequest);
+ LogReply = "ICP_OP_MISS_NOFETCH";
+ }
+ }
+ break;
+ case ICP_OP_SECHO:
+ {
+ UrlRequest = &Payload[4];
+ LogRequest = "ICP_OP_QUERY";
+ LogReply = "ICP_OP_QUERY";
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_SECHO, Version, 0, Request_Number, 0, 0, 0, UrlRequest);
+ }
+ break;
+ default:
+ LogRequest = "NOTIMPLEMENTED";
+ LogReply = "ICP_OP_ERR";
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, Version, 0, Request_Number, 0, 0, 0, NULL);
+ break;
+ }
+ } else {
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, 2, 0, Request_Number, 0, 0, 0, NULL);
+ }
+ } /* Ignored (RFC2186) */
+ } else {
+ ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, Version, 0, Request_Number, 0, 0, 0, NULL);
+ }
+ }
+
+ /* Logging */
+ {
+ String ip = STRING_EMPTY;
+ SOCaddr serverClient;
+ int lenServerClient = (int) sizeof(serverClient);
+ SOCaddr_copyaddr(serverClient, lenServerClient, &clientAddr, clientAddrLen);
+ if (lenServerClient > 0) {
+ ip = getip(&serverClient, lenServerClient);
+ } else {
+ StringStrcpy(ip, "unknown");
+ }
+ LOG("ICP %s %s/%s %s" _ StringBuff(ip) _ LogRequest _ LogReply _ (UrlRequest ? UrlRequest : "-") );
+ StringFree(ip);
+ }
+
+ }
+ }
+ if (soc != INVALID_SOCKET) {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ }
+ free(buffer);
+ return 1;
+}
+
+static int proxytrack_start(PT_Indexes indexes, T_SOC soc, T_SOC socICP) {
+ int ret = 1;
+ if (proxytrack_process_generic(proxytrack_start_ICP, indexes, socICP)) {
+ //if (!proxytrack_process_generic(proxytrack_start_HTTP, indexes, soc))
+ if (!proxytrack_start_HTTP(indexes, soc)) {
+ ret = 0;
+ }
+ } else {
+ ret = 0;
+ }
+ return ret;
+}
+
diff --git a/src/proxy/proxytrack.h b/src/proxy/proxytrack.h
new file mode 100644
index 0000000..498f4d8
--- /dev/null
+++ b/src/proxy/proxytrack.h
@@ -0,0 +1,288 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: ProxyTrack, httrack cache-based proxy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef WEBHTTRACK_PROXYTRACK
+#define WEBHTTRACK_PROXYTRACK
+
+/* Version */
+#define PROXYTRACK_VERSION "0.4"
+
+/* Store manager */
+#include "../minizip/mztools.h"
+#include "store.h"
+
+#include <sys/stat.h>
+
+/* generic */
+
+int proxytrack_main(char* proxyAddr, int proxyPort,
+ char* icpAddr, int icpPort,
+ PT_Indexes index);
+
+/* Spaces: CR,LF,TAB,FF */
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) )
+#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
+#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) )
+
+/* Static definitions */
+
+#define _ ,
+#define CRITICAL_(msg, file, line) do { \
+ fprintf(stderr, "* critical: "); \
+ fprintf(stderr, msg); \
+ fprintf(stderr, " at %s:%d\n", file, line); \
+ fflush(stderr); \
+} while(0)
+#define CRITICAL(msg) do { \
+ fprintf(stderr, "* critical: "); \
+ fprintf(stderr, msg); \
+ fprintf(stderr, " at %s:%d\n", __FILE__, __LINE__); \
+ fflush(stderr); \
+} while(0)
+
+#define WARNING(msg) do { \
+ fprintf(stderr, "* warning: "); \
+ fprintf(stderr, msg); \
+ fprintf(stderr, "\n"); \
+ fflush(stderr); \
+} while(0)
+
+#define LOG(msg) do { \
+ fprintf(stderr, "* log: "); \
+ fprintf(stderr, msg); \
+ fprintf(stderr, "\n"); \
+ fflush(stderr); \
+} while(0)
+
+#if defined(_DEBUG) || defined(DEBUG)
+#define DEBUG(msg) do { \
+ fprintf(stderr, "* debug: "); \
+ fprintf(stderr, msg); \
+ fprintf(stderr, "\n"); \
+ fflush(stderr); \
+} while(0)
+#else
+#define DEBUG_(msg, file, line) do { } while(0)
+#define DEBUG(msg) do { } while(0)
+#endif
+
+/* Header for generated pages */
+#define PROXYTRACK_COMMENT_HEADER \
+ "<!-- Generated by ProxyTrack " PROXYTRACK_VERSION " build " __DATE__ " -->\r\n" \
+ "<!-- This is an add-on for HTTrack " HTTRACK_VERSIONID " -->\r\n"
+
+/* See IE "feature" (MSKB Q294807) */
+#define DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES \
+ "<!-- Start Disable IE Friendly HTTP Error Messages -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \
+ "<!-- End Disable IE Friendly HTTP Error Messages -->\r\n"
+
+static char* gethomedir(void) {
+ char* home = getenv( "HOME" );
+ if (home)
+ return home;
+ else
+ return ".";
+}
+
+static int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 0: case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+
+static int link_has_authority(const char* lien) {
+ const char* a=lien;
+ if (isalpha((const unsigned char)*a)) {
+ // Skip scheme?
+ while (isalpha((const unsigned char)*a))
+ a++;
+ if (*a == ':')
+ a++;
+ else
+ return 0;
+ }
+ if (strncmp(a,"//",2) == 0)
+ return 1;
+ return 0;
+}
+
+static const char* jump_protocol(const char* source) {
+ int p;
+ // scheme
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ if ((p = strfield(source,"http:")))
+ source+=p;
+ else if ((p = strfield(source,"ftp:")))
+ source+=p;
+ else if ((p = strfield(source,"https:")))
+ source+=p;
+ else if ((p = strfield(source,"file:")))
+ source+=p;
+ else if ((p = strfield(source,"mms:")))
+ source+=p;
+ // net_path
+ if (strncmp(source,"//",2)==0)
+ source+=2;
+ return source;
+}
+
+static const char* strrchr_limit(const char* s, char c, const char* limit) {
+ if (limit == NULL) {
+ char* p = strrchr(s, c);
+ return p?(p+1):NULL;
+ } else {
+ char *a=NULL, *p;
+ for(;;) {
+ p=strchr((a)?a:s, c);
+ if ((p >= limit) || (p == NULL))
+ return a;
+ a=p+1;
+ }
+ }
+}
+
+static const char* jump_protocol_and_auth(const char* source) {
+ const char *a,*trytofind;
+ if (strcmp(source, "file://") == 0)
+ return source;
+ a = jump_protocol(source);
+ trytofind = strrchr_limit(a, '@', strchr(a,'/'));
+ return (trytofind != NULL)?trytofind:a;
+}
+
+#ifndef min
+#define min(a,b) ((a)>(b)?(b):(a))
+#endif
+#ifndef max
+#define max(a,b) ((a)>(b)?(a):(b))
+#endif
+static int linput_trim(FILE* fp,char* s,int max) {
+ int rlen=0;
+ char* ls=(char*) malloc(max+2);
+ s[0]='\0';
+ if (ls) {
+ char* a;
+ // lire ligne
+ rlen=linput(fp,ls,max);
+ if (rlen) {
+ // sauter espaces et tabs en fin
+ while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) )
+ ls[--rlen]='\0';
+ // sauter espaces en début
+ a=ls;
+ while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
+ a++;
+ rlen--;
+ }
+ if (rlen>0) {
+ memcpy(s,a,rlen); // can copy \0 chars
+ s[rlen]='\0';
+ }
+ }
+ //
+ free(ls);
+ }
+ return rlen;
+}
+
+// copy of concat
+#define HTS_URLMAXSIZE 1024
+typedef struct concat_strc {
+ char buff[16][HTS_URLMAXSIZE*2*2];
+ int rol;
+} concat_strc;
+static char* concat(const char* a,const char* b) {
+ static concat_strc* strc = NULL;
+ if (strc == NULL) {
+ strc = (concat_strc*) calloc(16, sizeof(concat_strc));
+ }
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpy(strc->buff[strc->rol],a);
+ if (b) strcat(strc->buff[strc->rol],b);
+ return strc->buff[strc->rol];
+}
+
+#ifndef S_ISREG
+#define S_ISREG(m) ((m) & _S_IFREG)
+#endif
+static int fexist(char* s) {
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(s, &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#ifndef _WIN32
+#define fconv(a) (a)
+#define fconcat(a,b) concat(a,b)
+#endif
+
+#ifdef _WIN32
+static char* __fconv(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='/') // convertir
+ a[i]='\\';
+ return a;
+}
+static char* fconcat(char* a,char* b) {
+ return __fconv(concat(a,b));
+}
+static char* fconv(char* a) {
+ return __fconv(concat(a,""));
+}
+#endif
+
+#endif
diff --git a/src/proxy/store.c b/src/proxy/store.c
new file mode 100644
index 0000000..1d17574
--- /dev/null
+++ b/src/proxy/store.c
@@ -0,0 +1,1505 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: Cache manager for ProxyTrack */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Locking */
+#ifdef _WIN32
+#include <process.h> /* _beginthread, _endthread */
+#else
+#include <pthread.h>
+#endif
+
+#include "htsglobal.h"
+
+#define HTS_INTERNAL_BYTECODE
+#include "htsinthash.h"
+#undef HTS_INTERNAL_BYTECODE
+#include "../minizip/mztools.h"
+
+#include "htscore.h"
+#include "htsback.h"
+
+#include "store.h"
+#include "proxystrings.h"
+#include "proxytrack.h"
+
+/* Unlocked functions */
+
+static int PT_LookupCache__New_u(PT_Index index, const char* url);
+static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags);
+
+static int PT_LookupCache__Old_u(PT_Index index, const char* url);
+static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags);
+
+
+/* Locking */
+
+#ifdef _WIN32
+void MutexInit(PT_Mutex *pMutex) {
+ *pMutex = CreateMutex(NULL,FALSE,NULL);
+}
+
+void MutexLock(PT_Mutex *pMutex) {
+ WaitForSingleObject(*pMutex, INFINITE);
+}
+
+void MutexUnlock(PT_Mutex *pMutex) {
+ ReleaseMutex(*pMutex);
+}
+
+void MutexFree(PT_Mutex *pMutex) {
+ CloseHandle(*pMutex);
+ *pMutex = NULL;
+}
+#else
+void MutexInit(PT_Mutex *pMutex) {
+ (void) pthread_mutex_init(pMutex, 0);
+}
+
+void MutexLock(PT_Mutex *pMutex) {
+ pthread_mutex_lock(pMutex);
+}
+
+void MutexUnlock(PT_Mutex *pMutex) {
+ pthread_mutex_unlock(pMutex);
+}
+
+void MutexFree(PT_Mutex *pMutex) {
+ pthread_mutex_destroy(pMutex);
+}
+#endif
+
+/* Indexes */
+
+typedef struct _PT_Index__New _PT_Index__New;
+typedef struct _PT_Index__Old _PT_Index__Old;
+typedef struct _PT_Index_Functions _PT_Index_Functions;
+
+typedef struct _PT_Index__New *PT_Index__New;
+typedef struct _PT_Index__Old *PT_Index__Old;
+typedef struct _PT_Index_Functions *PT_Index_Functions;
+
+enum {
+ PT_CACHE_UNDEFINED = -1,
+ PT_CACHE_MIN = 0,
+ PT_CACHE__NEW = PT_CACHE_MIN,
+ PT_CACHE__OLD,
+ PT_CACHE_MAX = PT_CACHE__OLD
+};
+
+static int PT_LoadCache__New(PT_Index index, const char *filename);
+static void PT_Index_Delete__New(PT_Index *pindex);
+static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__New(PT_Index index, const char* url);
+/**/
+static int PT_LoadCache__Old(PT_Index index, const char *filename);
+static void PT_Index_Delete__Old(PT_Index *pindex);
+static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__Old(PT_Index index, const char* url);
+
+struct _PT_Index_Functions {
+ int (*PT_LoadCache)(PT_Index index, const char *filename);
+ void (*PT_Index_Delete)(PT_Index *pindex);
+ PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags);
+ int (*PT_LookupCache)(PT_Index index, const char* url);
+};
+
+static _PT_Index_Functions _IndexFuncts[] = {
+ { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New },
+ { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old },
+ { NULL, NULL, NULL, NULL }
+};
+
+#define PT_INDEX_COMMON_STRUCTURE \
+ time_t timestamp; \
+ inthash hash; \
+ char startUrl[1024]
+
+struct _PT_Index__New {
+ PT_INDEX_COMMON_STRUCTURE;
+ char path[1024]; /* either empty, or must include ending / */
+ int fixedPath;
+ int safeCache;
+ unzFile zFile;
+ PT_Mutex zFileLock;
+};
+
+struct _PT_Index__Old {
+ PT_INDEX_COMMON_STRUCTURE;
+ char filenameDat[1024];
+ char filenameNdx[1024];
+ FILE *dat,*ndx;
+ PT_Mutex fileLock;
+ int version;
+ char lastmodified[1024];
+ char path[1024]; /* either empty, or must include ending / */
+ int fixedPath;
+ int safeCache;
+};
+
+struct _PT_Index {
+ int type;
+ union {
+ _PT_Index__New formatNew;
+ _PT_Index__Old formatOld;
+ struct {
+ PT_INDEX_COMMON_STRUCTURE;
+ } common;
+ } slots;
+};
+
+struct _PT_Indexes {
+ inthash cil;
+ struct _PT_Index **index;
+ int index_size;
+};
+
+struct _PT_CacheItem {
+ time_t lastUsed;
+ size_t size;
+ void* data;
+};
+
+struct _PT_Cache {
+ inthash index;
+ size_t maxSize;
+ size_t totalSize;
+ int count;
+};
+
+PT_Indexes PT_New() {
+ PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1);
+ index->cil = inthash_new(127);
+ index->index_size = 0;
+ index->index = NULL;
+ return index;
+}
+
+void PT_Delete(PT_Indexes index) {
+ if (index != NULL) {
+ inthash_delete(&index->cil);
+ free(index);
+ }
+}
+
+int PT_RemoveIndex(PT_Indexes index, int indexId) {
+ return 0;
+}
+
+#define assertf(exp)
+
+static int binput(char* buff,char* s,int max) {
+ int count = 0;
+ int destCount = 0;
+
+ // Note: \0 will return 1
+ while(destCount < max && buff[count] != '\0' && buff[count] != '\n') {
+ if (buff[count] != '\r') {
+ s[destCount++] = buff[count];
+ }
+ count++;
+ }
+ s[destCount] = '\0';
+
+ // then return the supplemental jump offset
+ return count + 1;
+}
+
+static time_t file_timestamp(const char* file) {
+ struct stat buf;
+ if (stat(file, &buf) == 0) {
+ time_t tt = buf.st_mtime;
+ if (tt != (time_t) 0 && tt != (time_t) -1) {
+ return tt;
+ }
+ }
+ return (time_t) 0;
+}
+
+static int PT_Index_Check__(PT_Index index, const char* file, int line) {
+ if (index == NULL)
+ return 0;
+ if (index->type >= PT_CACHE_MIN && index->type <= PT_CACHE_MAX)
+ return 1;
+ CRITICAL_("index corrupted in memory", file, line);
+ return 0;
+}
+#define SAFE_INDEX(index) PT_Index_Check__(index, __FILE__, __LINE__)
+
+
+/* ------------------------------------------------------------ */
+/* Generic cache dispatch */
+/* ------------------------------------------------------------ */
+
+void PT_Index_Delete(PT_Index *pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index index = *pindex;
+ if (SAFE_INDEX(index)) {
+ _IndexFuncts[index->type].PT_Index_Delete(pindex);
+ }
+ free(index);
+ *pindex = NULL;
+ }
+}
+
+static void PT_Index_Delete__New(PT_Index *pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__New index = &(*pindex)->slots.formatNew;
+ if (index->zFile != NULL) {
+ unzClose(index->zFile);
+ index->zFile = NULL;
+ }
+ if (index->hash != NULL) {
+ inthash_delete(&index->hash);
+ index->hash = NULL;
+ }
+ MutexFree(&index->zFileLock);
+ }
+}
+
+static void PT_Index_Delete__Old(PT_Index *pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__Old index = &(*pindex)->slots.formatOld;
+ if (index->dat != NULL) {
+ fclose(index->dat);
+ }
+ if (index->ndx != NULL) {
+ fclose(index->ndx);
+ }
+ if (index->hash != NULL) {
+ inthash_delete(&index->hash);
+ index->hash = NULL;
+ }
+ MutexFree(&index->fileLock);
+ }
+}
+
+int PT_AddIndex(PT_Indexes indexes, const char *path) {
+ PT_Index index = PT_LoadCache(path);
+ if (index != NULL) {
+ int ret = PT_IndexMerge(indexes, &index);
+ if (index != NULL) {
+ PT_Index_Delete(&index);
+ }
+ return ret;
+ }
+ return -1;
+}
+
+PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) {
+ if (indexes != NULL) {
+ PT_Element elt = PT_ElementNew();
+ int i;
+ String html = STRING_EMPTY;
+ StringClear(html);
+ StringStrcat(html,
+ "<html>"
+ PROXYTRACK_COMMENT_HEADER
+ DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
+ "<head>\r\n"
+ "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>"
+ "</head>\r\n"
+ "<body>\r\n"
+ "<h3>Available sites in this cache:</h3><br />"
+ "<br />"
+ );
+ StringStrcat(html, "<ul>\r\n");
+ for(i = 0 ; i < indexes->index_size ; i++) {
+ if (indexes->index[i] != NULL
+ && indexes->index[i]->slots.common.startUrl[0] != '\0')
+ {
+ const char * url = indexes->index[i]->slots.common.startUrl;
+ StringStrcat(html, "<li>\r\n");
+ StringStrcat(html, "<a href=\"");
+ StringStrcat(html, url);
+ StringStrcat(html, "\">");
+ StringStrcat(html, url);
+ StringStrcat(html, "</a>\r\n");
+ StringStrcat(html, "</li>\r\n");
+ }
+ }
+ StringStrcat(html, "</ul>\r\n");
+ StringStrcat(html, "</body></html>\r\n");
+ elt->size = StringLength(html);
+ elt->adr = StringAcquire(&html);
+ elt->statuscode = 200;
+ strcpy(elt->charset, "iso-8859-1");
+ strcpy(elt->contenttype, "text/html");
+ strcpy(elt->msg, "OK");
+ StringFree(html);
+ return elt;
+ }
+ return NULL;
+}
+
+static char* strchr_stop(char* str, char c, char stop) {
+ for( ; *str != 0 && *str != stop && *str != c ; str++);
+ if (*str == c)
+ return str;
+ return NULL;
+}
+
+char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
+ // should be cached!
+ if (indexes != NULL && indexes->cil != NULL) {
+ unsigned int urlSize;
+ String list = STRING_EMPTY;
+ String listindexes = STRING_EMPTY;
+ String subitem = STRING_EMPTY;
+ unsigned int listCount = 0;
+ struct_inthash_enum en = inthash_enum_new(indexes->cil);
+ inthash_chain* chain;
+ inthash hdupes = NULL;
+ if (!subtree)
+ hdupes= inthash_new(127);
+ StringClear(list);
+ StringClear(listindexes);
+ StringClear(subitem);
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ urlSize = (unsigned int) strlen(url);
+ while((chain = inthash_enum_next(&en))) {
+ long int index = (long int)chain->value.intg;
+ if (urlSize == 0 || strncmp(chain->name, url, urlSize) == 0) {
+ if (index >= 0 && index < indexes->index_size) {
+ char * item = chain->name + urlSize;
+ if (*item == '/')
+ item++;
+ {
+ char * pos = subtree ? 0 : strchr_stop(item, '/', '?');
+ unsigned int len = pos ? (unsigned int)( pos - item ) : (unsigned int)strlen(item);
+ if (len > 0 /* default document */ || *item == 0) {
+ int isFolder = ( item[len] == '/' );
+ StringClear(subitem);
+ if (len > 0)
+ StringMemcat(subitem, item, len);
+ if (len == 0 || !inthash_exists(hdupes, StringBuff(subitem))) {
+ char* ptr = NULL;
+ ptr += StringLength(list);
+ if (len > 0)
+ StringStrcat(list, StringBuff(subitem));
+ if (isFolder)
+ StringStrcat(list, "/");
+ StringMemcat(list, "\0", 1); /* NULL terminated strings */
+ StringMemcat(listindexes, &ptr, sizeof(ptr));
+ listCount++;
+ inthash_write(hdupes, StringBuff(subitem), 0);
+ }
+ }
+ }
+ } else {
+ CRITICAL("PT_Enumerate:Corrupted central index locator");
+ }
+ }
+ }
+ StringFree(subitem);
+ inthash_delete(&hdupes);
+ if (listCount > 0) {
+ unsigned int i;
+ void* blk;
+ char *nullPointer = NULL;
+ char* startStrings;
+ /* NULL terminated index */
+ StringMemcat(listindexes, &nullPointer, sizeof(nullPointer));
+ /* start of all strings (index) */
+ startStrings = nullPointer + StringLength(listindexes);
+ /* copy list of URLs after indexes */
+ StringMemcat(listindexes, StringBuff(list), StringLength(list));
+ /* ---- no reallocation beyond this point (fixed addresses) ---- */
+ /* start of all strings (pointer) */
+ startStrings = (startStrings - nullPointer) + StringBuff(listindexes);
+ /* transform indexes into references */
+ for(i = 0 ; i < listCount ; i++) {
+ char *ptr = NULL;
+ unsigned int ndx;
+ memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*));
+ ndx = (unsigned int) (ptr - nullPointer);
+ ptr = startStrings + ndx;
+ memcpy(&StringBuff(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*));
+ }
+ blk = StringAcquire(&listindexes);
+ StringFree(list);
+ StringFree(listindexes);
+ return (char **)blk;
+ }
+ }
+ return NULL;
+}
+
+void PT_Enumerate_Delete(char ***plist) {
+ if (plist != NULL && *plist != NULL) {
+ free(*plist);
+ *plist = NULL;
+ }
+}
+
+PT_Index PT_LoadCache(const char *filename) {
+ int type = PT_CACHE_UNDEFINED;
+ char * dot = strrchr(filename, '.');
+ if (dot != NULL) {
+ if (strcasecmp(dot, ".zip") == 0) {
+ type = PT_CACHE__NEW;
+ } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) {
+ type = PT_CACHE__OLD;
+ }
+ }
+ if (type != PT_CACHE_UNDEFINED) {
+ PT_Index index = calloc(sizeof(_PT_Index), 1);
+ if (index != NULL) {
+ index->type = type;
+ index->slots.common.timestamp = (time_t) time(NULL);
+ index->slots.common.startUrl[0] = '\0';
+ index->slots.common.hash = inthash_new(8191);
+ if (!_IndexFuncts[type].PT_LoadCache(index, filename)) {
+ DEBUG("reading httrack cache (format #%d) %s : error" _ type _ filename );
+ free(index);
+ index = NULL;
+ return NULL;
+ } else {
+ DEBUG("reading httrack cache (format #%d) %s : success" _ type _ filename );
+ }
+ /* default starting URL is the first hash entry */
+ if (index->slots.common.startUrl[0] == '\0') {
+ struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
+ inthash_chain* chain;
+ chain = inthash_enum_next(&en);
+ if (chain != NULL
+ && strstr(chain->name, "/robots.txt") != NULL)
+ {
+ chain = inthash_enum_next(&en);
+ }
+ if (chain != NULL) {
+ if (!link_has_authority(chain->name))
+ strcat(index->slots.common.startUrl, "http://");
+ strcat(index->slots.common.startUrl, chain->name);
+ }
+ }
+ }
+ return index;
+ }
+ return NULL;
+}
+
+
+static long int filesize(const char* filename) {
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(filename, &st) == 0) {
+ return (long int)st.st_size;
+ }
+ return -1;
+}
+
+int PT_LookupCache(PT_Index index, const char* url) {
+ if (index != NULL && SAFE_INDEX(index)) {
+ return _IndexFuncts[index->type].PT_LookupCache(index, url);
+ }
+ return 0;
+}
+
+time_t PT_Index_Timestamp(PT_Index index) {
+ return index->slots.common.timestamp;
+}
+
+static int PT_LookupCache__New(PT_Index index, const char* url) {
+ int retCode;
+ MutexLock(&index->slots.formatNew.zFileLock);
+ {
+ retCode = PT_LookupCache__New_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatNew.zFileLock);
+ return retCode;
+}
+
+static int PT_LookupCache__New_u(PT_Index index_, const char* url) {
+ if (index_ != NULL) {
+ PT_Index__New index = &index_->slots.formatNew;
+ if (index->hash != NULL && index->zFile != NULL && url != NULL && *url != 0) {
+ int hash_pos_return;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return = inthash_read(index->hash, url, NULL);
+ if (hash_pos_return)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex)
+{
+ if (pindex != NULL && *pindex != NULL && (*pindex)->slots.common.hash != NULL
+ && indexes != NULL)
+ {
+ PT_Index index = *pindex;
+ struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
+ inthash_chain* chain;
+ int index_id = indexes->index_size++;
+ int nMerged = 0;
+ if ((indexes->index = realloc(indexes->index, sizeof(struct _PT_Index)*indexes->index_size)) != NULL) {
+ indexes->index[index_id] = index;
+ *pindex = NULL;
+ while((chain = inthash_enum_next(&en)) != NULL) {
+ const char * url = chain->name;
+ if (url != NULL && url[0] != '\0') {
+ long int previous_index_id = 0;
+ if (inthash_read(indexes->cil, url, (long int*)&previous_index_id)) {
+ if (previous_index_id >= 0 && previous_index_id < indexes->index_size) {
+ if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer
+ break;
+ } else {
+ CRITICAL("PT_IndexMerge:Corrupted central index locator");
+ }
+ }
+ inthash_write(indexes->cil, chain->name, index_id);
+ nMerged++;
+ }
+ }
+ } else {
+ CRITICAL("PT_IndexMerge:Memory exhausted");
+ }
+ return nMerged;
+ }
+ return -1;
+}
+
+void PT_Element_Delete(PT_Element *pentry) {
+ if (pentry != NULL) {
+ PT_Element entry = *pentry;
+ if (entry != NULL) {
+ if (entry->adr != NULL) {
+ free(entry->adr);
+ entry->adr = NULL;
+ }
+ if (entry->headers != NULL) {
+ free(entry->headers);
+ entry->headers = NULL;
+ }
+ if (entry->location != NULL) {
+ free(entry->location);
+ entry->location = NULL;
+ }
+ free(entry);
+ }
+ *pentry = NULL;
+ }
+}
+
+PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags)
+{
+ if (indexes != NULL)
+ {
+ long int index_id;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(indexes->cil, url, &index_id)) {
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags);
+ if (item != NULL) {
+ item->indexId = index_id;
+ return item;
+ }
+ } else {
+ CRITICAL("PT_ReadCache:Corrupted central index locator");
+ }
+ }
+ }
+ return NULL;
+}
+
+int PT_LookupIndex(PT_Indexes indexes, const char* url) {
+ if (indexes != NULL)
+ {
+ long int index_id;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(indexes->cil, url, &index_id)) {
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ return 1;
+ } else {
+ CRITICAL("PT_ReadCache:Corrupted central index locator");
+ }
+ }
+ }
+ return 0;
+}
+
+PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) {
+ if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size)
+ {
+ return indexes->index[indexId];
+ }
+ return NULL;
+}
+
+PT_Element PT_ElementNew() {
+ PT_Element r = NULL;
+ if ((r = calloc(sizeof(_PT_Element), 1)) == NULL)
+ return NULL;
+ r->statuscode=STATUSCODE_INVALID;
+ r->indexId = -1;
+ return r;
+}
+
+PT_Element PT_ReadCache(PT_Index index, const char* url, int flags) {
+ if (index != NULL && SAFE_INDEX(index)) {
+ return _IndexFuncts[index->type].PT_ReadCache(index, url, flags);
+ }
+ return NULL;
+}
+
+static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags) {
+ PT_Element retCode;
+ MutexLock(&index->slots.formatNew.zFileLock);
+ {
+ retCode = PT_ReadCache__New_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatNew.zFileLock);
+ return retCode;
+}
+
+
+/* ------------------------------------------------------------ */
+/* New HTTrack cache (new.zip) format */
+/* ------------------------------------------------------------ */
+
+#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ strcpy(refvalue, value); \
+ line[0] = '\0'; \
+ } \
+} while(0)
+#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ int intval = 0; \
+ sscanf(value, "%d", &intval); \
+ (refvalue) = intval; \
+ line[0] = '\0'; \
+ } \
+} while(0)
+
+int PT_LoadCache__New(PT_Index index_, const char *filename) {
+ if (index_ != NULL && filename != NULL) {
+ PT_Index__New index = &index_->slots.formatNew;
+ unzFile zFile = index->zFile = unzOpen(filename);
+ index->timestamp = file_timestamp(filename);
+ MutexInit(&index->zFileLock);
+
+ // Opened ?
+ if (zFile!=NULL) {
+ const char * abpath;
+ int slashes;
+ inthash hashtable = index->hash;
+
+ /* Compute base path for this index - the filename MUST be absolute! */
+ for(slashes = 2, abpath = filename + (int)strlen(filename) - 1
+ ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0)
+ ; abpath--);
+ index->path[0] = '\0';
+ if (slashes == 0 && *abpath != 0) {
+ int i;
+ strncat(index->path, filename, (int) ( abpath - filename ) + 1 );
+ for(i = 0 ; index->path[i] != 0 ; i++) {
+ if (index->path[i] == '\\') {
+ index->path[i] = '/';
+ }
+ }
+ }
+
+ /* Ready directory entries */
+ if (unzGoToFirstFile(zFile) == Z_OK) {
+ char comment[128];
+ char filename[HTS_URLMAXSIZE * 4];
+ int entries = 0;
+ int firstSeen = 0;
+ memset(comment, 0, sizeof(comment)); // for truncated reads
+ do {
+ int readSizeHeader = 0;
+ filename[0] = '\0';
+ comment[0] = '\0';
+ if (unzOpenCurrentFile(zFile) == Z_OK) {
+ if (
+ (readSizeHeader = unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0
+ &&
+ unzGetCurrentFileInfo(zFile, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK
+ )
+ {
+ long int pos = (long int) unzGetOffset(zFile);
+ assertf(readSizeHeader < sizeof(comment));
+ comment[readSizeHeader] = '\0';
+ entries++;
+ if (pos > 0) {
+ int dataincache = 0; // data in cache ?
+ char* filenameIndex = filename;
+ if (strncmp(filenameIndex, "http://", 7) == 0) {
+ filenameIndex += 7;
+ }
+ if (comment[0] != '\0') {
+ int maxLine = 2;
+ char* a = comment;
+ while(*a && maxLine-- > 0) { // parse only few first lines
+ char line[1024];
+ line[0] = '\0';
+ a+=binput(a, line, sizeof(line) - 2);
+ if (strncmp(line, "X-In-Cache:", 11) == 0) {
+ if (strcmp(line, "X-In-Cache: 1") == 0) {
+ dataincache = 1;
+ } else {
+ dataincache = 0;
+ }
+ break;
+ }
+ }
+ }
+ if (dataincache)
+ inthash_add(hashtable, filenameIndex, pos);
+ else
+ inthash_add(hashtable, filenameIndex, -pos);
+
+ /* First link as starting URL */
+ if (!firstSeen) {
+ if (strstr(filenameIndex, "/robots.txt") == NULL) {
+ firstSeen = 1;
+ if (!link_has_authority(filenameIndex))
+ strcat(index->startUrl, "http://");
+ strcat(index->startUrl, filenameIndex);
+ }
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries);
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries);
+ }
+ unzCloseCurrentFile(zFile);
+ } else {
+ fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries);
+ }
+ } while( unzGoToNextFile(zFile) == Z_OK );
+ return 1;
+ } else {
+ inthash_delete(&index->hash);
+ index = NULL;
+ }
+ } else {
+ index = NULL;
+ }
+ }
+ return 0;
+}
+
+static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flags)
+{
+ PT_Index__New index = (PT_Index__New) &index_->slots.formatNew;
+ char location_default[HTS_URLMAXSIZE*2];
+ char previous_save[HTS_URLMAXSIZE*2];
+ char previous_save_[HTS_URLMAXSIZE*2];
+ long int hash_pos;
+ int hash_pos_return;
+ PT_Element r = NULL;
+ if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
+ previous_save[0] = previous_save_[0] = '\0';
+ memset(r, 0, sizeof(_PT_Element));
+ r->location = location_default;
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return = inthash_read(index->hash, url, (long int*)&hash_pos);
+
+ if (hash_pos_return) {
+ uLong posInZip;
+ if (hash_pos > 0) {
+ posInZip = (uLong) hash_pos;
+ } else {
+ posInZip = (uLong) -hash_pos;
+ }
+ if (unzSetOffset(index->zFile, posInZip) == Z_OK) {
+ /* Read header (Max 8KiB) */
+ if (unzOpenCurrentFile(index->zFile) == Z_OK) {
+ char headerBuff[8192 + 2];
+ int readSizeHeader;
+ int totalHeader = 0;
+ int dataincache = 0;
+
+ /* For BIG comments */
+ headerBuff[0]
+ = headerBuff[sizeof(headerBuff) - 1]
+ = headerBuff[sizeof(headerBuff) - 2]
+ = headerBuff[sizeof(headerBuff) - 3] = '\0';
+
+ if ( (readSizeHeader = unzGetLocalExtrafield(index->zFile, headerBuff, sizeof(headerBuff) - 2)) > 0)
+ {
+ int offset = 0;
+ char line[HTS_URLMAXSIZE + 2];
+ int lineEof = 0;
+ headerBuff[readSizeHeader] = '\0';
+ do {
+ char* value;
+ line[0] = '\0';
+ offset += binput(headerBuff + offset, line, sizeof(line) - 2);
+ if (line[0] == '\0') {
+ lineEof = 1;
+ }
+ value = strchr(line, ':');
+ if (value != NULL) {
+ *value++ = '\0';
+ if (*value == ' ' || *value == '\t') value++;
+ ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
+ ZIP_READFIELD_INT(line, value, "X-Statuscode", r->statuscode);
+ ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r->msg); // msg
+ ZIP_READFIELD_INT(line, value, "X-Size", r->size); // size
+ ZIP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
+ ZIP_READFIELD_STRING(line, value, "X-Charset", r->charset); // contenttype
+ ZIP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
+ ZIP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
+ ZIP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
+ ZIP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
+ //ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address
+ //ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename
+ ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
+ }
+ } while(offset < readSizeHeader && !lineEof);
+ totalHeader = offset;
+
+ /* Previous entry */
+ if (previous_save_[0] != '\0') {
+ int pathLen = (int) strlen(index->path);
+ if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
+ strcpy(previous_save, previous_save_);
+ }
+ // relative ? (hack)
+ else if (index->safeCache
+ || (previous_save_[0] != '/' // /home/foo/bar.gif
+ && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif
+ )
+ {
+ index->safeCache = 1;
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ // bogus format (includes buggy absolute path)
+ else {
+ /* guess previous path */
+ if (index->fixedPath == 0) {
+ const char * start = jump_protocol_and_auth(url);
+ const char * end = start ? strchr(start, '/') : NULL;
+ int len = (int) (end - start);
+ if (start != NULL && end != NULL && len > 0 && len < 128) {
+ char piece[128 + 2];
+ const char * where;
+ piece[0] = '\0';
+ strncat(piece, start, len);
+ if ((where = strstr(previous_save_, piece)) != NULL) {
+ index->fixedPath = (int) (where - previous_save_); // offset to relative path
+ }
+ }
+ }
+ if (index->fixedPath > 0) {
+ int saveLen = (int) strlen(previous_save_);
+ if (index->fixedPath < saveLen) {
+ sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath);
+ } else {
+ sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath);
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ } else {
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ }
+ }
+
+ /* Complete fields */
+ r->adr=NULL;
+ if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
+ int ok = 0;
+
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ if (ok) {
+ if (r->msg[0] == '\0') {
+ strcpy(r->msg,"Cache Read Error : Unexpected error");
+ }
+ } else { // lire en mémoire
+
+ if (!dataincache) {
+ /* Read in memory from cache */
+ if (flags & FETCH_BODY) {
+ if (strnotempty(previous_save)) {
+ FILE* fp = fopen(fconv(previous_save), "rb");
+ if (fp != NULL) {
+ r->adr = (char*) malloc(r->size + 4);
+ if (r->adr != NULL) {
+ if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
+ r->statuscode=STATUSCODE_INVALID;
+ sprintf(r->msg,"Read error in cache disk data: %s", strerror(errno));
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(previous_save));
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cached file name is invalid");
+ }
+ }
+ } else {
+ // lire fichier (d'un coup)
+ if (flags & FETCH_BODY) {
+ r->adr=(char*) malloc(r->size+1);
+ if (r->adr!=NULL) {
+ if (unzReadCurrentFile(index->zFile, r->adr, r->size) != r->size) { // erreur
+ free(r->adr);
+ r->adr=NULL;
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Read Data");
+ } else
+ *(r->adr+r->size)='\0';
+ //printf(">%s status %d\n",back[p].r->contenttype,back[p].r->statuscode);
+ } else { // erreur
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Memory Error");
+ }
+ }
+ }
+ }
+ } // si save==null, ne rien charger (juste en tête)
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Read Header Data");
+ }
+ unzCloseCurrentFile(index->zFile);
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Open File");
+ }
+
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Bad Offset");
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
+ return r;
+}
+
+
+/* ------------------------------------------------------------ */
+/* Old HTTrack cache (dat/ndx) format */
+/* ------------------------------------------------------------ */
+
+static int cache_brstr(char* adr,char* s) {
+ int i;
+ int off;
+ char buff[256 + 1];
+ off=binput(adr,buff,256);
+ adr+=off;
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ strncpy(s,adr,i);
+ *(s+i)='\0';
+ off+=i;
+ return off;
+}
+
+static void cache_rstr(FILE* fp,char* s) {
+ INTsys i;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,INTsysP,&i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i=0;
+ if (i>0) {
+ if ((int) fread(s,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ }
+ *(s+i)='\0';
+}
+
+static char* cache_rstr_addr(FILE* fp) {
+ INTsys i;
+ char* addr = NULL;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,"%d",&i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i=0;
+ if (i > 0) {
+ addr = malloc(i + 1);
+ if (addr != NULL) {
+ if ((int) fread(addr,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ *(addr+i)='\0';
+ }
+ }
+ return addr;
+}
+
+static void cache_rint(FILE* fp,int* i) {
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,"%d",i);
+}
+
+static void cache_rLLint(FILE* fp,unsigned long* i) {
+ int l;
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,"%d",&l);
+ *i = (unsigned long)l;
+}
+
+static int PT_LoadCache__Old(PT_Index index_, const char *filename) {
+ if (index_ != NULL && filename != NULL) {
+ char * pos = strrchr(filename, '.');
+ PT_Index__Old cache = &index_->slots.formatOld;
+ long int ndxSize;
+ cache->filenameDat[0] = '\0';
+ cache->filenameNdx[0] = '\0';
+ cache->path[0] = '\0';
+
+ {
+ PT_Index__Old index = cache;
+ const char * abpath;
+ int slashes;
+ /* -------------------- COPY OF THE __New() CODE -------------------- */
+ /* Compute base path for this index - the filename MUST be absolute! */
+ for(slashes = 2, abpath = filename + (int)strlen(filename) - 1
+ ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0)
+ ; abpath--);
+ index->path[0] = '\0';
+ if (slashes == 0 && *abpath != 0) {
+ int i;
+ strncat(index->path, filename, (int) ( abpath - filename ) + 1 );
+ for(i = 0 ; index->path[i] != 0 ; i++) {
+ if (index->path[i] == '\\') {
+ index->path[i] = '/';
+ }
+ }
+ }
+ /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
+ }
+
+ /* Index/data filenames */
+ if (pos != NULL) {
+ int nLen = (int) (pos - filename);
+ strncat(cache->filenameDat, filename, nLen);
+ strncat(cache->filenameNdx, filename, nLen);
+ strcat(cache->filenameDat, ".dat");
+ strcat(cache->filenameNdx, ".ndx");
+ }
+ ndxSize = filesize(cache->filenameNdx);
+ cache->timestamp = file_timestamp(cache->filenameDat);
+ cache->dat = fopen(cache->filenameDat, "rb");
+ cache->ndx = fopen(cache->filenameNdx, "rb");
+ if (cache->dat != NULL && cache->ndx != NULL && ndxSize > 0) {
+ char * use = malloc(ndxSize + 1);
+ if (fread(use, 1, ndxSize, cache->ndx) == ndxSize) {
+ char firstline[256];
+ char* a=use;
+ use[ndxSize] = '\0';
+ a += cache_brstr(a, firstline);
+ if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
+ if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
+ cache->version=(int)(firstline[8]-'0'); // cache 1.x
+ if (cache->version <= 5) {
+ a+=cache_brstr(a,firstline);
+ strcpy(cache->lastmodified,firstline);
+ } else {
+ // fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
+ fclose(cache->dat);
+ cache->dat=NULL;
+ free(use);
+ use=NULL;
+ }
+ } else { // non supporté
+ // fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fclose(cache->dat);
+ cache->dat=NULL;
+ free(use);
+ use=NULL;
+ }
+ /* */
+ } else { // Vieille version du cache
+ /* */
+ // fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
+ cache->version=0; // cache 1.0
+ strcpy(cache->lastmodified,firstline);
+ }
+
+ /* Create hash table for the cache (MUCH FASTER!) */
+ if (use) {
+ char line[HTS_URLMAXSIZE*2];
+ char linepos[256];
+ int pos;
+ int firstSeen = 0;
+ while ( (a!=NULL) && (a < (use + ndxSize) ) ) {
+ a=strchr(a+1,'\n'); /* start of line */
+ if (a) {
+ a++;
+ /* read "host/file" */
+ a+=binput(a,line,HTS_URLMAXSIZE);
+ a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
+ /* read position */
+ a+=binput(a,linepos,200);
+ sscanf(linepos,"%d",&pos);
+
+ /* Add entry */
+ inthash_add(cache->hash,line,pos);
+
+ /* First link as starting URL */
+ if (!firstSeen) {
+ if (strstr(line, "/robots.txt") == NULL) {
+ PT_Index__Old index = cache;
+ firstSeen = 1;
+ if (!link_has_authority(line))
+ strcat(index->startUrl, "http://");
+ strcat(index->startUrl, line);
+ }
+ }
+
+ }
+ }
+ /* Not needed anymore! */
+ free(use);
+ use=NULL;
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static String DecodeUrl(const char * url) {
+ int i;
+ String s = STRING_EMPTY;
+ StringClear(s);
+ for(i = 0 ; url[i] != '\0' ; i++) {
+ if (url[i] == '+') {
+ StringAddchar(s, ' ');
+ } else if (url[i] == '%') {
+ if (url[i + 1] == '%') {
+ StringAddchar(s, '%');
+ i++;
+ } else if (url[i + 1] != 0 && url[i + 2] != 0) {
+ char tmp[3];
+ int codepoint = 0;
+ tmp[0] = url[i + 1];
+ tmp[1] = url[i + 2];
+ tmp[2] = 0;
+ if (sscanf(tmp, "%x", &codepoint) == 1) {
+ StringAddchar(s, (char)codepoint);
+ }
+ i += 2;
+ }
+ } else {
+ StringAddchar(s, url[i]);
+ }
+ }
+ return s;
+}
+
+static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags) {
+ PT_Element retCode;
+ MutexLock(&index->slots.formatOld.fileLock);
+ {
+ retCode = PT_ReadCache__Old_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatOld.fileLock);
+ return retCode;
+}
+
+static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) {
+ PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld;
+ long int hash_pos;
+ int hash_pos_return;
+ char location_default[HTS_URLMAXSIZE*2];
+ char previous_save[HTS_URLMAXSIZE*2];
+ char previous_save_[HTS_URLMAXSIZE*2];
+ PT_Element r;
+ int ok=0;
+
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
+ previous_save[0] = previous_save_[0] = '\0';
+ memset(r, 0, sizeof(_PT_Element));
+ r->location = location_default;
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return=inthash_read(cache->hash, url, (long int*)&hash_pos);
+
+ if (hash_pos_return) {
+ int pos = (int) hash_pos; /* simply */
+
+ if (fseek(cache->dat, (pos>0) ? pos : (-pos), SEEK_SET) == 0) {
+ /* Importer cache1.0 */
+ if (cache->version==0) {
+ OLD_htsblk old_r;
+ if (fread((char*) &old_r,1,sizeof(old_r),cache->dat) == sizeof(old_r)) { // lire tout (y compris statuscode etc)
+ int i;
+ String urlDecoded;
+ r->statuscode = old_r.statuscode;
+ r->size = old_r.size; // taille fichier
+ strcpy(r->msg, old_r.msg);
+ strcpy(r->contenttype, old_r.contenttype);
+
+ /* Guess the destination filename.. this sucks, because this method is not reliable.
+ Yes, the old 1.0 cache format was *that* bogus. /rx */
+#define FORBIDDEN_CHAR(c) (c == '~' \
+ || c == '\\' \
+ || c == ':' \
+ || c == '*' \
+ || c == '?' \
+ || c == '\"' \
+ || c == '<' \
+ || c == '>' \
+ || c == '|' \
+ || c == '@' \
+ || ((unsigned char) c ) <= 31 \
+ || ((unsigned char) c ) == 127 \
+ )
+ urlDecoded = DecodeUrl(jump_protocol_and_auth(url));
+ strcpy(previous_save_, StringBuff(urlDecoded));
+ StringFree(urlDecoded);
+ for(i = 0 ; previous_save_[i] != '\0' && previous_save_[i] != '?' ; i++) {
+ if (FORBIDDEN_CHAR(previous_save_[i])) {
+ previous_save_[i] = '_';
+ }
+ }
+ previous_save_[i] = '\0';
+#undef FORBIDDEN_CHAR
+ ok = 1; /* import ok */
+ }
+ /* */
+ /* Cache 1.1 */
+ } else {
+ char check[256];
+ unsigned long size_read;
+ check[0]='\0';
+ //
+ cache_rint(cache->dat,&r->statuscode);
+ cache_rLLint(cache->dat,&r->size);
+ cache_rstr(cache->dat,r->msg);
+ cache_rstr(cache->dat,r->contenttype);
+ if (cache->version >= 3)
+ cache_rstr(cache->dat,r->charset);
+ cache_rstr(cache->dat,r->lastmodified);
+ cache_rstr(cache->dat,r->etag);
+ cache_rstr(cache->dat,r->location);
+ if (cache->version >= 2)
+ cache_rstr(cache->dat,r->cdispo);
+ if (cache->version >= 4) {
+ cache_rstr(cache->dat, previous_save_); // adr
+ cache_rstr(cache->dat, previous_save_); // fil
+ previous_save[0] = '\0';
+ cache_rstr(cache->dat, previous_save_); // save
+ }
+ if (cache->version >= 5) {
+ r->headers = cache_rstr_addr(cache->dat);
+ }
+ //
+ cache_rstr(cache->dat,check);
+ if (strcmp(check,"HTS")==0) { /* intégrité OK */
+ ok=1;
+ }
+ cache_rLLint(cache->dat, &size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
+ if (size_read > 0) { /* si inscrite ici */
+ r->size = size_read;
+ } else { /* pas de données directement dans le cache, fichier présent? */
+ r->size = 0;
+ }
+ }
+
+ /* Check destination filename */
+
+ {
+ PT_Index__Old index = cache;
+ /* -------------------- COPY OF THE __New() CODE -------------------- */
+ if (previous_save_[0] != '\0') {
+ int pathLen = (int) strlen(index->path);
+ if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
+ strcpy(previous_save, previous_save_);
+ }
+ // relative ? (hack)
+ else if (index->safeCache
+ || (previous_save_[0] != '/' // /home/foo/bar.gif
+ && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif
+ )
+ {
+ index->safeCache = 1;
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ // bogus format (includes buggy absolute path)
+ else {
+ /* guess previous path */
+ if (index->fixedPath == 0) {
+ const char * start = jump_protocol_and_auth(url);
+ const char * end = start ? strchr(start, '/') : NULL;
+ int len = (int) (end - start);
+ if (start != NULL && end != NULL && len > 0 && len < 128) {
+ char piece[128 + 2];
+ const char * where;
+ piece[0] = '\0';
+ strncat(piece, start, len);
+ if ((where = strstr(previous_save_, piece)) != NULL) {
+ index->fixedPath = (int) (where - previous_save_); // offset to relative path
+ }
+ }
+ }
+ if (index->fixedPath > 0) {
+ int saveLen = (int) strlen(previous_save_);
+ if (index->fixedPath < saveLen) {
+ sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath);
+ } else {
+ sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath);
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ } else {
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ }
+ }
+ /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
+ }
+
+ /* Read data */
+ if (ok) {
+ r->adr = NULL;
+ if ( (r->statuscode>=0) && (r->statuscode<=999)) {
+ r->adr = NULL;
+ if (pos<0) {
+ if (flags & FETCH_BODY) {
+ FILE* fp = fopen(previous_save, "rb");
+ if (fp != NULL) {
+ r->adr = (char*) malloc(r->size + 1);
+ if (r->adr != NULL) {
+ if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Read error in cache disk data");
+ }
+ r->adr[r->size] = '\0';
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Previous cache file not found (2)");
+ }
+ }
+ } else {
+ // lire fichier (d'un coup)
+ if (flags & FETCH_BODY) {
+ r->adr=(char*) malloc(r->size + 1);
+ if (r->adr!=NULL) {
+ if (fread(r->adr, 1, r->size,cache->dat) != r->size) { // erreur
+ free(r->adr);
+ r->adr=NULL;
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Read Data");
+ } else
+ r->adr[r->size] = '\0';
+ } else { // erreur
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Memory Error");
+ }
+ }
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Bad Data");
+ }
+ } else { // erreur
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Read Header");
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Cache Read Error : Seek Failed");
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
+ return r;
+}
+
+static int PT_LookupCache__Old(PT_Index index, const char* url) {
+ int retCode;
+ MutexLock(&index->slots.formatOld.fileLock);
+ {
+ retCode = PT_LookupCache__Old_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatOld.fileLock);
+ return retCode;
+}
+
+static int PT_LookupCache__Old_u(PT_Index index_, const char* url) {
+ if (index_ != NULL) {
+ PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew;
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return 0;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(cache->hash, url, NULL))
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/src/proxy/store.h b/src/proxy/store.h
new file mode 100644
index 0000000..805bc20
--- /dev/null
+++ b/src/proxy/store.h
@@ -0,0 +1,105 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: Cache manager for ProxyTrack */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef WEBHTTRACK_PROXYTRACK_STORE
+#define WEBHTTRACK_PROXYTRACK_STORE
+
+/* Proxy */
+
+typedef struct _PT_Index _PT_Index;
+typedef struct _PT_Indexes _PT_Indexes;
+
+typedef struct _PT_Index *PT_Index;
+typedef struct _PT_Indexes *PT_Indexes;
+
+typedef struct _PT_Cache _PT_Cache;
+typedef struct _PT_Cache *PT_Cache;
+
+typedef struct _PT_CacheItem _PT_CacheItem;
+typedef struct _PT_CacheItem *PT_CacheItem;
+
+typedef struct _PT_Element {
+ int indexId; // index identifier, if suitable (!= -1)
+ //
+ int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
+ char* adr; // adresse du bloc de mémoire, NULL=vide
+ char* headers; // adresse des en têtes si présents
+ unsigned long int size; // taille fichier
+ char msg[1024]; // error message ("\0"=undefined)
+ char contenttype[64]; // content-type ("text/html" par exemple)
+ char charset[64]; // charset ("iso-8859-1" par exemple)
+ char* location; // on copie dedans éventuellement la véritable 'location'
+ char lastmodified[64]; // Last-Modified
+ char etag[64]; // Etag
+ char cdispo[256]; // Content-Disposition coupé
+} _PT_Element;
+typedef struct _PT_Element *PT_Element;
+
+typedef enum PT_Fetch_Flags {
+ FETCH_HEADERS, // fetch headers
+ FETCH_BODY // fetch body
+} PT_Fetch_Flags;
+
+/* Locking */
+#ifdef _WIN32
+typedef void* PT_Mutex;
+#else
+typedef pthread_mutex_t PT_Mutex;
+#endif
+
+void MutexInit(PT_Mutex *pMutex);
+void MutexLock(PT_Mutex *pMutex);
+void MutexUnlock(PT_Mutex *pMutex);
+void MutexFree(PT_Mutex *pMutex);
+
+/* Indexes */
+PT_Indexes PT_New(void);
+void PT_Delete(PT_Indexes index);
+PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags);
+int PT_LookupIndex(PT_Indexes indexes, const char* url);
+int PT_AddIndex(PT_Indexes index, const char *path);
+int PT_RemoveIndex(PT_Indexes index, int indexId);
+int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex);
+PT_Index PT_GetIndex(PT_Indexes indexes, int indexId);
+
+/* Indexes list */
+PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes);
+char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree);
+void PT_Enumerate_Delete(char ***plist);
+
+/* Index */
+PT_Index PT_LoadCache(const char *filename);
+void PT_Index_Delete(PT_Index *pindex);
+PT_Element PT_ReadCache(PT_Index index, const char* url, int flags);
+int PT_LookupCache(PT_Index index, const char* url);
+time_t PT_Index_Timestamp(PT_Index index);
+
+/* Elements*/
+PT_Element PT_ElementNew(void);
+void PT_Element_Delete(PT_Element *pentry);
+
+#endif