diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:57:43 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:57:43 +0000 |
commit | 64cc4a88da8887ef1f7f4d90be0158d2cc76222d (patch) | |
tree | e72af709fbce8bc495f51e7f0518de9a9a2c3b7f /src/proxy | |
parent | 844ecc37072d515513177c65a8c9dc35c9cdfc1a (diff) |
httrack 3.40.4
Diffstat (limited to 'src/proxy')
-rw-r--r-- | src/proxy/AUTHORS | 1 | ||||
-rw-r--r-- | src/proxy/COPYING | 340 | ||||
-rw-r--r-- | src/proxy/changelog.txt | 20 | ||||
-rw-r--r-- | src/proxy/main.c | 164 | ||||
-rwxr-xr-x | src/proxy/proxystrings.h | 153 | ||||
-rw-r--r-- | src/proxy/proxytrack.c | 1621 | ||||
-rw-r--r-- | src/proxy/proxytrack.h | 288 | ||||
-rw-r--r-- | src/proxy/store.c | 1505 | ||||
-rw-r--r-- | src/proxy/store.h | 105 |
9 files changed, 4197 insertions, 0 deletions
diff --git a/src/proxy/AUTHORS b/src/proxy/AUTHORS new file mode 100644 index 0000000..66da09f --- /dev/null +++ b/src/proxy/AUTHORS @@ -0,0 +1 @@ +Xavier Roche <roche@httrack.com>
diff --git a/src/proxy/COPYING b/src/proxy/COPYING new file mode 100644 index 0000000..d60c31a --- /dev/null +++ b/src/proxy/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/src/proxy/changelog.txt b/src/proxy/changelog.txt new file mode 100644 index 0000000..f5ae48b --- /dev/null +++ b/src/proxy/changelog.txt @@ -0,0 +1,20 @@ +0.4 - Sept 18 2005
+- implemented very limited WebDAV (RFC2518) primitives
+- index enumeration fixes
+- limited access to the proxy server through HTTP in non-proxy mode
+
+0.3 - Sept 10 2005
+- implemented ICPv2 server (tested with Squid Web Proxy Cache) implementing ICP_OP_QUERY and ICP_OP_SECHO
+- redirects for URLs with missing ending '/'
+- fixed htsnet.h macro errors (bogus port during address copy)
+- keep-alive fixes
+
+0.2 - Sept 4 2005
+- hack to fix the "external files stored as absolute references" bug
+- proper locking for indexes (unlocked zFile)
+- added previous httrack .dat/.ndx cache format
+- added catalog as index fallback
+- started to write ICPv2 server (RFC2186), but not yet ready
+
+0.1 - Aug 27 2005
+- initial release: HTTP (RFC2616) proxy and aggregation ready
diff --git a/src/proxy/main.c b/src/proxy/main.c new file mode 100644 index 0000000..e48b51d --- /dev/null +++ b/src/proxy/main.c @@ -0,0 +1,164 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: ProxyTrack, httrack cache-based proxy */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +/* Standard includes */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <ctype.h> + +#include "htsbase.h" +#include "htsnet.h" +#include "htslib.h" +#include "store.h" +#include "proxytrack.h" + +#ifndef _WIN32 +#include <signal.h> +static void sig_brpipe( int code ) { + /* ignore */ +} +#endif + +static int scanHostPort(const char* str, char *host, int *port) { + char* pos = strrchr(str, ':'); + if (pos != NULL) { + int n = (int) ( pos - str ); + if (n < 256) { + host[0] = '\0'; + strncat(host, str, n); + if (sscanf(pos + 1, "%d", port) == 1) { + return 1; + } + } + } + return 0; +} + +int main(int argc, char* argv[]) +{ + int i; + int ret = 0; + int proxyPort, icpPort; + char proxyAddr[256 + 1], icpAddr[256 + 1]; + PT_Indexes index; + +#ifdef _WIN32 + { + WORD wVersionRequested; // requested version WinSock API + WSADATA wsadata; // Windows Sockets API data + int stat; + wVersionRequested = 0x0101; + stat = WSAStartup( wVersionRequested, &wsadata ); + if (stat != 0) { + fprintf(stderr, "Winsock not found!\n"); + return -1; + } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { + fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n"); + WSACleanup(); + return -1; + } + } +#endif + + /* Args */ + printf("ProxyTrack %s, build proxies upon HTTrack Website Copier Archives\n", PROXYTRACK_VERSION); + printf("Copyright (C) Xavier Roche and other contributors\n"); + printf("\n"); + printf("This program is free software; you can redistribute it and/or\n"); + printf("modify it under the terms of the GNU General Public License\n"); + printf("as published by the Free Software Foundation; either version 2\n"); + printf("of the License, or any later version.\n"); + printf("\n"); + printf("*** This version is a development release ***\n"); + printf("\n"); + if (argc < 3 + || !scanHostPort(argv[1], proxyAddr, &proxyPort) + || !scanHostPort(argv[2], icpAddr, &icpPort)) + { + fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | --list <file-list> ) ..]\n", argv[0]); + fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]); + return 1; + } + index = PT_New(); + for(i = 3 ; i < argc ; i++) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "--list") == 0) { + if (i + 1 < argc) { + char line[256 + 1]; + FILE *fp = fopen(argv[++i], "rb"); + if (fp == NULL) { + fprintf(stderr, "error: could not process list %s\n", argv[i]); + exit(1); + } + while(linput(fp, line, 256)) { + int itemsAdded = PT_AddIndex(index, line); + if (itemsAdded > 0) { + fprintf(stderr, "processed: %s (%d items added)\n", line, itemsAdded); + } else if (itemsAdded == 0) { + fprintf(stderr, "processed: %s (no items added)\n", line); + } else { + fprintf(stderr, "error: could not process %s\n", line); + } + } + fclose(fp); + } + } else { + fprintf(stderr, "* bad arg %s\n", argv[i]); + exit(1); + } + } else { + int itemsAdded = PT_AddIndex(index, argv[i]); + if (itemsAdded > 0) { + fprintf(stderr, "processed: %s (%d items added)\n", argv[i], itemsAdded); + } else if (itemsAdded == 0) { + fprintf(stderr, "processed: %s (no items added)\n", argv[i]); + } else { + fprintf(stderr, "error: could not process %s\n", argv[i]); + } + } + } + + /* sigpipe */ +#ifndef _WIN32 + signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket) +#endif + + /* Go */ + ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index); + + /* Wipe */ + PT_Delete(index); + +#ifdef _WIN32 + WSACleanup(); +#endif + + return ret; +} + diff --git a/src/proxy/proxystrings.h b/src/proxy/proxystrings.h new file mode 100755 index 0000000..87bcf34 --- /dev/null +++ b/src/proxy/proxystrings.h @@ -0,0 +1,153 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: Strings */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +// Strings a bit safer than static buffers + +#ifndef HTS_STRINGS_DEFSTATIC +#define HTS_STRINGS_DEFSTATIC + +typedef struct String { + char* buff; + int len; + int capa; +} String; + +#define STRING_EMPTY {NULL, 0, 0} +#define STRING_BLK_SIZE 256 +#define StringBuff(blk) ((blk).buff) +#define StringLength(blk) ((blk).len) +#define StringCapacity(blk) ((blk).capa) +#define StringRoom(blk, size) do { \ + if ((blk).len + (int)(size) + 1 > (blk).capa) { \ + (blk).capa = ((blk).len + (size) + 1) * 2; \ + (blk).buff = (char*) realloc((blk).buff, (blk).capa); \ + } \ +} while(0) +#define StringBuffN(blk, size) StringBuffN_(&(blk), size) +static char* StringBuffN_(String* blk, int size) { + StringRoom(*blk, (blk->len) + size); + return StringBuff(*blk); +} +#define StringClear(blk) do { \ + StringRoom(blk, 0); \ + (blk).buff[0] = '\0'; \ + (blk).len = 0; \ +} while(0) +#define StringFree(blk) do { \ + if ((blk).buff != NULL) { \ + free((blk).buff); \ + (blk).buff = NULL; \ + } \ + (blk).capa = 0; \ + (blk).len = 0; \ +} while(0) +#define StringMemcat(blk, str, size) do { \ + StringRoom(blk, size); \ + if ((int)(size) > 0) { \ + memcpy((blk).buff + (blk).len, (str), (size)); \ + (blk).len += (size); \ + } \ + *((blk).buff + (blk).len) = '\0'; \ +} while(0) +#define StringAddchar(blk, c) do { \ + char __c = (c); \ + StringMemcat(blk, &__c, 1); \ +} while(0) +static void* StringAcquire(String* blk) { + void* buff = blk->buff; + blk->buff = NULL; + blk->capa = 0; + blk->len = 0; + return buff; +} +static StringAttach(String* blk, char** str) { + StringFree(*blk); + if (str != NULL && *str != NULL) { + blk->buff = *str; + blk->capa = (int)strlen(blk->buff); + blk->len = blk->capa; + *str = NULL; + } +} +#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0) +#define StringStrcpy(blk, str) do { \ + StringClear(blk); \ + StringStrcat(blk, str); \ +} while(0) + +/* Tools */ + +static int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +static int ehex(const char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + +static void unescapehttp(const char* s, String* tempo) { + int i; + for (i = 0; s[i] != '\0' ; i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + StringAddchar(*tempo, (char) hc); + i++; // sauter 2 caractères finalement + } + else if (s[i]=='+') { + StringAddchar(*tempo, ' '); + } + else + StringAddchar(*tempo, s[i]); + } +} + +static void escapexml(const char* s, String* tempo) { + int i; + for (i=0 ; s[i] != '\0' ; i++) { + if (s[i] == '&') + StringStrcat(*tempo, "&"); + else if (s[i] == '<') + StringStrcat(*tempo, "<"); + else if (s[i] == '>') + StringStrcat(*tempo, ">"); + else if (s[i] == '\"') + StringStrcat(*tempo, """); + else + StringAddchar(*tempo, s[i]); + } +} + +#endif diff --git a/src/proxy/proxytrack.c b/src/proxy/proxytrack.c new file mode 100644 index 0000000..7604804 --- /dev/null +++ b/src/proxy/proxytrack.c @@ -0,0 +1,1621 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: ProxyTrack, httrack cache-based proxy */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + + +/* + +/\/\/\/\/\/\/\/\/\/\/\/\/\ PENDING WORK /\/\/\/\/\/\/\/\/\/\/\/\/\ +- Etag update handling +- Other cache archive handling (.arc) +- Live plug/unplug of archives +- Listing +\/\/\/\/\/\/\/\/\/\/\/\/\/ PENDING WORK \/\/\/\/\/\/\/\/\/\/\/\/\/ + +*/ + +/* +Architecture rough draft +Xavier Roche 2005 + +Aim: Building a sub-proxy to be linked with other top level proxies (such as Squid) +Basic design: Classical HTTP/1.0 proxy server, with ICP server support +Internal data design: HTTrack cache indexing in fast hashtables, with 'pluggable' design (add/removal of caches on-the-fly) + + +Index structure organization: +----------------------------- + +foo/hts-cache/new.zip -----> Index[0] \ +bar/hts-cache/new.zip -----> Index[1] > Central Index Lookup (CIL) +baz/hts-cache/new.zip -----> Index[2] / +.. -----> .. + +Indexes are hashtables with URL (STRING) -> INTEGER lookup. + +URL -----> CIL Ask for index ID +URL -----> Index[ID] Ask for index properties (ZIP cache index) + + +Lookup of an entry: +------------------- + +ID = CIL[URL] +If ID is valid Then + return SUCCESS +Else + return FAILURE +EndIf + + +Fetching of an entry: +--------------------- + +RESOURCE = null +ID = CIL[URL] +If ID is valid Then + OFFSET = Index[ID][URL] + If OFFSET is valid Then + RESOURCE = Fetch(ID, OFFSET) + EndIf +EndIf + + +Removal of index N: +------------------- + +For all entries in Index[N] + URL(key) -----> Lookup all other caches + Found: Replace in CIL + Not Found: Delete entry in CIL +Done +Delete Index[N] + + +Adding of index N: +------------------ + +Build Index[N] +For all entries in Index[N] + URL(key) -----> Lookup in CIL + Found: Do nothing if corresponding Cache is newer than this one + Not Found: Add/Replace entry in CIL +Done + +Remark: If no cache newer than the added one is found, all entries can be added without any lookup (optim) + +*/ + +/* HTTrack definitions */ +#include "htsbase.h" +#include "htsnet.h" +#include "htslib.h" +#include "htsglobal.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <fcntl.h> +#if HTS_WIN +#else +#include <arpa/inet.h> +#endif +#ifndef _WIN32 +#include <signal.h> +#endif +/* END specific definitions */ + +/* String */ +#include "proxystrings.h" + +/* Network base */ +#include "htsbasenet.h" + +/* définitions globales */ +#include "htsglobal.h" + +/* htslib */ +/*#include "htslib.h"*/ + +/* HTTrack Website Copier Library */ +#include "httrack-library.h" + +/* htsweb */ +#include "htsinthash.h" + +/* ProxyTrack */ +#include "proxytrack.h" + +/* Store manager */ +#include "../minizip/mztools.h" +#include "store.h" + +/* threads */ +#ifdef _WIN32 +#include <process.h> /* _beginthread, _endthread */ +#else +#include <pthread.h> +#endif + +/* External references */ +// htsErrorCallback htsCallbackErr = NULL; +int htsMemoryFastXfr = 1; /* fast xfr by default */ +void abortLog__fnc(char* msg, char* file, int line); +void abortLog__fnc(char* msg, char* file, int line) { + FILE* fp = fopen("CRASH.TXT", "wb"); + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); + if (!fp) fp = fopen("CRASH.TXT", "wb"); + if (fp) { + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line); + fprintf(fp, "Reason:\r\n%s\r\n", msg); + fflush(fp); + fclose(fp); + } +} +// HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */ +#define webhttrack_lock(A) do{}while(0) + +/* Static definitions */ + +static int linputsoc(T_SOC soc, char* s, int max) { + int c; + int j=0; + do { + unsigned char ch; + if (recv(soc, &ch, 1, 0) == 1) { + c = ch; + } else { + c = EOF; + } + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} + +static int check_readinput_t(T_SOC soc, int timeout) { + if (soc != INVALID_SOCKET) { + fd_set fds; // poll structures + struct timeval tv; // structure for select + FD_ZERO(&fds); + FD_SET(soc,&fds); + tv.tv_sec=timeout; + tv.tv_usec=0; + select((int)(soc + 1),&fds,NULL,NULL,&tv); + if (FD_ISSET(soc,&fds)) + return 1; + else + return 0; + } else + return 0; +} + +static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { + if (check_readinput_t(soc, timeout)) { + return linputsoc(soc, s, max); + } + return -1; +} + +static void unescapeini(char* s, String* tempo) { + int i; + char lastc=0; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, lastc = '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + if (!is_retorsep(hc) || !is_retorsep(lastc)) { + StringAddchar(*tempo, lastc = (char) hc); + } + i++; // sauter 2 caractères finalement + } + else + StringAddchar(*tempo, lastc = s[i]); + } +} + +static int gethost(const char* hostname, SOCaddr *server, size_t server_size) { + if (hostname != NULL && *hostname != '\0') { +#if HTS_INET6==0 + /* + ipV4 resolver + */ + t_hostent* hp=gethostbyname(hostname); + if (hp!=NULL) { + if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) { + SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); + return 1; + } + } +#else + /* + ipV6 resolver + */ + struct addrinfo* res = NULL; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); +#if 0 + if (IPV6_resolver == 1) // V4 only (for bogus V6 entries) + hints.ai_family = PF_INET; + else if (IPV6_resolver == 2) // V6 only (for testing V6 only) + hints.ai_family = PF_INET6; + else +#endif + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + if (getaddrinfo(hostname, NULL, &hints, &res) == 0) { + if (res) { + if ( (res->ai_addr) && (res->ai_addrlen) ) { + SOCaddr_copyaddr(*server, server_size, res->ai_addr, res->ai_addrlen); + freeaddrinfo(res); + return 1; + } + } + } + if (res) { + freeaddrinfo(res); + } + +#endif + } + return 0; +} + +static String getip(SOCaddr *server, int serverLen) { + String s = STRING_EMPTY; +#if HTS_INET6==0 + unsigned int sizeMax = sizeof("999.999.999.999:65535"); +#else + unsigned int sizeMax = sizeof("ffff:ffff:ffff:ffff:ffff:ffff:ffff:65535"); +#endif + char * dotted = malloc(sizeMax + 1); + unsigned short port = ntohs(SOCaddr_sinport(*server)); + if (dotted == NULL) { + CRITICAL("memory exhausted"); + return s; + } + SOCaddr_inetntoa(dotted, sizeMax, *server, serverLen); + sprintf(dotted + strlen(dotted), ":%d", port); + StringAttach(&s, &dotted); + return s; +} + + +static T_SOC smallserver_init(const char* adr, int port, int family) { + SOCaddr server; + size_t server_size = sizeof(server); + + memset(&server, 0, sizeof(server)); + SOCaddr_initany(server, server_size); + if (gethost(adr, &server, server_size)) { // host name + T_SOC soc = INVALID_SOCKET; + if ( (soc = socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) { + SOCaddr_initport(server, port); + if ( bind(soc,(struct sockaddr*) &server, (int)server_size) == 0 ) { + if (family != SOCK_STREAM + || listen(soc, 10) >=0 ) { + return soc; + } else { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + soc=INVALID_SOCKET; + } + } else { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + soc=INVALID_SOCKET; + } + } + } + return INVALID_SOCKET; +} + +static int proxytrack_start(PT_Indexes indexes, T_SOC soc, T_SOC socICP); +int proxytrack_main(char* proxyAddr, int proxyPort, + char* icpAddr, int icpPort, + PT_Indexes index) { + int returncode = 0; + T_SOC soc = smallserver_init(proxyAddr, proxyPort, SOCK_STREAM); + T_SOC socICP = smallserver_init(proxyAddr, icpPort, SOCK_DGRAM); + if (soc != INVALID_SOCKET + && socICP != INVALID_SOCKET) + { + char url[HTS_URLMAXSIZE*2]; + char method[32]; + char data[32768]; + url[0]=method[0]=data[0]='\0'; + // + printf("HTTP Proxy installed on %s:%d/\n", proxyAddr, proxyPort); + printf("ICP Proxy installed on %s:%d/\n", icpAddr, icpPort); +#ifndef _WIN32 + { + pid_t pid = getpid(); + printf("PID=%d\n", (int)pid); + } +#endif + fflush(stdout); + fflush(stderr); + // + if (!proxytrack_start(index, soc, socICP)) { + fprintf(stderr, "Unable to create the server: %s\n", strerror(errno)); +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + printf("Done\n"); + returncode = 1; + } else { + returncode = 0; + } + } else { + fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(errno)); + returncode = 1; + } + printf("EXITED\n"); + fflush(stdout); + fflush(stderr); + return returncode; +} + +static const char* GetHttpMessage(int statuscode) { + // Erreurs HTTP, selon RFC + switch( statuscode) { + case 100: return "Continue"; break; + case 101: return "Switching Protocols"; break; + case 200: return "OK"; break; + case 201: return "Created"; break; + case 202: return "Accepted"; break; + case 203: return "Non-Authoritative Information"; break; + case 204: return "No Content"; break; + case 205: return "Reset Content"; break; + case 206: return "Partial Content"; break; + case 207: return "Multi-Status"; break; + case 300: return "Multiple Choices"; break; + case 301: return "Moved Permanently"; break; + case 302: return "Moved Temporarily"; break; + case 303: return "See Other"; break; + case 304: return "Not Modified"; break; + case 305: return "Use Proxy"; break; + case 306: return "Undefined 306 error"; break; + case 307: return "Temporary Redirect"; break; + case 400: return "Bad Request"; break; + case 401: return "Unauthorized"; break; + case 402: return "Payment Required"; break; + case 403: return "Forbidden"; break; + case 404: return "Not Found"; break; + case 405: return "Method Not Allowed"; break; + case 406: return "Not Acceptable"; break; + case 407: return "Proxy Authentication Required"; break; + case 408: return "Request Time-out"; break; + case 409: return "Conflict"; break; + case 410: return "Gone"; break; + case 411: return "Length Required"; break; + case 412: return "Precondition Failed"; break; + case 413: return "Request Entity Too Large"; break; + case 414: return "Request-URI Too Large"; break; + case 415: return "Unsupported Media Type"; break; + case 416: return "Requested Range Not Satisfiable"; break; + case 417: return "Expectation Failed"; break; + case 500: return "Internal Server Error"; break; + case 501: return "Not Implemented"; break; + case 502: return "Bad Gateway"; break; + case 503: return "Service Unavailable"; break; + case 504: return "Gateway Time-out"; break; + case 505: return "HTTP Version Not Supported"; break; + default: return "Unknown HTTP Error"; break; + } +} + +#ifndef NO_WEBDAV +static void proxytrack_add_DAV_Item(String *item, String *buff, + const char* filename, + unsigned long int size, + time_t timestamp, + const char* mime, + int isDir, + int isRoot, + int isDefault) +{ + struct tm * timetm; + if (timestamp == (time_t) 0 || timestamp == (time_t) -1) { + timestamp = time(NULL); + } + if ((timetm = gmtime(×tamp)) != NULL) { + char tms[256 + 1]; + const char * name; + strftime(tms, 256, "%a, %d %b %Y %H:%M:%S GMT", timetm); /* Sun, 18 Sep 2005 11:45:45 GMT */ + + if (mime == NULL || *mime == 0) + mime = "application/octet-stream"; + + StringLength(*buff) = 0; + escapexml(filename, buff); + + name = strrchr(StringBuff(*buff), '/'); + if (name != NULL) + name++; + if (name == NULL || *name == 0) { + if (strcmp(mime, "text/html") == 0) + name = "Default Document for the Folder.html"; + else + name = "Default Document for the Folder"; + } + + StringRoom(*item, 1024); + sprintf(StringBuff(*item), + "<response xmlns=\"DAV:\">\r\n" + "<href>/webdav%s%s</href>\r\n" + "<propstat>\r\n" + "<prop>\r\n" + "<displayname>%s</displayname>\r\n" + "<iscollection>%d</iscollection>\r\n" + "<haschildren>%d</haschildren>\r\n" + "<isfolder>%d</isfolder>\r\n" + "<resourcetype>%s</resourcetype>\r\n" + "<creationdate>%d-%02d-%02dT%02d:%02d:%02dZ</creationdate>\r\n" + "<getlastmodified>%s</getlastmodified>\r\n" + "<supportedlock></supportedlock>\r\n" + "<lockdiscovery/>\r\n" + "<getcontenttype>%s</getcontenttype>\r\n" + "<getcontentlength>%d</getcontentlength>\r\n" + "<isroot>%d</isroot>\r\n" + "</prop>\r\n" + "<status>HTTP/1.1 200 OK</status>\r\n" + "</propstat>\r\n" + "</response>\r\n", + /* */ + ( StringBuff(*buff)[0] == '/' ) ? "" : "/", StringBuff(*buff), + name, + isDir ? 1 : 0, + isDir ? 1 : 0, + isDir ? 1 : 0, + isDir ? "<collection/>" : "", + timetm->tm_year + 1900, timetm->tm_mon + 1, timetm->tm_mday, timetm->tm_hour, timetm->tm_min, timetm->tm_sec, + tms, + isDir ? "httpd/unix-directory" : mime, + (int)size, + isRoot ? 1 : 0 + ); + StringLength(*item) = (int) strlen(StringBuff(*item)); + } +} + +/* Convert a RFC822 time to time_t */ +time_t get_time_rfc822(const char* s) { + struct tm result; + /* */ + char months[]="jan feb mar apr may jun jul aug sep oct nov dec"; + char str[256]; + char* a; + int i; + /* */ + int result_mm=-1; + int result_dd=-1; + int result_n1=-1; + int result_n2=-1; + int result_n3=-1; + int result_n4=-1; + /* */ + + if ((int) strlen(s) > 200) + return (time_t)0; + for(i = 0 ; s[i] != 0 ; i++) { + if (s[i] >= 'A' && s[i] <= 'Z') + str[i] = s[i] + ('a' - 'A'); + else + str[i] = s[i]; + } + str[i] = 0; + /* éliminer :,- */ + while( (a=strchr(str,'-')) ) *a=' '; + while( (a=strchr(str,':')) ) *a=' '; + while( (a=strchr(str,',')) ) *a=' '; + /* tokeniser */ + a=str; + while(*a) { + char *first,*last; + char tok[256]; + /* découper mot */ + while(*a==' ') a++; /* sauter espaces */ + first=a; + while((*a) && (*a!=' ')) a++; + last=a; + tok[0]='\0'; + if (first!=last) { + char* pos; + strncat(tok,first,(int) (last - first)); + /* analyser */ + if ( (pos=strstr(months,tok)) ) { /* month always in letters */ + result_mm=((int) (pos - months))/4; + } else { + int number; + if (sscanf(tok,"%d",&number) == 1) { /* number token */ + if (result_dd<0) /* day always first number */ + result_dd=number; + else if (result_n1<0) + result_n1=number; + else if (result_n2<0) + result_n2=number; + else if (result_n3<0) + result_n3=number; + else if (result_n4<0) + result_n4=number; + } /* sinon, bruit de fond(+1GMT for exampel) */ + } + } + } + if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) { + if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */ + result.tm_year=result_n4-1900; + result.tm_hour=result_n1; + result.tm_min=result_n2; + result.tm_sec=max(result_n3,0); + } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */ + result.tm_hour=result_n2; + result.tm_min=result_n3; + result.tm_sec=max(result_n4,0); + if (result_n1<=50) /* 00 means 2000 */ + result.tm_year=result_n1+100; + else if (result_n1<1000) /* 99 means 1999 */ + result.tm_year=result_n1; + else /* 2000 */ + result.tm_year=result_n1-1900; + } + result.tm_isdst=0; /* assume GMT */ + result.tm_yday=-1; /* don't know */ + result.tm_wday=-1; /* don't know */ + result.tm_mon=result_mm; + result.tm_mday=result_dd; + return mktime(&result); + } + return (time_t) 0; +} + +static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char * urlFull, int depth) { + const char * file = jump_protocol_and_auth(urlFull); + if ( (file = strchr(file, '/')) == NULL) + return NULL; + + if (strncmp(file, "/webdav", 7) != 0) { + PT_Element elt = PT_ElementNew(); + elt->statuscode = 405; + strcpy(elt->msg, "Method Not Allowed"); + return elt; + } + + /* Skip /webdav */ + file += 7; + + /* */ + { + PT_Element elt = PT_ElementNew(); + int i, isDir; + String url = STRING_EMPTY; + String response = STRING_EMPTY; + String item = STRING_EMPTY; + String itemUrl = STRING_EMPTY; + String buff = STRING_EMPTY; + StringClear(response); + StringClear(item); + StringClear(itemUrl); + StringClear(buff); + + /* Canonize URL */ + StringStrcpy(url, file + ((file[0] == '/') ? 1 : 0)); + if (StringLength(url) > 0) { + if (StringBuff(url)[StringLength(url) - 1] == '/') { + StringBuff(url)[StringLength(url) - 1] = '\0'; + StringLength(url)--; + } + } + + /* Form response */ + StringRoom(response, 1024); + sprintf(StringBuff(response), + "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n" + "<multistatus xmlns=\"DAV:\">\r\n"); + StringLength(response) = (int) strlen(StringBuff(response)); + /* */ + + /* Root */ + StringLength(item) = 0; + proxytrack_add_DAV_Item(&item, &buff, + StringBuff(url), /*size*/0, /*timestamp*/(time_t) 0, /*mime*/NULL, /*isDir*/1, /*isRoot*/1, /*isDefault*/0); + StringMemcat(response, StringBuff(item), StringLength(item)); + + /* Childrens (Depth > 0) */ + if (depth > 0) { + time_t timestampRep = (time_t) -1; + const char * prefix = StringBuff(url); + unsigned int prefixLen = (unsigned int) strlen(prefix); + char ** list = PT_Enumerate(indexes, prefix, 0); + if (list != NULL) { + for(isDir = 1 ; isDir >= 0 ; isDir--) { + for(i = 0 ; list[i] != NULL ; i++) { + const char * thisUrl = list[i]; + const char * mimeType = "application/octet-stream"; + unsigned int thisUrlLen = (unsigned int) strlen(thisUrl); + int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0; + + /* Item URL */ + StringRoom(itemUrl, thisUrlLen + prefixLen + sizeof("/webdav/") + 1); + StringClear(itemUrl); + sprintf(StringBuff(itemUrl), "/%s/%s", prefix, thisUrl); + if (!thisIsDir) + StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)); + else + StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)) - 1; + StringBuff(itemUrl)[StringLength(itemUrl)] = '\0'; + + if (thisIsDir == isDir) { + unsigned long size = 0; + time_t timestamp = (time_t) 0; + PT_Element file = NULL; + + /* Item stats */ + if (!isDir) { + file = PT_ReadIndex(indexes, StringBuff(itemUrl) + 1, FETCH_HEADERS); + if (file != NULL && file->statuscode == 200 ) { + size = file->size; + if (file->lastmodified) { + timestamp = get_time_rfc822(file->lastmodified); + } + if (timestamp == (time_t) 0) { + if (timestampRep == (time_t) -1) { + timestampRep = 0; + if (file->indexId != -1) { + timestampRep = PT_Index_Timestamp(PT_GetIndex(indexes, file->indexId)); + } + } + timestamp = timestampRep; + } + if (file->contenttype) { + mimeType = file->contenttype; + } + } + } + + /* Add item */ + StringLength(item) = 0; + proxytrack_add_DAV_Item(&item, &buff, + StringBuff(itemUrl), size, timestamp, mimeType, isDir, /*isRoot*/0, /*isDefault*/(thisUrlLen == 0)); + StringMemcat(response, StringBuff(item), StringLength(item)); + + /* Wipe element */ + if (file != NULL) + PT_Element_Delete(&file); + } + } + } + PT_Enumerate_Delete(&list); + } /* items != NULL */ + } /* Depth > 0 */ + + /* End of responses */ + StringStrcat(response, + "</multistatus>\r\n" + ); + + StringFree(item); + StringFree(itemUrl); + StringFree(url); + StringFree(buff); + + elt->size = StringLength(response); + elt->adr = StringAcquire(&response); + elt->statuscode = 207; /* Multi-Status */ + strcpy(elt->charset, "utf-8"); + strcpy(elt->contenttype, "text/xml"); + strcpy(elt->msg, "Multi-Status"); + StringFree(response); + + fprintf(stderr, "RESPONSE:\n%s\n", elt->adr); + + return elt; + } + return NULL; +} +#endif + +static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * url) { + char ** list = PT_Enumerate(indexes, url, 0); + if (list != NULL) { + PT_Element elt = PT_ElementNew(); + int i, isDir; + String html = STRING_EMPTY; + StringClear(html); + StringStrcat(html, + "<html>" + PROXYTRACK_COMMENT_HEADER + DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES + "<head>\r\n" + "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>" + "</head>\r\n" + "<body>\r\n" + "<h3>Directory index:</h3><br />" + "<br />" + "<hr>" + "<tt>[DIR] <a href=\"..\">..</a></tt><br />" + ); + for(isDir = 1 ; isDir >= 0 ; isDir--) { + for(i = 0 ; list[i] != NULL ; i++) { + char * thisUrl = list[i]; + unsigned int thisUrlLen = (unsigned int) strlen(thisUrl); + int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0; + if (thisIsDir == isDir) { + if (isDir) + StringStrcat(html, "<tt>[DIR] "); + else + StringStrcat(html, "<tt> "); + StringStrcat(html, "<a href=\""); + if (isDir) { + StringStrcat(html, "http://proxytrack/"); + } + StringStrcat(html, url); + StringStrcat(html, list[i]); + StringStrcat(html, "\">"); + StringStrcat(html, list[i]); + StringStrcat(html, "</a></tt><br />"); + } + } + } + StringStrcat(html, + "</body>" + "</html>"); + PT_Enumerate_Delete(&list); + elt->size = StringLength(html); + elt->adr = StringAcquire(&html); + elt->statuscode = 200; + strcpy(elt->charset, "iso-8859-1"); + strcpy(elt->contenttype, "text/html"); + strcpy(elt->msg, "OK"); + StringFree(html); + return elt; + } + return NULL; +} + +static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { + int timeout=30; + int retour=0; + int willexit=0; + int buffer_size = 32768; + char * buffer = (char*)malloc(buffer_size); + int line1Size = 1024; + char * line1 = (char*)malloc(line1Size); + int lineSize = 8192; + char * line = (char*)malloc(lineSize); + int length = 0; + int keepAlive = 1; + + String url = STRING_EMPTY; + String urlRedirect = STRING_EMPTY; + String headers = STRING_EMPTY; + String output = STRING_EMPTY; + String host = STRING_EMPTY; + String localhost = STRING_EMPTY; +#ifndef NO_WEBDAV + String davHeaders = STRING_EMPTY; + String davRequest = STRING_EMPTY; +#endif + + StringRoom(localhost, 256); + if (gethostname(StringBuff(localhost), StringCapacity(localhost) - 1) == 0) { + StringLength(localhost) = (int) strlen(StringBuff(localhost)); + } else { + StringStrcpy(localhost, "localhost"); + } + +#ifdef _DEBUG + Sleep(1000); +#endif + + if (buffer == NULL || line == NULL || line1 == NULL) { + CRITICAL("proxytrack_process_HTTP:memory exhausted"); +#if HTS_WIN + closesocket(soc_c); +#else + close(soc_c); +#endif + return ; + } + + do { + const char* msgError = NULL; + int msgCode = 0; + PT_Element element = NULL; + char* command; + char* proto; + char* surl; + int directHit = 0; + int headRequest = 0; + int listRequest = 0; +#ifndef NO_WEBDAV + int davDepth = 0; +#endif + + /* Clear context */ + line[0] = line1[0] = '\0'; + buffer[0] = '\0'; + command = line1; + StringClear(url); + StringClear(urlRedirect); + StringClear(headers); + StringClear(output); + StringClear(host); +#ifndef NO_WEBDAV + StringClear(davHeaders); + StringClear(davRequest); +#endif + + /* line1: "GET http://www.example.com/ HTTP/1.0" */ + if (linputsoc_t(soc_c, line1, line1Size - 2, timeout) > 0 + && ( surl = strchr(line1, ' ') ) + && !(*surl = '\0') + && ++surl + && (proto = strchr(surl, ' ')) && !(*proto = '\0') && ++proto) + { + /* Flush headers */ + while(linputsoc_t(soc_c, line, lineSize - 2, timeout) > 0 + && line[0] != 0) + { + int p; + if ((p = strfield(line, "Content-length:"))!=0) { + if (sscanf(line+p, "%d", &length) != 1) { + msgCode = 500; + msgError = "Bad HTTP Content-Length Field"; + keepAlive = 0; + length = 0; + } + } else if (strcasecmp(line, "Connection: close") == 0) { + keepAlive = 0; + } else if (strcasecmp(line, "Connection: keep-alive") == 0) { + keepAlive = 1; + } else if ((p = strfield(line, "Host:"))) { + char* chost = line + p; + if (*chost == ' ') + chost++; + StringStrcpy(host, chost); + } +#ifndef NO_WEBDAV + else if ((p = strfield(line, "Depth: "))) { + char* depth = line + p; + if (sscanf(depth, "%d", &davDepth) != 1) { + davDepth = 0; + } + } +#endif + } + + /* Flush body */ +#ifndef NO_WEBDAV + if (length > 0) { + if (length < 32768) { + StringRoom(davRequest, length + 1); + if (recv(soc_c, StringBuff(davRequest), length, 0) == length) { + StringBuff(davRequest)[length] = 0; + } else { + msgCode = 500; + msgError = "Posted Data Read Error"; + keepAlive = 0; + } + } else { + msgCode = 500; + msgError = "Posted Data Too Large"; + keepAlive = 0; + } + } +#endif + + /* Switch protocol ID */ + if (strcasecmp(command, "post") == 0) { +#ifndef NO_WEBDAV + msgCode = 404; +#else + msgCode = 501; + keepAlive = 0; +#endif + msgError = "Proxy Error (POST Request Forbidden)"; + } + else if (strcasecmp(command, "get") == 0) { + headRequest = 0; + } + else if (strcasecmp(command, "head") == 0) { + headRequest = 1; + } +#ifndef NO_WEBDAV + else if (strcasecmp(command, "options") == 0) { + const char * options = "GET, HEAD, OPTIONS, POST, PROPFIND, TRACE" + ", MKCOL, DELETE, PUT"; /* Not supported */ + msgCode = 200; + StringRoom(headers, 8192); + sprintf(StringBuff(headers), + "HTTP/1.1 %d %s\r\n" + "DAV: 1, 2\r\n" + "MS-Author-Via: DAV\r\n" + "Cache-Control: private\r\n" + "Allow: %s\r\n", + msgCode, GetHttpMessage(msgCode), options); + StringLength(headers) = (int) strlen(StringBuff(headers)); + } + else if (strcasecmp(command, "propfind") == 0) { + if (davDepth > 1) { + msgCode = 403; + msgError = "DAV Depth Limit Forbidden"; + } else { + fprintf(stderr, "DEBUG: DAV-DATA=<%s>\n", StringBuff(davRequest)); + listRequest = 2; /* propfind */ + } + } + else if (strcasecmp(command, "mkcol") == 0 + || strcasecmp(command, "delete") == 0 + || strcasecmp(command, "put") == 0 + || strcasecmp(command, "proppatch") == 0 + || strcasecmp(command, "lock") == 0 + || strcasecmp(command, "unlock") == 0 + || strcasecmp(command, "copy") == 0 + || strcasecmp(command, "trace") == 0) + { + msgCode = 403; + msgError = "Method Forbidden"; + } +#endif + else { + msgCode = 501; + msgError = "Proxy Error (Unsupported or Unknown HTTP Command Request)"; + keepAlive = 0; + } + if (strcasecmp(proto, "http/1.1") == 0) { + keepAlive = 1; + } else if (strcasecmp(proto, "http/1.0") == 0) { + keepAlive = 0; + } else { + msgCode = 505; + msgError = "Proxy Error (Unknown HTTP Version)"; + keepAlive = 0; + } + + /* Post-process request */ + if (link_has_authority(surl)) { + const unsigned int prefixLen = sizeof("http://proxytrack/") - 1; + if (strncasecmp(surl, "http://proxytrack/", prefixLen) == 0) { + directHit = 1; /* Another direct hit hack */ + } + StringStrcpy(url, surl); + } else { + if (StringLength(host) > 0) { + /* Direct hit */ + if ( +#ifndef NO_WEBDAV + listRequest != 2 + && +#endif + strncasecmp(StringBuff(host), StringBuff(localhost), StringLength(localhost)) == 0 + && + (StringBuff(host)[StringLength(localhost)] == '\0' + || StringBuff(host)[StringLength(localhost)] == ':') + && surl[0] == '/' + ) + { + const char * toHit = surl + 1; + if (strncmp(toHit, "webdav/", 7) == 0) { + toHit += 7; + } + /* Direct hit */ + directHit = 1; + StringStrcpy(url, ""); + if (!link_has_authority(toHit)) + StringStrcat(url, "http://"); + StringStrcat(url, toHit); + } else { + /* Transparent proxy */ + StringStrcpy(url, "http://"); + StringStrcat(url, StringBuff(host)); + StringStrcat(url, surl); + } + } else { + msgCode = 500; + msgError = "Transparent Proxy Error ('Host' HTTP Request Header Field Missing)"; + keepAlive = 0; + } + } + + /* Response */ + if (msgCode == 0) { + if (listRequest == 1) { + element = proxytrack_process_HTTP_List(indexes, StringBuff(url)); + } +#ifndef NO_WEBDAV + else if (listRequest == 2) { + if ((element = proxytrack_process_DAV_Request(indexes, StringBuff(url), davDepth)) != NULL) { + msgCode = element->statuscode; + StringRoom(davHeaders, 1024); + sprintf(StringBuff(davHeaders), + "DAV: 1, 2\r\n" + "MS-Author-Via: DAV\r\n" + "Cache-Control: private\r\n"); + StringLength(davHeaders) = (int) strlen(StringBuff(davHeaders)); + } + } +#endif + else { + element = PT_ReadIndex(indexes, StringBuff(url), FETCH_BODY); + } + if (element == NULL +#ifndef NO_WEBDAV + && listRequest == 2 +#endif + && StringLength(url) > 0 + && StringBuff(url)[StringLength(url) - 1] == '/' + ) + { + element = PT_Index_HTML_BuildRootInfo(indexes); + if (element != NULL) { + element->statuscode = 404; /* HTML page, but in error */ + } + } + if (element != NULL) { + msgCode = element->statuscode; + StringRoom(headers, 8192); + sprintf(StringBuff(headers), + "HTTP/1.1 %d %s\r\n" +#ifndef NO_WEBDAV + "%s" +#endif + "Content-Type: %s%s%s%s\r\n" + "%s%s%s" + "%s%s%s" + "%s%s%s", + /* */ + msgCode, + element->msg, +#ifndef NO_WEBDAV + /* DAV */ + StringBuff(davHeaders), +#endif + /* Content-type: foo; [ charset=bar ] */ + element->contenttype, + ( ( element->charset[0]) ? "; charset=\"" : ""), + element->charset, + ( ( element->charset[0]) ? "\"" : ""), + /* location */ + ( ( element->location != NULL && element->location[0]) ? "Location: " : ""), + ( ( element->location != NULL && element->location[0]) ? element->location : ""), + ( ( element->location != NULL && element->location[0]) ? "\r\n" : ""), + /* last-modified */ + ( ( element->lastmodified[0]) ? "Last-Modified: " : ""), + ( ( element->lastmodified[0]) ? element->lastmodified : ""), + ( ( element->lastmodified[0]) ? "\r\n" : ""), + /* etag */ + ( ( element->etag[0]) ? "ETag: " : ""), + ( ( element->etag[0]) ? element->etag : ""), + ( ( element->etag[0]) ? "\r\n" : "") + ); + StringLength(headers) = (int) strlen(StringBuff(headers)); + } else { + /* No query string, no ending / : check the the <url>/ page */ + if (StringLength(url) > 0 && StringBuff(url)[StringLength(url) - 1] != '/' && strchr(StringBuff(url), '?') == NULL) { + StringStrcpy(urlRedirect, StringBuff(url)); + StringStrcat(urlRedirect, "/"); + if (PT_LookupIndex(indexes, StringBuff(urlRedirect))) { + msgCode = 301; /* Moved Permanently */ + StringRoom(headers, 8192); + sprintf(StringBuff(headers), + "HTTP/1.1 %d %s\r\n" + "Content-Type: text/html\r\n" + "Location: %s\r\n", + /* */ + msgCode, + GetHttpMessage(msgCode), + StringBuff(urlRedirect) + ); + StringLength(headers) = (int) strlen(StringBuff(headers)); + /* */ + StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); + sprintf(StringBuff(output), + "<html>" + PROXYTRACK_COMMENT_HEADER + DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES + "<head>" + "<title>ProxyTrack - Page has moved</title>" + "</head>\r\n" + "<body>" + "<h3>The correct location is:</h3><br />" + "<b><a href=\"%s\">%s</a></b><br />" + "<br />" + "<br />\r\n" + "<i>Generated by ProxyTrack " PROXYTRACK_VERSION ", (C) Xavier Roche and other contributors</i>" + "\r\n" + "</body>" + "</header>", + StringBuff(urlRedirect), + StringBuff(urlRedirect)); + StringLength(output) = (int) strlen(StringBuff(output)); + } + } + if (msgCode == 0) { + msgCode = 404; + msgError = "Not Found in this cache"; + } + } + } + } else { + msgCode = 500; + msgError = "Server Error"; + keepAlive = 0; + } + if (StringLength(headers) == 0) { + if (msgCode == 0) { + msgCode = 500; + msgError = "Internal Proxy Error"; + } else if (msgError == NULL) { + msgError = GetHttpMessage(msgCode); + } + StringRoom(headers, 256); + sprintf(StringBuff(headers), + "HTTP/1.1 %d %s\r\n" + "Content-type: text/html\r\n", + msgCode, + msgError); + StringLength(headers) = (int) strlen(StringBuff(headers)); + StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); + sprintf(StringBuff(output), + "<html>" + PROXYTRACK_COMMENT_HEADER + DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES + "<head>" + "<title>ProxyTrack - HTTP Proxy Error %d</title>" + "</head>\r\n" + "<body>" + "<h3>A proxy error has occured while processing the request.</h3><br />" + "<b>Error HTTP %d: <i>%s</i></b><br />" + "<br />" + "<br />\r\n" + "<i>Generated by ProxyTrack " PROXYTRACK_VERSION ", (C) Xavier Roche and other contributors</i>" + "\r\n" + "</body>" + "</html>", + msgCode, + msgCode, + msgError); + StringLength(output) = (int) strlen(StringBuff(output)); + } + { + char tmp[20 + 1]; /* 2^64 = 18446744073709551616 */ + unsigned int dataSize = 0; + if (!headRequest) { + dataSize = StringLength(output); + if (dataSize == 0 && element != NULL) { + dataSize = element->size; + } + } + sprintf(tmp, "%d", (int) dataSize); + StringStrcat(headers, "Content-length: "); + StringStrcat(headers, tmp); + StringStrcat(headers, "\r\n"); + } + if (keepAlive) { + StringStrcat(headers, + "Connection: Keep-Alive\r\n" + "Proxy-Connection: Keep-Alive\r\n"); + } else { + StringStrcat(headers, + "Connection: Close\r\n" + "Proxy-Connection: Close\r\n"); + } + if (msgCode != 500) + StringStrcat(headers, "X-Cache: HIT from "); + else + StringStrcat(headers, "X-Cache: MISS from "); + StringStrcat(headers, StringBuff(localhost)); + StringStrcat(headers, "\r\n"); + + /* Logging */ + { + const char * contentType = "text/html"; + unsigned long int size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 ); + /* */ + String ip = STRING_EMPTY; + SOCaddr serverClient; + int lenServerClient = (int) sizeof(serverClient); + memset(&serverClient, 0, sizeof(serverClient)); + if (getsockname(soc_c, (struct sockaddr*) &serverClient, &lenServerClient) == 0) { + ip = getip(&serverClient, lenServerClient); + } else { + StringStrcpy(ip, "unknown"); + } + if (element != NULL && element->contenttype[0] != '\0') { + contentType = element->contenttype; + } + LOG("HTTP %s %d %d %s %s %s" _ StringBuff(ip) _ msgCode _ (int)size _ command _ StringBuff(url) _ contentType); + StringFree(ip); + } + + /* Send reply */ + StringStrcat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n"); + StringStrcat(headers, "\r\n"); /* Headers separator */ + if (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers) + || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) + || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, element->size, 0) != element->size) + ) + { + keepAlive = 0; /* Error, abort connection */ + } + PT_Element_Delete(&element); + + /* Shutdown (FIN) and wait until confirmed */ + if (!keepAlive) { + char c; +#ifdef _WIN32 + shutdown(soc_c, SD_SEND); +#else + shutdown(soc_c, 1); +#endif + while(recv(soc_c, ((char*)&c), 1, 0) > 0); + } + } while(keepAlive); + +#if HTS_WIN + closesocket(soc_c); +#else + close(soc_c); +#endif + + StringFree(url); + StringFree(urlRedirect); + StringFree(headers); + StringFree(output); + StringFree(host); + + if (buffer) + free(buffer); +} + +#ifdef _WIN32 +#define PTHREAD_RETURN +#define PTHREAD_TYPE void +#define PTHREAD_TYPE_FNC __cdecl +#else +#define PTHREAD_RETURN NULL +#define PTHREAD_TYPE void* +#define PTHREAD_TYPE_FNC +#endif + +/* Generic threaded function start */ +static int startThread(PTHREAD_TYPE (PTHREAD_TYPE_FNC * funct)(void* ), + void* param) +{ + if (param != NULL) { +#ifdef _WIN32 + if (_beginthread(funct, 0, param) == -1) { + free(param); + return 0; + } + return 1; +#else + pthread_t handle = 0; + int retcode; + retcode = pthread_create(&handle, NULL, funct, param); + if (retcode != 0) { /* error */ + free(param); + return 0; + } else { + /* detach the thread from the main process so that is can be independent */ + pthread_detach(handle); + return 1; + } +#endif + } else { + return 0; + } +} + +/* Generic socket/index structure */ +typedef struct proxytrack_process_th_p { + T_SOC soc_c; + PT_Indexes indexes; + void (*process)(PT_Indexes indexes, T_SOC soc_c); +} proxytrack_process_th_p; + +/* Generic socket/index function stub */ +static PTHREAD_TYPE PTHREAD_TYPE_FNC proxytrack_process_th(void* param_) { + proxytrack_process_th_p *param = (proxytrack_process_th_p *) param_; + T_SOC soc_c = param->soc_c; + PT_Indexes indexes = param->indexes; + void (*process)(PT_Indexes indexes, T_SOC soc_c) = param->process; + free(param); + process(indexes, soc_c); + return PTHREAD_RETURN ; +} + +/* Process generic socket/index operation */ +static int proxytrack_process_generic(void (*process)(PT_Indexes indexes, T_SOC soc_c), + PT_Indexes indexes, T_SOC soc_c) +{ + proxytrack_process_th_p *param = calloc(sizeof(proxytrack_process_th_p), 1); + if (param != NULL) { + param->soc_c = soc_c; + param->indexes = indexes; + param->process = process; + return startThread(proxytrack_process_th, param); + } else { + CRITICAL("proxytrack_process_generic:Memory exhausted"); + return 0; + } + return 0; +} + +/* Process HTTP proxy requests */ +static int proxytrack_process_HTTP_threaded(PT_Indexes indexes, T_SOC soc) { + return proxytrack_process_generic(proxytrack_process_HTTP, indexes, soc); +} + +/* HTTP Server */ +static int proxytrack_start_HTTP(PT_Indexes indexes, T_SOC soc) { + while(soc != INVALID_SOCKET) { + T_SOC soc_c; + struct sockaddr clientAddr; + int clientAddrLen = sizeof(struct sockaddr); + memset(&clientAddr, 0, sizeof(clientAddr)); + if ( (soc_c = accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) { + if (!proxytrack_process_HTTP_threaded(indexes, soc_c)) { + CRITICAL("proxytrack_start_HTTP::Can not fork a thread"); + } + } + } + if (soc != INVALID_SOCKET) { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + } + return 1; +} + +/* Network order is big endian */ +#define READ_NET16(buffer) ( ( ((unsigned char*)buffer)[0] << 8 ) + ((unsigned char*)buffer)[1] ) +#define READ_NET32(buffer) ( ( READ_NET16(buffer) << 16 ) + READ_NET16(((unsigned char*)buffer) + 2) ) +#define WRITE_NET8(buffer, value) do { \ + ((unsigned char*)buffer)[0] = (unsigned char)(value); \ +} while(0) +#define WRITE_NET16(buffer, value) do { \ + ((unsigned char*)buffer)[0] = (((unsigned short)(value)) >> 8) & 0xff; \ + ((unsigned char*)buffer)[1] = ((unsigned short)(value)) & 0xff; \ +} while(0) +#define WRITE_NET32(buffer, value) do { \ + WRITE_NET16(buffer, ( ((unsigned int)(value)) >> 16 ) & 0xffff); \ + WRITE_NET16(((unsigned char*)buffer) + 2, ( ((unsigned int)(value)) ) & 0xffff); \ +} while(0) + +static int ICP_reply(struct sockaddr * clientAddr, + int clientAddrLen, + T_SOC soc, + /* */ + unsigned char Opcode, + unsigned char Version, + unsigned short Message_Length, + unsigned int Request_Number, + unsigned int Options, + unsigned int Option_Data, + unsigned int Sender_Host_Address, + unsigned char *Message + ) +{ + int ret = 0; + unsigned long int BufferSize; + unsigned char * buffer; + if (Message_Length == 0 && Message != NULL) /* We have to get the message size */ + Message_Length = (unsigned int) strlen(Message) + 1; /* NULL terminated */ + BufferSize = 20 + Message_Length; + buffer = malloc(BufferSize); + if (buffer != NULL) { + WRITE_NET8(&buffer[0], Opcode); + WRITE_NET8(&buffer[1], Version); + WRITE_NET16(&buffer[2], Message_Length); + WRITE_NET32(&buffer[4], Request_Number); + WRITE_NET32(&buffer[8], Options); + WRITE_NET32(&buffer[12], Option_Data); + WRITE_NET32(&buffer[16], Sender_Host_Address); + if (Message != NULL && Message_Length > 0) { + memcpy(buffer + 20, Message, Message_Length); + } + if (sendto(soc, buffer, BufferSize, 0, clientAddr, clientAddrLen) == BufferSize) { + ret = 1; + } + free(buffer); + } + return ret; +} + +/* ICP Server */ +static int proxytrack_start_ICP(PT_Indexes indexes, T_SOC soc) { + /* "ICP messages MUST not exceed 16,384 octets in length." (RFC2186) */ + int bufferSize = 16384; + unsigned char * buffer = (unsigned char*) malloc(bufferSize + 1); + if (buffer == NULL) { + CRITICAL("proxytrack_start_ICP:memory exhausted"); +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + return -1; + } + while(soc != INVALID_SOCKET) { + struct sockaddr clientAddr; + int clientAddrLen = sizeof(struct sockaddr); + int n; + memset(&clientAddr, 0, sizeof(clientAddr)); + n = recvfrom(soc, (char*)buffer, bufferSize, 0, &clientAddr, &clientAddrLen); + if (n != -1) { + const char * LogRequest = "ERROR"; + const char * LogReply = "ERROR"; + unsigned char * UrlRequest = NULL; + if (n >= 20) { + enum { + ICP_OP_MIN = 0, + ICP_OP_INVALID = 0, + ICP_OP_QUERY = 1, + ICP_OP_HIT = 2, + ICP_OP_MISS = 3, + ICP_OP_ERR = 4, + ICP_OP_SECHO = 10, + ICP_OP_DECHO = 11, + ICP_OP_MISS_NOFETCH = 21, + ICP_OP_DENIED = 22, + ICP_OP_HIT_OBJ = 23, + ICP_OP_MAX = ICP_OP_HIT_OBJ + }; + unsigned char Opcode = buffer[0]; + unsigned char Version = buffer[1]; + unsigned short Message_Length = READ_NET16(&buffer[2]); + unsigned int Request_Number = READ_NET32(&buffer[4]); /* Session ID */ + unsigned int Options = READ_NET32(&buffer[8]); + unsigned int Option_Data = READ_NET32(&buffer[12]); /* ICP_FLAG_SRC_RTT */ + unsigned int Sender_Host_Address = READ_NET32(&buffer[16]); /* ignored */ + unsigned char* Payload = &buffer[20]; + buffer[bufferSize] = '\0'; /* Ensure payload is NULL terminated */ + if (Message_Length <= bufferSize - 20) { + if (Opcode >= ICP_OP_MIN && Opcode <= ICP_OP_MAX) { + if (Version == 2) { + switch(Opcode) { + case ICP_OP_QUERY: + { + unsigned int UrlRequestSize; + UrlRequest = &Payload[4]; + UrlRequestSize = (unsigned int)strlen((char*)UrlRequest); + LogRequest = "ICP_OP_QUERY"; + if (indexes == NULL) { + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_DENIED, Version, 0, Request_Number, 0, 0, 0, UrlRequest); + LogReply = "ICP_OP_DENIED"; + } else if (PT_LookupIndex(indexes, UrlRequest)) { + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_HIT, Version, 0, Request_Number, 0, 0, 0, UrlRequest); + LogReply = "ICP_OP_HIT"; + } else { + if (UrlRequestSize > 0 && UrlRequest[UrlRequestSize - 1] != '/' && strchr(UrlRequest, '?') == NULL) { + char * UrlRedirect = malloc(UrlRequestSize + 1 + 1); + if (UrlRedirect != NULL) { + sprintf(UrlRedirect, "%s/", UrlRequest); + if (PT_LookupIndex(indexes, UrlRedirect)) { /* We'll generate a redirect */ + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_HIT, Version, 0, Request_Number, 0, 0, 0, UrlRequest); + LogReply = "ICP_OP_HIT"; + free(UrlRedirect); + break; + } + free(UrlRedirect); + } + } + /* We won't retrive the cache MISS online, no way! */ + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_MISS_NOFETCH, Version, 0, Request_Number, 0, 0, 0, UrlRequest); + LogReply = "ICP_OP_MISS_NOFETCH"; + } + } + break; + case ICP_OP_SECHO: + { + UrlRequest = &Payload[4]; + LogRequest = "ICP_OP_QUERY"; + LogReply = "ICP_OP_QUERY"; + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_SECHO, Version, 0, Request_Number, 0, 0, 0, UrlRequest); + } + break; + default: + LogRequest = "NOTIMPLEMENTED"; + LogReply = "ICP_OP_ERR"; + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, Version, 0, Request_Number, 0, 0, 0, NULL); + break; + } + } else { + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, 2, 0, Request_Number, 0, 0, 0, NULL); + } + } /* Ignored (RFC2186) */ + } else { + ICP_reply(&clientAddr, clientAddrLen, soc, ICP_OP_ERR, Version, 0, Request_Number, 0, 0, 0, NULL); + } + } + + /* Logging */ + { + String ip = STRING_EMPTY; + SOCaddr serverClient; + int lenServerClient = (int) sizeof(serverClient); + SOCaddr_copyaddr(serverClient, lenServerClient, &clientAddr, clientAddrLen); + if (lenServerClient > 0) { + ip = getip(&serverClient, lenServerClient); + } else { + StringStrcpy(ip, "unknown"); + } + LOG("ICP %s %s/%s %s" _ StringBuff(ip) _ LogRequest _ LogReply _ (UrlRequest ? UrlRequest : "-") ); + StringFree(ip); + } + + } + } + if (soc != INVALID_SOCKET) { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + } + free(buffer); + return 1; +} + +static int proxytrack_start(PT_Indexes indexes, T_SOC soc, T_SOC socICP) { + int ret = 1; + if (proxytrack_process_generic(proxytrack_start_ICP, indexes, socICP)) { + //if (!proxytrack_process_generic(proxytrack_start_HTTP, indexes, soc)) + if (!proxytrack_start_HTTP(indexes, soc)) { + ret = 0; + } + } else { + ret = 0; + } + return ret; +} + diff --git a/src/proxy/proxytrack.h b/src/proxy/proxytrack.h new file mode 100644 index 0000000..498f4d8 --- /dev/null +++ b/src/proxy/proxytrack.h @@ -0,0 +1,288 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: ProxyTrack, httrack cache-based proxy */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef WEBHTTRACK_PROXYTRACK +#define WEBHTTRACK_PROXYTRACK + +/* Version */ +#define PROXYTRACK_VERSION "0.4" + +/* Store manager */ +#include "../minizip/mztools.h" +#include "store.h" + +#include <sys/stat.h> + +/* generic */ + +int proxytrack_main(char* proxyAddr, int proxyPort, + char* icpAddr, int icpPort, + PT_Indexes index); + +/* Spaces: CR,LF,TAB,FF */ +#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) +#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) +#define is_taborspace(c) ( ((c)==' ') || ((c)==9) ) +#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') ) +#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) ) + +/* Static definitions */ + +#define _ , +#define CRITICAL_(msg, file, line) do { \ + fprintf(stderr, "* critical: "); \ + fprintf(stderr, msg); \ + fprintf(stderr, " at %s:%d\n", file, line); \ + fflush(stderr); \ +} while(0) +#define CRITICAL(msg) do { \ + fprintf(stderr, "* critical: "); \ + fprintf(stderr, msg); \ + fprintf(stderr, " at %s:%d\n", __FILE__, __LINE__); \ + fflush(stderr); \ +} while(0) + +#define WARNING(msg) do { \ + fprintf(stderr, "* warning: "); \ + fprintf(stderr, msg); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} while(0) + +#define LOG(msg) do { \ + fprintf(stderr, "* log: "); \ + fprintf(stderr, msg); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} while(0) + +#if defined(_DEBUG) || defined(DEBUG) +#define DEBUG(msg) do { \ + fprintf(stderr, "* debug: "); \ + fprintf(stderr, msg); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} while(0) +#else +#define DEBUG_(msg, file, line) do { } while(0) +#define DEBUG(msg) do { } while(0) +#endif + +/* Header for generated pages */ +#define PROXYTRACK_COMMENT_HEADER \ + "<!-- Generated by ProxyTrack " PROXYTRACK_VERSION " build " __DATE__ " -->\r\n" \ + "<!-- This is an add-on for HTTrack " HTTRACK_VERSIONID " -->\r\n" + +/* See IE "feature" (MSKB Q294807) */ +#define DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES \ + "<!-- Start Disable IE Friendly HTTP Error Messages -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- _-._.--._._-._.--._._-._.--._._-._.--._._-._.--._. -->\r\n" \ + "<!-- End Disable IE Friendly HTTP Error Messages -->\r\n" + +static char* gethomedir(void) { + char* home = getenv( "HOME" ); + if (home) + return home; + else + return "."; +} + +static int linput(FILE* fp,char* s,int max) { + int c; + int j=0; + do { + c=fgetc(fp); + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 0: case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} + +static int link_has_authority(const char* lien) { + const char* a=lien; + if (isalpha((const unsigned char)*a)) { + // Skip scheme? + while (isalpha((const unsigned char)*a)) + a++; + if (*a == ':') + a++; + else + return 0; + } + if (strncmp(a,"//",2) == 0) + return 1; + return 0; +} + +static const char* jump_protocol(const char* source) { + int p; + // scheme + // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) + if ((p = strfield(source,"http:"))) + source+=p; + else if ((p = strfield(source,"ftp:"))) + source+=p; + else if ((p = strfield(source,"https:"))) + source+=p; + else if ((p = strfield(source,"file:"))) + source+=p; + else if ((p = strfield(source,"mms:"))) + source+=p; + // net_path + if (strncmp(source,"//",2)==0) + source+=2; + return source; +} + +static const char* strrchr_limit(const char* s, char c, const char* limit) { + if (limit == NULL) { + char* p = strrchr(s, c); + return p?(p+1):NULL; + } else { + char *a=NULL, *p; + for(;;) { + p=strchr((a)?a:s, c); + if ((p >= limit) || (p == NULL)) + return a; + a=p+1; + } + } +} + +static const char* jump_protocol_and_auth(const char* source) { + const char *a,*trytofind; + if (strcmp(source, "file://") == 0) + return source; + a = jump_protocol(source); + trytofind = strrchr_limit(a, '@', strchr(a,'/')); + return (trytofind != NULL)?trytofind:a; +} + +#ifndef min +#define min(a,b) ((a)>(b)?(b):(a)) +#endif +#ifndef max +#define max(a,b) ((a)>(b)?(a):(b)) +#endif +static int linput_trim(FILE* fp,char* s,int max) { + int rlen=0; + char* ls=(char*) malloc(max+2); + s[0]='\0'; + if (ls) { + char* a; + // lire ligne + rlen=linput(fp,ls,max); + if (rlen) { + // sauter espaces et tabs en fin + while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) ) + ls[--rlen]='\0'; + // sauter espaces en début + a=ls; + while((rlen>0) && ((*a==' ') || (*a=='\t'))) { + a++; + rlen--; + } + if (rlen>0) { + memcpy(s,a,rlen); // can copy \0 chars + s[rlen]='\0'; + } + } + // + free(ls); + } + return rlen; +} + +// copy of concat +#define HTS_URLMAXSIZE 1024 +typedef struct concat_strc { + char buff[16][HTS_URLMAXSIZE*2*2]; + int rol; +} concat_strc; +static char* concat(const char* a,const char* b) { + static concat_strc* strc = NULL; + if (strc == NULL) { + strc = (concat_strc*) calloc(16, sizeof(concat_strc)); + } + strc->rol=((strc->rol+1)%16); // roving pointer + strcpy(strc->buff[strc->rol],a); + if (b) strcat(strc->buff[strc->rol],b); + return strc->buff[strc->rol]; +} + +#ifndef S_ISREG +#define S_ISREG(m) ((m) & _S_IFREG) +#endif +static int fexist(char* s) { + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(s, &st) == 0) { + if (S_ISREG(st.st_mode)) { + return 1; + } + } + return 0; +} + +#ifndef _WIN32 +#define fconv(a) (a) +#define fconcat(a,b) concat(a,b) +#endif + +#ifdef _WIN32 +static char* __fconv(char* a) { + int i; + for(i=0;i<(int) strlen(a);i++) + if (a[i]=='/') // convertir + a[i]='\\'; + return a; +} +static char* fconcat(char* a,char* b) { + return __fconv(concat(a,b)); +} +static char* fconv(char* a) { + return __fconv(concat(a,"")); +} +#endif + +#endif diff --git a/src/proxy/store.c b/src/proxy/store.c new file mode 100644 index 0000000..1d17574 --- /dev/null +++ b/src/proxy/store.c @@ -0,0 +1,1505 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: Cache manager for ProxyTrack */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Locking */ +#ifdef _WIN32 +#include <process.h> /* _beginthread, _endthread */ +#else +#include <pthread.h> +#endif + +#include "htsglobal.h" + +#define HTS_INTERNAL_BYTECODE +#include "htsinthash.h" +#undef HTS_INTERNAL_BYTECODE +#include "../minizip/mztools.h" + +#include "htscore.h" +#include "htsback.h" + +#include "store.h" +#include "proxystrings.h" +#include "proxytrack.h" + +/* Unlocked functions */ + +static int PT_LookupCache__New_u(PT_Index index, const char* url); +static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags); + +static int PT_LookupCache__Old_u(PT_Index index, const char* url); +static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags); + + +/* Locking */ + +#ifdef _WIN32 +void MutexInit(PT_Mutex *pMutex) { + *pMutex = CreateMutex(NULL,FALSE,NULL); +} + +void MutexLock(PT_Mutex *pMutex) { + WaitForSingleObject(*pMutex, INFINITE); +} + +void MutexUnlock(PT_Mutex *pMutex) { + ReleaseMutex(*pMutex); +} + +void MutexFree(PT_Mutex *pMutex) { + CloseHandle(*pMutex); + *pMutex = NULL; +} +#else +void MutexInit(PT_Mutex *pMutex) { + (void) pthread_mutex_init(pMutex, 0); +} + +void MutexLock(PT_Mutex *pMutex) { + pthread_mutex_lock(pMutex); +} + +void MutexUnlock(PT_Mutex *pMutex) { + pthread_mutex_unlock(pMutex); +} + +void MutexFree(PT_Mutex *pMutex) { + pthread_mutex_destroy(pMutex); +} +#endif + +/* Indexes */ + +typedef struct _PT_Index__New _PT_Index__New; +typedef struct _PT_Index__Old _PT_Index__Old; +typedef struct _PT_Index_Functions _PT_Index_Functions; + +typedef struct _PT_Index__New *PT_Index__New; +typedef struct _PT_Index__Old *PT_Index__Old; +typedef struct _PT_Index_Functions *PT_Index_Functions; + +enum { + PT_CACHE_UNDEFINED = -1, + PT_CACHE_MIN = 0, + PT_CACHE__NEW = PT_CACHE_MIN, + PT_CACHE__OLD, + PT_CACHE_MAX = PT_CACHE__OLD +}; + +static int PT_LoadCache__New(PT_Index index, const char *filename); +static void PT_Index_Delete__New(PT_Index *pindex); +static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags); +static int PT_LookupCache__New(PT_Index index, const char* url); +/**/ +static int PT_LoadCache__Old(PT_Index index, const char *filename); +static void PT_Index_Delete__Old(PT_Index *pindex); +static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags); +static int PT_LookupCache__Old(PT_Index index, const char* url); + +struct _PT_Index_Functions { + int (*PT_LoadCache)(PT_Index index, const char *filename); + void (*PT_Index_Delete)(PT_Index *pindex); + PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags); + int (*PT_LookupCache)(PT_Index index, const char* url); +}; + +static _PT_Index_Functions _IndexFuncts[] = { + { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New }, + { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old }, + { NULL, NULL, NULL, NULL } +}; + +#define PT_INDEX_COMMON_STRUCTURE \ + time_t timestamp; \ + inthash hash; \ + char startUrl[1024] + +struct _PT_Index__New { + PT_INDEX_COMMON_STRUCTURE; + char path[1024]; /* either empty, or must include ending / */ + int fixedPath; + int safeCache; + unzFile zFile; + PT_Mutex zFileLock; +}; + +struct _PT_Index__Old { + PT_INDEX_COMMON_STRUCTURE; + char filenameDat[1024]; + char filenameNdx[1024]; + FILE *dat,*ndx; + PT_Mutex fileLock; + int version; + char lastmodified[1024]; + char path[1024]; /* either empty, or must include ending / */ + int fixedPath; + int safeCache; +}; + +struct _PT_Index { + int type; + union { + _PT_Index__New formatNew; + _PT_Index__Old formatOld; + struct { + PT_INDEX_COMMON_STRUCTURE; + } common; + } slots; +}; + +struct _PT_Indexes { + inthash cil; + struct _PT_Index **index; + int index_size; +}; + +struct _PT_CacheItem { + time_t lastUsed; + size_t size; + void* data; +}; + +struct _PT_Cache { + inthash index; + size_t maxSize; + size_t totalSize; + int count; +}; + +PT_Indexes PT_New() { + PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1); + index->cil = inthash_new(127); + index->index_size = 0; + index->index = NULL; + return index; +} + +void PT_Delete(PT_Indexes index) { + if (index != NULL) { + inthash_delete(&index->cil); + free(index); + } +} + +int PT_RemoveIndex(PT_Indexes index, int indexId) { + return 0; +} + +#define assertf(exp) + +static int binput(char* buff,char* s,int max) { + int count = 0; + int destCount = 0; + + // Note: \0 will return 1 + while(destCount < max && buff[count] != '\0' && buff[count] != '\n') { + if (buff[count] != '\r') { + s[destCount++] = buff[count]; + } + count++; + } + s[destCount] = '\0'; + + // then return the supplemental jump offset + return count + 1; +} + +static time_t file_timestamp(const char* file) { + struct stat buf; + if (stat(file, &buf) == 0) { + time_t tt = buf.st_mtime; + if (tt != (time_t) 0 && tt != (time_t) -1) { + return tt; + } + } + return (time_t) 0; +} + +static int PT_Index_Check__(PT_Index index, const char* file, int line) { + if (index == NULL) + return 0; + if (index->type >= PT_CACHE_MIN && index->type <= PT_CACHE_MAX) + return 1; + CRITICAL_("index corrupted in memory", file, line); + return 0; +} +#define SAFE_INDEX(index) PT_Index_Check__(index, __FILE__, __LINE__) + + +/* ------------------------------------------------------------ */ +/* Generic cache dispatch */ +/* ------------------------------------------------------------ */ + +void PT_Index_Delete(PT_Index *pindex) { + if (pindex != NULL && (*pindex) != NULL) { + PT_Index index = *pindex; + if (SAFE_INDEX(index)) { + _IndexFuncts[index->type].PT_Index_Delete(pindex); + } + free(index); + *pindex = NULL; + } +} + +static void PT_Index_Delete__New(PT_Index *pindex) { + if (pindex != NULL && (*pindex) != NULL) { + PT_Index__New index = &(*pindex)->slots.formatNew; + if (index->zFile != NULL) { + unzClose(index->zFile); + index->zFile = NULL; + } + if (index->hash != NULL) { + inthash_delete(&index->hash); + index->hash = NULL; + } + MutexFree(&index->zFileLock); + } +} + +static void PT_Index_Delete__Old(PT_Index *pindex) { + if (pindex != NULL && (*pindex) != NULL) { + PT_Index__Old index = &(*pindex)->slots.formatOld; + if (index->dat != NULL) { + fclose(index->dat); + } + if (index->ndx != NULL) { + fclose(index->ndx); + } + if (index->hash != NULL) { + inthash_delete(&index->hash); + index->hash = NULL; + } + MutexFree(&index->fileLock); + } +} + +int PT_AddIndex(PT_Indexes indexes, const char *path) { + PT_Index index = PT_LoadCache(path); + if (index != NULL) { + int ret = PT_IndexMerge(indexes, &index); + if (index != NULL) { + PT_Index_Delete(&index); + } + return ret; + } + return -1; +} + +PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) { + if (indexes != NULL) { + PT_Element elt = PT_ElementNew(); + int i; + String html = STRING_EMPTY; + StringClear(html); + StringStrcat(html, + "<html>" + PROXYTRACK_COMMENT_HEADER + DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES + "<head>\r\n" + "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>" + "</head>\r\n" + "<body>\r\n" + "<h3>Available sites in this cache:</h3><br />" + "<br />" + ); + StringStrcat(html, "<ul>\r\n"); + for(i = 0 ; i < indexes->index_size ; i++) { + if (indexes->index[i] != NULL + && indexes->index[i]->slots.common.startUrl[0] != '\0') + { + const char * url = indexes->index[i]->slots.common.startUrl; + StringStrcat(html, "<li>\r\n"); + StringStrcat(html, "<a href=\""); + StringStrcat(html, url); + StringStrcat(html, "\">"); + StringStrcat(html, url); + StringStrcat(html, "</a>\r\n"); + StringStrcat(html, "</li>\r\n"); + } + } + StringStrcat(html, "</ul>\r\n"); + StringStrcat(html, "</body></html>\r\n"); + elt->size = StringLength(html); + elt->adr = StringAcquire(&html); + elt->statuscode = 200; + strcpy(elt->charset, "iso-8859-1"); + strcpy(elt->contenttype, "text/html"); + strcpy(elt->msg, "OK"); + StringFree(html); + return elt; + } + return NULL; +} + +static char* strchr_stop(char* str, char c, char stop) { + for( ; *str != 0 && *str != stop && *str != c ; str++); + if (*str == c) + return str; + return NULL; +} + +char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { + // should be cached! + if (indexes != NULL && indexes->cil != NULL) { + unsigned int urlSize; + String list = STRING_EMPTY; + String listindexes = STRING_EMPTY; + String subitem = STRING_EMPTY; + unsigned int listCount = 0; + struct_inthash_enum en = inthash_enum_new(indexes->cil); + inthash_chain* chain; + inthash hdupes = NULL; + if (!subtree) + hdupes= inthash_new(127); + StringClear(list); + StringClear(listindexes); + StringClear(subitem); + if (strncmp(url, "http://", 7) == 0) + url += 7; + urlSize = (unsigned int) strlen(url); + while((chain = inthash_enum_next(&en))) { + long int index = (long int)chain->value.intg; + if (urlSize == 0 || strncmp(chain->name, url, urlSize) == 0) { + if (index >= 0 && index < indexes->index_size) { + char * item = chain->name + urlSize; + if (*item == '/') + item++; + { + char * pos = subtree ? 0 : strchr_stop(item, '/', '?'); + unsigned int len = pos ? (unsigned int)( pos - item ) : (unsigned int)strlen(item); + if (len > 0 /* default document */ || *item == 0) { + int isFolder = ( item[len] == '/' ); + StringClear(subitem); + if (len > 0) + StringMemcat(subitem, item, len); + if (len == 0 || !inthash_exists(hdupes, StringBuff(subitem))) { + char* ptr = NULL; + ptr += StringLength(list); + if (len > 0) + StringStrcat(list, StringBuff(subitem)); + if (isFolder) + StringStrcat(list, "/"); + StringMemcat(list, "\0", 1); /* NULL terminated strings */ + StringMemcat(listindexes, &ptr, sizeof(ptr)); + listCount++; + inthash_write(hdupes, StringBuff(subitem), 0); + } + } + } + } else { + CRITICAL("PT_Enumerate:Corrupted central index locator"); + } + } + } + StringFree(subitem); + inthash_delete(&hdupes); + if (listCount > 0) { + unsigned int i; + void* blk; + char *nullPointer = NULL; + char* startStrings; + /* NULL terminated index */ + StringMemcat(listindexes, &nullPointer, sizeof(nullPointer)); + /* start of all strings (index) */ + startStrings = nullPointer + StringLength(listindexes); + /* copy list of URLs after indexes */ + StringMemcat(listindexes, StringBuff(list), StringLength(list)); + /* ---- no reallocation beyond this point (fixed addresses) ---- */ + /* start of all strings (pointer) */ + startStrings = (startStrings - nullPointer) + StringBuff(listindexes); + /* transform indexes into references */ + for(i = 0 ; i < listCount ; i++) { + char *ptr = NULL; + unsigned int ndx; + memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*)); + ndx = (unsigned int) (ptr - nullPointer); + ptr = startStrings + ndx; + memcpy(&StringBuff(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*)); + } + blk = StringAcquire(&listindexes); + StringFree(list); + StringFree(listindexes); + return (char **)blk; + } + } + return NULL; +} + +void PT_Enumerate_Delete(char ***plist) { + if (plist != NULL && *plist != NULL) { + free(*plist); + *plist = NULL; + } +} + +PT_Index PT_LoadCache(const char *filename) { + int type = PT_CACHE_UNDEFINED; + char * dot = strrchr(filename, '.'); + if (dot != NULL) { + if (strcasecmp(dot, ".zip") == 0) { + type = PT_CACHE__NEW; + } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) { + type = PT_CACHE__OLD; + } + } + if (type != PT_CACHE_UNDEFINED) { + PT_Index index = calloc(sizeof(_PT_Index), 1); + if (index != NULL) { + index->type = type; + index->slots.common.timestamp = (time_t) time(NULL); + index->slots.common.startUrl[0] = '\0'; + index->slots.common.hash = inthash_new(8191); + if (!_IndexFuncts[type].PT_LoadCache(index, filename)) { + DEBUG("reading httrack cache (format #%d) %s : error" _ type _ filename ); + free(index); + index = NULL; + return NULL; + } else { + DEBUG("reading httrack cache (format #%d) %s : success" _ type _ filename ); + } + /* default starting URL is the first hash entry */ + if (index->slots.common.startUrl[0] == '\0') { + struct_inthash_enum en = inthash_enum_new(index->slots.common.hash); + inthash_chain* chain; + chain = inthash_enum_next(&en); + if (chain != NULL + && strstr(chain->name, "/robots.txt") != NULL) + { + chain = inthash_enum_next(&en); + } + if (chain != NULL) { + if (!link_has_authority(chain->name)) + strcat(index->slots.common.startUrl, "http://"); + strcat(index->slots.common.startUrl, chain->name); + } + } + } + return index; + } + return NULL; +} + + +static long int filesize(const char* filename) { + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(filename, &st) == 0) { + return (long int)st.st_size; + } + return -1; +} + +int PT_LookupCache(PT_Index index, const char* url) { + if (index != NULL && SAFE_INDEX(index)) { + return _IndexFuncts[index->type].PT_LookupCache(index, url); + } + return 0; +} + +time_t PT_Index_Timestamp(PT_Index index) { + return index->slots.common.timestamp; +} + +static int PT_LookupCache__New(PT_Index index, const char* url) { + int retCode; + MutexLock(&index->slots.formatNew.zFileLock); + { + retCode = PT_LookupCache__New_u(index, url); + } + MutexUnlock(&index->slots.formatNew.zFileLock); + return retCode; +} + +static int PT_LookupCache__New_u(PT_Index index_, const char* url) { + if (index_ != NULL) { + PT_Index__New index = &index_->slots.formatNew; + if (index->hash != NULL && index->zFile != NULL && url != NULL && *url != 0) { + int hash_pos_return; + if (strncmp(url, "http://", 7) == 0) + url += 7; + hash_pos_return = inthash_read(index->hash, url, NULL); + if (hash_pos_return) + return 1; + } + } + return 0; +} + +int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex) +{ + if (pindex != NULL && *pindex != NULL && (*pindex)->slots.common.hash != NULL + && indexes != NULL) + { + PT_Index index = *pindex; + struct_inthash_enum en = inthash_enum_new(index->slots.common.hash); + inthash_chain* chain; + int index_id = indexes->index_size++; + int nMerged = 0; + if ((indexes->index = realloc(indexes->index, sizeof(struct _PT_Index)*indexes->index_size)) != NULL) { + indexes->index[index_id] = index; + *pindex = NULL; + while((chain = inthash_enum_next(&en)) != NULL) { + const char * url = chain->name; + if (url != NULL && url[0] != '\0') { + long int previous_index_id = 0; + if (inthash_read(indexes->cil, url, (long int*)&previous_index_id)) { + if (previous_index_id >= 0 && previous_index_id < indexes->index_size) { + if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer + break; + } else { + CRITICAL("PT_IndexMerge:Corrupted central index locator"); + } + } + inthash_write(indexes->cil, chain->name, index_id); + nMerged++; + } + } + } else { + CRITICAL("PT_IndexMerge:Memory exhausted"); + } + return nMerged; + } + return -1; +} + +void PT_Element_Delete(PT_Element *pentry) { + if (pentry != NULL) { + PT_Element entry = *pentry; + if (entry != NULL) { + if (entry->adr != NULL) { + free(entry->adr); + entry->adr = NULL; + } + if (entry->headers != NULL) { + free(entry->headers); + entry->headers = NULL; + } + if (entry->location != NULL) { + free(entry->location); + entry->location = NULL; + } + free(entry); + } + *pentry = NULL; + } +} + +PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags) +{ + if (indexes != NULL) + { + long int index_id; + if (strncmp(url, "http://", 7) == 0) + url += 7; + if (inthash_read(indexes->cil, url, &index_id)) { + if (index_id >= 0 && index_id <= indexes->index_size) { + PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags); + if (item != NULL) { + item->indexId = index_id; + return item; + } + } else { + CRITICAL("PT_ReadCache:Corrupted central index locator"); + } + } + } + return NULL; +} + +int PT_LookupIndex(PT_Indexes indexes, const char* url) { + if (indexes != NULL) + { + long int index_id; + if (strncmp(url, "http://", 7) == 0) + url += 7; + if (inthash_read(indexes->cil, url, &index_id)) { + if (index_id >= 0 && index_id <= indexes->index_size) { + return 1; + } else { + CRITICAL("PT_ReadCache:Corrupted central index locator"); + } + } + } + return 0; +} + +PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) { + if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size) + { + return indexes->index[indexId]; + } + return NULL; +} + +PT_Element PT_ElementNew() { + PT_Element r = NULL; + if ((r = calloc(sizeof(_PT_Element), 1)) == NULL) + return NULL; + r->statuscode=STATUSCODE_INVALID; + r->indexId = -1; + return r; +} + +PT_Element PT_ReadCache(PT_Index index, const char* url, int flags) { + if (index != NULL && SAFE_INDEX(index)) { + return _IndexFuncts[index->type].PT_ReadCache(index, url, flags); + } + return NULL; +} + +static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags) { + PT_Element retCode; + MutexLock(&index->slots.formatNew.zFileLock); + { + retCode = PT_ReadCache__New_u(index, url, flags); + } + MutexUnlock(&index->slots.formatNew.zFileLock); + return retCode; +} + + +/* ------------------------------------------------------------ */ +/* New HTTrack cache (new.zip) format */ +/* ------------------------------------------------------------ */ + +#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + strcpy(refvalue, value); \ + line[0] = '\0'; \ + } \ +} while(0) +#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + int intval = 0; \ + sscanf(value, "%d", &intval); \ + (refvalue) = intval; \ + line[0] = '\0'; \ + } \ +} while(0) + +int PT_LoadCache__New(PT_Index index_, const char *filename) { + if (index_ != NULL && filename != NULL) { + PT_Index__New index = &index_->slots.formatNew; + unzFile zFile = index->zFile = unzOpen(filename); + index->timestamp = file_timestamp(filename); + MutexInit(&index->zFileLock); + + // Opened ? + if (zFile!=NULL) { + const char * abpath; + int slashes; + inthash hashtable = index->hash; + + /* Compute base path for this index - the filename MUST be absolute! */ + for(slashes = 2, abpath = filename + (int)strlen(filename) - 1 + ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0) + ; abpath--); + index->path[0] = '\0'; + if (slashes == 0 && *abpath != 0) { + int i; + strncat(index->path, filename, (int) ( abpath - filename ) + 1 ); + for(i = 0 ; index->path[i] != 0 ; i++) { + if (index->path[i] == '\\') { + index->path[i] = '/'; + } + } + } + + /* Ready directory entries */ + if (unzGoToFirstFile(zFile) == Z_OK) { + char comment[128]; + char filename[HTS_URLMAXSIZE * 4]; + int entries = 0; + int firstSeen = 0; + memset(comment, 0, sizeof(comment)); // for truncated reads + do { + int readSizeHeader = 0; + filename[0] = '\0'; + comment[0] = '\0'; + if (unzOpenCurrentFile(zFile) == Z_OK) { + if ( + (readSizeHeader = unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0 + && + unzGetCurrentFileInfo(zFile, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK + ) + { + long int pos = (long int) unzGetOffset(zFile); + assertf(readSizeHeader < sizeof(comment)); + comment[readSizeHeader] = '\0'; + entries++; + if (pos > 0) { + int dataincache = 0; // data in cache ? + char* filenameIndex = filename; + if (strncmp(filenameIndex, "http://", 7) == 0) { + filenameIndex += 7; + } + if (comment[0] != '\0') { + int maxLine = 2; + char* a = comment; + while(*a && maxLine-- > 0) { // parse only few first lines + char line[1024]; + line[0] = '\0'; + a+=binput(a, line, sizeof(line) - 2); + if (strncmp(line, "X-In-Cache:", 11) == 0) { + if (strcmp(line, "X-In-Cache: 1") == 0) { + dataincache = 1; + } else { + dataincache = 0; + } + break; + } + } + } + if (dataincache) + inthash_add(hashtable, filenameIndex, pos); + else + inthash_add(hashtable, filenameIndex, -pos); + + /* First link as starting URL */ + if (!firstSeen) { + if (strstr(filenameIndex, "/robots.txt") == NULL) { + firstSeen = 1; + if (!link_has_authority(filenameIndex)) + strcat(index->startUrl, "http://"); + strcat(index->startUrl, filenameIndex); + } + } + } else { + fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries); + } + } else { + fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries); + } + unzCloseCurrentFile(zFile); + } else { + fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries); + } + } while( unzGoToNextFile(zFile) == Z_OK ); + return 1; + } else { + inthash_delete(&index->hash); + index = NULL; + } + } else { + index = NULL; + } + } + return 0; +} + +static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flags) +{ + PT_Index__New index = (PT_Index__New) &index_->slots.formatNew; + char location_default[HTS_URLMAXSIZE*2]; + char previous_save[HTS_URLMAXSIZE*2]; + char previous_save_[HTS_URLMAXSIZE*2]; + long int hash_pos; + int hash_pos_return; + PT_Element r = NULL; + if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0) + return NULL; + if ((r = PT_ElementNew()) == NULL) + return NULL; + location_default[0] = '\0'; + previous_save[0] = previous_save_[0] = '\0'; + memset(r, 0, sizeof(_PT_Element)); + r->location = location_default; + strcpy(r->location, ""); + if (strncmp(url, "http://", 7) == 0) + url += 7; + hash_pos_return = inthash_read(index->hash, url, (long int*)&hash_pos); + + if (hash_pos_return) { + uLong posInZip; + if (hash_pos > 0) { + posInZip = (uLong) hash_pos; + } else { + posInZip = (uLong) -hash_pos; + } + if (unzSetOffset(index->zFile, posInZip) == Z_OK) { + /* Read header (Max 8KiB) */ + if (unzOpenCurrentFile(index->zFile) == Z_OK) { + char headerBuff[8192 + 2]; + int readSizeHeader; + int totalHeader = 0; + int dataincache = 0; + + /* For BIG comments */ + headerBuff[0] + = headerBuff[sizeof(headerBuff) - 1] + = headerBuff[sizeof(headerBuff) - 2] + = headerBuff[sizeof(headerBuff) - 3] = '\0'; + + if ( (readSizeHeader = unzGetLocalExtrafield(index->zFile, headerBuff, sizeof(headerBuff) - 2)) > 0) + { + int offset = 0; + char line[HTS_URLMAXSIZE + 2]; + int lineEof = 0; + headerBuff[readSizeHeader] = '\0'; + do { + char* value; + line[0] = '\0'; + offset += binput(headerBuff + offset, line, sizeof(line) - 2); + if (line[0] == '\0') { + lineEof = 1; + } + value = strchr(line, ':'); + if (value != NULL) { + *value++ = '\0'; + if (*value == ' ' || *value == '\t') value++; + ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache); + ZIP_READFIELD_INT(line, value, "X-Statuscode", r->statuscode); + ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r->msg); // msg + ZIP_READFIELD_INT(line, value, "X-Size", r->size); // size + ZIP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype + ZIP_READFIELD_STRING(line, value, "X-Charset", r->charset); // contenttype + ZIP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified + ZIP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag + ZIP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved + ZIP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition + //ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address + //ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename + ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename + } + } while(offset < readSizeHeader && !lineEof); + totalHeader = offset; + + /* Previous entry */ + if (previous_save_[0] != '\0') { + int pathLen = (int) strlen(index->path); + if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format + strcpy(previous_save, previous_save_); + } + // relative ? (hack) + else if (index->safeCache + || (previous_save_[0] != '/' // /home/foo/bar.gif + && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif + ) + { + index->safeCache = 1; + sprintf(previous_save, "%s%s", index->path, previous_save_); + } + // bogus format (includes buggy absolute path) + else { + /* guess previous path */ + if (index->fixedPath == 0) { + const char * start = jump_protocol_and_auth(url); + const char * end = start ? strchr(start, '/') : NULL; + int len = (int) (end - start); + if (start != NULL && end != NULL && len > 0 && len < 128) { + char piece[128 + 2]; + const char * where; + piece[0] = '\0'; + strncat(piece, start, len); + if ((where = strstr(previous_save_, piece)) != NULL) { + index->fixedPath = (int) (where - previous_save_); // offset to relative path + } + } + } + if (index->fixedPath > 0) { + int saveLen = (int) strlen(previous_save_); + if (index->fixedPath < saveLen) { + sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath); + } else { + sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath); + r->statuscode = STATUSCODE_INVALID; + } + } else { + sprintf(previous_save, "%s%s", index->path, previous_save_); + } + } + } + + /* Complete fields */ + r->adr=NULL; + if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */ + int ok = 0; + + // Court-circuit: + // Peut-on stocker le fichier directement sur disque? + if (ok) { + if (r->msg[0] == '\0') { + strcpy(r->msg,"Cache Read Error : Unexpected error"); + } + } else { // lire en mémoire + + if (!dataincache) { + /* Read in memory from cache */ + if (flags & FETCH_BODY) { + if (strnotempty(previous_save)) { + FILE* fp = fopen(fconv(previous_save), "rb"); + if (fp != NULL) { + r->adr = (char*) malloc(r->size + 4); + if (r->adr != NULL) { + if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) { + r->statuscode=STATUSCODE_INVALID; + sprintf(r->msg,"Read error in cache disk data: %s", strerror(errno)); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Read error (memory exhausted) from cache"); + } + fclose(fp); + } else { + r->statuscode=STATUSCODE_INVALID; + sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(previous_save)); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cached file name is invalid"); + } + } + } else { + // lire fichier (d'un coup) + if (flags & FETCH_BODY) { + r->adr=(char*) malloc(r->size+1); + if (r->adr!=NULL) { + if (unzReadCurrentFile(index->zFile, r->adr, r->size) != r->size) { // erreur + free(r->adr); + r->adr=NULL; + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Read Data"); + } else + *(r->adr+r->size)='\0'; + //printf(">%s status %d\n",back[p].r->contenttype,back[p].r->statuscode); + } else { // erreur + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Memory Error"); + } + } + } + } + } // si save==null, ne rien charger (juste en tête) + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Read Header Data"); + } + unzCloseCurrentFile(index->zFile); + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Open File"); + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Bad Offset"); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"File Cache Entry Not Found"); + } + if (r->location[0] != '\0') { + r->location = strdup(r->location); + } else { + r->location = NULL; + } + return r; +} + + +/* ------------------------------------------------------------ */ +/* Old HTTrack cache (dat/ndx) format */ +/* ------------------------------------------------------------ */ + +static int cache_brstr(char* adr,char* s) { + int i; + int off; + char buff[256 + 1]; + off=binput(adr,buff,256); + adr+=off; + sscanf(buff,"%d",&i); + if (i>0) + strncpy(s,adr,i); + *(s+i)='\0'; + off+=i; + return off; +} + +static void cache_rstr(FILE* fp,char* s) { + INTsys i; + char buff[256+4]; + linput(fp,buff,256); + sscanf(buff,INTsysP,&i); + if (i < 0 || i > 32768) /* error, something nasty happened */ + i=0; + if (i>0) { + if ((int) fread(s,1,i,fp) != i) { + int fread_cache_failed = 0; + assertf(fread_cache_failed); + } + } + *(s+i)='\0'; +} + +static char* cache_rstr_addr(FILE* fp) { + INTsys i; + char* addr = NULL; + char buff[256+4]; + linput(fp,buff,256); + sscanf(buff,"%d",&i); + if (i < 0 || i > 32768) /* error, something nasty happened */ + i=0; + if (i > 0) { + addr = malloc(i + 1); + if (addr != NULL) { + if ((int) fread(addr,1,i,fp) != i) { + int fread_cache_failed = 0; + assertf(fread_cache_failed); + } + *(addr+i)='\0'; + } + } + return addr; +} + +static void cache_rint(FILE* fp,int* i) { + char s[256]; + cache_rstr(fp,s); + sscanf(s,"%d",i); +} + +static void cache_rLLint(FILE* fp,unsigned long* i) { + int l; + char s[256]; + cache_rstr(fp,s); + sscanf(s,"%d",&l); + *i = (unsigned long)l; +} + +static int PT_LoadCache__Old(PT_Index index_, const char *filename) { + if (index_ != NULL && filename != NULL) { + char * pos = strrchr(filename, '.'); + PT_Index__Old cache = &index_->slots.formatOld; + long int ndxSize; + cache->filenameDat[0] = '\0'; + cache->filenameNdx[0] = '\0'; + cache->path[0] = '\0'; + + { + PT_Index__Old index = cache; + const char * abpath; + int slashes; + /* -------------------- COPY OF THE __New() CODE -------------------- */ + /* Compute base path for this index - the filename MUST be absolute! */ + for(slashes = 2, abpath = filename + (int)strlen(filename) - 1 + ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0) + ; abpath--); + index->path[0] = '\0'; + if (slashes == 0 && *abpath != 0) { + int i; + strncat(index->path, filename, (int) ( abpath - filename ) + 1 ); + for(i = 0 ; index->path[i] != 0 ; i++) { + if (index->path[i] == '\\') { + index->path[i] = '/'; + } + } + } + /* -------------------- END OF COPY OF THE __New() CODE -------------------- */ + } + + /* Index/data filenames */ + if (pos != NULL) { + int nLen = (int) (pos - filename); + strncat(cache->filenameDat, filename, nLen); + strncat(cache->filenameNdx, filename, nLen); + strcat(cache->filenameDat, ".dat"); + strcat(cache->filenameNdx, ".ndx"); + } + ndxSize = filesize(cache->filenameNdx); + cache->timestamp = file_timestamp(cache->filenameDat); + cache->dat = fopen(cache->filenameDat, "rb"); + cache->ndx = fopen(cache->filenameNdx, "rb"); + if (cache->dat != NULL && cache->ndx != NULL && ndxSize > 0) { + char * use = malloc(ndxSize + 1); + if (fread(use, 1, ndxSize, cache->ndx) == ndxSize) { + char firstline[256]; + char* a=use; + use[ndxSize] = '\0'; + a += cache_brstr(a, firstline); + if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache + if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x + cache->version=(int)(firstline[8]-'0'); // cache 1.x + if (cache->version <= 5) { + a+=cache_brstr(a,firstline); + strcpy(cache->lastmodified,firstline); + } else { + // fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); + fclose(cache->dat); + cache->dat=NULL; + free(use); + use=NULL; + } + } else { // non supporté + // fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline); + fclose(cache->dat); + cache->dat=NULL; + free(use); + use=NULL; + } + /* */ + } else { // Vieille version du cache + /* */ + // fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + cache->version=0; // cache 1.0 + strcpy(cache->lastmodified,firstline); + } + + /* Create hash table for the cache (MUCH FASTER!) */ + if (use) { + char line[HTS_URLMAXSIZE*2]; + char linepos[256]; + int pos; + int firstSeen = 0; + while ( (a!=NULL) && (a < (use + ndxSize) ) ) { + a=strchr(a+1,'\n'); /* start of line */ + if (a) { + a++; + /* read "host/file" */ + a+=binput(a,line,HTS_URLMAXSIZE); + a+=binput(a,line+strlen(line),HTS_URLMAXSIZE); + /* read position */ + a+=binput(a,linepos,200); + sscanf(linepos,"%d",&pos); + + /* Add entry */ + inthash_add(cache->hash,line,pos); + + /* First link as starting URL */ + if (!firstSeen) { + if (strstr(line, "/robots.txt") == NULL) { + PT_Index__Old index = cache; + firstSeen = 1; + if (!link_has_authority(line)) + strcat(index->startUrl, "http://"); + strcat(index->startUrl, line); + } + } + + } + } + /* Not needed anymore! */ + free(use); + use=NULL; + return 1; + } + } + } + } + return 0; +} + +static String DecodeUrl(const char * url) { + int i; + String s = STRING_EMPTY; + StringClear(s); + for(i = 0 ; url[i] != '\0' ; i++) { + if (url[i] == '+') { + StringAddchar(s, ' '); + } else if (url[i] == '%') { + if (url[i + 1] == '%') { + StringAddchar(s, '%'); + i++; + } else if (url[i + 1] != 0 && url[i + 2] != 0) { + char tmp[3]; + int codepoint = 0; + tmp[0] = url[i + 1]; + tmp[1] = url[i + 2]; + tmp[2] = 0; + if (sscanf(tmp, "%x", &codepoint) == 1) { + StringAddchar(s, (char)codepoint); + } + i += 2; + } + } else { + StringAddchar(s, url[i]); + } + } + return s; +} + +static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags) { + PT_Element retCode; + MutexLock(&index->slots.formatOld.fileLock); + { + retCode = PT_ReadCache__Old_u(index, url, flags); + } + MutexUnlock(&index->slots.formatOld.fileLock); + return retCode; +} + +static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) { + PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld; + long int hash_pos; + int hash_pos_return; + char location_default[HTS_URLMAXSIZE*2]; + char previous_save[HTS_URLMAXSIZE*2]; + char previous_save_[HTS_URLMAXSIZE*2]; + PT_Element r; + int ok=0; + + if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0) + return NULL; + if ((r = PT_ElementNew()) == NULL) + return NULL; + location_default[0] = '\0'; + previous_save[0] = previous_save_[0] = '\0'; + memset(r, 0, sizeof(_PT_Element)); + r->location = location_default; + strcpy(r->location, ""); + if (strncmp(url, "http://", 7) == 0) + url += 7; + hash_pos_return=inthash_read(cache->hash, url, (long int*)&hash_pos); + + if (hash_pos_return) { + int pos = (int) hash_pos; /* simply */ + + if (fseek(cache->dat, (pos>0) ? pos : (-pos), SEEK_SET) == 0) { + /* Importer cache1.0 */ + if (cache->version==0) { + OLD_htsblk old_r; + if (fread((char*) &old_r,1,sizeof(old_r),cache->dat) == sizeof(old_r)) { // lire tout (y compris statuscode etc) + int i; + String urlDecoded; + r->statuscode = old_r.statuscode; + r->size = old_r.size; // taille fichier + strcpy(r->msg, old_r.msg); + strcpy(r->contenttype, old_r.contenttype); + + /* Guess the destination filename.. this sucks, because this method is not reliable. + Yes, the old 1.0 cache format was *that* bogus. /rx */ +#define FORBIDDEN_CHAR(c) (c == '~' \ + || c == '\\' \ + || c == ':' \ + || c == '*' \ + || c == '?' \ + || c == '\"' \ + || c == '<' \ + || c == '>' \ + || c == '|' \ + || c == '@' \ + || ((unsigned char) c ) <= 31 \ + || ((unsigned char) c ) == 127 \ + ) + urlDecoded = DecodeUrl(jump_protocol_and_auth(url)); + strcpy(previous_save_, StringBuff(urlDecoded)); + StringFree(urlDecoded); + for(i = 0 ; previous_save_[i] != '\0' && previous_save_[i] != '?' ; i++) { + if (FORBIDDEN_CHAR(previous_save_[i])) { + previous_save_[i] = '_'; + } + } + previous_save_[i] = '\0'; +#undef FORBIDDEN_CHAR + ok = 1; /* import ok */ + } + /* */ + /* Cache 1.1 */ + } else { + char check[256]; + unsigned long size_read; + check[0]='\0'; + // + cache_rint(cache->dat,&r->statuscode); + cache_rLLint(cache->dat,&r->size); + cache_rstr(cache->dat,r->msg); + cache_rstr(cache->dat,r->contenttype); + if (cache->version >= 3) + cache_rstr(cache->dat,r->charset); + cache_rstr(cache->dat,r->lastmodified); + cache_rstr(cache->dat,r->etag); + cache_rstr(cache->dat,r->location); + if (cache->version >= 2) + cache_rstr(cache->dat,r->cdispo); + if (cache->version >= 4) { + cache_rstr(cache->dat, previous_save_); // adr + cache_rstr(cache->dat, previous_save_); // fil + previous_save[0] = '\0'; + cache_rstr(cache->dat, previous_save_); // save + } + if (cache->version >= 5) { + r->headers = cache_rstr_addr(cache->dat); + } + // + cache_rstr(cache->dat,check); + if (strcmp(check,"HTS")==0) { /* intégrité OK */ + ok=1; + } + cache_rLLint(cache->dat, &size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */ + if (size_read > 0) { /* si inscrite ici */ + r->size = size_read; + } else { /* pas de données directement dans le cache, fichier présent? */ + r->size = 0; + } + } + + /* Check destination filename */ + + { + PT_Index__Old index = cache; + /* -------------------- COPY OF THE __New() CODE -------------------- */ + if (previous_save_[0] != '\0') { + int pathLen = (int) strlen(index->path); + if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format + strcpy(previous_save, previous_save_); + } + // relative ? (hack) + else if (index->safeCache + || (previous_save_[0] != '/' // /home/foo/bar.gif + && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif + ) + { + index->safeCache = 1; + sprintf(previous_save, "%s%s", index->path, previous_save_); + } + // bogus format (includes buggy absolute path) + else { + /* guess previous path */ + if (index->fixedPath == 0) { + const char * start = jump_protocol_and_auth(url); + const char * end = start ? strchr(start, '/') : NULL; + int len = (int) (end - start); + if (start != NULL && end != NULL && len > 0 && len < 128) { + char piece[128 + 2]; + const char * where; + piece[0] = '\0'; + strncat(piece, start, len); + if ((where = strstr(previous_save_, piece)) != NULL) { + index->fixedPath = (int) (where - previous_save_); // offset to relative path + } + } + } + if (index->fixedPath > 0) { + int saveLen = (int) strlen(previous_save_); + if (index->fixedPath < saveLen) { + sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath); + } else { + sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath); + r->statuscode = STATUSCODE_INVALID; + } + } else { + sprintf(previous_save, "%s%s", index->path, previous_save_); + } + } + } + /* -------------------- END OF COPY OF THE __New() CODE -------------------- */ + } + + /* Read data */ + if (ok) { + r->adr = NULL; + if ( (r->statuscode>=0) && (r->statuscode<=999)) { + r->adr = NULL; + if (pos<0) { + if (flags & FETCH_BODY) { + FILE* fp = fopen(previous_save, "rb"); + if (fp != NULL) { + r->adr = (char*) malloc(r->size + 1); + if (r->adr != NULL) { + if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Read error in cache disk data"); + } + r->adr[r->size] = '\0'; + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Read error (memory exhausted) from cache"); + } + fclose(fp); + } else { + r->statuscode = STATUSCODE_INVALID; + strcpy(r->msg, "Previous cache file not found (2)"); + } + } + } else { + // lire fichier (d'un coup) + if (flags & FETCH_BODY) { + r->adr=(char*) malloc(r->size + 1); + if (r->adr!=NULL) { + if (fread(r->adr, 1, r->size,cache->dat) != r->size) { // erreur + free(r->adr); + r->adr=NULL; + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Read Data"); + } else + r->adr[r->size] = '\0'; + } else { // erreur + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Memory Error"); + } + } + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Bad Data"); + } + } else { // erreur + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Read Header"); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Cache Read Error : Seek Failed"); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"File Cache Entry Not Found"); + } + if (r->location[0] != '\0') { + r->location = strdup(r->location); + } else { + r->location = NULL; + } + return r; +} + +static int PT_LookupCache__Old(PT_Index index, const char* url) { + int retCode; + MutexLock(&index->slots.formatOld.fileLock); + { + retCode = PT_LookupCache__Old_u(index, url); + } + MutexUnlock(&index->slots.formatOld.fileLock); + return retCode; +} + +static int PT_LookupCache__Old_u(PT_Index index_, const char* url) { + if (index_ != NULL) { + PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew; + if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0) + return 0; + if (strncmp(url, "http://", 7) == 0) + url += 7; + if (inthash_read(cache->hash, url, NULL)) + return 1; + } + return 0; +} + diff --git a/src/proxy/store.h b/src/proxy/store.h new file mode 100644 index 0000000..805bc20 --- /dev/null +++ b/src/proxy/store.h @@ -0,0 +1,105 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: Cache manager for ProxyTrack */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef WEBHTTRACK_PROXYTRACK_STORE +#define WEBHTTRACK_PROXYTRACK_STORE + +/* Proxy */ + +typedef struct _PT_Index _PT_Index; +typedef struct _PT_Indexes _PT_Indexes; + +typedef struct _PT_Index *PT_Index; +typedef struct _PT_Indexes *PT_Indexes; + +typedef struct _PT_Cache _PT_Cache; +typedef struct _PT_Cache *PT_Cache; + +typedef struct _PT_CacheItem _PT_CacheItem; +typedef struct _PT_CacheItem *PT_CacheItem; + +typedef struct _PT_Element { + int indexId; // index identifier, if suitable (!= -1) + // + int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) + char* adr; // adresse du bloc de mémoire, NULL=vide + char* headers; // adresse des en têtes si présents + unsigned long int size; // taille fichier + char msg[1024]; // error message ("\0"=undefined) + char contenttype[64]; // content-type ("text/html" par exemple) + char charset[64]; // charset ("iso-8859-1" par exemple) + char* location; // on copie dedans éventuellement la véritable 'location' + char lastmodified[64]; // Last-Modified + char etag[64]; // Etag + char cdispo[256]; // Content-Disposition coupé +} _PT_Element; +typedef struct _PT_Element *PT_Element; + +typedef enum PT_Fetch_Flags { + FETCH_HEADERS, // fetch headers + FETCH_BODY // fetch body +} PT_Fetch_Flags; + +/* Locking */ +#ifdef _WIN32 +typedef void* PT_Mutex; +#else +typedef pthread_mutex_t PT_Mutex; +#endif + +void MutexInit(PT_Mutex *pMutex); +void MutexLock(PT_Mutex *pMutex); +void MutexUnlock(PT_Mutex *pMutex); +void MutexFree(PT_Mutex *pMutex); + +/* Indexes */ +PT_Indexes PT_New(void); +void PT_Delete(PT_Indexes index); +PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags); +int PT_LookupIndex(PT_Indexes indexes, const char* url); +int PT_AddIndex(PT_Indexes index, const char *path); +int PT_RemoveIndex(PT_Indexes index, int indexId); +int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex); +PT_Index PT_GetIndex(PT_Indexes indexes, int indexId); + +/* Indexes list */ +PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes); +char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree); +void PT_Enumerate_Delete(char ***plist); + +/* Index */ +PT_Index PT_LoadCache(const char *filename); +void PT_Index_Delete(PT_Index *pindex); +PT_Element PT_ReadCache(PT_Index index, const char* url, int flags); +int PT_LookupCache(PT_Index index, const char* url); +time_t PT_Index_Timestamp(PT_Index index); + +/* Elements*/ +PT_Element PT_ElementNew(void); +void PT_Element_Delete(PT_Element *pentry); + +#endif |