summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:36:11 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:36:11 +0000
commitad5b7acc19290ff91e0f42a0de448a26760fcf99 (patch)
tree2d1867758835fd0c4e443ff3cc7e5c774af85874
Imported httrack 3.20.2
-rw-r--r--COPYING340
-rw-r--r--HelpHtml/abuse.html580
-rw-r--r--HelpHtml/addurl.html155
-rw-r--r--HelpHtml/cmddoc.html155
-rw-r--r--HelpHtml/contact.html227
-rw-r--r--HelpHtml/dev.html262
-rw-r--r--HelpHtml/div/search.sh59
-rw-r--r--HelpHtml/faq.html902
-rw-r--r--HelpHtml/fcguide.html2719
-rw-r--r--HelpHtml/filters.html261
-rw-r--r--HelpHtml/httrack.css70
-rw-r--r--HelpHtml/httrack.man.html2122
-rw-r--r--HelpHtml/images/bg_rings.gifbin0 -> 4243 bytes
-rw-r--r--HelpHtml/images/header_title_4.gifbin0 -> 2000 bytes
-rw-r--r--HelpHtml/images/screenshot_01.jpgbin0 -> 6046 bytes
-rw-r--r--HelpHtml/img/addurl1.gifbin0 -> 9690 bytes
-rw-r--r--HelpHtml/img/addurl2.gifbin0 -> 9834 bytes
-rw-r--r--HelpHtml/img/addurl3.gifbin0 -> 10533 bytes
-rw-r--r--HelpHtml/img/addurl4.gifbin0 -> 9686 bytes
-rw-r--r--HelpHtml/img/addurl5.gifbin0 -> 9680 bytes
-rw-r--r--HelpHtml/img/backblue.gifbin0 -> 5616 bytes
-rw-r--r--HelpHtml/img/fade.gifbin0 -> 828 bytes
-rw-r--r--HelpHtml/img/httrack.gifbin0 -> 4908 bytes
-rw-r--r--HelpHtml/img/snap1_a.gifbin0 -> 9925 bytes
-rw-r--r--HelpHtml/img/snap1_b.gifbin0 -> 10569 bytes
-rw-r--r--HelpHtml/img/snap1_c.gifbin0 -> 21952 bytes
-rw-r--r--HelpHtml/img/snap2_a.gifbin0 -> 49391 bytes
-rw-r--r--HelpHtml/img/snap2_b.gifbin0 -> 41329 bytes
-rw-r--r--HelpHtml/img/snap3_a.gifbin0 -> 40089 bytes
-rw-r--r--HelpHtml/img/snap4_a.gifbin0 -> 37990 bytes
-rw-r--r--HelpHtml/img/snap5_a.gifbin0 -> 14993 bytes
-rw-r--r--HelpHtml/img/snap9.gifbin0 -> 4847 bytes
-rw-r--r--HelpHtml/img/snap9_a.gifbin0 -> 3450 bytes
-rw-r--r--HelpHtml/img/snap9_b.gifbin0 -> 4458 bytes
-rw-r--r--HelpHtml/img/snap9_c.gifbin0 -> 3445 bytes
-rw-r--r--HelpHtml/img/snap9_d.gifbin0 -> 6850 bytes
-rw-r--r--HelpHtml/img/snap9_d2.gifbin0 -> 9871 bytes
-rw-r--r--HelpHtml/img/snap9_d3.gifbin0 -> 8199 bytes
-rw-r--r--HelpHtml/img/snap9_d4.gifbin0 -> 8083 bytes
-rw-r--r--HelpHtml/img/snap9_d5.gifbin0 -> 10404 bytes
-rw-r--r--HelpHtml/img/snap9_d6.gifbin0 -> 9051 bytes
-rw-r--r--HelpHtml/img/snap9_d7.gifbin0 -> 9088 bytes
-rw-r--r--HelpHtml/img/snap9_d8.gifbin0 -> 3525 bytes
-rw-r--r--HelpHtml/img/snap9_e.gifbin0 -> 3491 bytes
-rw-r--r--HelpHtml/img/snap9_f.gifbin0 -> 3380 bytes
-rw-r--r--HelpHtml/img/snap9_g.gifbin0 -> 2888 bytes
-rw-r--r--HelpHtml/img/snap9_g2.gifbin0 -> 9388 bytes
-rw-r--r--HelpHtml/img/snap9_g3.gifbin0 -> 13593 bytes
-rw-r--r--HelpHtml/img/snap9_h.gifbin0 -> 2874 bytes
-rw-r--r--HelpHtml/img/snap9_i.gifbin0 -> 2459 bytes
-rw-r--r--HelpHtml/img/snap9_j.gifbin0 -> 4844 bytes
-rw-r--r--HelpHtml/img/snap9_k.gifbin0 -> 4935 bytes
-rw-r--r--HelpHtml/index.html153
-rw-r--r--HelpHtml/options.html363
-rw-r--r--HelpHtml/overview.html156
-rw-r--r--HelpHtml/shelldoc.html135
-rw-r--r--HelpHtml/start.html16
-rw-r--r--HelpHtml/step.html139
-rw-r--r--HelpHtml/step1.html154
-rw-r--r--HelpHtml/step2.html168
-rw-r--r--HelpHtml/step3.html140
-rw-r--r--HelpHtml/step4.html139
-rw-r--r--HelpHtml/step5.html138
-rw-r--r--HelpHtml/step9.html155
-rw-r--r--HelpHtml/step9_opt1.html156
-rw-r--r--HelpHtml/step9_opt10.html162
-rw-r--r--HelpHtml/step9_opt11.html193
-rw-r--r--HelpHtml/step9_opt2.html192
-rw-r--r--HelpHtml/step9_opt3.html156
-rw-r--r--HelpHtml/step9_opt4.html187
-rw-r--r--HelpHtml/step9_opt5.html176
-rw-r--r--HelpHtml/step9_opt6.html173
-rw-r--r--HelpHtml/step9_opt7.html162
-rw-r--r--HelpHtml/step9_opt8.html152
-rw-r--r--HelpHtml/step9_opt9.html167
-rw-r--r--INSTALL36
-rw-r--r--Makefile9
-rw-r--r--README58
-rw-r--r--gpl-fr.txt197
-rw-r--r--gpl.txt287
-rw-r--r--greetings.txt78
-rw-r--r--history.txt430
-rw-r--r--httrack-doc.html10
-rw-r--r--lib/example.c156
-rw-r--r--lib/example.dsp311
-rw-r--r--lib/example.dsw29
-rw-r--r--lib/example.h27
-rw-r--r--lib/htssystem.h12
-rw-r--r--lib/readme.txt35
-rw-r--r--license.txt40
-rw-r--r--man/httrack.1.gzbin0 -> 6494 bytes
-rw-r--r--src/Makefile10
-rw-r--r--src/Makefile.in417
-rwxr-xr-xsrc/configure603
-rw-r--r--src/gpl.txt287
-rw-r--r--src/hts-indextmpl.h924
-rw-r--r--src/htsalias.c520
-rw-r--r--src/htsalias.h58
-rw-r--r--src/htsback.c2462
-rw-r--r--src/htsback.h75
-rw-r--r--src/htsbase.h136
-rw-r--r--src/htsbasenet.h86
-rw-r--r--src/htsbauth.c401
-rw-r--r--src/htsbauth.h74
-rw-r--r--src/htscache.c881
-rw-r--r--src/htscache.h64
-rw-r--r--src/htscatchurl.c296
-rw-r--r--src/htscatchurl.h76
-rw-r--r--src/htsconfig.h133
-rw-r--r--src/htscore.c4158
-rw-r--r--src/htscore.h363
-rw-r--r--src/htscoremain.c2001
-rw-r--r--src/htscoremain.h62
-rw-r--r--src/htsdefines.h100
-rw-r--r--src/htsfilters.c316
-rw-r--r--src/htsfilters.h49
-rw-r--r--src/htsftp.c1135
-rw-r--r--src/htsftp.h68
-rw-r--r--src/htsglobal.h332
-rw-r--r--src/htshash.c453
-rw-r--r--src/htshash.h104
-rw-r--r--src/htshelp.c622
-rw-r--r--src/htshelp.h53
-rw-r--r--src/htsindex.c483
-rw-r--r--src/htsindex.h48
-rw-r--r--src/htsjava.c395
-rw-r--r--src/htsjava.h69
-rw-r--r--src/htslib.c4279
-rw-r--r--src/htslib.h339
-rw-r--r--src/htsmd5.c76
-rw-r--r--src/htsmd5.h52
-rw-r--r--src/htsname.c1266
-rw-r--r--src/htsname.h50
-rw-r--r--src/htsnet.h242
-rw-r--r--src/htsnostatic.c260
-rw-r--r--src/htsnostatic.h223
-rw-r--r--src/htsopt.h186
-rw-r--r--src/htsparse.c2377
-rw-r--r--src/htsrobots.c118
-rw-r--r--src/htsrobots.h56
-rw-r--r--src/htssystem.h15
-rw-r--r--src/htssystem.h.windows9x11
-rw-r--r--src/htsthread.c97
-rw-r--r--src/htsthread.h95
-rw-r--r--src/htstools.c785
-rw-r--r--src/htstools.h138
-rw-r--r--src/htswizard.c880
-rw-r--r--src/htswizard.h53
-rw-r--r--src/htswrap.c69
-rw-r--r--src/htswrap.h48
-rw-r--r--src/htszlib.c84
-rw-r--r--src/htszlib.h49
-rw-r--r--src/httrack-library.h50
-rw-r--r--src/httrack.c571
-rw-r--r--src/httrack.dsp324
-rw-r--r--src/httrack.dsw29
-rw-r--r--src/httrack.h107
-rw-r--r--src/md5.c271
-rw-r--r--src/md5.h36
-rwxr-xr-xsrc/postinst-config.in55
-rwxr-xr-xsrc/strip_cr.in32
-rw-r--r--templates/index-body.html8
-rw-r--r--templates/index-footer.html33
-rw-r--r--templates/index-header.html101
-rw-r--r--templates/topindex-body.html5
-rw-r--r--templates/topindex-footer.html30
-rw-r--r--templates/topindex-header.html100
167 files changed, 45213 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..196760e
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/HelpHtml/abuse.html b/HelpHtml/abuse.html
new file mode 100644
index 0000000..4be36a1
--- /dev/null
+++ b/HelpHtml/abuse.html
@@ -0,0 +1,580 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h3>For HTTrack users:</h3>
+<ul>
+<li><a href="#USERS">Advice & what <u>not</u> to do when you are using HTTrack</a></li>
+</ul>
+<br>
+
+<h3>For webmasters having problems with bandwidth abuse / other abuses related to HTTrack:</h3>
+<ul>
+<li><a href="#WEBMASTERS">Abuse FAQ for <u>webmasters</a></u></li>
+</ul>
+
+<br><br>
+<table width="100%" height="100%" border="0" cellspacing="0" cellpadding="0"><tr><td valign="top" align="left">
+
+<a name="USERS"></a>
+
+
+
+<!-- ------------------------------------------------------------ -->
+
+
+
+<h2 align="center"><em>Advice & what <u>not</u> to do</em></h2>
+
+<center><h3>Please follow these <i>common sense</i> rules to avoid any network abuse</h3></center>
+
+<br>
+
+<ul>
+<li>Do not overload the websites!</li>
+<br>
+Downloading a site can overload it, if you have a fast pipe, or if you capture too many simultaneous cgi (dynamically generated pages).
+<br>
+<ul>
+<li>Do not download too large websites: use filters</li>
+<li>Do not use too many simultaneous connections</li>
+<li>Use bandwidth limits</li>
+<li>Use connection limits</li>
+<li>Use size limits</li>
+<li>Use time limits</li>
+<li>Only disable robots.txt rules with great care</li>
+<li>Try not to download during working hours</li>
+<li>Check your mirror transfer rate/size</li>
+<li>For large mirrors, first ask the webmaster of the site</li>
+</ul>
+<br>
+<li>Ensure that you can copy the website</li>
+<ul>
+<li>Are the pages copyrighted?</li>
+<li>Can you copy them only for private purpose?</li>
+<li>Do not make online mirrors unless you are authorized to do so</li>
+</ul>
+<br>
+<li>Do not overload your network</li>
+<ul>
+<li>Is your (corporate, private..) network connected through dialup ISP?</li>
+<li>Is your network bandwidth limited (and expensive)?</li>
+<li>Are you slowing down the traffic?</li>
+</ul>
+<br>
+<li>Do not steal private information</li>
+<ul>
+<li>Do not grab emails</li>
+<li>Do not grab private information</li>
+</ul>
+</ul>
+
+</table>
+<br><br>
+<table width="100%" height="100%" border="0" cellspacing="0" cellpadding="0"><tr><td valign="top" align="left">
+
+<a name="WEBMASTERS"></a>
+
+
+
+<!-- ------------------------------------------------------------ -->
+
+
+
+<h2 align="center"><em>Abuse FAQ for <u>webmasters</u></em></h2>
+
+<center><h3>How to limit network abuse
+<br>
+HTTrack Website Copier FAQ (updated - DRAFT)
+</h3></center>
+
+<br>
+Q: <strong>How to block offline browsers, like HTTrack?</strong><br>
+<br>
+A: <strong>This is a complex question, let's study it</strong><br>
+<br>
+First, there are several different reasons for that<br>
+Why do you want to block offline browsers? :<br>
+<br>
+<ol>
+<li><a href="#ABUSE1">Because a large part of your bandwidth is used by some users, who are slowing down the rest</a></li>
+<li><a href="#ABUSE2">Because of copyright questions (you do not want people to copy parts of your website)</a></li>
+<li><a href="#ABUSE3">Because of privacy (you do not want email grabbers to steal all your user's emails)</a></li>
+</ol>
+<br>
+<br>
+<ol>
+
+<a name="ABUSE1"></a>
+<li>Bandwidth abuse:<br>
+<br>
+Many Webmasters are concerned about bandwidth abuse, even if this problem is caused by
+a minority of people. Offline browsers tools, like HTTrack, can be used in a WRONG way,
+and
+therefore are sometimes considered as a potential danger.<br>
+But before thinking that all offline browsers are BAD, consider this:
+students, teachers, IT consultants, websurfers and many people who like your website, may
+want to copy
+parts of it, for their work, their studies, to teach or demonstrate to people during class
+school or
+shows. They might do that because they are connected through expensive modem connection,
+or because they would like to consult pages while travelling, or archive sites that may be
+removed
+one day, make some data mining, comiling information (&quot;if only I could find this
+website I saw one day..&quot;). <br>
+There are many good reasons to mirror websites, and this helps many good people.<br>
+As a webmaster, you might be interested to use such tools, too: test broken links, move a
+website to
+another location, control which external links are put on your website for legal/content
+control,
+test the webserver response and performances, index it..<br>
+<br>
+Anyway, bandwidth abuse can be a problem. If your site is regularly &quot;clobbered&quot;
+by evil downloaders, you have <br>
+various solutions. You have radical solutions, and intermediate solutions. I strongly
+recomment not to use<br>
+radical solutions, because of the previous remarks (good people often mirror websites).<br>
+<br>
+In general, for all solutions,<br>
+the good thing: it will limit the bandwidth abuse<br>
+the bad thing: depending on the solution, it will be either a small constraint, or a fatal
+nuisance (you'll get 0 visitors)<br>
+or, to be extreme: if you unplug the wire, there will be no bandwidth abuse<br>
+<br>
+<ol type="a">
+
+<li>Inform people, explain why (&quot;please do not clobber the bandwidth&quot;)<br>
+Good: Will work with good people. Many good people just don't KNOW that they can slow down
+a network.<br>
+Bad: Will **only** work with good people<br>
+How to do: Obvious - place a note, a warning, an article, a draw, a poeme or whatever you
+want<br>
+<br>
+</li><li>Use &quot;robots.txt&quot; file<br>
+Good: Easy to setup<br>
+Bad: Easy to override<br>
+How to do: Create a robots.txt file on top dir, with proper parameters<br>
+Example:<br>
+&nbsp;&nbsp;&nbsp; User-agent: *<br>
+<br>
+&nbsp;&nbsp;&nbsp; Disallow: /bigfolder<br>
+<br>
+</li><li>Ban registered offline-browsers User-agents<br>
+Good: Easy to setup<br>
+Bad: Radical, and easy to override<br>
+How to do: Filter the &quot;User-agent&quot; HTTP header field<br>
+<br>
+</li><li>Limit the bandwidth per IP (or by folders)<br>
+Good: Efficient<br>
+Bad: Multiple users behind proxies will be slow down, not really easy to setup<br>
+How to do: Depends on webserver. Might be done with low-level IP rules (QoS)<br>
+<br>
+</li><li>Priorize small files, against large files<br>
+Good: Efficient if large files are the cause of abuse<br>
+Bad: Not always efficient<br>
+How to do: Depends on the webserver<br>
+<br>
+</li><li>Ban abuser IPs<br>
+Good: Immediate solution<br>
+Bad: Annoying to do, useless for dynamic IPs, and not very user friendly<br>
+How to do: Either ban IP's on the firewall, or on the webserver (see ACLs)<br>
+<br>
+</li><li>Limit abusers IPs<br>
+Good: Intermediate and immediate solution<br>
+Bad: Annoying to do, useless for dynamic IPs, and annoying to maintain..<br>
+How to do: Use routine QoS (fair queuing), or webserver options<br>
+<br>
+</li><li>Use technical tricks (like javascript) to hide URLs<br>
+Good: Efficient<br>
+Bad: The most efficient tricks will also cause your website to he heavy, and not
+user-friendly (and therefore less attractive, even for surfing users). Remember: clients
+or visitors might want to consult offline your website. Advanced users will also be still
+able to note the URLs and catch them. Will not work on non-javascript browsers. It will
+not work if the user clicks 50 times and put downloads in background with a standard
+browser<br>
+How to do: Most offline browsers (I would say all, but let's say most) are unable to
+&quot;understand&quot; javascript/java properly. Reason: very tricky to handle!<br>
+Example: <br>
+You can replace:<br>
+
+<tt>
+&nbsp;&nbsp;&nbsp; &lt;a href=&quot;bigfile.zip&quot;&gt;Foo&lt;/a&gt;<br>
+</tt>
+
+by:<br>
+<tt>
+&nbsp;&nbsp;&nbsp; &lt;script language=&quot;javascript&quot;&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;!--<br>
+&nbsp;&nbsp;&nbsp; document.write('&lt;a h' + 're' + 'f=&quot;');<br>
+&nbsp;&nbsp;&nbsp; document.write('bigfile' + '.' + 'zip&quot;&gt;');<br>
+&nbsp;&nbsp;&nbsp; // --&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;/script&gt;<br>
+&nbsp;&nbsp;&nbsp; Foo<br>
+&nbsp;&nbsp;&nbsp; &lt;/a&gt;<br>
+</tt>
+<br>
+You can also use java-based applets. I would say that it is the &quot;best of the
+horrors&quot;. A big, fat, slow, bogus java applet. Avoid!<br>
+<br>
+</li><li>Use technical tricks to lag offline browsers<br>
+Good: Efficient<br>
+Bad: Can be avoided by advanced users, annoying to maintain, AND potentially worst that
+the illness (cgi's are often taking some CPU usage). . It will not work if the user clicks
+50 times and put downloads in background with a standard browser<br>
+How to do: Create fake empty links that point to cgi's, with long delays<br>
+Example: Use things like
+
+<tt>
+&lt;ahref=&quot;slow.cgi?p=12786549&quot;&gt;&lt;nothing&gt;&lt;/a&gt; (example in php:)<br>
+&nbsp;&nbsp;&nbsp; &lt;?php<br>
+&nbsp;&nbsp;&nbsp; for($i=0;$i&lt;10;$i++) {<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; sleep(6);<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; echo &quot; &quot;;<br>
+&nbsp;&nbsp;&nbsp; }<br>
+&nbsp;&nbsp;&nbsp; ?&gt;<br>
+</tt>
+
+<br>
+</li><li>Use technical tricks to temporarily ban IPs<br>
+Good: Efficient<br>
+Bad: Radical (your site will only be available online for all users), not easy to setup<br>
+How to to: Create fake links with &quot;killing&quot; targets<br>
+Example: Use things like &lt;a href=&quot;killme.cgi&quot;&gt;&lt;nothing&gt;&lt;/a&gt;
+(again an example in php:)<br>
+<tt>
+&nbsp;&nbsp;&nbsp; &lt;?php<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; // Of course,
+&quot;add_temp_firewall_rule&quot; has to be written..<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; add_temp_firewall_rule($REMOTE_ADDR,&quot;30s&quot;);<br>
+&nbsp;&nbsp;&nbsp; ?&gt;<br>
+</tt>
+<br>
+<br>
+</li>
+</ol>
+
+<a name="ABUSE2"></a>
+<li>Copyright issues<br>
+<br>
+You do not want people to &quot;steal&quot; your website, or even copy parts of it. First,
+stealing a website does not<br>
+require to have an offline browser. Second, direct (and credited) copy is sometimes better
+than disguised <br>
+plagiarism. Besides, several previous remarks are also interesting here: the more
+protected your website will be,<br>
+the potentially less attractive it will also be. There is no perfect solution, too. A
+webmaster asked me one day<br>
+to give him a solution to prevent any website copy. Not only for offline browsers, but
+also against &quot;save as&quot;, <br>
+cut and paste, print.. and print screen. I replied that is was not possible, especially
+for the print screen - and<br>
+that another potential threat was the evil photographer. Maybe with a &quot;this document
+will self-destruct in 5 seconds..&quot;<br>
+or by shooting users after consulting the document.<br>
+More seriously, once a document is being placed on a website, there will always be the
+risks of copy (or plagiarism)<br>
+<br>
+To limit the risk, previous a- and h- solutions, in &quot;bandwidth abuse&quot; section,
+can be used<br>
+<br>
+<br>
+</li>
+
+<a name="ABUSE3"></a>
+<li>Privacy<br>
+<br>
+Might be related to section 2.<br>
+But the greatest risk is maybe email grabbers. <br>
+<br>
+<ol type="a">
+<li>A solution can be to use javascript to hide emails. <br>
+Good: Efficient<br>
+Bad: Will not work on non-javascript browsers<br>
+How to do: Use javascript to build mailto: links<br>
+Example: (in php)<br>
+<tt>
+&nbsp;&nbsp;&nbsp; &lt;script language=&quot;javascript&quot;&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;!--<br>
+&nbsp;&nbsp;&nbsp; function FOS(host,nom,info) {<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; var s;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if (info == &quot;&quot;) info=nom+&quot;@&quot;+host;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; s=&quot;mail&quot;;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; document.write(&quot;&lt;a href='&quot;+s+&quot;to:&quot;+nom+&quot;@&quot;+host+&quot;'&gt;&quot;+info+&quot;&lt;/a&gt;&quot;);<br>
+&nbsp;&nbsp;&nbsp; }<br>
+&nbsp;&nbsp;&nbsp; FOS('mycompany.com','smith?subject=Hi, John','Click here to email me!')<br>
+&nbsp;&nbsp;&nbsp; // --&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;/script&gt;<br>
+</tt>
+<br>
+</li><li>Another one is to create images of emails<br>
+Good: Efficient, does not require javascript<br>
+Bad: There is still the problem of the link (mailto:), images are bigger than text<br>
+How to do: Not so obvious of you do not want to create images by yourself<br>
+Example: (php, Unix)<br>
+
+<tt>
+&lt;?php<br>
+/*<br>
+Email contact displayer<br>
+Usage: email.php3?id=&lt;4 bytes of user's md5&gt;<br>
+The &lt;4 bytes of user's md5&gt; can be calculated using the 2nd script (see below)<br>
+Example: http://yourhost/email.php3?id=91ff1a48<br>
+*/<br>
+$domain=&quot;mycompany.com&quot;;<br>
+$size=12;<br>
+<br>
+/* Find the user in the system database */<br>
+if (!$id)<br>
+&nbsp;&nbsp;exit;<br>
+unset($email);<br>
+unset($name);<br>
+unset($pwd);<br>
+unset($apwd);<br>
+$email=&quot;&quot;;<br>
+$name=&quot;&quot;;<br>
+$fp=@fopen(&quot;/etc/passwd&quot;,&quot;r&quot;);<br>
+if ($fp) {<br>
+&nbsp;&nbsp;$pwd=@fread($fp,filesize(&quot;/etc/passwd&quot;));<br>
+&nbsp;&nbsp;@fclose($fp);<br>
+}<br>
+$apwd=split(&quot;\n&quot;,$pwd);<br>
+foreach($apwd as $line) {<br>
+&nbsp;&nbsp;$fld=split(&quot;:&quot;,$line);<br>
+&nbsp;&nbsp;if (substr(md5($fld[0]),0,8) == $id) {<br>
+&nbsp;&nbsp;&nbsp;&nbsp;$email=$fld[0].&quot;@&quot;.$domain;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;$nm=substr($fld[4],0,strpos($fld[4],&quot;,&quot;));<br>
+&nbsp;&nbsp;&nbsp;&nbsp;$name=$email;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;if ($nm)<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$name=&quot;\&quot;&quot;.$nm.&quot;\&quot; &lt;&quot;.$email.&quot;&gt;&quot;;<br>
+&nbsp;&nbsp;}<br>
+}<br>
+if (!$name)<br>
+&nbsp;&nbsp;exit;<br>
+<br>
+/* Create and show the image */<br>
+Header (&quot;Content-type: image/gif&quot;);<br>
+$im = imagecreate ($size*strlen($name), $size*1.5);<br>
+$black = ImageColorAllocate ($im, 255, 255, 255);<br>
+$white = ImageColorAllocate ($im, 0,0,0);<br>
+ImageTTFText($im, $size, 0, 0, $size , $white,
+&quot;/usr/share/enlightenment/E-docs/aircut3.ttf&quot;,$name);<br>
+ImageGif ($im);<br>
+ImageDestroy ($im);<br>
+?&gt;<br>
+<br>
+</tt>
+
+The script to find the id:<br>
+<br>
+
+<tt>
+#!/bin/sh<br>
+<br>
+# small script for email.php3<br>
+echo &quot;Enter login:&quot;<br>
+read login<br>
+echo &quot;The URL is:&quot;<br>
+printf &quot;http://yourhost/email.php3?id=&quot;<br>
+printf $login|md5sum|cut -c1-8<br>
+echo <br>
+<br>
+</tt>
+
+</li><li>You can also create temporary email aliases, each week, for all users<br>
+Good: Efficient, and you can give your real email in your reply-to address<br>
+Bad: Temporary emails<br>
+How to do: Not so hard todo<br>
+Example: (script &amp; php, Unix)<br>
+
+<tt>
+#!/bin/sh<br>
+#<br>
+# Anonymous random aliases for all users<br>
+# changed each week, to avoid spam problems<br>
+# on websites<br>
+# (to put into /etc/cron.weekly/)<br>
+<br>
+# Each alias is regenerated each week, and valid for 2 weeks<br>
+<br>
+# prefix for all users<br>
+# must not be the prefix of another alias!<br>
+USER_PREFIX=&quot;user-&quot;<br>
+<br>
+# valid for 2 weeks<br>
+ALIAS_VALID=2<br>
+<br>
+# random string<br>
+SECRET=&quot;my secret string `hostname -f`&quot;<br>
+<br>
+# build<br>
+grep -vE &quot;^$USER_PREFIX&quot; /etc/aliases &gt; /etc/aliases.new<br>
+for i in `cut -f1 -d':' /etc/passwd`; do<br>
+&nbsp;&nbsp;if test `id -u $i` -ge 500; then<br>
+&nbsp;&nbsp;&nbsp;&nbsp;off=0<br>
+&nbsp;&nbsp;&nbsp;&nbsp;while test &quot;$off&quot; -lt $ALIAS_VALID; do<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;THISWEEK=&quot;`date +'%Y'` $[`date +'%U'`-$off]&quot;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;SECRET=&quot;`echo \&quot;$SECRET $i $THISWEEK\&quot; | md5sum | cut -c1-4`&quot;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;FIRST=`echo $i | cut -c1-3`<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;NAME=&quot;$USER_PREFIX$FIRST$SECRET&quot;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;echo &quot;$NAME : $i&quot; &gt;&gt; /etc/aliases.new<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;off=$[$off+1]<br>
+&nbsp;&nbsp;&nbsp;&nbsp;done<br>
+&nbsp;&nbsp;fi<br>
+done<br>
+<br>
+# move file<br>
+mv -f /etc/aliases /etc/aliases.old<br>
+mv -f /etc/aliases.new /etc/aliases<br>
+<br>
+# update aliases<br>
+newaliases<br>
+<br>
+</tt>
+
+And then, put the email address in your pages through:
+<br>
+<br>
+
+<tt>
+&lt;a href="mailto:&lt;?php<br>
+&nbsp;&nbsp;&nbsp; $user="smith";<br>
+&nbsp;&nbsp;&nbsp; $alias=exec("grep ".$user." /etc/aliases | cut -f1 -d' ' | head -n1");<br>
+&nbsp;&nbsp;&nbsp; print $alias;<br>
+?&gt;@mycompany.com&gt;>
+</tt>
+
+</li>
+</ol>
+
+</li>
+
+
+<!-- ------------------------------------------------------------ -->
+
+
+
+</table>
+
+<br>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
+
diff --git a/HelpHtml/addurl.html b/HelpHtml/addurl.html
new file mode 100644
index 0000000..a09d01b
--- /dev/null
+++ b/HelpHtml/addurl.html
@@ -0,0 +1,155 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Add a URL</em></h2>
+
+<br>
+
+<ol>
+ <li>Enter a typical Web address</li>
+ <br><small>Just type in your address in the field</small><br><br>
+ <center><img src="img/addurl1.gif" border="0"></center>
+<br><br>OR<br><br>
+ <li>Enter a Web address with authentication</li>
+ <br><small>Useful when you need basic authentication to watch the Web page</small><br><br>
+ <center><img src="img/addurl2.gif" border="0"></center>
+<br><br>OR<br><br>
+ <li>Capture a link from your Web browser to HTTrack</li>
+ <br><small>Use this tool only for form-based pages (pages delivered after submiting a form) that need some analysis</small><br><br>
+ <center><img src="img/addurl3.gif" border="0"></center>
+ <br>
+ <br>
+<small>Set, as explained, your Web browser proxy preferences to the values indicated : set the proxy's address, and the proxy's port,
+ then click on the button or link as you usually do in your Web browser.
+ <br>The temporary proxy, installed by HTTrack, will then
+ capture the link and display a confirmation page.
+ <br><br>
+</small>
+ <center><img src="img/addurl4.gif" border="0"></center>
+ <br>
+ <center><img src="img/addurl5.gif" border="0"></center>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/cmddoc.html b/HelpHtml/cmddoc.html
new file mode 100644
index 0000000..322f5fd
--- /dev/null
+++ b/HelpHtml/cmddoc.html
@@ -0,0 +1,155 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Command-Line Documentation</em></h2>
+
+<br>
+
+The command-line version
+
+<ul>
+ <li><a href="options.html">Command line Options</a></li>
+ <br>List of all powerful command line options<br><br>
+ <li>How to use httrack command-line version:</li>
+ <ul>
+ <li>Open a shell window</li>
+ <br>
+ <br>
+ <li>Type in <tt>httrack</tt> (or the complete path to the httrack executable)</li>
+ <br><small><tt>httrack</tt></small>
+ <br>
+ <br>
+ <li>Add the URLs, separated by a blank space</li>
+ <br><small><tt>httrack www.someweb.com/foo/</tt></small>
+ <br>
+ <br>
+ <li>If you need, add some options (see the <a href="options.html">option list</a>)</li>
+ <br><small><tt>httrack www.someweb.com/foo/ -O "/webs" -N4 -P proxy.myhost.com:3128</tt></small>
+ <br>
+ <br>
+ <li>Launch the command line, and wait until the mirror is finishing</li>
+ <br><small>You can (especially on the Unix release) press ^C to stop the mirror or put httrack in background</small>
+ <br>
+ <br>
+ </ul>
+</ul>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
diff --git a/HelpHtml/contact.html b/HelpHtml/contact.html
new file mode 100644
index 0000000..09ff760
--- /dev/null
+++ b/HelpHtml/contact.html
@@ -0,0 +1,227 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Contact Us!</em></h2>
+
+<br>
+
+<h3>
+<center>
+<a href="http://www.httrack.com/" target="_blank">Please visit our website!</a>
+<br>
+</center>
+</h3>
+
+<b>About this project:</b>
+
+
+<pre>
+
+Informations on this project:
+
+This project has been developed by:
+Xavier Roche (xroche@httrack.com)
+ for the main engine and Windows interface
+ and maintainer for v2.0 and v3.0
+Yann Philippot (yphilippot@lemel.fr)
+ for the java binary .class parser
+Robert Lagadec (rlagadec@yahoo.fr)
+ for checking both English & French translations
+Juan Pablo Barrio Lera (University of León)
+ for Spanish translations
+Rainer Klueting (rainer@klueting.de)
+Bastian Gorke (bastiang@yahoo.com)
+Rudi Ferrari (Wyando@netcologne.de)
+Marcus Gaza (MarcusGaza@t-online.de)
+ for German translations
+Rudi Ferrari (Wyando@netcologne.de)
+ for Dutch translations
+Lukasz Jokiel (Opole University of Technology, Lukasz.Jokiel@po.opole.pl)
+ for Polish translations
+Rui Fernandes (CANTIC, ruiefe@mail.malhatlantica.pt)
+Pedro T. Pinheiro (Universidade Nova de Lisboa-FCT, ptiago@mail.iupi.pt)
+ for Portuguese translations
+Andrei Iliev (iliev@vitaplus.ru)
+ for Russian translations
+Witold Krakowski (wkrakowski@libero.it)
+ for Italian translations
+Jozsef Tamas Herczeg (hdodi@freemail.hu)
+ for Hungarian translation
+Paulo Neto (company@layout.com.br)
+ for Brazilian translation
+Brook Qin (brookqwr@sina.com)
+ for simplified Chinese translation
+Addy Lin (addy1975@pchome.com.tw)
+ for traditional Chinese translation
+Jesper Bramm (bramm@get2net.dk)
+ for Danish translation
+Tõnu Virma
+ for Estonian translation
+Staffan Ström (staffan@fam-strom.org)
+ for Swedish translation
+Mehmet Akif Köeoðlu (mak@ttnet.net.tr)
+ for Turkish translation
+Aleksandar Savic (aleks@macedonia.eu.org)
+ for Macedonian translation
+Takayoshi Nakasikiryo
+ for Japanese translation
+Martin Sereday (sereday@slovanet.sk)
+ for Slovak translation
+Antonín Matìjèík (matejcik@volny.cz)
+ for Czech translation
+
+Special Thanks to:
+Patrick Ducrot & Daniel Carré (ENSI of Caen)
+ for their initial support
+Fred Cohen (fc@all.net)
+ for HTTrack user's guide
+
+Greetings to:
+Jean-loup Gailly from gzip.org
+ for the help provided with Zlib
+Russ Freeman from gipsymedia
+ for hints on DLL dynamic load
+Paul DiLascia
+ for helping to fix problems with CHtmlView
+ISMRA/Ensi of Caen
+ for their initial support
+..and all users that are using and supporting HTTrack!
+
+</pre>
+<hr>
+<br>
+
+ If you want to ask any question to the authors, report bugs/problems,
+ please first check the <a href="http://forum.httrack.com">HTTrack Website Copier forum</a>.<br>
+ You can also contact by email, but due to the large volume of messages, it is impossible
+ to always respond (especially for configuration help or other configuration-related
+ questions).
+<br>
+ <a href="mailto:httrack@httrack.com">HTTrack project</a>,
+ <a href="mailto:xroche@httrack.com">Xavier ROCHE</a>,
+<br><hr><br>
+<br>
+ This program is covered by the GNU General Public License.<br>
+ HTTrack/HTTrack Website Copier is Copyright (C) 1998-2002 Xavier Roche and other contributors
+<br>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/dev.html b/HelpHtml/dev.html
new file mode 100644
index 0000000..cba5613
--- /dev/null
+++ b/HelpHtml/dev.html
@@ -0,0 +1,262 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>HTTrack Programming page</em></h2>
+
+<br>
+
+HTTrack can be used as a third-party program in batch files, or as library (see in the <tt>lib/</tt> directory).
+We will see here several examples, written in batch script (can be adapted to almost all batch script languages) or in C.
+
+<br><br>
+
+<br><br>
+
+<table width="100%" border="0" background="img/fade.gif">
+<tr><td>
+<b>How to get one single file</b><br><br>
+<tt>
+httrack --get http://localhost/
+</tt>
+</td></tr>
+</table>
+
+<br><br>
+
+<table width="100%" border="0" background="img/fade.gif">
+<tr><td>
+<b>How to get one single file and pipe it to stdout</b><br><br>
+<tt>
+httrack --quiet --get http://localhost/ -O tmpget -V "cat \$0" | grep -iE "TITLE"
+rm -rf tmpget
+</tt>
+</td></tr>
+</table>
+
+<br><br>
+
+<table width="100%" border="0" background="img/fade.gif">
+<tr><td>
+<b>How to search in all HTML files on a website</b><br><br>
+<tt>
+httrack --skeleton http://localhost/ -V "if grep -iE \"TITLE\" \"\$0\">/dev/null; then echo \"Match found at \$0\"; fi"<br>
+rm -rf tmpget<br>
+</tt>
+
+<br>Same thing but matches only the first file:<br>
+
+<tt>
+httrack --skeleton http://localhost/ -V "if grep -iE \"TITLE\" \"\$0\">/dev/null; then echo \"Match found at \$0\"; kill -9 \$PPID; fi"<br>
+rm -rf tmpget<br>
+</tt>
+
+</td></tr>
+</table>
+
+<br><br>
+
+<table width="100%" border="0" background="img/fade.gif">
+
+<tr><td>
+<b>Indexing a website, and using the index as a search engine</b><br><br>
+
+<tt>httrack localhost -%I</tt><br>
+
+Will generate an index.txt file, which contains all detected keywords, sorted and indexed using this format:<br>
+
+<br>
+<table border="0" bgcolor="white"><tr><td>
+<i>keyword</i><br>
+&lt;tab&gt; &nbsp; <i>number_of_hits_in_current_page_for_this_keyword</i> &nbsp; <i>page_location</i><br>
+&lt;tab&gt; &nbsp; <i>number_of_hits_in_current_page_for_this_keyword</i> &nbsp; <i>page_location</i><br>
+&lt;tab&gt; &nbsp; <i>number_of_hits_in_current_page_for_this_keyword</i> &nbsp; <i>page_location</i><br>
+...<br>
+=<i>total_number_of_hits_for_this_keyword</i><br>
+(<i>(total_number_of_hits_for_this_keyword*1000)/total_number_of_keywords</i>)<br>
+</td></tr></table>
+<br>
+
+Example:
+
+<table border="0" bgcolor="white"><tr><td>
+<pre>
+
+abilities
+ 1 localhost/manual/mod/index-2.html
+ 1 localhost/manual/mod/index.html
+ 1 localhost/manual/mod/mod_negotiation.html
+ =3
+ (0)
+ability
+ 2 localhost/manual/misc/FAQ.html
+ 2 localhost/manual/suexec.html
+ 1 localhost/manual/handler.html
+ 1 localhost/manual/misc/security_tips.html
+ 1 localhost/manual/mod/mod_rewrite.html
+ 1 localhost/manual/mod/mod_setenvif.html
+ 1 localhost/manual/multilogs.html
+ 1 localhost/manual/netware.html
+ 1 localhost/manual/new_features_1_3.html
+ 1 localhost/manual/windows.html
+ =12
+ (0)
+able
+ 4 localhost/manual/dso.html
+ 4 localhost/manual/mod/core.html
+ 3 localhost/manual/dns-caveats.html
+ 3 localhost/manual/mod/mod_auth.html
+ 3 localhost/manual/mod/mod_rewrite.html
+ 3 localhost/manual/upgrading_to_1_3.html
+ 2 localhost/manual/misc/API.html
+ 2 localhost/manual/misc/FAQ.html
+ 2 localhost/manual/misc/windoz_keepalive.html
+ 2 localhost/manual/mod/mod_auth_db.html
+ 2 localhost/manual/mod/mod_auth_dbm.html
+ 1 localhost/manual/misc/descriptors.html
+ 1 localhost/manual/misc/fin_wait_2.html
+ 1 localhost/manual/misc/security_tips.html
+ 1 localhost/manual/mod/mod_auth_digest.html
+ 1 localhost/manual/mod/mod_cern_meta.html
+ 1 localhost/manual/mod/mod_env.html
+ 1 localhost/manual/mod/mod_example.html
+ 1 localhost/manual/mod/mod_unique_id.html
+ 1 localhost/manual/mod/mod_usertrack.html
+ 1 localhost/manual/stopping.html
+ 1 localhost/manual/suexec.html
+ 1 localhost/manual/vhosts/ip-based.html
+ 1 localhost/manual/windows.html
+ =43
+ (0)
+</pre>
+...
+</td></tr></table>
+<br>
+
+Script example:
+<a href="div/search.sh">search.sh</a>
+
+</td></tr>
+</table>
+
+<br><br>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/div/search.sh b/HelpHtml/div/search.sh
new file mode 100644
index 0000000..fb0c7cd
--- /dev/null
+++ b/HelpHtml/div/search.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+# Simple indexing test using HTTrack
+# A "real" script/program would use advanced search, and
+# use dichotomy to find the word in the index.txt file
+# This script is really basic and NOT optimized, and
+# should not be used for professional purpose :)
+
+TESTSITE="http://localhost/"
+
+# Create an index if necessary
+if ! test -f "index.txt"; then
+ echo "Building the index .."
+ rm -rf test
+ httrack --display "$TESTSITE" -%I -O test
+ mv test/index.txt ./
+fi
+
+# Convert crlf to lf
+if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then
+ echo "Converting index to Unix LF style (not CR/LF) .."
+ mv -f index.txt index.txt.old
+ cat index.txt.old|tr -d '\r' > index.txt
+fi
+
+keyword=-
+while test -n "$keyword"; do
+ printf "Enter a keyword: "
+ read keyword
+
+ if test -n "$keyword"; then
+ FOUNDK="`grep -niE \"^$keyword\" index.txt`"
+
+ if test -n "$FOUNDK"; then
+ if ! test `echo "$FOUNDK"|wc -l` = "1"; then
+ # Multiple matches
+ printf "Found multiple keywords: "
+ echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' '
+ echo ""
+ echo "Use keyword$ to find only one"
+ else
+ # One match
+ N=`echo "$FOUNDK"|cut -f1 -d':'`
+ PM=`tail +$N index.txt|grep -nE "\("|head -n 1`
+ if ! echo "$PM"|grep "ignored">/dev/null; then
+ M=`echo $PM|cut -f1 -d':'`
+ echo "Found in:"
+ cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
+ else
+ echo "keyword ignored (too many hits)"
+ fi
+ fi
+ else
+ echo "not found"
+ fi
+
+ fi
+done
+
diff --git a/HelpHtml/faq.html b/HelpHtml/faq.html
new file mode 100644
index 0000000..1beee2b
--- /dev/null
+++ b/HelpHtml/faq.html
@@ -0,0 +1,902 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>F A Q</em></h2>
+
+<br>
+
+<p><em><br>
+<ul>
+<strong>Tips:</strong>
+<li>In case of troubles/problems during transfer, <b><u><font color="red">first check the hts-log.txt (and hts-err.txt) files to figure out what happened</b></u></font>. These log files report all
+events that may be useful to detect a problem. You can also ajust the debug level of the log files in the option
+</li><li>
+The tutorial written by Fred Cohen is a very good document to read, to understand how to use the engine,
+how the command line version works, and how the window version works, too! All options are described and explained in
+clear language!
+</li>
+</ul>
+</em><br></p>
+
+<ul><br>
+
+<h3><b>Very</b> Frequently Asked Questions:<br></h3>
+
+<li><a href="#VF1">HTTrack does not capture all files I want to capture!</a><br></li>
+
+<br>
+<h3>General questions:<br></h3>
+
+<li><a href="#QG0">Is there any 'spyware' or 'adware' in this program? Can you prove that there isn't any?</a><br></li>
+
+<li><a href="#QG0b">Is there any risks of viruses with this software?</a><br></li>
+
+<li><a href="#QG1">The install is not working on NT without administrator rights!</a><br></li>
+
+<li><a href="#QG2">Where can I find French/other languages documentation?</a><br></li>
+
+<li><a href="#QG3">Is HTTrack working on NT/2000?</a><br></li>
+
+<li><a href="#QG4">What's the difference between HTTrack and WinHTTrack?</a><br></li>
+
+<li><a href="#QG5">Is HTTrack Mac compatible?</a><br></li>
+
+<li><a href="#QG6">Can HTTrack be compiled on all Un*x?</a><br></li>
+
+<li><a href="#QG7">I use HTTrack for professional purpose. What about restrictions/license fee?</a><br></li>
+
+<li><a href="#QG7b">Is there any license royalties for distributing a mirror made with HTTrack?</a><br></li>
+
+<li><a href="#QG8">Is a DLL/library version available?</a><br></li>
+
+<li><a href="#QG9">Is there a X11/KDE shell available for Linux and Un*x?</a><br></li>
+
+
+<h3>Troubleshooting:<br></h3>
+
+<li><a href="#Q0">Some sites are captured very well, other aren't. Why?</a><br></li>
+
+<li><a href="#Q1">When I use HTTrack, nothing is mirrored (no files) What's happening?</a><br></li>
+
+<li><a href="#QT1">Only the first page is caught. What's wrong?</a><br></li>
+
+<li><a href="#Q1b">There are missing files! What's happening?</a><br></li>
+
+<li><a href="#Q1bc">There are corrupted images/files! How to fix them?</a><br></li>
+
+<li><a href="#Q1bb">FTP links are not caught! What's happening?</a><br></li>
+
+<li><a href="#Q1b1">I got some weird messages telling that robots.txt do not allow several files to be captured. What's going on?</a><br></li>
+
+<li><a href="#Q1b11">I have duplicate files! What's going on?</a><br></li>
+
+<li><a href="#Q1b2">I'm downloading too many files! What can I do?</a><br></li>
+
+<li><a href="#Q1b22">The engine turns crazy, getting thousands of files! What's going on?</a><br></li>
+
+<li><a href="#Q1b3">File are sometimes renamed (the type is changed)! Why?</a><br></li>
+
+<li><a href="#Q1b3b">File are sometimes *incorrectly* renamed! Why?</a><br></li>
+
+<li><a href="#Q1b4b">How do I rename all ".dat" files into ".zip" files?</a><br></li>
+
+<li><a href="#Q1c">I can not access several pages (access forbidden, or redirect to another location), but I can with my browser, what's going on?</a><br></li>
+
+<li><a href="#Q2">Some pages can't be seen, or are displayed with errors!</a><br></li>
+
+<li><a href="#QT4">Files are created with strange names, like '-1.html'!</a><br></li>
+
+<li><a href="#Q2b">Some Java applets do not work properly!</a><br></li>
+
+<li><a href="#QT5">When capturing real audio/video links (.ram), I only get a shortcut!</a><br></li>
+
+<li><a href="#QT6">Using user:password@address is not working!</a><br></li>
+
+<li><a href="#QT3">Are https URL working?</a><br></li>
+
+<li><a href="#QT3b">Are ipv6 URL working?</a><br></li>
+
+<li><a href="#QP3">HTTrack is taking too much time for parsing, it is very slow. What's wrong?</a><br></li>
+
+<li><a href="#Q3">HTTrack is being idle for a long time without transfering. What's happening?</a><br></li>
+
+<li><a href="#Q3b">I want to update a site, but it's taking too much time! What's happening?</a><br></li>
+
+<li><a href="#Q3b2">I wanted to update a site, but after the update the site disappeared!! What's going on?</a><br></li>
+
+<li><a href="#Q4">I am behind a firewall. What can I do?</a><br></li>
+
+<li><a href="#Q14">HTTrack has crashed during a mirror, what's happening?</a><br></li>
+
+<li><a href="#Q100">I want to update a mirrored project, but HTTrack is retransfering all pages. What's going on?</a><br></li>
+
+<li><a href="#Q10a">I want to continue a mirrored project, but HTTrack is rescanning all pages. What's going on?</a><br></li>
+
+<li><a href="#Q101">HTTrack window sometimes "disappears" at then end of a mirrored project. What's going on?<br></a></li>
+
+<li><a href="#QT2">With WinHTTrack, sometimes the minimize in system tray causes a crash!</a><br></li>
+
+
+<h3><br>Questions concerning a mirror:<br></h3></li>
+
+<li><a href="#Q5">I want to mirror a Web site, but there are some files outside the domain, too. How to retrieve them?</a><br></li>
+
+<li><a href="#Q6">I have forgotten some URLs of files during a long mirror.. Should I redo all?</a><br></li>
+
+<li><a href="#Q7">I just want to retrieve all ZIP files or other files in a web site/in a page. How do I do it?</a><br></li>
+
+<li><a href="#Q8">There are ZIP files in a page, but I don't want to transfer them. How do I do it?</a><br></li>
+
+<li><a href="#Q9">I don't want to load gif files.. but what may happen if I watch the page?</a><br></li>
+
+<li><a href="#Q15">I get all types of files on a web site, but I didn't select them on filters!</a><br></li>
+
+<li><a href="#Q10">When I use filters, I get too many files!</a><br></li>
+
+<li><a href="#Q11">When I use filters, I can't access another domain, but I have filtered it!</a><br></li>
+
+<li><a href="#Q12">Must I add a&nbsp; '+' or '-' in the filter list when I want to use filters?</a><br></li>
+
+<li><a href="#Q13">I want to find file(s) in a web-site. How do I do it?</a><br></li>
+
+<li><a href="#Q200">I want to download ftp files/ftp site. How do I do it?</a><br></li>
+
+<li><a href="#QM1">How can I retrieve .asp or .cgi sources instead of .html result?</a><br></li>
+
+<li><a href="#QM2">How can I remove these annoying <tt>&lt;!-- Mirrored from... --&gt;</tt> from html files?</a><br></li>
+
+<li><a href="#QM3">Do I have to select between ascii/binary transfer mode?</a><br></li>
+
+<li><a href="#QM4">Can HTTrack perform form-based authentication?</a><br></li>
+
+<li><a href="#QM5">Can I redirect downloads to tar/zip archive?</a><br></li>
+
+<li><a href="#QM6">Can I use username/password authentication on a site?</a><br></li>
+
+<li><a href="#QM7">Can I use username/password authentication for a proxy?</a><br></li>
+
+<li><a href="#QM8">Can HTTrack generates HP-UX or ISO9660 compatible files?</a><br></li>
+
+<li><a href="#QM9">If there any SOCKS support?</a><br></li>
+
+<li><a href="#QM10">What's this hts-cache directory? Can I remove it?</a><br></li>
+
+<li><a href="#QM11">Can I start a mirror from my bookmarks?</a><br></li>
+
+<li><a href="#QM11c">Can I convert a local website (file:// links) to a standard website?</a><br></li>
+
+<li><a href="#QM11b">Can I copy a project to another folder - Will the mirror work?</a><br></li>
+
+<li><a href="#QM12">Can I copy a project to another computer/system? Can I then update it ?</a><br></li>
+
+<li><a href="#QM13">How can I grab email addresses in web pages?</a><br></li>
+
+<h3>Other problems:<br></h3></a>
+
+<li><a href="#Q300">My problerm is not listed!</a><br></li>
+
+</ul>
+
+</p>
+
+<br>
+<hr>
+<br>
+
+<u><strong>Very Frequently Asked Questions:</strong></u><br><br>
+
+<a name="VF1">Q: <strong>HTTrack does not capture all files I want to capture!</strong><br>
+A: <em>This is a frequent question, generally related to the filters.
+<u>BUT first check if your problem is not related to the <a href="#Q1b1">robots.txt</a> website rules.</u>
+<br>
+<br>
+Okay, let me explain how to precisely control the capture process.<br>
+<br>
+Let's take an example:<br>
+<br>
+Imagine you want to capture the following site:<br>
+<tt>www.someweb.com/gallery/flowers/</tt><br>
+<br>
+HTTrack, by default, will capture all links encountered in <tt>www.someweb.com/gallery/flowers/</tt> or in lower directories, like
+<tt>www.someweb.com/gallery/flowers/roses/</tt>.<br>
+It will not follow links to other websites, because this behaviour might cause to capture the Web entirely!<br>
+It will not follow links located in higher directories, too (for example, <tt>www.someweb.com/gallery/flowers/</tt> itself) because this
+might cause to capture too much data.<br>
+<br>
+This is the <b><u>default behaviour</b></u> of HTTrack, BUT, of course, if you want, you can tell HTTrack to capture other directorie(s), website(s)!..
+<br>
+In our example, we might want also to capture all links in <tt>www.someweb.com/gallery/trees/</tt>, and in <tt>www.someweb.com/photos/</tt><br>
+<br>
+This can easily done by using filters: go to the Option panel, select the Filters tab, and enter this line:
+(you can leave a blank space between each rules, instead of entering a carriage return)<br>
+<tt>+www.someweb.com/gallery/trees/*<br>
++www.someweb.com/photos/*</tt><br>
+<br>
+This means "accept all links begining with <tt>www.someweb.com/gallery/trees/</tt> and <tt>www.someweb.com/photos/</tt>"
+- the <tt>+</tt> means "accept" and the final <tt>*</tt> means "any character will match after the previous ones".
+Remember the <tt>*.doc</tt> or <tt>*.zip</tt> encountered when you want to select all files from a certain type on your computer:
+it is almost the same here, except the begining "+"<br>
+<br>
+Now, we might want to exclude all links in <tt>www.someweb.com/gallery/trees/hugetrees/</tt>, because with the previous filter,
+we accepted too many files. Here again, you can add a filter rule to refuse these links. Modify the previous filters to:<br>
+<tt>+www.someweb.com/gallery/trees/*<br>
++www.someweb.com/photos/*<br>
+-www.someweb.com/gallery/trees/hugetrees/*</tt><br>
+<br>
+You have noticed the <tt>-</tt> in the begining of the third rule: this means "refuse links matching the rule"
+; and the rule is "any files begining with <tt>www.someweb.com/gallery/trees/hugetrees/</tt><br>
+
+Voila! With these three rules, you have precisely defined what you wanted to capture.<br>
+<br>
+A more complex example?<br>
+<br>
+Imagine that you want to accept all jpg files (files with .jpg type) that have "blue" in the name and located in www.someweb.com<br>
+<tt>+www.someweb.com/*blue*.jpg</tt><br>
+<br>
+More detailed information can be found <a href="filters.html">here</a>!<br>
+<br>
+</em>
+
+<br>
+<u><strong>General questions:<br>
+</strong></u><br>
+
+<a NAME="QG0">Q: <strong>Is there any 'spyware' or 'adware' in this program? Can you prove that there isn't any?</strong></a><br>
+A: <em>No ads (banners), and absolutely no 'spy' features inside the program.<br>
+The best proof is the software status: all sources are released, and everybody can check them. Open source is the best protection against privacy problems - HTTrack is an open source project, free of charge and free of any spy 'features'.</em>
+
+<br><br><a NAME="QG0b">Q: <strong>Are there any risks of viruses with this software?</strong></a><br>
+A: <em>For the software itself:
+All official releases (at httrack.com) are checked against all known viruses, and the packaging process is also checked. Archives are stored on Un*x servers, not really concerned by viruses.<br>
+For files you are downloading on the WWW using HTTrack: You may encounter websites which were corrupted by viruses, and downloading data on these websites might be dangerous (as dangerous as if using a regular Browser). Always ensure that websites you are crawling are safe.
+ (Note: remember that using an antivirus software is a good idea once you are connected to the Internet)</em>
+
+<br><br><a NAME="QG1">Q: <strong>The install is not working on NT without administrator rights!</strong></a><br>
+A: <em>That's right. You can, however, install WinHTTrack on your own machine, and then copy your <tt>WinHTTrack</tt> folder from your <tt>Program Files</tt> folder to another machine, in a temporary directory (e.g. <tt>C:\temp\</tt>)</em>
+
+<br><br><a NAME="QG2">Q: <strong>Where can I find French/other languages documentation?</strong></a><br>
+A: <em>Windows interface is available on several languages, but not yet the documentation!</em>
+
+<br><br><a NAME="QG3">Q: <strong>Is HTTrack working on NT/2000?</strong></a><br>
+A: <em>Yes, it does</em>
+
+<br><br><a NAME="QG4">Q: <strong>What's the difference between HTTrack and WinHTTrack?</strong></a><br>
+A: <em>WinHTTrack is the Windows release of HTTrack (with a graphic shell)</em>
+
+<br><br><a NAME="QG5">Q: <strong>Is HTTrack Mac compatible?</strong></a><br>
+A: <em>No, because of a lack of time. But sources are available</em>
+
+<br><br><a NAME="QG6">Q: <strong>Can HTTrack be compiled on all Un*x?</strong></a><br>
+A: <em>It should. The <tt>Makefile</tt> may be modified in some cases, however</em>
+
+<br><br><a NAME="QG7">Q: <strong>I use HTTrack for professional purpose. What about restrictions/license fee?</strong></a><br>
+A: <em>HTTrack is covered by the GNU General Public License (GPL). There is no restrictions using HTTrack for professional purpose,
+except if you develop a software which uses HTTrack components (parts of the source, or any other component).
+See the <tt>license.txt</tt> file for more information</em>
+
+<br><br><a NAME="QG7b">Q: <strong>Is there any license royalties for distributing a mirror made with HTTrack?</strong></a><br>
+A: <em>No.</em>
+
+<br><br><a NAME="QG8">Q: <strong>Is a DLL/library version available?</strong></a><br>
+A: <em>Not yet. But, again, sources are available (see <tt>license.txt</tt> for distribution infos)</em>
+
+<br><br><a NAME="QG9">Q: <strong>Is there a X11/KDE shell available for Linux and Un*x?</strong></a><br>
+A: <em>Yes. See the download/contribution section at <a href="http://www.httrack.com">www.httrack.com!</a></em>
+
+
+<p><br>
+<u><strong>Troubleshooting:<br>
+</strong></u><br>
+
+<a NAME="Q0">Q: <strong>Some sites are captured very well, other aren't. Why?</strong><br>
+A: <em>
+There are several reasons (and solutions) for a mirror to fail. Reading the log files (ans this FAQ!) is generally a VERY good idea to figure out what occured.
+
+<ul>
+ <li>Links within the site refers to external links, or links located in another (or upper) directories, not captured by default - the use of filters is generally THE solution, as this is one of the powerful option in HTTrack. <u>See the above questions/answers</u>.</li>
+ <li>Website <a href="#Q1b1">'robots.txt' rules</a> forbide access to several website parts - you can disable them, but only with great care!</li>
+ <li>HTTrack is filtered (by its default User-agent IDentity) - you can change the Browser User-Agent identity to an anonymous one (MSIE, Netscape..) - here again, use this option with care, as this measure might have been put to avoid some bandwidth abuse (see also the <a href="abuse.html">abuse faq</a>!)</li>
+</ul>
+
+There are cases, however, that can not be (yet) handled:
+
+<ul>
+ <li>Flash sites - not handled</li>
+ <li>Intensive Java/Javascript sites - might be bogus/incomplete</li>
+ <li>Complex CGI with built-in redirect, and other tricks - very complicated to handle, and therefore might cause problems</li>
+ <li>Parsing problem in the HTML code (cases where the engine is fooled, for example by a false comment (&lt;!--) which has no closing comment (--&gt;) detected.
+ Rare cases, but might occur.
+ A bug report is then generally good!
+ </li>
+</ul>
+
+Note:
+For some sites, setting "Force old HTTP/1.0 requests" option can be useful, as this option uses more basic requests (no HEAD request for example).
+This will cause a performance loss, but will increase the compatibility with some cgi-based sites.
+<br>
+
+<br></em>
+
+<a NAME="QT1">Q: <strong>Only the first page is caught. What's wrong?</a></strong></br>
+A: <em>First, check the <tt>hts-log.txt</tt> file (and/or <tt>hts-err.txt</tt> error log file) - this can give you precious information.<br>
+The problem can be a website that redirects you to another site (for example, <tt>www.someweb.com</tt> to <tt>public.someweb.com</tt>) :
+in this case, use filters to accept this site<br>
+This can be, also, a problem in the HTTrack options (link depth too low, for example)</em>
+
+<br><br><a NAME="QT2">Q: <strong>With WinHTTrack, sometimes the minimize in system tray causes a crash!</a></strong></a></br>
+A: <em>This bug sometimes appears in the shell on some systems. If you encounter this problem, avoid minimizing the window!</em>
+
+<br><br><a NAME="QT3">Q: <strong>Are https URL working?</a></strong></a></br>
+A: <em>Yes, HTTrack does support (since 3.20 release) https (secure socket layer protocol) sites</em>
+
+<br><br><a NAME="QT3b">Q: <strong>Are ipv6 URL working?</a></strong></a></br>
+A: <em>Yes, HTTrack does support (since 3.20 release) ipv6 sites, using A/AAAA entries, or direct v6 addresses (like http://[3ffe:b80:12:34:56::78]/)</em>
+
+<br><br><a NAME="QT4">Q: <strong>Files are created with strange names, like '-1.html'!</a></strong></a></br>
+A: <em>Check the build options (you may have selected user-defined structure with wrong parameters!)</em>
+
+<br><br><a NAME="QT5">Q: <strong>When capturing real audio/video links (.ram), I only get a shortcut!</a></strong></a></br>
+A: <em>Yes, but .ra/.rm associated file should be captured together - except if rtsp:// protocol is used (not supported by HTTrack yet), or if proper filters are needed</em>
+
+<br><br><a NAME="QT6">Q: <strong>Using user:password@address is not working!</a></strong></a></br>
+A: <em>Again, first check the <tt>hts-log.txt</tt> and <tt>hts-err.txt</tt> error log files - this can give you precious information<br>
+The site may have a different authentication scheme - form based authentication, for example.
+In this case, use the URL capture features of HTTrack, it might work</em>
+<br><br>
+
+<a NAME="Q1">Q: <strong>When I use HTTrack, nothing is mirrored (no files) What's
+happening?</strong><br>
+A: <em>First, be sure that the URL typed is correct. Then, check if you need to use a
+proxy server (see proxy options in WinHTTrack or the <tt>-P proxy:port</tt> option in the
+command line program). The site you want to mirror may only accept certain browsers. You
+can change your &quot;browser identity&quot; with the Browser ID option in the OPTION box.
+Finally, you can have a look at the hts-log.txt (and hts-err.txt) file to see what
+happened. <br>
+<br></em>
+
+<a NAME="Q1b">Q: <strong>There are missing files! What's happening?</strong><br>
+A: <em>You may want to capture files that exist in a different folder, or in another web site.
+You may also want to capture files that are forbidden by default by the <a href="#Q1b1">robots.txt</a> website rules.
+In these cases, HTTrack does not capture these links automatically, you have to tell it to do so.
+<br><br>
+<ul><li>Either use the <a href="filters.html">filters</a>.<br>
+Example: You are downloading <tt>http://www.someweb.com/foo/</tt> and can not get .jpg images located
+in <tt>http://www.someweb.com/bar/</tt> (for example, http://www.someweb.com/bar/blue.jpg)<br>
+Then, add the filter rule <tt>+www.someweb.com/bar/*.jpg</tt> to accept all .jpg files from this location<br>
+You can, also, accept all files from the /bar folder with <tt>+www.someweb.com/bar/*</tt>, or only html files with <tt>+www.someweb.com/bar/*.html</tt> and so on..<br><br>
+</li><li>
+If the problems are related to robots.txt rules, that do not let you access some folders (check in the logs if you are not sure),
+you may want to disable the default robots.txt rules in the options. (but only disable this option with great care,
+some restricted parts of the website might be huge or not downloadable)
+</ul>
+</em>
+<br>
+
+<a NAME="Q1bc">Q: <strong>There are corrupted images/files! How to fix them?</strong><br>
+A: <em>First check the log files to ensure that the images do really exist remotely and are not fake html error pages renamed into .jpg ("Not found" errors, for example).
+Rescan the website with "Continue an interrupted download" to catch images that might be broken due to various errors (transfer timemout, for example).
+Then, check if the broken image/file name is present in the log (hts-log.txt) - in this case you will find there the reason why the file has not been properly caught.
+<br><u>If</u> this doesn't work, delete the corrupted files (Note: to detect corrupted images, you can browse the directories with a tool like ACDSee and then delete them)
+and rescan the website as described before. HTTrack will be obliged to recatch the deleted files, and this time it should work, if they do really exist remotely!.</em>
+<br>
+<br>
+
+<a NAME="Q1bb">Q: <strong>FTP links are not caught! What's happening?</strong><br>
+A: <em>FTP files might be seen as external links, especially if they are located in outside domain. You have either to accept all external links (See the links options, -n option) or
+only specific files (see <a href="filters.html">filters</a> section). <br>
+Example: You are downloading <tt>http://www.someweb.com/foo/</tt> and can not get ftp://ftp.someweb.com files<br>
+Then, add the filter rule <tt>+ftp.someweb.com/*</tt> to accept all files from this (ftp) location<br>
+</em>
+<br>
+
+<a NAME="Q1b1">Q: <strong>I got some weird messages telling that robots.txt do not allow several files to be captured. What's going on?</strong><br>
+A: <em>
+These rules, stored in a file called robots.txt, are given by the website, to specify which links or folders should not be caught by robots and spiders
+- for example, /cgi-bin or large images files.
+They are followed by default by HTTrack, as it is advised. Therefore, you may miss some files that would have been downloaded without
+these rules - check in your logs if it is the case:<br>
+<tt>Info: Note: due to www.foobar.com remote robots.txt rules, links begining with these path will be forbidden: /cgi-bin/,/images/ (see in the options to disable this)
+</tt>
+<br>
+If you want to disable them, just change the corresponding option in the option list! (but only disable this option with great care,
+some restricted parts of the website might be huge or not downloadable)
+</em>
+<br>
+<br>
+
+<a NAME="Q1b11"><strong>Q: I have duplicate files! What's going on?</strong><br>
+A: <em>This is generally the case for top indexes (index.html and index-2.html), isn't it?
+<br>
+This is a common issue, but that can not be easily avoided!<br>
+For example, http://www.foobar.com/ and http://www.foobar.com/index.html might be the same pages.
+But if links in the website refers both to http://www.foobar.com/ and http://www.foobar.com/index.html, these two pages will be caught.
+And because http://www.foobar.com/ must have a name, as you may want to browse the website locally (the / would give a directory listing, NOT the index itself!),
+HTTrack must find one. Therefore, two index.html will be produced, one with the -2 to show that the file had to be renamed.
+<br>
+It might be a good idea to consider that http://www.foobar.com/ and http://www.foobar.com/index.html are the same links, to avoid
+duplicate files, isn't it?
+NO, because the top index (/) can refer to ANY filename, and if index.html is generally the default name, index.htm can be choosen,
+or index.php3, mydog.jpg, or anything you may imagine. (some webmasters are really crazy)
+<br>
+<br>
+Note: In some rare cases, duplicate data files can be found when the website redirect to another file. This issue should be rare, and might be avoided using filters.
+</em>
+<br>
+<br>
+
+<a NAME="Q1b2">Q: <strong>I'm downloading too many files! What can I do?</strong><br>
+A: <em>This is often the case when you use too large a filter, for example <tt>+*.html</tt>, which asks the
+engine to catch all .html pages (even ones on other sites!). In this case, try to use more specific filters, like <tt>+www.someweb.com/specificfolder/*.html</tt><br>
+If you still have too many files, use filters to avoid somes files. For example, if you have too many files from www.someweb.com/big/,
+use <tt>-www.someweb.com/big/*</tt> to avoid all files from this folder. Remember that the default behaviour of the engine, when
+mirroring http://www.someweb.com/big/index.html, is to catch everything in http://www.someweb.com/big/. Filters are your friends,
+use them!
+</em>
+<br>
+<br>
+
+<a NAME="Q1b22">Q: <strong>The engine turns crazy, getting thousands of files! What's going on?</strong><br>
+A: <em>This can happen if a loop occurs in some bogus website. For example, a page that refers to itself, with a timestamp
+in the query string (e.g. <tt>http://www.someweb.com/foo.asp?ts=2000/10/10,09:45:17:147</tt>).
+These are really annoying, as it is VERY difficult to detect the loop (the timestamp might be a page number).
+To limit the problem: set a recurse level (for example to 6), or avoid the bogus pages (use the filters)
+</em>
+<br>
+<br>
+
+<a NAME="Q1b3">Q: <strong>File are sometimes renamed (the type is changed)! Why?</strong><br>
+A: <em>By default, HTTrack tries to know the type of remote files. This is useful when links like
+<tt>http://www.someweb.com/foo.cgi?id=1</tt> can be either HTML pages, images or anything else.
+Locally, foo.cgi will not be recognized as an html page, or as an image, by your browser. HTTrack has to rename the file
+as foo.html or foo.gif so that it can be viewed.<br>
+</em>
+<br>
+
+<a NAME="Q1b3b">Q: <strong>File are sometimes *incorrectly* renamed! Why?</strong><br>
+A: <em>Sometimes, some data files are seen by the remote server as html files, or images : in this case HTTrack is
+being fooled.. and rename the file. This can generally be avoided by using the "use HTTP/1.0 requests" option.
+You might also avoid this by disabling the type checking in the option panel.
+</em>
+<br>
+<br>
+
+<a NAME="Q1b4b">Q: <strong>How do I rename all ".dat" files into ".zip" files?</strong><br>
+A: <em>Simply use the <tt>--assume dat=application/x-zip</tt> option
+</em>
+<br>
+<br>
+
+<a NAME="Q1c">Q: <strong>I can not access several pages (access forbidden, or redirect to another location), but I can with my browser, what's going on?</strong><br>
+A: <em>You may need cookies! Cookies are specific data (for example, your username or password) that are sent to your browser once
+you have logged in certain sites so that you only have to log-in once. For example, after having entered your username in a website, you can
+view pages and articles, and the next time you will go to this site, you will not have to re-enter your username/password.<br>
+To "merge" your personnal cookies to an HTTrack project, just copy the cookies.txt file from your Netscape folder (or the cookies located into the Temporary Internet Files folder for IE)
+into your project folder (or even the HTTrack folder)
+</em>
+<br>
+<br>
+
+</a><a NAME="Q2">Q: <strong>Some pages can't be seen, or are displayed with errors!</strong><br>
+A: <em>Some pages may include javascript or java files that are not recognized. For
+example, generated filenames. There may be transfer problems, too (broken pipe, etc.). But
+most mirrors do work. We still are working to improve the mirror quality of HTTrack.<br>
+</em>
+<br>
+
+</a><a NAME="Q2b">Q: <strong>Some Java applets do not work properly!</strong><br>
+A: <em>Java applets may not work in some cases, for example if HTTrack failed to detect all included classes
+or files called within the class file. Sometimes, Java applets need to be online, because remote files are
+directly caught. Finally, the site structure can be incompatible with the class (always try to keep the original site structure
+when you want to get Java classes)<br>
+If there is no way to make some classes work properly, you can exclude them with the filters.
+They will be available, but only online.
+</em>
+<br>
+<br>
+
+</a><a NAME="QP3">Q: <strong>HTTrack is taking too much time for parsing, it is very slow. What's wrong?</strong><br>
+A: <em>Former (before 3.04) releases of HTTrack had problems with parsing. It was really slow, and performances -especially
+with huge HTML files- were not really good. The engine is now optimized, and should parse very quickly all html files.
+For example, a 10MB HTML file should be scanned in less than 3 or 4 seconds.<br>
+<br>
+Therefore, higher values mean that the engine had to wait a bit for testing several links.
+
+<ul>
+<li>Sometimes, links are malformed in pages.
+"<tt>a href="/foo"</tt>" instead of "<tt>a href="/foo/"</tt>", for example, is a common mistake. It will force the engine to
+make a supplemental request, and find the real <tt>/foo/</tt> location.
+</li>
+<br><br>
+<li>Dynamic pages. Links with names terminated by <tt>.php3</tt>, <tt>.asp</tt> or other type which are different from the regular
+<tt>.html</tt> or <tt>.htm</tt> will require a supplemental request, too. HTTrack has to "know" the type (called "MIME type") of a file
+before forming the destination filename. Files like foo.gif are "known" to be images, ".html" are obviously HTML pages - but ".php3"
+pages may be either dynamically generated html pages, images, data files...<br>
+<br>
+If you KNOW that ALL ".php3" and ".asp" pages are in fact HTML pages on a mirror, use the <tt>assume</tt> option:<br>
+<tt>--assume php3=text/html,asp=text/html</tt>
+<br><br>
+This option can be used to change the type of a file, too : the MIME type "application/x-MYTYPE" will always have the "MYTYPE" type.
+Therefore, <br>
+<tt>--assume dat=application/x-zip</tt>
+<br>
+will force the engine to rename all dat files into zip files
+</li>
+</ul>
+
+
+</em><br>
+<br>
+
+</a><a NAME="Q3">Q: <strong>HTTrack is being idle for a long time without
+transfering. What's happening?</strong><br>
+A: <em>Maybe you try to reach some very slow sites. Try a lower TimeOut value (see
+options, or <tt>-Txx</tt> option in the command line program). Note that you will abandon
+the entire site (except if the option is unchecked) if a timeout happen You can, with the
+Shell version, skip some slow files, too.</em><br>
+<br>
+
+</a><a NAME="Q3b">Q: <strong>I want to update a site, but it's taking too much time! What's happening?</strong><br>
+A: <em>First, HTTrack always tries to minimize the download flow by interrogating the server about the
+file changes. But, because HTTrack has to rescan all files from the begining to rebuild the local site structure,
+it can take some time.
+Besides, some servers are not very smart and always consider that they get newer files, forcing HTTrack to reload them,
+even if no changes have been made!
+</em><br>
+<br>
+
+</a><a NAME="Q3b2">Q: <strong>I wanted to update a site, but after the update the site disappeared!! What's going on?</strong><br>
+A: <em>You may have done something wrong, but not always
+
+<ul>
+<li>The site has moved : the current location only shows a notification. Therefore, all other files have been deleted to show the current state of the website!</li>
+<li>The connection failed: the engine could not catch the first files, and therefore deleted everything.
+To avoid that, using the option "do not purge old files" might be a good idea</li>
+<li>You tried to add a site to the project BUT in fact deleted the former addresses.<br>
+Example: A project contains '<tt>www.foo.com www.bar.com</tt>' and you want to add '<tt>www.doe.com</tt>'.
+Ensure that '<tt>www.foo.com www.bar.com www.doe.com</tt>' is the new URL list, and NOT '<tt>www.doe.com</tt>'!
+</li>
+</ul>
+
+</em><br>
+
+</a><a NAME="Q4">Q: <strong>I am behind a firewall. What can I do?</strong><br>
+A: <em>You need to use a proxy, too. Ask your administrator to know the proxy server's
+name/port. Then, use the proxy field in HTTrack or use the <tt>-P proxy:port</tt> option
+in the command line program.</em><br>
+</a></p>
+
+<p><a NAME="Q14">Q: <strong>HTTrack has crashed during a mirror, what's happening?</strong><br>
+A: <em>We are trying to avoid bugs and problems so that the program can be as reliable as
+possible. But we can not be infallible. If you occurs a bug, please check if you have the
+latest release of HTTrack, and send us an email with a detailed description of your
+problem (OS type, addresses concerned, crash description, and everything you deem to be
+necessary). This may help the other users too.</em><br>
+<br>
+<br>
+
+<a NAME="Q100">Q: <strong>I want to update a mirrored project, but HTTrack is retransfering all pages. What's going on?</strong><br>
+A: <em>First, HTTrack always rescans all local pages to reconstitute the website structure, and it can take some time.
+Then, it asks the server if the files that are stored locally are up-to-date. On most sites, pages are not
+updated frequently, and the update process is fast. But some sites have dynamically-generated pages that are considered as
+"newer" than the local ones.. even if they are identical! Unfortunately, there is no possibility to avoid this problem,
+which is strongly linked with the server abilities.
+</em>
+<br>
+<br>
+
+<a NAME="Q10a">Q: <strong>I want to continue a mirrored project, but HTTrack is rescanning all pages. What's going on?</strong><br>
+A: <em>HTTrack has to (quickly) rescan all pages from the cache, without retransfering them, to rebuild the internal file structure. However, this process can take some time with huge sites
+with numerous links.
+</em>
+<br>
+<br>
+
+<a NAME="Q101">Q: <strong>HTTrack window sometimes "disappears" at then end of a mirrored project. What's going on?</strong><br>
+A: <em>This is a known bug in the interface. It does NOT affect the quality of the mirror, however. We are still hunting it down,
+but this is a smart bug..
+</em>
+<br>
+<br>
+
+<br><u><strong>Questions concerning a mirror:</strong></u><br>
+
+<br>
+<a NAME="Q5">Q: <strong>I want to mirror a Web site, but there are some files outside
+the domain, too. How to retrieve them?</strong><br>
+A: <em>If you just want to retrieve files that can be reached through links, just activate
+the 'get file near links' option. But if you want to retrieve html pages too, you can both
+use wildcards or explicit addresses ; e.g. add <tt>www.someweb.com/*</tt> to accept all
+files and pages from www.someweb.com.<br>
+<br>
+</em></a><a NAME="Q6">Q: <strong>I have forgotten some URLs of files during a long
+mirror.. Should I redo all?</strong><br>
+A: <em>No, if you have kept the 'cache' files (in hts-cache), cached files will not be
+retransfered.</em><br>
+<br>
+</a><a NAME="Q7">Q: <strong>I just want to retrieve all ZIP files or other files in a web
+site/in a page. How do I do it?</strong><br>
+A: <em>You can use different methods. You can use the 'get files near a link' option if
+files are in a foreign domain. You can use, too, a filter adress: adding <tt>+*.zip</tt>
+in the URL list (or in the filter list) will accept all ZIP files, even if these files are
+outside the address. <br>
+Example : <tt>httrack www.someweb.com/someaddress.html +*.zip</tt> will allow
+you to retrieve all zip files that are linked on the site.</em><br>
+<br>
+</a><a NAME="Q8">Q: <strong>There are ZIP files in a page, but I don't want to transfer
+them. How do I do it?</strong><br>
+A: <em>Just filter them: add <tt>-*.zip</tt></em> in the filter list.<br>
+<br>
+</a><a NAME="Q9">Q: <strong>I don't want to load gif files.. but what may happen if I
+watch the page?</strong><br>
+A: <em>If you have filtered gif files (<tt>-*.gif</tt>), links to gif files will be
+rebuilt so that your browser can find them on the server.</em><br>
+<br>
+</a><a NAME="Q10">Q: <strong>I get all types of files on a web site, but I didn't select
+them on filters!</strong><br>
+A: <em>By default, HTTrack retrieves all types of files on authorized links. To avoid
+that, define filters like </a><a NAME="Q7"><tt>-* +&lt;website&gt;/*.html
++&lt;website&gt;/*.htm +&lt;website&gt;/ +*.&lt;type wanted&gt;</tt></a><a NAME="Q10"><br>
+Example: <tt>httrack www.someweb.com/index.html -* +www.someweb.com/*.htm* +www.someweb.com/*.gif +www.someweb.com/*.jpg</tt><br>
+<br>
+</em>Q: <strong>When I use filters, I get too many files!</strong><br>
+A: <em>You might use too large a filter, for example <tt>*.html</tt> will get ALL html
+files identified. If you want to get all files on an address, use <tt>www.&lt;address&gt;/*.html</tt>.<br>
+If you want to get ONLY files defined by your filters, use something like <tt>-* +www.foo.com/*</tt>, because
+<tt>+www.foo.com/*</tt> will only accept selected links without forbidding other ones!<br>
+There are lots of possibilities using filters.<br>
+Example:<tt>httrack www.someweb.com +*.someweb.com/*.htm*</tt><br>
+<br>
+</em></a><a NAME="Q11">Q: <strong>When I use filters, I can't access another domain, but I
+have filtered it!</strong><br>
+A: <em>You may have done a mistake declaring filters, for example <tt>+www.someweb.com/*
+-*someweb* </tt></em>will not work, because -*someweb* has an upper priority (because it has
+been declared after +www.someweb.com)<br>
+<br>
+</a><a NAME="Q12">Q: <strong>Must I add a&nbsp; '+' or '-' in the filter list when I want
+to use filters?</strong><br>
+A: <em>YES. '+' is for accepting links and '-' to avoid them. If you forget it, HTTrack
+will consider that you want to accept a filter if there is a wild card in the syntax - e.g.
++&lt;filter&gt; is identical to &lt;filter&gt; if &lt;filter&gt; contains a wild card (*)
+(else it will be considered as a normal link to mirror)</em></a><br>
+<br>
+Q: <strong>I want to find file(s) in a web-site. How do I do it?</strong><br>
+A: <a NAME="Q13"><em>You can use the filters: forbid all files (add a <tt>-*</tt> in the
+filter list) and accept only html files and the file(s) you want to retrieve (BUT do not
+forget to add <tt>+&lt;website&gt;*.html</tt> in the filter list, or pages will not be
+scanned! Add the name of files you want with a <tt>*/</tt> before ; i.e. if you want to
+retrieve file.zip, add <tt>*/file.zip</tt>)<br>
+Example:<tt>httrack www.someweb.com +www.someweb.com/*.htm* +thefileiwant.zip</tt><br>
+<br>
+</em>
+
+<a NAME="Q200">Q: <strong>I want to download ftp files/ftp site. How do I do it?</strong><br>
+A: <em>First, HTTrack is not the best tool to download many ftp files. Its ftp engine is basic (even if reget are
+possible) and if your purpose is to download a complete site, use a specific client.<br>
+You can download ftp files just by typing the URL, such as <tt>ftp://ftp.somesite.com/pub/files/file010.zip</tt> and list ftp directories
+like <tt>ftp://ftp.somesite.com/pub/files/</tt></em>.<br>
+Note: For the filters, use something like <tt>+ftp.somesite.com/*</tt>
+<br>
+
+<br><a NAME="QM1">Q: <strong>How can I retrieve .asp or .cgi sources instead of .html result?</strong></a><br>
+A: <em>You can't! For security reasons, web servers do not allow that.</em>
+
+<br><br><a NAME="QM2">Q: <strong>How can I remove these annoying <tt>&lt;!-- Mirrored from... --&gt;</tt> from html files?</strong></a><br>
+A: <em>Use the footer option (-%F, or see the WinHTTrack options)</em>
+
+<br><br><a NAME="QM3">Q: <strong>Do I have to select between ascii/binary transfer mode?</strong></a><br>
+A: <em>No, http files are always transfered as binary files. Ftp files, too (even if ascii mode could be selected)</em>
+
+<br><br><a NAME="QM4">Q: <strong>Can HTTrack perform form-based authentication?</strong></a><br>
+A: <em>Yes. See the URL capture abilities (--catchurl for command-line release, or in the WinHTTrack interface)</em>
+
+<br><br><a NAME="QM5">Q: <strong>Can I redirect downloads to tar/zip archive?</strong></a><br>
+A: <em>Yes. See the shell system command option (-V option for command-line release)</em>
+
+<br><br><a NAME="QM6">Q: <strong>Can I use username/password authentication on a site?</strong></a><br>
+A: <em>Yes. Use user:password@your_url (example: <tt>http://foo:bar@www.someweb.com/private/mybox.html</tt>)</em>
+
+<br><br><a NAME="QM7">Q: <strong>Can I use username/password authentication for a proxy?</strong></a><br>
+A: <em>Yes. Use user:password@your_proxy_name as your proxy name (example: <tt>smith:foo@proxy.mycorp.com</tt>)</em>
+
+<br><br><a NAME="QM8">Q: <strong>Can HTTrack generates HP-UX or ISO9660 compatible files?</strong></a><br>
+A: <em>Yes. See the build options (-N, or see the WinHTTrack options)</em>
+
+<br><br><a NAME="QM9">Q: <strong>If there any SOCKS support?</strong></a><br>
+A: <em>Not yet!</em>
+
+<br><br><a NAME="QM10">Q: <strong>What's this hts-cache directory? Can I remove it?</strong></a><br>
+A: <em>NO if you want to update the site, because this directory is used by HTTrack for this purpose.
+If you remove it, options and URLs will not be available for updating the site</em>
+
+<br><br><a NAME="QM11">Q: <strong>Can I start a mirror from my bookmarks?</strong></a><br>
+A: <em>Yes. Drag&Drop your bookmark.html file to the WinHTTrack window (or use file://filename for command-line release) and select
+bookmark mirroring (mirror all links in pages, -Y) or bookmark testing (--testlinks)<em></em>
+
+<br><br><a NAME="QM11c">Q: <strong>Can I convert a local website (file:// links) to a standard website?</strong></a><br>
+A: <em>Yes. Just start from the top index (example: file://C:\foopages\index.html) and mirror the local website.
+HTTrack will convert all file:// links to relative ones.
+</em>
+
+<br><br><a NAME="QM11b">Q: <strong>Can I copy a project to another folder - Will the mirror work?</strong></a><br>
+A: <em>Yes. There is no absolute links, all links are relative.
+You can copy a project to another drive/computer/OS, and browse is without installing anything.</em>
+
+<br><br><a NAME="QM12">Q: <strong>Can I copy a project to another computer/system? Can I then update it ?</strong></a><br>
+A: <em>Absolutely! You can keep your HTTrack favorite folder (C:\My Web Sites) in your local hard disk, copy it
+for a friend, and possibly update it, and then bring it back!<br>You can copy individual folders (projects), too: exchange
+your favorite websites with your friends, or send an old version of a site to someone who has a faster connection, and
+ask him to update it!</i><br>
+
+
+<br><small>
+Note: Export (Windows <-> Linux)<br>
+The file and cache structure is compatible between Linux/Windows, but you may have to do some changes, like the path<br>
+<table border="1">
+ <tr><th>
+ Windows -> Linux/Unix
+ </th></tr>
+ <tr><td>
+ Copy (in binary mode) the entire folder and then to update it, enter into it and do a<br>
+ <tt>
+ httrack --update -O ./
+ </tt>
+ <br><br>
+ <i>
+ Note: You can then safely replace the existing folder (under Windows) with this one, because
+ the Linux/Unix version did not change any options<br>
+ Note: If you often switch between Windows/Linux with the same project, it might be a good idea to edit the hts-cache/doit.log file
+ and delete old "-O" entries, because each time you do a <tt>httrack --update -O ./</tt> an entry is added,
+ causing the command line to be long
+ </i>
+ </td></tr>
+ <tr><th>
+ Linux/Unix -> Windows
+ </th></tr>
+ <tr><td>
+ Copy (in binary mode) the entire folder in your favorite Web mirror folder.
+ Then, select this project, AND retype ALL URLs AND redefine all options as if you were
+ creating a new project.
+ This is necessary because the profile (winprofile.ini) has not be created with the Linux/Unix version.
+ But do not be afraid, WinHTTrack will use cached files to update the project!
+ </td></tr>
+</table>
+</small>
+
+</em>
+
+<br><br><a NAME="QM13">Q: <strong>How can I grab email addresses in web pages?</strong></a><br>
+A: <em>You can not. HTTrack has not be designed to be an email grabber, like many other (bad) products.
+</em>
+
+<br>
+<br>
+<br>
+<u><strong>Other problems:</strong></u><br>
+<br>
+
+<a NAME="Q300">Q: <strong>My problerm is not listed!</strong><br>
+A: <em>Feel free to <a href="contact.html">contact us</a>!
+</em><br>
+
+</em></p><h3><br>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/fcguide.html b/HelpHtml/fcguide.html
new file mode 100644
index 0000000..34f9edd
--- /dev/null
+++ b/HelpHtml/fcguide.html
@@ -0,0 +1,2719 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<center><h1> Httrack Users Guide (3.10)</h1>
+By Fred Cohen </center>
+
+<hr>
+
+<h2> Background and Introduction </h2>
+
+<p align=justify> I started using httrack in mid-2000 and found it to be
+an excellent tool for imaging web sites. Various words are used to
+describe this process - from imaging to mirroring to snaking and so on.
+I will be using a variety of these words in my description.
+
+<p align=justify> I have used many such tools over the years, have
+performed many manual and semi-automatic operations of similar sorts,
+and written partial programs to do similar functions, but - at least for
+now - httrack seems to me to be the best option for this function.
+
+<p align=justify> The only problem I encountered when using httrack was
+that it is so rich with features that I could never really figure out
+precisely the right thing to do at any given point. I was using
+recepies rather than knowledge to get the job done - and I was pestering
+the authors for those recepies. After a few days of very helpful
+assistance from the authors I volenteered to write a users manual for
+httrack - and here it is. I hope it gets the job done.
+
+<hr>
+<h2> Basics </h2>
+
+<p align=justify> Httrack is a program that gets information from the
+Internet, looks for pointers to other information, gets that
+information, and so forth. If you ask it to, and have enough disk
+space, it will try to make a copy of the whole Internet on your
+computer. While this may be the answer to Dilbert's boss when he asks
+to get a printout of the Internet for some legal document, for most of
+us, we want to get copies of just the right part of the Internet, and
+have them nicely organized for our use. This is where httrack does a
+great job. Here's a simple example:
+
+<pre><b><i>
+httrack "http://www.all.net/" -O "/tmp/www.all.net" "+*.all.net/*" -v
+</i></b></pre>
+
+<p align=justify> In this example, we ask httrack to start the Universal
+Resource Locator (URL) http://www.all.net/ and store the results under
+the directory /tmp/www.all.net (the -O stands for "output to") while not
+going beyond the bounds of all the files in the www.all.net domain and
+printing out any error messages along the way (-v means verbose). This
+is the most common way that I use httrack. Please note that this
+particular command might take you a while - and run you out of disk
+space.
+
+<p align=justify> This sort of a mirror image is not an identical copy
+of the original web site - in some ways it's better such as for local
+use - while in other ways it may be problematic - such as for legal use.
+This default mirroring method changes the URLs within the web site so
+that the references are made relative to the location the copy is stored
+in. This makes it very useful for navigating through the web site on
+your local machine with a web browser since most things will work as you
+would expect them to work. In this example, URLs that point outside of
+the www.all.net domain space will still point there, and if you encounter
+one, the web browser will try to get the data from that location.
+
+<p align=justify> For each of the issues discussed here - and many more
+- httrack has options to allow you to make different choices and get
+different results. This is one of the great things about httrack - and
+one of the the real major problems with using it without the knowledge
+of all that it can do. If you want to know all the things httrack can
+do, you might try typing:
+
+<pre><b><i>
+httrack --help
+</i></b></pre>
+
+<p align=justify> Unfortunately, while this outputs a though list of
+options, it is somewhat less helpful it might be for those who don't
+know what the options all mean and haven't used them before. On the
+other hand, this is most useful for those who already know how to use
+the program but don't remember some obscure option that they haven't
+used for some time.
+
+<p align=justify> The rest of this manual is dedicated to detailing what
+you find in the help message and providing examples - lots and lots of
+examples... Here is what you get (page by page - use <enter> to move to
+the next page in the real program) if you type 'httrack --help':
+
+<pre>
+>httrack --help
+ HTTrack version 3.03BETAo4 (compiled Jul 1 2001)
+ usage: ./httrack <URLs [-option] [+<FILTERs>] [-<FILTERs>]
+ with options listed below: (* is the default value)
+
+General options:
+ O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles]) (--path <param>)
+ %O top path if no path defined (-O path_mirror[,path_cache_and_logfiles])
+
+Action options:
+ w *mirror web sites (--mirror)
+ W mirror web sites, semi-automatic (asks questions) (--mirror-wizard)
+ g just get files (saved in the current directory) (--get-files)
+ i continue an interrupted mirror using the cache
+ Y mirror ALL links located in the first level pages (mirror links) (--mirrorlinks)
+
+Proxy options:
+ P proxy use (-P proxy:port or -P user:pass@proxy:port) (--proxy <param>)
+ %f *use proxy for ftp (f0 don't use) (--httpproxy-ftp[=N])
+
+Limits options:
+ rN set the mirror depth to N (* r9999) (--depth[=N])
+ %eN set the external links depth to N (* %e0) (--ext-depth[=N])
+ mN maximum file length for a non-html file (--max-files[=N])
+ mN,N' for non html (N) and html (N')
+ MN maximum overall size that can be uploaded/scanned (--max-size[=N])
+ EN maximum mirror time in seconds (60=1 minute, 3600=1 hour) (--max-time[=N])
+ AN maximum transfer rate in bytes/seconds (1000=1kb/s max) (--max-rate[=N])
+ %cN maximum number of connections/seconds (*%c10)
+ GN pause transfer if N bytes reached, and wait until lock file is deleted (--max-pause[=N])
+
+Flow control:
+ cN number of multiple connections (*c8) (--sockets[=N])
+ TN timeout, number of seconds after a non-responding link is shutdown (--timeout)
+ RN number of retries, in case of timeout or non-fatal errors (*R1) (--retries[=N])
+ JN traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link (--min-rate[=N])
+ HN host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow (--host-control[=N])
+
+Links options:
+ %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use) (--extended-parsing[=N])
+ n get non-html files 'near' an html file (ex: an image located outside) (--near)
+ t test all URLs (even forbidden ones) (--test)
+ %L <file add all URL located in this text file (one URL per line) (--list <param>)
+
+Build options:
+ NN structure type (0 *original structure, 1+: see below) (--structure[=N])
+ or user defined structure (-N "%h%p/%n%q.%t")
+ LN long names (L1 *long names / L0 8-3 conversion) (--long-names[=N])
+ KN keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K3 absolute URI links) (--keep-links[=N])
+ x replace external html links by error pages (--replace-external)
+ %x do not include any password for external password protected websites (%x0 include) (--no-passwords)
+ %q *include query string for local files (useless, for information purpose only) (%q0 don't include) (--include-query-string)
+ o *generate output html file in case of error (404..) (o0 don't generate) (--generate-errors)
+ X *purge old files after update (X0 keep delete) (--purge-old[=N])
+
+Spider options:
+ bN accept cookies in cookies.txt (0=do not accept,* 1=accept) (--cookies[=N])
+ u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always) (--check-type[=N])
+ j *parse Java Classes (j0 don't parse) (--parse-java[=N])
+ sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always) (--robots[=N])
+ %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies) (--http-10)
+ %B tolerant requests (accept bogus responses on some servers, but not standard!) (--tolerant)
+ %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..) (--updatehack)
+ %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3=text/html) (--assume <param>)
+
+Browser ID:
+ F user-agent field (-F "user-agent name") (--user-agent <param>)
+ %F footer string in Html code (-%F "Mirrored [from host %s [file %s [at %s]]]" (--footer <param>)
+ %l preffered language (-%l "fr, en, jp, *" (--language <param>)
+
+Log, index, cache
+ C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before) (--cache[=N])
+ k store all files in cache (not useful if files on disk) (--store-all-in-cache)
+ %n do not re-download locally erased files (--do-not-recatch)
+ %v display on screen filenames downloaded (in realtime) (--display)
+ Q no log - quiet mode (--do-not-log)
+ q no questions - quiet mode (--quiet)
+ z log - extra infos (--extra-log)
+ Z log - debug (--debug-log)
+ v log on screen (--verbose)
+ f *log in files (--file-log)
+ f2 one single log file (--single-log)
+ I *make an index (I0 don't make) (--index)
+ %I make an searchable index for this mirror (* %I0 don't make) (--search-index)
+
+Expert options:
+ pN priority mode: (* p3) (--priority[=N])
+ 0 just scan, don't save anything (for checking links)
+ 1 save only html files
+ 2 save only non html files
+ *3 save all files
+ 7 get html files before, then treat other files
+ S stay on the same directory
+ D *can only go down into subdirs
+ U can only go to upper directories
+ B can both go up&down into the directory structure
+ a *stay on the same address
+ d stay on the same principal domain
+ l stay on the same TLD (eg: .com)
+ e go everywhere on the web
+ %H debug HTTP headers in logfile (--debug-headers)
+
+Guru options: (do NOT use)
+ #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')
+ #f Always flush log files
+ #FN Maximum number of filters
+ #h Version info
+ #K Scan stdin (debug)
+ #L Maximum number of links (-#L1000000)
+ #p Display ugly progress information
+ #P Catch URL
+ #R Old FTP routines (debug)
+ #T Generate transfer ops. log every minutes
+ #u Wait time
+ #Z Generate transfer rate statictics every minutes
+ #! Execute a shell command (-#! "echo hello")
+
+Command-line specific options:
+ V execute system command after each files ($0 is the filename: -V "rm \$0") (--userdef-cmd <param>)
+ %U run the engine with another id when called as root (-%U smith) (--user <param>)
+
+Details: Option N
+ N0 Site-structure (default)
+ N1 HTML in web/, images/other files in web/images/
+ N2 HTML in web/HTML, images/other in web/images
+ N3 HTML in web/, images/other in web/
+ N4 HTML in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)
+ N5 Images/other in web/xxx and HTML in web/HTML
+ N99 All files in web/, with random names (gadget !)
+ N100 Site-structure, without www.domain.xxx/
+ N101 Identical to N1 exept that "web" is replaced by the site's name
+ N102 Identical to N2 exept that "web" is replaced by the site's name
+ N103 Identical to N3 exept that "web" is replaced by the site's name
+ N104 Identical to N4 exept that "web" is replaced by the site's name
+ N105 Identical to N5 exept that "web" is replaced by the site's name
+ N199 Identical to N99 exept that "web" is replaced by the site's name
+ N1001 Identical to N1 exept that there is no "web" directory
+ N1002 Identical to N2 exept that there is no "web" directory
+ N1003 Identical to N3 exept that there is no "web" directory (option set for g option)
+ N1004 Identical to N4 exept that there is no "web" directory
+ N1005 Identical to N5 exept that there is no "web" directory
+ N1099 Identical to N99 exept that there is no "web" directory
+Details: User-defined option N
+ %n Name of file without file type (ex: image) (--do-not-recatch)
+ %N Name of file, including file type (ex: image.gif)
+ %t File type (ex: gif)
+ %p Path [without ending /] (ex: /someimages)
+ %h Host name (ex: www.someweb.com) (--http-10)
+ %M URL MD5 (128 bits, 32 ascii bytes)
+ %Q query string MD5 (128 bits, 32 ascii bytes)
+ %q small query string MD5 (16 bits, 4 ascii bytes) (--include-query-string)
+ %s? Short name version (ex: %sN)
+ %[param] param variable in query string
+
+Shortcuts:
+--mirror <URLs *make a mirror of site(s) (default)
+--get <URLs get the files indicated, do not seek other URLs (-qg)
+--list <text file add all URL located in this text file (-%L)
+--mirrorlinks <URLs mirror all links in 1st level pages (-Y)
+--testlinks <URLs test links in pages (-r1p0C0I0t)
+--spider <URLs spider site(s), to test links: reports Errors & Warnings (-p0C0I0t)
+--testsite <URLs identical to --spider
+--skeleton <URLs make a mirror, but gets only html files (-p1)
+--update update a mirror, without confirmation (-iC2)
+--continue continue a mirror, without confirmation (-iC1)
+
+--catchurl create a temporary proxy to capture an URL or a form post URL
+--clean erase cache & log files
+
+--http10 force http/1.0 requests (-%h)
+
+example: httrack www.someweb.com/bob/
+means: mirror site www.someweb.com/bob/ and only this site
+
+example: httrack www.someweb.com/bob/ www.anothertest.com/mike/ +*.com/*.jpg
+means: mirror the two sites together (with shared links) and accept any .jpg files on .com sites
+
+example: httrack www.someweb.com/bob/bobby.html +* -r6
+means get all files starting from bobby.html, with 6 link-depth, and possibility of going everywhere on the web
+
+example: httrack www.someweb.com/bob/bobby.html --spider -P proxy.myhost.com:8080
+runs the spider on www.someweb.com/bob/bobby.html using a proxy
+
+example: httrack --update
+updates a mirror in the current folder
+
+example: httrack --continue
+continues a mirror in the current folder
+
+HTTrack version 3.03BETAo4 (compiled Jul 1 2001)
+Copyright (C) Xavier Roche and other contributors
+[compiled: Linux linux 2.2.18 #9 SMP Sat Dec 30 09:51:39 CET 2000 i586 unknown]
+
+</pre>
+
+<p align=justify> For many of you, the manual is now complete, but for
+the rest of us, I will now go through this listing one item at a time
+with examples... I will be here a while...
+
+<hr>
+<h2> Syntax </h2>
+
+<pre><b><i>httrack <URLs> [-option] [+<FILTERs>] [-<FILTERs>] </i></b></pre>
+
+<p align=justify> The syntax of httrack is quite simple. You specify
+the URLs you wish to start the process from (<URLS>), any options you
+might want to add ([-option], any filters specifying places you should
+([+<FILTERs>]) and should not ([-<FILTERs>]) go, and end the command
+line by pressing <enter>. Httrack then goes off and does your bidding.
+For example:
+
+<pre><b><i>
+httrack www.all.net/bob/
+</i></b></pre>
+
+<p align=justify> This will use the 'defaults' (those selections from
+the help page marked with '*' in the listing above) to image the web
+site. Specifically, the defauls are:
+
+<pre>
+ w *mirror web sites
+ %f *use proxy for ftp (f0 don't use)
+ cN number of multiple connections (*c8)
+ RN number of retries, in case of timeout or non-fatal errors (*R1)
+ %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use)
+ NN name conversion type (0 *original structure, 1+: see below)
+ LN long names (L1 *long names / L0 8-3 conversion)
+ K keep original links (e.g. http://www.adr/link) (K0 *relative link)
+ o *generate output html file in case of error (404..) (o0 don't generate)
+ X *purge old files after update (X0 keep delete)
+ bN accept cookies in cookies.txt (0=do not accept,* 1=accept)
+ u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)
+ j *parse Java Classes (j0 don't parse)
+ sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)
+ C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)
+ f *log file mode
+ I *make an index (I0 don't make)
+ pN priority mode: (* p3) *3 save all files
+ D *can only go down into subdirs
+ a *stay on the same address
+ --mirror <URLs> *make a mirror of site(s) (default)
+</pre>
+
+<p align=justify> Here's what all of that means:
+
+<ul>
+<pre><b><i> w *mirror web sites </i></b></pre>
+
+<p align=justify> Automatically go though each URL you download and look
+for links to other URLs inside it, dowloading them as well.
+
+<pre><b><i> %f *use proxy for ftp (f0 don't use) </i></b></pre>
+
+<p align=justify> If there are and links to ftp URLs (URLs using the
+file transfer protocol (FTP) rather than the hypertext transfer protocol
+HTTP), go through an ftp proxy server to get them.
+
+<pre><b><i> cN number of multiple connections (*c8) </i></b></pre>
+
+<p align=justify> Use up to 8 simultaneous downloads so that at any
+gioven time, up to 8 URLs may be underway.
+
+<pre><b><i> RN number of retries, in case of timeout or non-fatal errors (*R1) </i></b></pre>
+
+<p align=justify> Retry once if anything goes wrong with a download.
+
+<pre><b><i> %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use) </i></b></pre>
+
+<p align=justify> Try to parse all URLs - even if they are in
+Javascript, Java, tags of unknown types, or anywhere else the program
+can find things.
+
+<pre><b><i> NN name conversion type (0 *original structure, 1+: see below) </i></b></pre>
+
+<p align=justify> Use the original directory and file structure of the
+web site in your mirror image of the site.
+
+<pre><b><i> LN long names (L1 *long names / L0 8-3 conversion) </i></b></pre>
+
+<p align=justify> If filenames do not follow the old DOS conventions,
+store them with the same names used on the web site.
+
+<pre><b><i> K keep original links (e.g. http://www.adr/link) (K0 *relative link) </i></b></pre>
+
+<p align=justify> Use relative rather than the original links so that
+URLs within this web site are adjusted to point to the files in the
+mirror.
+
+<pre><b><i> o *generate output html file in case of error (404..) (o0 don't generate) </i></b></pre>
+
+<p align=justify> IF there are errors in downloading, create a file that
+indicates that the URL was not found. This makes browsing go a lot
+smoother.
+
+<pre><b><i> X *purge old files after update (X0 keep delete) </i></b></pre>
+
+<p align=justify> Files not found on the web site that were previously
+there get deleted so that you have an accurate snapshot of the site as
+it is today - losing historical data.
+
+<pre><b><i> bN accept cookies in cookies.txt (0=do not accept,* 1=accept) </i></b></pre>
+
+<p align=justify> Accept all cokkies sent to you and return them if
+requested. This is required for many sites to function. These cookies
+are only kept relative to the specific site, so you don't have to worry
+about your browser retaining them.
+
+<pre><b><i> u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always) </i></b></pre>
+
+<p align=justify> This causes different document types to be analyzed
+differently.
+
+<pre><b><i> j *parse Java Classes (j0 don't parse) </i></b></pre>
+
+<p align=justify> This causes Java class files to be parsed looking for
+URLs.
+
+<pre><b><i> sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always) </i></b></pre>
+
+<p align=justify> This tells the program to follow the wishes of the
+site owner with respect to limiting where robots like this one search.
+
+<pre><b><i> C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before) </i></b></pre>
+
+<p align=justify> If you are downloading a site you have a previous copy
+of, supplemental parameters are transmitted to the server, for example
+the 'If-Modified-Since:' field will be used to see if files are newer
+than the last copy you have. If they are newer, they will be
+downloaded, otherwise, they will not.
+
+<pre><b><i> f *log file mode </i></b></pre>
+
+<p align=justify> This retains a detailed log of any important events
+that took place.
+
+<pre><b><i> I *make an index (I0 don't make) </i></b></pre>
+
+<p align=justify> This makes a top-level index.html file so that if you
+image a set of sites, you can have one place to start reviewing the set
+of sites.
+
+<pre><b><i> pN priority mode: (* p3) *3 save all files </i></b></pre>
+
+<p align=justify> This will cause all downloaded files to be saved.
+
+<pre><b><i> D *can only go down into subdirs </i></b></pre>
+
+<p align=justify> This prevents the program from going to higher level
+directories than the initial subdirectory, but allows lower-level
+subdirectories of the starting directory to be investigated.
+
+<pre><b><i> a *stay on the same address </i></b></pre>
+
+<p align=justify> This indicates that only the web site(s) where the
+search started are to be collected. Other sites they point to are not
+to be imaged.
+
+<pre><b><i> --mirror <URLs> *make a mirror of site(s) (default) </i></b></pre>
+
+<p align=justify> This indicates that the program should try to make a
+copy of the site as well as it can.
+
+</ul>
+
+<p align=justify> Now that's a lot of options for the default - but of
+course there are a lot more options to go. For the most part, the rest
+of the options represent variations on these themes. For example,
+instead of saving all files, we might only want to save html files, or
+instead of 8 simultaneous sessions, we might want only 4.
+
+<p align=justify> If we wanted to make one of these changes, we would
+specify the option on the command line. For example:
+
+<pre><b><i>
+httrack www.all.net/bob/ -c4 -B
+</i></b></pre>
+
+<p align=justify> This would restrict httrack to only use 4
+siumultaneous sessions but allow it to go up the directory structure
+(for example to www.all.net/joe/) as well as down it (for example to
+www.all.net/bob/deeper/).
+
+<p align=justify> You can add a lot of options to a command line!
+
+<hr>
+
+<h2> A Thorough Going Over </h2>
+
+<p align=justify> Now that you have an introduction, it's time for a
+more though coverage. This is where I go through each of the options
+and describe it in detail with examples... Actually, I won't quite do
+that. But I will get close.
+
+<p align=justify> Options tend to come in groups. Each group tends to
+be interrelated, so it's easier and more useful to go through them a
+group at a time with some baseline project in mind. In my case, the
+project is to collect all of the information on the Internet about some
+given subject. We will assume that, through a previous process, I have
+gotten a list of URLs of interest to me. Typically there will be
+hundreds of these URLs, and they will be a mixed bag of sites that are
+full of desired information, pages with lists of pointers to other
+sites, URLs of portions of a web site that are of interest (like Bob's
+home pages and subdirectories), and so forth. Let us say that for today
+we are looking for the definitive colleciton of Internet information on
+shoe sizes from around the world.
+
+<hr>
+<h3>General Options</h3>
+
+<pre><b><i>
+General options:
+ O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])
+</i></b></pre>
+
+<p align=justify> For this project, I will want to keep all of the
+information I gather in one place, so I will specify that output area of
+the project as /tmp/shoesizes by adding '<b><i>-O
+/tmp/shoesizes</i></b>' to every command line I use.. for example:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes
+</i></b></pre>
+
+<p align=justify> The action options tell httrack how to operate at the
+larger level.
+
+<hr>
+<h3>Action Options</h3>
+
+<pre><b><i>
+Action options:
+ w *mirror web sites
+ W mirror web sites, semi-automatic (asks questions)
+ g just get files (saved in the current directory)
+ i continue an interrupted mirror using the cache
+ Y mirror ALL links located in the first level pages (mirror links)
+</i></b></pre>
+
+<p align=justify> If I want httrack to ask me questions - such as what
+options to use, what sites to mirror, etc. I can tell it to ask these
+questions as follows:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -W
+</i></b><pre>
+
+
+<p align=justify> I can also do:
+
+<pre><b><i>httrack</i></b></pre>
+OR
+<pre><b><i>httrack -W</i></b></pre>
+OR
+<pre><b><i>httrack -w</i></b></pre>
+
+<p align=justify> The '-W' options asks whether the or not a site has to
+be mirrored, while the '-w' option does not ask this question but asks
+the remainder of the questions required to mirror the site.
+
+<p align=justify> The -g option allows you to get the files exactly as
+they are and store them in the currant directory. This is handy for a
+relatively small collection of information where organization isn't
+important. With this option, the html files will not even be parsed to
+look for other URLs. This option is useful for getting isolated files
+(e.g., httrack -g www.mydrivers.com/drivers/windrv32.exe).
+
+
+<p align=justify> If I start a collection process and it fails for ome
+reason or another - such as me interrupting it because I am running out
+of disk space - or a network outage - then I can restart the process by
+using the -i option:
+
+<pre><b><i>httrack http://www.shoesizes.com -O /tmp/shoesizes -i </i></b></pre>
+
+<p align=justify> Finally, I can mirror all links in the first level
+pages of the URLs I specify. A good example of where to use whis would
+be in a case where I have a page that points to a lot of other sites and
+I want to get the initial information on those sites before mirroring
+them:
+
+<pre><b><i>httrack http://www.shoesizes.com/othersites.html -O /tmp/shoesizes -Y </i></b></pre>
+
+<hr>
+<h3>Proxy Options</h3>
+
+<p align=justify> Many users use a proxy for many of their functions.
+This is a key component in many firewalls, but it is also commonly used
+for anonymizing access and for exploiting higher speed communications at
+a remote server.
+
+<pre><b><i>Proxy options:
+ P proxy use (-P proxy:port or -P user:pass@proxy:port)
+ %f *use proxy for ftp (f0 don't use)
+</i></b></pre>
+
+<p align=justify> If you are using a standard proxy that doesn't require
+a user ID and password, you would do something like this:
+
+<pre><b><i>httrack http://www.shoesizes.com -O /tmp/shoesizes -P proxy.www.all.net:8080 </i></b></pre>
+
+<p align=justify> In this case, we have asusmed that proxy.www.all.net is
+the host that does the proxy service and that it uses port 8080 for this
+service. In some cases you will have to ask your network or firewall
+administrator for these details, however, in most cases they should be
+the same as the options used in your web browser.
+
+<p align=justify> In some cases, a user ID and password are required for
+the proxy server. This is common in corporate environments where only
+authorized users may access the Internet.
+
+<pre><b><i>httrack http://www.shoesizes.com -O /tmp/shoesizes -P fc:password@proxy.www.all.net:8080 </i></b></pre>
+
+<p align=justify> In this case, the user ID 'fc' and the password
+'password' are used on proxy.www.all.net port 8080. Again, your network or
+firewall administrator can be most helpful in addressing the specifics
+for your environment.
+
+<p align=justify> FTP normally operates through a proxy server, but for systems
+that have direct connections to the Internet, the following option should help:
+
+<pre><b><i>httrack ftp://ftp.shoesizes.com -O /tmp/shoesizes -%f0 </i></b></pre>
+
+<hr>
+<h3>Limits Options</h3>
+
+<pre><b><i>
+Limits options:
+ rN set the mirror depth to N
+ mN maximum file length for a non-html file
+ mN,N' for non html (N) and html (N')
+ MN maximum overall size that can be uploaded/scanned
+ EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)
+ AN maximum transfer rate in bytes/seconds (1000=1kb/s max)
+ GN pause transfer if N bytes reached, and wait until lock file is deleted
+ %eN set the external links depth to N (* %e0) (--ext-depth[=N])
+ %cN maximum number of connections/seconds (*%c10)
+</i></b></pre>
+
+<p align=justify> Setting limits provides the means by which you can
+avoid running out of disk space, CPU time, and so forth. This may be
+particularly helpful for those who accidentally try to image the whole
+Internet.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -r50
+</i></b></pre>
+
+<p align=justify> In this example, we limit the directlry depth to 50
+levels deep. As a general rule, web sites don't go much deeper than 20
+levels or so, and if you think about it, if there are only 2
+subdirectories per directory level, a directory structure 50 deep would
+have about 10 trillion directories. Of course many sites have a small
+number of files many levels deep in a directory structure for various
+reasons. In some cases, a symbolic link will cause an infinite
+recursion of directory levels as well, so placing a limit may be
+advisable.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -m50000000
+</i></b></pre>
+
+<p align=justify> This example sets the maximum file length for non-HTML
+files to 50 megabytes. This is not an unusual length for things like
+tar files, and in some cases - for example when there are images of
+CD-ROMs to fetch from sites, you might want a limit more like 750
+megabytes.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -m50000000,100000
+</i></b></pre>
+
+<p align=justify> In this example, we have set a limit for html files
+as well - at 100,000 bytes. HTML files are rarely larger than this,
+however, in some cases larger sizes may be needed.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -M1000000000
+</i></b></pre>
+
+<p align=justify> This option sets the maximum total size - in bytes -
+that can be uploaded from a site - in this case to 1 gigabyte.
+Depending on how much disk space you have, such an option may be
+worthwhile.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -E3600
+</i></b></pre>
+
+<p align=justify> This sets the maximum runtime for the download
+process. Of course depending on the speed of your connection it may
+take longer or shorter runtimes to get the same job done, and network
+traffic is also a factor. 3600 seconds corresponds to one hour.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes A100000000
+</i></b></pre>
+
+<p align=justify> This option specifies the largest number of bytes per
+second that should be used for transfers. For example, you might want
+to go slow for some servers that are heavily loaded in the middle of the
+day, or to download slowly so that the servers at the other end are less
+likely to identify you as mirroring their site. The setting above
+limits my bandwidth to 100 million bytes per second - slow I know, but I
+wouldn't want to stress the rest of the Internet.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -G100000000
+</i></b></pre>
+
+<p align=justify> In this case, the G option is used to 'pause' a
+download after the first gigabyte is downloaded pending manual removal
+of the lockfile. This is handy of you want to download some portion of
+the data, move it to secondary storage, and then continue - or if you
+want to only download overnight and want to stop before daylight and
+continue the next evening. You could even combine this option with a
+cron job to remove the lock file so that the job automatically restarts
+at 7PM every night and gets another gigabyte.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes %e5
+</i></b></pre>
+
+<p align=justify> In this case, httrack will only go to depth 5 for external links,
+thus not imaging the entire web, but only yhose links within 5 links of these web pages.
+
+<p align=justify> Also note that the interaction of these options may
+cause unintended consequences. For example, limiting bandwidth and
+download time conspire to limit the total amount of data that can
+be downloaded.
+
+<hr>
+<h3>Flow Control Options</h3>
+
+<pre><b><i>
+Flow control:
+ cN number of multiple connections (*c8)
+ %cN maximum number of connections/seconds (*%c10)
+ TN timeout, number of seconds after a non-responding link is shutdown
+ RN number of retries, in case of timeout or non-fatal errors (*R1)
+ JN traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link
+ HN host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow
+</i></b>
+
+<p align=justify> This example allows up to 128 simultaneous downloads.
+Note that this is likely to crash remote web servers - or at least fail
+to download many of the files - because of limits on the number of
+simultaneous sessions at many sites. At busy times of day, you might
+want to lower this to 1 or 2, especially at sites that limit the number
+of simultaneous users. Otherwise you will not get all of the downloads.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -c128
+</i></b></pre>
+
+<p align=justify> Many operating systems have a limit of 64 file
+handles, including internet connections and all other files that can be
+opened. Therefore, in many cases, more that 48 connections might cause
+a "socket error" because the OS can not handle that many sockets. This
+is also true for many servers. As an example, a test with 48 sockets on
+a cgi-based web server (Pentium 166,80Meg RAM) overloaded the machine
+and stopped other services from running correctly. Some servers will
+ban users that try to brutally download the website. 8 sockets is
+generally good, but when I'm getting large files (e.g., from a a site
+with large graphical images) 1 or 2 sockets is a better selection. Here
+are some other figures from one sample set of runs:
+
+<ul><pre>
+Tests: on a 10/100Mbps network, 30MB website, 99 files (70 images (some are
+little, other are big (few MB)), 23 HTML)
+With 8 sockets: 1,24MB/s
+With 48 sockets: 1,30MB/s
+With 128 sockets: 0,93MB/s
+</pre></ul>
+
+<p align=justify> The timeout option causes downloads to time out after
+a non-response from a download attempt. 30 seconds is pretty reasonable
+for many sites. You might want to increase the number of retries as
+well so that you try again and again after such timeouts.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%c20
+</i></b></pre>
+
+<p align=justify> This limits the number of connections per second. It
+is similar to the above option but allows the pace to be controlled
+rather than the simultanaety. It is particulsrly useful for long-term
+pulls at low rates that allow little impact on remote infrastructure.
+The default is 10 connections per second.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -T30
+</i></b></pre>
+
+<p align=justify> This example increases the number of retries to 5.
+This means that if a download fails 5 times, httrack will give up on it.
+For relatively unreliable sites - or for busy times of day, this number
+should be higher.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -R5
+</i></b></pre>
+
+<p align=justify> This is an interesting option. It says that in a
+traffic jam - where downloads are excessively slow - we might decide to
+back off the download. In this case, we have limited downloads to stop
+bothering once we reach 10 bytes per second.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -J10
+</i></b></pre>
+
+<p align=justify> These three options will cause the download from a
+host to be abandoned if (respectively) (0) never, (1) a timeout is
+reached, (2) slow traffic is detected, (or) (3) a timeout is reached OR
+slow traffic is detected.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -H0
+httrack http://www.shoesizes.com -O /tmp/shoesizes -H1
+httrack http://www.shoesizes.com -O /tmp/shoesizes -H2
+httrack http://www.shoesizes.com -O /tmp/shoesizes -H3
+</i></b></pre>
+
+<p align=justify> Of course these options can be combined to provide a
+powerful set of criteria for when to continue a download and when to
+give it up, how hard to push other sites. and how much to stress
+infrastructures.
+
+<hr>
+<h3>Link Following Options</h3>
+
+<pre><b><i>
+Links options:
+ %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use)
+ n get non-html files 'near' an html file (ex: an image located outside)
+ t test all URLs (even forbidden ones)
+ %L <file> add all URL located in this text file (one URL per line)
+</i></b></pre>
+
+<p align=justify> The links options allow you to control what links are
+followed and what links are not as well as to provide long lists of
+links to investigate. Any setting other than the default for this
+option forces the engine to use less reliable and more complex parsing.
+'Dirty' parsing means that links like 'xsgfd syaze="foo.gif"' will cause
+HTTrack to download foo.gif, even if HTTrack don't know what the "xsgfd
+syaze=" tag actually means! This option is powerful because some links
+might otherwise be missed, but it can cause errors in HTML or javascript.
+
+<p align=justify> This will direct the program to NOT search Javascript
+for unknown tag fields (e.g., it will find things like
+foo.location="bar.html"; but will not find things like bar="foo.gif";).
+While I have never had a reason to use this, some users may decide that
+they want to be more conservative in their searches. As a note,
+javascript imported files (.js) are not currently searched for URLs.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes '%P0'
+</i></b></pre>
+
+<p align=justify> Now here is a classic bit of cleaverness that 'does
+the right thing' for some cases. In this instance, we are asking
+httrack to get images - like gif and jpeg files that are used by a web
+page in its display, even though we would not normally get them. For
+example, if we were only getting a portion of a web site (e.g.,
+everything under the 'bob directory') we might want to get graphics from
+the rest of the web sote - or the rest of the web - that are used in
+those pages as well so that our mirror will look right.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -n
+</i></b></pre>
+
+<p align=justify> Here, we limit the collection to bob's area of the
+server - except that we get images and other such things that are used
+by bob in his area of the server.
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -n
+</i></b></pre>
+
+<p align=justify> This option 'tests' all links - even those forbidden
+(by the robot exclusion protocol) - by using the 'HEAD' protocol to test
+for the presence of a file.
+
+<pre><b><i>
+httrack http://www.shoesizes.com/ -O /tmp/shoesizes -t
+</i></b></pre>
+
+<p align=justify> In this case, we use a file to list the URLs we wish
+to mirror. This is particularly useful when we have a lot to do and
+don't want to tirelessly type in URLs on command line after command line.
+It's also useful - for example - if you update a set of mirrored sites
+evey evening. You can set up a command like this to run automatically
+from your cron file.
+
+<pre><b><i>
+httrack %L linkfile -O /tmp/shoesizes
+</i></b></pre>
+
+<p align=justify> This will update the mirror of your list of sites
+whenever it is run.
+
+<pre><b><i>
+httrack %L linkfile -O /tmp/shoesizes -B --update
+</i></b></pre>
+
+<p align=justify> The link file is also useful for things like this
+example where, after a binary image of a hard disk was analyzed (image)
+URLs found on that disk were collected by httrack:
+
+<pre><b><i>
+strings image | grep "http://" > list;
+httrack %L list -O /tmp/shoesizes
+</i></b></pre>
+
+
+<hr>
+<h3>Mirror Build Options</h3>
+
+<pre><b><i>
+Build options:
+ NN name conversion type (0 *original structure, 1+: see below)
+ N user defined structure (-N "%h%p/%n%q.%t")
+ LN long names (L1 *long names / L0 8-3 conversion)
+ K keep original links (e.g. http://www.adr/link) (K0 *relative link)
+ x replace external html links by error pages
+ o *generate output html file in case of error (404..) (o0 don't generate)
+ X *purge old files after update (X0 keep delete)
+ %x do not include any password for external password protected websites (%x0 include) (--no-passwords)
+ %q *include query string for local files (information only) (%q0 don't include) (--include-query-string)
+</i></b></pre>
+
+<p align=justify> The user can define naming conventions for building
+the mirror of a site by using these options. For example, to retain the
+original structure, the default is used. This only modifies the
+structure to the extent that select characters (e.g., ~, :, <, >, \, and
+@) are replaced by _ in all pathnames.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -N0
+</i></b></pre>
+<p align=justify> OR
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes
+</i></b></pre>
+
+<p align=justify> In either case, the mirror will build with the same
+directory hierarchy and name structure as the original site. For cases
+when you want to define your own structure, you use a string like this:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -N "%h%p/%n.%t"
+</i></b></pre>
+
+<p align=justify> In this case, %h, %p, $n, and %t stand for the href
+element (e.g., http://www.shoesizes.com or ftp://ftp.shoesizes.com), %p
+stands for the pathname (e.g., /bob/), %n stands for the name of the
+file, and %t stands for type (file extension). The full list of these
+options follows:
+
+<ul><pre>
+%n Name of file without file type (ex: image)
+%N Name of file, including file type (ex: image.gif)
+%t File type (ex: gif)
+%p Path [without ending /] (ex: /someimages)
+%h Host name (ex: www.all.net)
+%M URL MD5 (128 bits, 32 ascii bytes)
+%Q query string MD5 (128 bits, 32 ascii bytes)
+%q small query string MD5 (16 bits, 4 ascii bytes)
+%s? Short name version (ex: %sN)
+</pre></ul>
+
+<p align=justify> Other 'N' options include:
+
+<ul>
+<pre><b><i>
+Details: Option N
+ N0 Site-structure (default)
+ N1 HTML in web/, images/other files in web/images/
+ N2 HTML in web/HTML, images/other in web/images
+ N3 HTML in web/, images/other in web/
+ N4 HTML in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)
+ N5 Images/other in web/xxx and HTML in web/HTML
+ N99 All files in web/, with random names (gadget !)
+ N100 Site-structure, without www.domain.xxx/
+ N101 Identical to N1 exept that "web" is replaced by the site's name
+ N102 Identical to N2 exept that "web" is replaced by the site's name
+ N103 Identical to N3 exept that "web" is replaced by the site's name
+ N104 Identical to N4 exept that "web" is replaced by the site's name
+ N105 Identical to N5 exept that "web" is replaced by the site's name
+ N199 Identical to N99 exept that "web" is replaced by the site's name
+ N1001 Identical to N1 exept that there is no "web" directory
+ N1002 Identical to N2 exept that there is no "web" directory
+ N1003 Identical to N3 exept that there is no "web" directory (option set for g option)
+ N1004 Identical to N4 exept that there is no "web" directory
+ N1005 Identical to N5 exept that there is no "web" directory
+ N1099 Identical to N99 exept that there is no "web" directory
+</i></b></pre>
+</ul>
+
+<p align=justify> Long names are normally used (the <b><i>-L0</i></b>
+option) but if you are imaging to a DOS file system or want
+accessibility from older versions of DOS and Windows, you can use the
+<b><i>-L1</i></b> option to generate these filename sizes.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -L1
+</i></b></pre>
+
+<p align=justify> With the 'K' option, you can keep the original links
+in files. While this is less useful in being able to view a web site
+froim the mirrored copy, it is vitally important if you want an accurate
+copy of exactly what was on the web site in the first place. In a
+forensic image, for example, you might want to use this option to
+prevent the program from modifying the data as it is collected.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -K
+</i></b></pre>
+
+<p align=justify> In this case, instead of leaving external links (URLs
+that point to sites not being mirrored) in the pages, these links are
+replaced by pages that leave messages indicating that they could not be
+found. This is useful for local mirrors not on the Internet or mirrors
+that are on the Internet but that are not supposed to lead users to
+external sites. A really good use for this is that 'bugging' devices
+placed in web pages to track who is using them and from where will be
+deactivated byt his process.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -x
+</i></b></pre>
+
+<p align=justify> This option prevents the generation of '404' error
+files to replace files that were not found even though there were URLs
+pointing to them. It is useful for saving space as well as eliminating
+unnecessary files in operations where a working web site is not the
+desired result.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -o0
+</i></b></pre>
+
+<p align=justify> This option prevents the authoatic purging of files
+from the mirror site that were not found in the original web site after
+an 'update' is done. If you want to retain old data and old names for
+files that were renamed, this option should be used. If you want an
+up-to-date reflection of the current web site, you should not use this option.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -X0
+</i></b></pre>
+
+<p align=justify> These options can be combined as desired to produce a
+wide range of different arrangements, from collections of only graphical
+files stored in a graphics area, to files identified by their MD5
+checksums only, all stored in the same directory.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes %x0 include
+</i></b></pre>
+
+<p align=justify> This will not include passwords for web sites. If you
+mirror http://smith_john:foobar@www.privatefoo.com/smith/, and exclude
+using filters some links, these links will be by default rewritten with
+password data. For example, "bar.html" will be renamed into
+http://smith_john:foobar@www.privatefoo.com/smith/bar.html This can be a
+problem if you don't want to disclose the username/password! The %x
+option tell the engine not to include username/password data in
+rewritten URLs.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes %q
+</i></b></pre>
+
+<p align=justify> This option is not very useful, because parameters are
+useless, as pages are not dynamic anymore when mirrored. But some
+javascript code may use the query string, and it can give useful
+information. For example: catalog4FB8.html?page=computer-science is
+clearer than catalog4FB8.html Therefore, this option is activated by
+default.
+
+<hr>
+<h3>Spider Options</h3>
+
+<p align=justify> These options provide for automation with regard to
+the remote server. For example, some sites require that cookies be
+accepted and sent back in order to allow access.
+
+<pre><b><i>
+Spider options:
+ bN accept cookies in cookies.txt (0=do not accept,* 1=accept)
+ u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)
+ j *parse Java Classes (j0 don't parse)
+ sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)
+ %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)
+ %B tolerant requests (accept bogus responses on some servers, but not standard!)
+ %s update hacks: various hacks to limit re-transfers when updating
+ %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3=text/html) (--assume <param>)
+</i></b></pre>
+
+<p align=justify> By default, cookies are universally accepted and
+returned. This makes for more effective collection of data, but allows
+the site to be identified with its collection of data more easily. To
+disable cookies, use this option:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -b0
+</i></b></pre>
+
+<p align=justify> Some documents have known extension types (e.g.,
+html), while others have unknown types (e.g., iuh87Zs) and others may
+have misleading types (e.g., an html file with a 'gif' file extension.
+These options provide for (0) not checking file types, (1) checking all
+file types except directories, and (2) checking all file types including
+directories. Choose from these options:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -u0
+httrack http://www.shoesizes.com -O /tmp/shoesizes -u1
+httrack http://www.shoesizes.com -O /tmp/shoesizes -u2
+</i></b></pre>
+
+<p align=justify> Meta tags or 'robots.txt' files on a web site are used
+to indicate what files should and should not be visited by automatic
+programs when collectiong data. The polite and prudent move for normal
+data collection (and the default) is to follow this indication:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -s2
+</i></b></pre>
+
+<p align=justify> This follows the robots protocol and meta-tags EXCEPT
+in cases where the filters disagree with the robots protocols or
+meta-tags.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -s1
+</i></b></pre>
+
+<p align=justify> In this next case, we ignore meta-tags and robots.txt
+files completely and just take whatever we can get from the site. The
+danger of this includes the fact that automated programs - like games or
+search engines may generate an unlimited number of nearly identical or
+identical outputs that will put us in an infinite loop collecting
+useless data under different names. The benefit is that we will get all
+the data there is to get.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -s0
+</i></b></pre>
+
+<p align=justify> This next option uses strict HTTP/1.0 protocol. This
+means the program will use HTTP/1.0 headers (as in RFC1945.TXT) and NOT
+extended 1.1 features described in RFC2616.TXT. For example, reget
+(complete a partially downloaded file) is a HTTP/1.1 feature. The Etag
+feature is also a HTTP/1.1 feature (Etag is a special identifier that
+allow to easily detect file changes).
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%h
+</i></b></pre>
+
+<p align=justify> Some servers give responses not strictly within the
+requirements of the official http protocol. These 'Bogus' responses can
+be accepted by using this option. For example, when requesting foo.gif
+(5132 bytes), the server can, optionally, add:
+<pre>
+Content-length: 5132
+</pre>
+
+<p align=justify> This helps the client by allowing it to reserve a
+block of memory, instead of collecting each byte and re-reserving memory
+each time data is being received. But some servers are bogus, and send
+a wrong filesize. When HTtrack detects the end of file (connection
+broken), there are three cases:
+
+<ul>
+
+<p align=justify> 1- The connection has been closed by the server, and we
+have received all data (we have received the number of bytes incicated
+by the server). This is fine because we have successfully received the
+file.
+
+<p align=justify> 2- The connection has been closed by the server, BUT
+the filesize received is different from the server's headers: the
+connection has been suddenly closed, due to network problems, so we
+reget the file
+
+<p align=justify> 3- The connetion has been closed by the server, the
+filesize received is different from the server's headers, BUT the file
+is complete, because the server gave us a WRONG information! In this
+case, we use the bogus server option:
+</ul>
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%B
+</i></b></pre>
+
+<p align=justify> These options can be combined for the particular needs
+of the situaiton and are often adapted as a result of site-specific
+experiences.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%s
+</i></b></pre>
+
+<p align=justify> This is a collection of "tricks" which are not really
+"RFC compliant" but which can save bandwidth by trying not to retransfer
+data in several cases.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%A asp=text/html
+</i></b></pre>
+
+<p align=justify> The most important new feature for some people, maybe.
+This option tells the engine that if a link is en countered, with a
+specific type (.cgi, .asp, or .php3 for example), it MUST assume that
+this link has always the same MIME type, for example the "text/html"
+MIME type. This is VERY important to speed up many mirrors.
+
+<p align=justify> We have done tests on big HTML files (approx. 150 MB,
+150,000,000 bytes!) with 100,000 links inside. Such files are being
+parsed in approx. 20 seconds on my own PC by the latest optimized
+releases of HTTra ck. But these tests have been done with links of
+known types, that is, html, gif, and so on.. If you have, say, 10,000
+links of unknown type, such as ".asp", this will cause the engine to
+test ALL t hese files, and this will SLOOOOW down the parser. In this
+example, the parser will take hours, instead of 20 seconds! In this
+case, it would be great to tell HTTrack: ".asp pages are in fact HTML
+pages" This is possible, using: -%A asp=text/html
+
+<p align=justify> The -%A option can be replaced by the alias --assume
+asp=text/html which is MUCH more clear. You can use multiple
+definitions, separed by ",", or use multiple options. Therefore, these
+two lines are identical:
+
+<pre>
+--assume asp=text/html --assume php3=text/html --assume cgi=image/gif
+--assume asp=text/html,php3=text/html,cgi=image/gif
+</pre>
+
+<p align=justify> The MIME type is the standard well known "MIME" type.
+Here are the most important ones:
+<pre>
+text/html Html files, parsed by HTTrack
+image/gif GIF files
+image/jpeg Jpeg files
+image/png PNG files
+</pre>
+
+<p align=justify> There is also a collection of "non standard" MIME types. Example:
+
+<pre>
+application/x-foo Files with "foo" type
+</pre>
+
+<p align=justify> Therefore, you can give to all files terminated by
+".mp3" the MIME type: application/x-mp3
+
+<p align=justify> This allow you to rename files on a mirror. If you
+KNOW that all "dat" files are in fact "zip" files ren amed into "dat",
+you can tell httrack:
+
+<pre>
+--assume dat=application/x-zip
+</pre>
+
+<p align=justify> You can also "name" a file type, with its original
+MIME type, if this type is not known by HTTrack. This will avoid a test
+when the link will be reached:
+
+<pre>
+--assume foo=application/foobar
+</pre>
+
+<p align=justify> In this case, HTTrack won't check the type, because it
+has learned that "foo" is a known type, or MIME type
+"application/foobar". Therefore, it will let untouched the "foo" type.
+
+<p align=justify> A last remark, you can use complex definitions like:
+
+<pre>
+--assume asp=text/html,php3=text/html,cgi=image/gif,dat=application/x-zip,mpg=application/x-mp3,application/foobar
+</pre>
+
+<p align=justify> ..and save it on your .httrackrc file:
+
+<pre>
+set assume asp=text/html,php3=text/html,cgi=image/gif,dat=application/x-zip,mpg=application/x-mp3,application/foobar
+</pre>
+
+<hr>
+<h3>Browser Options</h3>
+
+<p align=justify> Browsers commonly leave footprints in web servers - as
+web servers leave footprints in the browser.
+
+<pre><b><i>
+Browser ID:
+ F user-agent field (-F "user-agent name")
+ %F footer string in Html code (-%F "Mirrored [from host %s [file %s [at %s]]]"
+ %l preffered language (-%l "fr, en, jp, *" (--language <param>)
+</i></b></pre>
+
+<p align=justify> The user-agent field is used by browsers to determine
+what kind of browser you are using as well as other information - such
+as your system type and operating system version. The 'User Agent'
+field can be set to indicate whatever is desired to the server. In this
+case, we are claiming to be a netscape browser (version 1.0) running a
+non-exitent Solaris operating system version on a Sun Sparcstation.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -F "Mozilla 1.0, Sparc, Solaris 23.54.34"
+</i></b></pre>
+
+<p align=justify> On the other side, we may wish to mark each page
+collected with footer information so that we can see from the page where
+it was collected from, when, and under what name it was stored.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%F "Mirrored [from host %s [file %s [at %s]]]"
+</i></b></pre>
+
+<p align=justify> This makes a modified copy of the file that may be
+useful in future identification. While it is not 'pure' in some senses,
+it may (or may not) be considered siilar to a camera that adds time and
+date stamps from a legal perspective.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%l "fr, en, jp, *"
+</i></b></pre>
+
+<p align=justify> "I prefer to have pages with french language, then
+english, then japanese, then any other language"
+
+<hr>
+<h3>Log, Cache, and Index Options</h3>
+
+<p align=justify> A lot of options are available for log files, indexing
+of sites, and cached results:
+
+<pre><b><i>
+Log, index, cache
+ C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)
+ k store all files in cache (not useful if files on disk)
+ %n do not re-download locally erased files
+ Q log quiet mode (no log)
+ q quiet mode (no questions)
+ z extra infos log
+ Z debug log
+ v verbose screen mode
+ %v display on screen filenames downloaded (in realtime) (--display)
+ f log file mode
+ f2 one single log file (--single-log)
+ I *make an index (I0 don't make)
+ %I make an searchable index for this mirror (* %I0 don't make) (--search-index)
+
+</i></b></pre>
+
+<p align=justify> A cache memory area is used for updates and retries to
+make the process far more efficient than it would otherwise be. You can
+choose to (0) go without a cache, (1) do not check remotly if the file
+has been updated or not, just load the cache content, or (2) see what
+works best and use it (the default). Here is the no cache example.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -C0
+</i></b></pre>
+
+<p align=justify> The cache can be used to store all files - if desired
+- but if files are being stored on disk anyway (the normal process for a
+mirroring operation), this is not helpful.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -k
+</i></b></pre>
+
+<p align=justify> In some cases, a file from a mirror site is erased
+locally. For example, if a file contains inappropriate content, it may
+be erased from the mirror site but remain on the remote site. This
+option allows you to leave deleted files permanently deleted when you
+do a site update.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -update '%n'
+</i></b></pre>
+
+<p align=justify> If no log is desired, the following option should be
+added.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -Q
+</i></b></pre>
+
+<p align=justify> If no questions should be asked of the user (in a mode
+that would otherwise ask questions), the following option should be
+added.
+
+<pre><b><i>
+</i></b></pre>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -q
+
+<p align=justify> By adding these options, you get (-z) extra log
+information or (-Z) debugging information, and (-v) verbose screen
+output.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -z -Z -v
+</i></b></pre>
+
+<p align=justify> Multiple log files can be created, but by default,
+this option is used to put all logs into a single log file.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -f2
+</i></b></pre>
+
+<p align=justify> Finally, an index is normally made of the sites
+mirrored (a pointer to the first page found from each specified URL) in
+an index.html file in the project directory. This can be prevented
+through the use of this option:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -I0
+</i></b></pre>
+
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes %v
+</i></b></pre>
+
+<p align=justify> Animated information when using consol-based version,
+example:
+<pre>
+17/95: localhost/manual/handler.html (6387 bytes) - OK
+</pre>
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes f2
+</i></b></pre>
+
+<p align=justify> Do not split error and information log (hts-log.txt
+and hts-err.txt) - use only one file (hts-log.txt)
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -%I linux.localdomain
+</i></b></pre>
+
+<p align=justify> Still in testing, this option asks the engine to
+generate an index.txt, useable by third-party programs or scripts, to
+index all words contained in html files. The above example will produce
+index.txt:
+
+<pre>
+..
+abridged
+ 1 linux/manual/misc/API.html
+ =1
+ (0)
+absence
+ 3 linux/manual/mod/core.html
+ 2 linux/manual/mod/mod_imap.html
+ 1 linux/manual/misc/nopgp.html
+ 1 linux/manual/mod/mod_proxy.html
+ 1 linux/manual/new_features_1_3.html
+ =8
+ (0)
+absolute
+ 3 linux/manual/mod/mod_auth_digest.html
+ 1 linux/manual/mod/mod_so.html
+ =4
+ (0)
+..
+</pre>
+
+<hr>
+<h3>Expert User Options</h3>
+
+<p align=justify> For expert users, the following options provide further
+options.
+
+<pre><b><i>
+Expert options:
+ pN priority mode: (* p3)
+ 0 just scan, don't save anything (for checking links)
+ 1 save only html files
+ 2 save only non html files
+ *3 save all files
+ 7 get html files before, then treat other files
+ S stay on the same directory
+ D *can only go down into subdirs
+ U can only go to upper directories
+ B can both go up&down into the directory structure
+ a *stay on the same address
+ d stay on the same principal domain
+ l stay on the same location (.com, etc.)
+ e go everywhere on the web
+ %H debug HTTP headers in logfile
+</i></b></pre>
+
+<p align=justify> One interesting application allows the mirror utility
+to check for valid and invalid links on a site. This is commonly used
+in site tests to look for missing pages or other html errors. I often
+run such programs against my web sites to verify that nothing is missing.
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -p0
+</i></b></pre>
+
+<p align=justify> To check for valid links outside of a site, the '-t'
+option can be used:
+
+<pre><b><i>
+httrack http://www.shoesizes.com -O /tmp/shoesizes -t
+</i></b></pre>
+
+<p align=justify> These options can be combined, for example, to provide
+a service that checks sites for validity of links and reports back a
+list of missing files and statistics.
+
+<p align=justify> Other options allow the retention of select files -
+for example - (1) only html files, (2) only non-html files, (3) all
+files, and (7) get all html files first, then get other files. This
+last option provides a fast way to get the web pointers so that, for
+example, a time limited collection process will tend to get the most
+important content first.
+
+<p align=justify> In many cases, we only want the files froma given
+directory. In this case, we specify this option:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -S
+</i></b></pre>
+
+<p align=justify> This option allows the mirror to go only into
+subdirectories of the initial directory on the remote host. You might
+want to combine it with the <b><i> -n </i></b> option to get all
+non-html files linked from the pages you find.
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -D -n
+</i></b></pre>
+
+<p align=justify> If you only want to work your way up the directory
+structure from the specified URL (don't ask me why you might want to do
+this), the following command line is for you:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -U
+</i></b></pre>
+
+<p align=justify> If you want to go both up and down the directory
+structure (i.e., anywhere on on this site that the requested page leads
+you to), this option will be best:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -B
+</i></b></pre>
+
+<p align=justify> The default is to remain on the same IP address - or
+host name. This option specifes this explicitly:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -a
+</i></b></pre>
+
+<p align=justify> If you want to restrict yourself only to the same
+principal domain (e.g., include sites liks ftp.shoesizes.com), you would
+use this option.
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -d
+</i></b></pre>
+
+<p align=justify> To restrict yourself to the same major portion of the
+Internet (e.g., .com, .net, .edu, etc.) try this option:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -l
+</i></b></pre>
+
+<p align=justify> Finally, if you want to mirror the whole Internet - at
+least every place on the internet that is ever led to - either directly
+or indirectly - from the starting point, use this one... Please note
+that this will almost always run you out of resources unless you use
+other options - like limiting the depth of search.
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -e
+</i></b></pre>
+
+<p align=justify> Last but not least, you can include debugging
+informaiton on all headers from a collection process by using this
+option:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -'%H'
+</i></b></pre>
+
+<p align=justify> The options S, D, U, B, a, d, l, and e can be replaces
+with filter options approximately as follows:
+
+<pre><b><i>
+S -www.foo.com/* +www.foo.com/bar/*[file]
+D (default)
+U +www.foo.com/bar/* -www.foo.com/*[name]/*
+B +www.foo.com/bar/*
+a (default)
+d +*[name].foo.com/*
+l +*[name].com/*
+e +* (this is crazy unless a depth limit is used!)
+</i></b></pre>
+
+<hr>
+<h3>Guru Options - DO NOT USE!!!</h3>
+
+<p align=justify> This is a new section, for all "not very well
+documented options". You can use them, in fact, do not believe what is
+written above!
+
+<pre>
+ #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')
+</pre>
+
+<p align=justify> To test the filter system. Example:
+
+<pre>
+$ httrack -#0 'www.*.com/*foo*bar.gif' 'www.mysite.com/test/foo4bar.gif'
+www.mysite.com/test/foo4bar.gif does match www.*.com/*foo*bar.gif
+</pre>
+
+<pre>
+ #f Always flush log files
+</pre>
+
+<p align=justify> Useful if you want the hts-log.txt file to be flushed
+regularly (not buffered)
+
+<pre>
+ #FN Maximum number of filters
+</pre>
+
+<p align=justify> Use if if you want to use more than the maximum
+default number of filters, that is, 500 filters: -#F2000 for 2,000 filters
+
+<pre>
+ #h Version info
+</pre>
+
+<p align=justify> Informations on the version number
+
+<pre>
+ #K Scan stdin (debug)
+</pre>
+
+<p align=justify> Not useful (debug only)
+
+<pre>
+ #L Maximum number of links (-#L1000000)
+</pre>
+
+<p align=justify> Use if if you want to use more than the maximum
+default number of links, that is, 100,000 links: -#L2000000 for 2,000,000 links
+
+<pre>
+ #p Display ugly progress information
+</pre>
+
+<p align=justify> Self-explanatory :) I will have to improve this one
+
+<pre>
+ #P Catch URL
+</pre>
+
+<p align=justify> "Catch URL" feature, allows to setup a temporary proxy
+to capture complex URLs, often linked with POST action (when using form
+based authentication)
+
+<pre>
+ #R Old FTP routines (debug)
+</pre>
+
+<p align=justify> Debug..
+
+<pre>
+ #T Generate transfer ops. log every minutes
+</pre>
+
+<p align=justify> Generate a log file with transfer statistics
+
+<pre>
+ #u Wait time
+</pre>
+
+<p align=justify> "On hold" option, in seconds
+
+<pre>
+ #Z Generate transfer rate statictics every minutes
+</pre>
+
+<p align=justify> Generate a log file with transfer statistics
+
+<pre>
+ #! Execute a shell command (-#! "echo hello")
+</pre>
+
+<p align=justify> Debug..
+
+<hr>
+<h3>Command-line Specific Options</h3>
+
+<pre><b><i>
+Command-line specific options:
+ V execute system command after each files ($0 is the filename: -V "rm \$0") (--userdef-cmd <param>)
+</i></b></pre>
+
+<p align=justify> This option is very nice for a wide array of actions
+that might be based on file details. For example, a simple log of all
+files collected could be generated by using:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes -V "/bin/echo \$0"
+</i></b></pre>
+
+<pre>
+ %U run the engine with another id when called as root (-%U smith) (--user <param>)
+</pre>
+
+<p align=justify> Change the UID of the owner when running as r00t
+
+<pre>
+ Details: User-defined option N
+ %[param] param variable in query string
+</pre>
+
+<p align=justify>
+This new option is important: you can include query-string content when forming the destination filename!
+
+<pre>
+Example: you are mirroring a huge website, with many pages named as:
+www.foo.com/catalog.php3?page=engineering
+www.foo.com/catalog.php3?page=biology
+www.foo.com/catalog.php3?page=computing
+..
+</pre>
+
+<p align=justify> Then you can use the -N option:
+
+<pre>
+httrack www.foo.com -N "%h%p/%n%[page].%t"
+</pre>
+
+<p align=justify> If found, the "page" parameter will be included after
+the filename, and the URLs above will be saved as:
+
+<pre>
+/home/mywebsites/foo/www.foo.com/catalogengineering.php3
+/home/mywebsites/foo/www.foo.com/catalogbiology.php3
+/home/mywebsites/foo/www.foo.com/catalogcomputing.php3
+...
+</pre>
+
+<hr>
+<h3>Shortcuts</h3>
+
+<p align=justify> These options provide shortcust to combinations
+of other options that are commonly used.
+
+<pre><b><i>
+Shortcuts:
+--mirror <URLs> *make a mirror of site(s) (default)
+--get <URLs> get the files indicated, do not seek other URLs (-qg)
+--list <text file> add all URL located in this text file (-%L)
+--mirrorlinks <URLs> mirror all links in 1st level pages (-Y)
+--testlinks <URLs> test links in pages (-r1p0C0I0t)
+--spider <URLs> spider site(s), to test links: reports Errors & Warnings (-p0C0I0t)
+--testsite <URLs> identical to --spider
+--skeleton <URLs> make a mirror, but gets only html files (-p1)
+--update update a mirror, without confirmation (-iC2)
+--continue continue a mirror, without confirmation (-iC1)
+--catchurl create a temporary proxy to capture an URL or a form post URL
+--clean erase cache & log files
+--http10 force http/1.0 requests (-%h)
+</i></b></pre>
+
+<p align=justify> Mirror is the default behavior. It is detailed
+earlier.
+
+<p align=justify> get simply gets the files specified on the command
+line.
+
+<p align=justify> The list option is useful for including a list of
+sites to collect data from.
+
+<p align=justify> The mirrorlinks option is ideal for using the result
+of a previous search (like a list of pages found in a web search or
+somebody's URL collection) to guide the collection of data. With
+additional options (such as depth 1) it can be used to collect all of
+the pages linked to a given page without going further. Here is an example:
+
+<pre><b><i>
+httrack http://www.shoesizes.com/bob/ -O /tmp/shoesizes --mirrorlinks -e -r1
+</i></b></pre>
+
+<p align=justify> Testing links in pages is useful for automating the
+verification that a link from a file is not pointing to a non-existent
+page.
+
+<p align=justify> The spider option does a site test automatically and
+returns errors for broken links.
+
+<p align=justify> The skeleton option makes a mirror of html files only.
+
+<p align=justify> The update option updates a site to match a remote
+mirror.
+
+<p align=justify> The continue option continues a previously terminated
+mirroring activity. This is useful for all sorts of mirror failures.
+
+<p align=justify> The catchurl option is a small application designed to
+catch difficult pages, like sites protected via formulas. You can see
+at http://httrack.free.fr/HelpHtml/addurl.html a Windows description of
+this application. The purpose is to create a temporary proxy, that will
+catch the user request to a page, and then store this request to
+continue the mirror. For example,
+
+<ul><pre>
+1. browse www.foo.com/bar/ until you have a page with a form
+2. fill this form to enter the site BUT do not click "submit"
+3. start the --catchurl application
+4. change your browser proxy settings according to the --catchurl application
+5. click on "submit" on your browser
+6. HTTrack has now captured this click and has stored it
+7. restore your proxy settings
+8. (click back in your browser)
+</pre></ul>
+
+<p align=justify> The clean option erases cache and log files.
+
+<p align=justify> The http10 option forces http/1.0 requests (the same
+as -%h).
+
+<hr>
+
+<h2> Filters </h2>
+
+<p align=justify> Filters are normally placed at the end of the command
+line, but can be intermixed with other command line options if desired,
+except that if they are placed between (for example) the '-O' and the
+pathname, your results may be different than you might otherwise
+predict. There are two sorts of filters, filters that indicate what to
+include (+) and filters that indicate what to exclude (-).
+
+<p align=justify> Starting with the initially specified URLs, the
+default operation mode is to mirror starting from these URLs downward
+into the directory structure of the host (i.e. if one of your starting
+pagees was www.all.net/test/a.html, all links starting with www.all.net/test/
+will be collected but links in www.all.net/anything-else will not be
+collected, because they are in a higher directory strcuture level. This
+prevents HTTrack from mirroring the whole site. If you may want to
+download files are in other parts of the site or pf particular types -
+or to not download files in a particular part of the site or of a
+particular type, you can use filters to specify more precisely what to
+collect and what not to collect.
+
+<p align=justify> The syntax for filters is similar to Unix regular
+expressions. A simple filter can be made by using characters from the
+URL with '*' as a wildcard for 0 or more characters - with the last
+filter rule having the highest precendence. An initial '+' indicates
+URLs to include and an initial '-' indicated URLs to not include. For
+example:
+
+<pre><b><i>
+'-*' '+*jpg'
+</i></b></pre>
+
+<p align=justify> would only get files ending in the 'jpg' extension,
+while:
+
+<pre><b><i>
+'-*jpg'
+</i></b></pre>
+
+<p align=justify> would not get any files ending in the jpg extension.
+You can add more filter lines to restrict or expand the scope as
+desired. The last rule is checked first, and so on - so that the rules
+are in reverse priority order. Here's an example:
+
+ <table BORDER="1" CELLPADDING="2">
+ <tr><td>
+ <b>+*.gif -image*.gif</b>
+ </td><td>
+ Will accept all gif files BUT image1.gif,imageblue.gif,imagery.gif and so on
+ </tr>
+ <tr><td>
+ <b>-image*.gif +*.gif</b>
+ </td><td>
+ Will accept all gif files, because the second pattern is prioritary (because it is defined AFTER the first one)
+ </tr>
+ </table>
+
+<p align="JUSTIFY"> The full syntax for filters follows:
+
+ <table BORDER="1" CELLPADDING="2">
+ <tr>
+ <td><b>*</b></td>
+ <td>any characters (the most commonly used)</td>
+ </tr>
+ <tr>
+ <td><b>*[file] or *[name]</b></td>
+ <td>any filename or name, e.g. not /,? and ; characters</td>
+ </tr>
+ <tr>
+ <td><b>*[path]</b></td>
+ <td>any path (and filename), e.g. not ? and ; characters</td>
+ </tr>
+ <tr>
+ <td><b>*[a,z,e,r,t,y]</b></td>
+ <td>any letters among a,z,e,r,t,y</td>
+ </tr>
+ <tr>
+ <td><b>*[a-z]</b></td>
+ <td>any letters</td>
+ </tr>
+ <tr>
+ <td><b>*[0-9,a,z,e,r,t,y]</b></td>
+ <td>any characters among 0..9 and a,z,e,r,t,y</td>
+ </tr>
+ <tr>
+ <td><b>*[]</b></td>
+ <td>no characters must be present after</a></td>
+ </tr>
+ <tr>
+ <td> <b> <filter>*[&lt NN]</b></td>
+ <td> size less than NN Kbytes</td>
+ </tr>
+ <tr>
+ <td> <b> <filter>*[&gt PP]</b></td>
+ <td> size more than PP Kbytes</td>
+ </tr>
+ <tr>
+ <td> <b> <filter>*[&lt NN &gt PP]</b></td>
+ <td> size less than NN Kbytes and more than PP Kbytes</td>
+ </tr>
+ </table>
+
+
+<p align="justify"> Here are some examples of filters: (that can be
+generated automatically using the interface)
+
+ <table BORDER="1" CELLPADDING="2">
+ <tr>
+ <td><b>-www.all.net* </b></td>
+ <td>This will refuse/accept this web site (all links located in it will be rejected)</td>
+ </tr>
+ <tr>
+ <td><b>+*.com/*</b></td>
+ <td>This will accept all links that contains .com in them</td>
+ </tr>
+ <tr>
+ <td><b>-*cgi-bin* </b></td>
+ <td>This will refuse all links that contains cgi-bin in them</td>
+ </tr>
+ <tr>
+ <td><b>+*.com/*[path].zip </b></td>
+ <td>This will accept all zip files in .com addresses</td>
+ </tr>
+ <tr>
+ <td><b>-*someweb*/*.tar*</b></td>
+ <td>This will refuse all tar (or tar.gz etc.) files in hosts containing someweb</td>
+ </tr>
+ <tr>
+ <td><b>+*/*somepage*</b></td>
+ <td>This will accept all links containing somepage (but not in the address)</td>
+ </tr>
+ <tr>
+ <td><b>-*.html</b></td>
+ <td>This will refuse all html files from anywhere in the world. </td>
+ </tr>
+ <tr>
+ <td><b>+*.html*[]</b></td>
+ <td>Accept <b>*.html</b>, but the link must not have any supplemental characters
+ at the end (e.g., links with parameters, like <b>www.all.net/index.html?page=10</b>
+ will not match this filter)</td>
+ </tr>
+ <tr>
+ <td> <b> -*.gif*[&gt 5] -*.zip +*.zip*[&lt 10]</b></td>
+ <td> refuse all gif files smaller than 5KB, exlude all zip files, EXCEPT zip files smaller than 10KB </td>
+ </tr>
+ </table>
+
+<hr>
+
+<h2> User Authentication Protocols </h2>
+
+<p align=justify> Smoe servers require user ID and password information
+in order to gain access. In this example, the user ID smith with
+password foobar is accessing www.all.net/private/index.html
+
+<pre><b><i>
+httrack smith:foobar@www.all.net/private/index.html
+</i></b></pre>
+
+<p align=justify> For more advanced forms of authentication, such as
+those involving forms and cookies of various sorts, an emerging
+capability is being provided through th URL capture features
+(--catchurl). This feature don't work all of the time.
+
+<hr>
+
+<h2> .httrackrc </h2>
+
+<p align=justify> A file called '.httrackrc' can be placed in the
+current directory, or if not found there, in the home directory, to
+include command line options. These options are included whenever
+httrack is run. A sample .httrack follows:
+
+<ul><pre><b><i>
+ set sockets 8
+ set retries 4
+ index on
+ set useragent "Mozilla [en] (foo)"
+ set proxy proxy:8080
+</i></b></pre></ul>
+
+<p align=justify> But the syntax is not strict, you can use any of
+these:
+
+<ul><pre><b><i>
+ set sockets 8
+ set sockets=8
+ sockets=8
+ sockets 8
+</i></b></pre></ul>
+
+
+<p align=justify> .httrackrc is sought in the following sequence with
+the first occurence used:
+
+<ul>
+<li> in the dirctory indicated by -O option (.httrackrc)
+<li> in the current directory (.httrackrc)
+<li> in the user's home directory (.httrackrc)
+<li> in /etc/httrack.conf (named httrack.conf to be "standard")
+</ul>
+
+<p align=justify> An example .httrackrc looks like:
+
+<ul><pre><b><i>
+set sockets=8
+set index on
+retries=2
+allow *.gif
+deny ad.doubleclick.net/*
+</i></b></pre></ul>
+
+<p align=justify> Each line is composed of an option name and a
+parameter. The "set" token can be used, but is not mandatory (it is
+ignored, in fact). The "=" is also optionnal, and is replaced by a
+space internally. The "on" and "off" are the same as "1" and "0"
+respectively. Therefore, the example .httrackrc above is equivalent to:
+
+<ul><pre><b><i>
+sockets=8
+index=1
+retries=2
+allow=*.gif
+deny=ad.doubleclick.net/*
+</i></b></pre></ul>
+
+<p align=justify> Because the "=" seems to (wrongly) imply a variable
+assignment (the option can be defined more than once to define more than
+one filter) the following .httrackrc:
+
+<ul><pre><b><i>
+allow *.gif
+allow *.jpg
+</i></b></pre></ul>
+
+<p align=justify> looks better for a human than:
+
+<ul><pre><b><i>
+allow=*.gif
+allow=*.jpg
+</i></b></pre></ul>
+
+<p align=justify> Here's a example run with the example .httrackrc file:
+
+<ul><pre><b><i>
+$ httrack ghost
+$ cat hts-cache/doit.log
+-c8 -C1 -R2 +*.gif -ad.doubleclick.net/* ghost
+</i></b></pre></ul>
+
+<p align=justify> The "-c8 -C1 -R2 +*.gif -ad.doubleclick.net/*" was
+added by the .httrackrc
+
+<hr>
+
+<h2> Release Notes </h2>
+
+<p align=justify> Some things change between releases. Here are some
+recent changes in httrack that may affect some of these options:
+
+<p align=justify> Options S,D,U,B, and a,d,l,e are default behaviours of
+HTTrack. they were the only options in old versions (1.0). With the
+introduction of filters, their roles are now limited, because filters
+can override them.
+
+<p align=justify> Note for the -N option: "%h%p/%n%q.%t" will be now be
+used if possible. In normal cases, when a file does not have any
+parameters (www.foo.com/bar.gif) the %q option does not add anything, so
+there are no differences in file names. But when parameters are present
+(for example, www.foo.com/bar.cgi?FileCollection=133.gif), the
+additionnal query string (in this case, FileCollection=133.gif) will be
+"hashed" and added to the filename. For example:
+
+<pre><i><b>'www.all.net/bar.cgi?FileCollection=133.gif'</b></i></pre>
+<p align=justify> will be named
+<pre><i><b>'/tmp/mysite/bar4F2E.gif'</b></i></pre>
+
+<p align=justify> The additionnal 4 letters/digits are VERY useful in
+cases where there are a substantial number of identical files:
+
+<pre><i><b>
+www.all.net/bar.cgi?FileCollection=133.gif
+www.all.net/bar.cgi?FileCollection=rose.gif
+www.all.net/bar.cgi?FileCollection=plant4.gif
+www.all.net/bar.cgi?FileCollection=silver.gif
+and so on...
+</b></i></pre>
+
+<p align=justify> In these cases, there is a small probability of a hash
+collision forlarge numbers of files.
+
+<hr>
+
+<h2> Some More Examples </h2>
+
+<p align=justify> Here are some examples of special purpose httrack
+command lines that might be useful for your situation.
+
+<p align=justify> This is a 'forensic' dump of a web site - intended to
+collect all URLs reachable from the initial point and at that particular
+site. It is intended to make no changes whatsoever to the image. It
+also prints out an MD5 checksum of each file imaged so that the image
+can be verified later to detect and changes after imaging. It uses 5
+retries to be more certain than normal of getting the files, never
+abandons its efforts, keeps original links, does not generate error
+files, ignores site restrictions for robots, logs as much as it can,
+stays in the principal domain, places debugging headers in the log file,
+
+<pre><b><i>
+httrack "www.website.com/" -O "/tmp/www.website.com" -R5H0Ko0s0zZd %H -V "md5 \$0" "+*.website.com/*"
+</i></b></pre>
+
+<p align=justify> Here's an example of a site where I pulled a set of
+data related to some subject. In this case, I only wanted the
+relevant subdirectory, all external links were to remain the same, a
+verbose listing of URLs was to be printed, and I wanted files near (n)
+and below (D) the original directory. Five retries just makes sure I
+don't miss anything.
+
+<pre><b><i>
+httrack "http://www.somesite.com/~library/thing/thingmain.htm" -O /tmp/thing -R5s0zZvDn
+</i></b></pre>
+
+<p align=justify> This listing is, of course, rather verbose. To reduce the noise,
+you might want to do something more like this:
+
+<pre><b><i>
+httrack "http://www.somesite.com/~library/thing/thingmain.htm" -O /tmp/thing -R5s0zvDn
+</i></b></pre>
+
+<p align=justify> A still quieter version - without any debugging
+information but with a list of files loaded looks like this:
+
+<pre><b><i>
+httrack "http://www.somesite.com/~library/thing/thingmain.htm" -O /tmp/thing -R5s0vDn
+</i></b></pre>
+
+<p align=justify> For the strong silent type, this might be still better:
+
+<pre><b><i>
+httrack "http://www.somesite.com/~library/thing/thingmain.htm" -O /tmp/thing -R5s0qDn
+</i></b></pre>
+
+<hr>
+
+<h2>General questions:</h2>
+
+<p align=justify><b>Q: The install is not working on NT without administrator rights! </b>
+
+<p align=justify> A: That's right. You can, however, install WinHTTrack
+on your own machine, and then copy your <b>WinHTTrack</b> folder from
+your <b>Program Files</b> folder to another machine, in a temporary
+directory (e.g. <b>C:\temp\</b>)
+
+<p align=justify><b>Q: Where can I find French/other languages documentation? </b>
+
+<p align=justify> A: Windows interface is available on several
+languages, but not yet the documentation!
+
+<p align=justify><b>Q: Is HTTrack working on NT/2000? </b>
+
+<p align=justify> A: Yes, it should
+
+<p align=justify><b>Q: What's the difference between HTTrack and WinHTTrack? </b>
+
+<p align=justify> A: WinHTTrack is the Windows release of HTTrack (with
+a graphic shell)
+
+<p align=justify><b>Q: Is HTTrack Mac compatible? </b>
+
+<p align=justify> A: No, because of a lack of time. But sources are
+available
+
+<p align=justify><b>Q: Can HTTrack be compiled on all Un*x? </b>
+
+<p align=justify> A: It should. The <b>Makefile</b> may be modified in
+some cases, however
+
+<p align=justify><b>Q: I use HTTrack for professional purpose. What
+about restrictions/license fee? </b>
+
+<p align=justify> A: There is no restrictions using HTTrack for
+professional purpose, except if you want to sell a product including
+HTTrack components (parts of the source, or any other component). See
+the <b>license.txt</b> file for more informations
+
+<p align=justify><b>Q: Is a DLL/library version available? </b>
+
+<p align=justify> A: Not yet. But, again, sources are available (see
+<b>license.txt</b> for distribution infos)
+
+<p align=justify><b>Q: Is there a X11/KDE shell available for Linux and
+Un*x? </b>
+
+<p align=justify> A: No. Unfortunately, we do not have enough time for
+that - if you want to help us, please write one!
+
+<hr><h2> Troubleshooting:</h2>
+
+<p align=justify><b>Q: Only the first page is caught. What's wrong?</b>
+</br> A: First, check the <b>hts-err.txt</b> error log file - this can
+give you precious informations.
+
+<p align=justify> The problem can be a website that redirects you to
+another site (for example, <b>www.all.net</b> to <b>public.www.all.net</b>) : in
+this case, use filters to accept this site
+
+<p align=justify> This can be, also, a problem in the HTTrack options
+(link depth too low, for example)
+
+<p align=justify><b>Q: With WinHTTrack, sometimes the minimize in system
+tray causes a crash!</b> </b>
+
+<p align=justify> A: This bug sometimes appears in the shell on some
+systems. If you encounter this problem, avoid minimizing the window!
+
+<p align=justify><b>Q: URLs with https:// are not working!</b> </b></br>
+A: HTTrack does not support https (secure socket layer protocol), only
+http protocol
+
+<p align=justify><b>Q: Files are created with strange names, like
+'-1.html'!</b>
+
+<p align=justify> A: Check the build options (you may have selected
+user-defined structure with wrong parameters!)
+
+<p align=justify><b>Q: When capturing real audio links (.ra), I only get
+a shortcut!</b>
+
+<p align=justify> A: Yes. The audio/video realtime streaming capture is
+not yet supported
+
+<p align=justify><b>Q: Using user:password@address is not working!</b>
+
+<p align=justify> A: Again, first check the <b>hts-err.txt</b> error log
+file - this can give you precious informations
+
+<p align=justify> The site may have a different authentication scheme
+(form based authentication, for example)
+
+<p align=justify><b>Q: When I use HTTrack, nothing is mirrored (no
+files) What's happening? </b>
+
+<p align=justify> A: First, be sure that the URL typed is correct.
+Then, check if you need to use a proxy server (see proxy options in
+WinHTTrack or the <b>-P proxy:port</b> option in the command line
+program). The site you want to mirror may only accept certain browsers.
+You can change your &quot;browser identity&quot; with the Browser ID
+option in the OPTION box. Finally, you can have a look at the
+hts-err.txt (and hts-log.txt) file to see what happened.
+
+<p align=justify><b>Q: There are missing files! What's happening? </b>
+
+<p align=justify> A: You may want to capture files that are in a
+different folder, or in another web site. In this case, HTTrack does not
+capture them automatically, you have to ask it to do. For that, use the
+filters.
+
+<p align=justify> Example: You are downloading
+<b>http://www.all.net/foo/</b> and can not get .jpg images located in
+<b>http://www.all.net/bar/</b> (for example, http://www.all.net/bar/blue.jpg)
+
+<p align=justify> Then, add the filter rule <b>+www.all.net/bar/*.jpg</b> to
+accept all .jpg files from this location
+
+<p align=justify> You can, also, accept all files from the /bar folder
+with <b>+www.all.net/bar/*</b>, or only html files with
+<b>+www.all.net/bar/*.html</b> and so on..
+
+<p align=justify><b>Q: I'm downloading too many files! What can I do?
+</b>
+
+<p align=justify> A: This is often the case when you use too large
+filters, for example <b>+*.html</b>, which asks the engine to catch all
+.html pages (even ones on other sites!). In this case, try to use more
+specific filters, like <b>+www.all.net/specificfolder/*.html</b>
+
+<p align=justify> If you still have too many files, use filters to avoid
+somes files. For example, if you have too many files from www.all.net/big/,
+use <b>-www.all.net/big/*</b> to avoid all files from this folder.
+
+<p align=justify><b>Q: File types are sometimes changed! Why? </b>
+
+<p align=justify> A: By default, HTTrack tries to know the type of
+remote files. This is useful when links like
+<b>http://www.all.net/foo.cgi?id=1</b> can be either HTML pages, images or
+anything else. Locally, foo.cgi will not be recognized as an html page,
+or as an image, by your browser. HTTrack has to rename the file as
+foo.html or foo.gif so that it can be viewed.
+
+<p align=justify> Sometimes, however, some data files are seen by the
+remote server as html files, or images : in this case HTTrack is being
+fooled.. and rename the file. You can avoid this by disabling the type
+checking in the option panel.
+
+<p align=justify><b>Q: I can not access to several pages (access
+forbidden, or redirect to another location), but I can with my browser,
+what's going on?</b>
+
+<p align=justify> A: You may need cookies! Cookies are specific datas
+(for example, your username or password) that are sent to your browser
+once you have logged in certain sites so that you only have to log-in
+once. For example, after having entered your username in a website, you
+can view pages and articles, and the next time you will go to this site,
+you will not have to re-enter your username/password.
+
+<p align=justify> To "merge" your personnal cookies to an HTTrack
+project, just copy the cookies.txt file from your Netscape folder (or
+the cookies located into the Temporary Internet Files folder for IE)
+into your project folder (or even the HTTrack folder)
+
+</b><p align=justify><b>Q: Some pages can't be seen, or are displayed
+with errors! </b>
+
+<p align=justify> A: Some pages may include javascript or java files
+that are not recognized. For example, generated filenames. There may
+be transfer problems, too (broken pipe, etc.). But most mirrors do
+work. We still are working to improve the mirror quality of HTTrack.
+
+<p align=justify><b>Q: Some Java applets do not work properly! </b>
+
+<p align=justify> A: Java applets may not work in some cases, for
+example if HTTrack failed to detect all included classes or files called
+within the class file. Sometimes, Java applets need to be online,
+because remote files are directly caught. Finally, the site structure
+can be incompatible with the class (always try to keep the original site
+structure when you want to get Java classes)
+
+<p align=justify> If there is no way to make some classes work properly,
+you can exclude them with the filters. They will be available, but only
+online.
+
+<p align=justify><b>Q: HTTrack is being idle for a long time without
+transfering. What's happening? </b>
+
+<p align=justify> A: Maybe you try to reach some very slow sites. Try a
+lower TimeOut value (see options, or <b>-Txx</b> option in the command
+line program). Note that you will abandon the entire site (except if
+the option is unchecked) if a timeout happen You can, with the Shell
+version, skip some slow files, too.
+
+<p align=justify><b>Q: I want to update a site, but it's taking too much
+time! What's happening?</b>
+
+<p align=justify> A: First, HTTrack always tries to minimize the
+download flow by interrogating the server about the file changes. But,
+because HTTrack has to rescan all files from the begining to rebuild the
+local site structure, it can takes some time. Besides, some servers are
+not very smart and always consider that they get newer files, forcing
+HTTrack to reload them, even if no changes have been made!
+
+<p align=justify><b>Q: I am behind a firewall. What can I do? </b>
+
+<p align=justify> A: You need to use a proxy, too. Ask your
+administrator to know the proxy server's name/port. Then, use the proxy
+field in HTTrack or use the <b>-P proxy:port</b> option in the command
+line program.
+
+<p align=justify><b>Q: HTTrack has crashed during a mirror, what's
+happening? </b>
+
+<p align=justify> A: We are trying to avoid bugs and problems so that
+the program can be as reliable as possible. But we can not be
+infallible. If you occurs a bug, please check if you have the latest
+release of HTTrack, and send us an email with a detailed description of
+your problem (OS type, addresses concerned, crash description, and
+everything you deem to be necessary). This may help the other users
+too.
+
+<p align=justify><b>Q: I want to update a mirrored project, but HTTrack
+is retransfering all pages. What's going on? </b>
+
+<p align=justify> A: First, HTTrack always rescan all local pages to
+reconstitute the website structure, and it can take some time. Then, it
+asks the server if the files that are stored locally are up-to-date. On
+most sites, pages are not updated frequently, and the update process is
+fast. But some sites have dynamically-generated pages that are
+considered as "newer" than the local ones.. even if there are
+identical! Unfortunately, there is no possibility to avoid this problem,
+which is strongly linked with the server abilities.
+
+<p align=justify> <hr><h2> Questions concerning a mirror: </h2>
+
+<p align=justify> <p align=justify><b>Q: I want to mirror a Web site,
+but there are some files outside the domain, too. How to retrieve them?
+</b>
+
+<p align=justify> A: If you just want to retrieve files that can be
+reached through links, just activate the 'get file near links' option.
+But if you want to retrieve html pages too, you can both use wildcards
+or explicit addresses ; e.g. add <b>www.all.net/*</b> to accept all
+files and pages from www.all.net.
+
+<p align=justify><b>Q: I have forgotten some URLs of files during a long
+mirror.. Should I redo all? </b>
+
+<p align=justify> A: No, if you have kept the 'cache' files (in
+hts-cache), cached files will not be retransfered.
+
+<p align=justify><b>Q: I just want to retrieve all ZIP files or other
+files in a web site/in a page. How do I do it? </b>
+
+<p align=justify> A: You can use different methods. You can use the
+'get files near a link' option if files are in a foreign domain. You
+can use, too, a filter adress: adding <b>+*.zip</b> in the URL list (or
+in the filter list) will accept all ZIP files, even if these files are
+outside the address.
+
+<p align=justify> Example : <b>httrack www.all.net/someaddress.html
++*.zip</b> will allow you to retrieve all zip files that are linked on
+the site.
+
+<p align=justify><b>Q: There are ZIP files in a page, but I don't want
+to transfer them. How do I do it? </b>
+
+<p align=justify> A: Just filter them: add <b>-*.zip</b> in the filter
+list.
+
+<p align=justify><b>Q: I don't want to load gif files.. but what may
+happen if I watch the page? </b>
+
+<p align=justify> A: If you have filtered gif files (<b>-*.gif</b>),
+links to gif files will be rebuild so that your browser can find them on
+the server.
+
+<p align=justify><b>Q: I get all types of files on a web site, but I
+didn't select them on filters! </b>
+
+<p align=justify> A: By default, HTTrack retrieves all types of files on
+authorized links. To avoid that, define filters like
+
+<p align=justify><b><b>-* +&lt;website&gt;/*.html +&lt;website&gt;/*.htm
++&lt;website&gt;/ +*.&lt;type wanted&gt;</b></b>
+
+<p align=justify> Example: <b>httrack www.all.net/index.html -*
++www.all.net/*.htm* +www.all.net/*.gif +www.all.net/*.jpg</b>
+
+<p align=justify><b>Q: When I use filters, I get too many files! </b>
+
+<p align=justify> A: You are using too large a filter, for example
+<b>*.html</b> will get ALL html files identified. If you want to get
+all files on an address, use <b>www.&lt;address&gt;/*.html</b>. There
+are lots of possibilities using filters.
+
+<p align=justify> Example:<b>httrack www.all.net +*.www.all.net/*.htm*</b>
+
+<p align=justify><b>Q: When I use filters, I can't access another
+domain, but I have filtered it! </b>
+
+<p align=justify> A: You may have done a mistake declaring filters, for
+example <b>+www.all.net/* -*all* </b> will not work, because
+-*all* has an upper priority (because it has been declared after
++www.all.net)
+
+<p align=justify><b>Q: Must I add a&nbsp; '+' or '-' in the filter list
+when I want to use filters? </b>
+
+<p align=justify> A: YES. '+' is for accepting links and '-' to avoid
+them. If you forget it, HTTrack will consider that you want to accept a
+filter if there is a wild card in the syntax - e.g. +&lt;filter&gt; if
+identical to &lt;filter&gt; if &lt;filter&gt; contains a wild card (*)
+(else it will be considered as a normal link to mirror)
+
+<p align=justify><b>Q: I want to find file(s) in a web-site. How do I do it?
+</b>
+
+<p align=justify> A: You can use the filters: forbid all files (add a
+<b>-*</b> in the filter list) and accept only html files and the file(s)
+you want to retrieve (BUT do not forget to add
+<b>+&lt;website&gt;*.html</b> in the filter list, or pages will not be
+scanned! Add the name of files you want with a <b>*/</b> before ; i.e.
+if you want to retrieve file.zip, add <b>*/file.zip</b>)
+
+<p align=justify> Example:<b>httrack www.all.net +www.all.net/*.htm*
++thefileiwant.zip</b>
+
+<p align=justify><b>Q: I want to download ftp files/ftp site. How to
+do? </b>
+
+<p align=justify> A: First, HTTrack is not the best tool to download
+many ftp files. Its ftp engine is basic (even if reget are possible)
+and if your purpose is to download a complete site, use a specific
+client.
+
+<p align=justify> You can download ftp files just by typing the URL,
+such as <b>ftp://ftp.www.all.net/pub/files/file010.zip</b> and list ftp
+directories like <b>ftp://ftp.www.all.net/pub/files/</b> .
+
+<p align=justify> Note: For the filters, use something like
+<b>+ftp://ftp.www.all.net/*</b>
+
+<p align=justify><b>Q: How can I retrieve .asp or .cgi sources instead
+of .html result? </b>
+
+<p align=justify> A: You can't! For security reasons, web servers do not
+allow that.
+
+<p align=justify><b>Q: How can I remove these annoying <b>&lt;!--
+Mirrored from... --&gt;</b> from html files? </b>
+
+<p align=justify> A: Use the footer option (-&F, or see the WinHTTrack
+options)
+
+<p align=justify><b>Q: Do I have to select between ascii/binary transfer
+mode? </b>
+
+<p align=justify> A: No, http files are always transfered as binary
+files. Ftp files, too (even if ascii mode could be selected)
+
+<p align=justify><b>Q: Can HTTrack perform form-based authentication?
+</b>
+
+<p align=justify> A: Yes. See the URL capture abilities (--catchurl for
+command-line release, or in the WinHTTrack interface)
+
+<p align=justify><b>Q: Can I redirect downloads to tar/zip archive? </b>
+
+<p align=justify> A: Yes. See the shell system command option (-V
+option for command-line release)
+
+<p align=justify><b>Q: Can I use username/password authentication on a
+site? </b>
+
+<p align=justify> A: Yes. Use user:password@your_url (example:
+<b>http://foo:bar@www.all.net/private/mybox.html</b>)
+
+<p align=justify><b>Q: Can I use username/password authentication for a
+proxy? </b>
+
+<p align=justify> A: Yes. Use user:password@your_proxy_name as your
+proxy name (example: <b>smith:foo@proxy.mycorp.com</b>)
+
+<p align=justify><b>Q: Can HTTrack generates HP-UX or ISO9660 compatible
+files? </b>
+
+<p align=justify> A: Yes. See the build options (-N, or see the
+WinHTTrack options)
+
+<p align=justify><b>Q: If there any SOCKS support? </b>
+
+<p align=justify> A: Not yet!
+
+<p align=justify><b>Q: What's this hts-cache directory? Can I remove it?
+</b>
+
+<p align=justify> A: NO if you want to update the site, because this
+directory is used by HTTrack for this purpose. If you remove it,
+options and URLs will not be available for updating the site
+
+<p align=justify><b>Q: Can I start a mirror from my bookmarks? </b>
+
+<p align=justify> A: Yes. Drag&Drop your bookmark.html file to the
+WinHTTrack window (or use file://filename for command-line release) and
+select bookmark mirroring (mirror all links in pages, -Y) or bookmark
+testing (--testlinks)
+
+<p align=justify><b>Q: I am getting a "pipe broken" error and the mirror
+stops, what should I do? </b>
+
+<p align=justify> A: Chances are this is a result of downloading too
+many pages at a time. Remote servers may not allow or be able to handle
+too many sessions, or your system may be unable to provide the necessary
+resources. Try redusing this number - for example using the -c2 options
+for only 2 simultaneous sesions.
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/filters.html b/HelpHtml/filters.html
new file mode 100644
index 0000000..6438dab
--- /dev/null
+++ b/HelpHtml/filters.html
@@ -0,0 +1,261 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Filters: Advanced</em></h2>
+
+<br>
+
+See also: The <a href="faq.html#VF1">FAQ</a><br>
+
+<br>
+
+ <i>You have to know that once you have defined
+ starts links, the default mode is to mirror these links - i.e. if one of your start page is
+ www.someweb.com/test/index.html, all links starting with www.someweb.com/test/ will be
+ accepted. But links directly in www.someweb.com/.. will not be accepted, however, because
+ they are in a higher strcuture. This prevent HTTrack from mirroring the whole site. (All
+ files in structure levels equal or lower than the primary links will be retrieved.)<br>
+ </i>
+ <br>
+ <b>But</b> you may want to download files that are not directly in the subfolders, or on the
+ contrary refuse files of a particular type. That is the purpose of filters.
+ <br>
+
+ <br>
+ To accept a family of links (for example, all links with a specific name or type), you just have to add
+ an authorization filter, like <b><tt>+*.gif</tt></b>. The pattern is a plus (this one: <b><tt>+</tt></b>),
+ followed by a pattern composed of letters and wildcards (this one: <b><tt>*</tt></b>).
+ <br><br>
+ To forbide a family of links, define
+ an authorization filter, like <b><tt>-*.gif</tt></b>. The pattern is a dash (this one: <b><tt>-</tt></b>),
+ followed by a the same kind of pattern as for the authorization filter.
+ <br><br>
+ Example: +*.gif will accept all files finished by .gif<br>
+ Example: -*.gif will refuse all files finished by .gif<br>
+ <br>
+
+ <br>
+ <u>Let's talk a little more about patterns:</u>
+
+ <br>
+ Filters are analyzed by HTTrack from the first filter to the last one. The complete URL
+ name is compared to filters defined by the user or added automatically by HTTrack. <br><br>
+ A link has an higher priority than the one before it - hierarchy is important: <br>
+
+ <br>
+ <table BORDER="1" CELLPADDING="2">
+ <tr><td>
+ <tt>+*.gif -image*.gif</tt>
+ </td><td>
+ Will accept all gif files BUT image1.gif,imageblue.gif,imagery.gif and so on
+ </tr>
+ <tr><td>
+ <tt>-image*.gif +*.gif</tt>
+ </td><td>
+ Will accept all gif files, because the second pattern is prioritary (because it is defined AFTER the first one)
+ </tr>
+ </table>
+ <br>
+
+ <br>
+ We saw that patterns are composed of letters and wildcards (<b><tt>*</tt></b>), as in */image*.gif
+
+ <p align="JUSTIFY"><br>
+ Special wild cards can be used for specific characters: (*[..])</p>
+ <table BORDER="1" CELLPADDING="2">
+ <tr>
+ <td><tt>*</tt></td>
+ <td>any characters (the most commonly used)</td>
+ </tr>
+ <tr>
+ <td><tt>*[file] or *[name]</tt></td>
+ <td>any filename or name, e.g. not /,? and ; characters</td>
+ </tr>
+ <tr>
+ <td><tt>*[path]</tt></td>
+ <td>any path (and filename), e.g. not ? and ; characters</td>
+ </tr>
+ <tr>
+ <td><tt>*[a,z,e,r,t,y]</tt></td>
+ <td>any letters among a,z,e,r,t,y</td>
+ </tr>
+ <tr>
+ <td><tt>*[a-z]</tt></td>
+ <td>any letters</td>
+ </tr>
+ <tr>
+ <td><tt>*[0-9,a,z,e,r,t,y]</tt></td>
+ <td>any characters among 0..9 and a,z,e,r,t,y</td>
+ </tr>
+ <tr>
+ <td><tt>*[]</tt></td>
+ <td>no characters must be present after</a></td>
+ </tr>
+ </table>
+
+
+ <p align="JUSTIFY"><br>
+ Here are some examples of filters: (that can be generated automatically using the
+ interface)</p>
+ <table BORDER="1" CELLPADDING="2">
+ <tr>
+ <td><tt>www.thisweb.com* </tt></td>
+ <td>This will refuse/accept this web site (all links located in it will be rejected)</td>
+ </tr>
+ <tr>
+ <td><tt>*.com/*</tt></td>
+ <td>This will refuse/accept all links that contains .com in them</td>
+ </tr>
+ <tr>
+ <td><tt>*cgi-bin* </tt></td>
+ <td>This will refuse/accept all links that contains cgi-bin in them</td>
+ </tr>
+ <tr>
+ <td><tt>www.*.com/*[path].zip </tt></td>
+ <td>This will refuse/accept all zip files in .com addresses</td>
+ </tr>
+ <tr>
+ <td><tt>*someweb*/*.tar*</tt></td>
+ <td>This will refuse/accept all tar (or tar.gz etc.) files in hosts containing someweb</td>
+ </tr>
+ <tr>
+ <td><tt>*/*somepage*</tt></td>
+ <td>This will refuse/accept all links containing somepage (but not in the address)</td>
+ </tr>
+ <tr>
+ <td><tt>*.html</tt></td>
+ <td>This will refuse/accept all html files. <br>
+ Warning! With this filter you will accept ALL html files, even those in other addresses.
+ (causing a global (!) web mirror..) Use www.someweb.com/*.html to accept all html files from
+ a web.</td>
+ </tr>
+ <tr>
+ <td><tt>*.html*[]</tt></td>
+ <td>Identical to <tt>*.html</tt>, but the link must not have any supplemental characters
+ at the end (links with parameters, like <tt>www.someweb.com/index.html?page=10</tt>, will be
+ refused)</td>
+ </tr>
+ </table>
+
+<br>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/httrack.css b/HelpHtml/httrack.css
new file mode 100644
index 0000000..4f2c01b
--- /dev/null
+++ b/HelpHtml/httrack.css
@@ -0,0 +1,70 @@
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 0.96em/1.3em "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+/*
+ Width for "#siteNavigation td" based on number of buttons
+ Six: 16.6%
+*/
+#siteNavigation td { width: 16.6%; }
+#siteNavigation a {
+ display: block; padding: 2px;
+ text-align: center; font-weight: bold; color: #448; /* font-size: 12px; */ background-color: #99c;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+#pageContent p { margin: 0; margin-bottom: 30px; }
+
+
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+.tableWidth { min-width: 400px; }
+
+
+/* ----------------------------------- */
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+
+
diff --git a/HelpHtml/httrack.man.html b/HelpHtml/httrack.man.html
new file mode 100644
index 0000000..8a6fbc2
--- /dev/null
+++ b/HelpHtml/httrack.man.html
@@ -0,0 +1,2122 @@
+<html>
+<head>
+<meta name="generator" content="groff -Thtml, see www.gnu.org">
+<meta name="Content-Style" content="text/css">
+<title>httrack</title>
+</head>
+<body>
+
+<h1 align=center>httrack</h1>
+<a href="#NAME">NAME</a><br>
+<a href="#SYNOPSIS">SYNOPSIS</a><br>
+<a href="#DESCRIPTION">DESCRIPTION</a><br>
+<a href="#EXAMPLES">EXAMPLES</a><br>
+<a href="#OPTIONS">OPTIONS</a><br>
+<a href="#FILES">FILES</a><br>
+<a href="#ENVIRONMENT">ENVIRONMENT</a><br>
+<a href="#DIAGNOSTICS">DIAGNOSTICS</a><br>
+<a href="#LIMITS">LIMITS</a><br>
+<a href="#BUGS">BUGS</a><br>
+<a href="#COPYRIGHT">COPYRIGHT</a><br>
+<a href="#CRYPTOGRAPHY">CRYPTOGRAPHY</a><br>
+<a href="#AVAILABILITY">AVAILABILITY</a><br>
+<a href="#AUTHOR">AUTHOR</a><br>
+<a href="#SEE ALSO">SEE ALSO</a><br>
+
+<hr>
+<!-- Creator : groff version 1.17.2 -->
+<!-- CreationDate: Tue Sep 17 21:25:28 2002 -->
+<a name="NAME"></a>
+<h2>NAME</h2>
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+httrack - offline browser : copy websites to a local directory</td></table>
+<a name="SYNOPSIS"></a>
+<h2>SYNOPSIS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack [ url ]... [ -filter ]... [ +filter ]... [</b> ]
+[ <b>-w, --mirror</b> ] [ <b>-W, --mirror-wizard</b> ] [
+<b>-g, --get-files</b> ] [ <b>-i, --continue</b> ] [ <b>-Y,
+--mirrorlinks</b> ] [ <b>-P, --proxy</b> ] [ <b>-%f,
+--httpproxy-ftp[=N]</b> ] [ <b>-rN, --depth[=N]</b> ] [
+<b>-%eN, --ext-depth[=N]</b> ] [ <b>-mN, --max-files[=N]</b>
+] [ <b>-MN, --max-size[=N]</b> ] [ <b>-EN,
+--max-time[=N]</b> ] [ <b>-AN, --max-rate[=N]</b> ] [
+<b>-%cN, --connection-per-second[=N]</b> ] [ <b>-GN,
+--max-pause[=N]</b> ] [ <b>-cN, --sockets[=N]</b> ] [
+<b>-TN, --timeout</b> ] [ <b>-RN, --retries[=N]</b> ] [
+<b>-JN, --min-rate[=N]</b> ] [ <b>-HN,
+--host-control[=N]</b> ] [ <b>-%P,
+--extended-parsing[=N]</b> ] [ <b>-n, --near</b> ] [ <b>-t,
+--test</b> ] [ <b>-%L, --list</b> ] [ <b>-NN,
+--structure[=N]</b> ] [ <b>-LN, --long-names[=N]</b> ] [
+<b>-KN, --keep-links[=N]</b> ] [ <b>-x,
+--replace-external</b> ] [ <b>-%x, --no-passwords</b> ] [
+<b>-%q, --include-query-string</b> ] [ <b>-o,
+--generate-errors</b> ] [ <b>-X, --purge-old[=N]</b> ] [
+<b>-%p, --preserve</b> ] [ <b>-bN, --cookies[=N]</b> ] [
+<b>-u, --check-type[=N]</b> ] [ <b>-j, --parse-java[=N]</b>
+] [ <b>-sN, --robots[=N]</b> ] [ <b>-%h, --http-10</b> ] [
+<b>-%B, --tolerant</b> ] [ <b>-%s, --updatehack</b> ] [
+<b>-%A, --assume</b> ] [ <b>-@iN, --protocol[=N]</b> ] [
+<b>-F, --user-agent</b> ] [ <b>-%F, --footer</b> ] [ <b>-%l,
+--language</b> ] [ <b>-C, --cache[=N]</b> ] [ <b>-k,
+--store-all-in-cache</b> ] [ <b>-%n, --do-not-recatch</b> ]
+[ <b>-%v, --display</b> ] [ <b>-Q, --do-not-log</b> ] [
+<b>-q, --quiet</b> ] [ <b>-z, --extra-log</b> ] [ <b>-Z,
+--debug-log</b> ] [ <b>-v, --verbose</b> ] [ <b>-f,
+--file-log</b> ] [ <b>-f2, --single-log</b> ] [ <b>-I,
+--index</b> ] [ <b>-%I, --search-index</b> ] [ <b>-pN,
+--priority[=N]</b> ] [ <b>-S, --stay-on-same-dir</b> ] [
+<b>-D, --can-go-down</b> ] [ <b>-U, --can-go-up</b> ] [
+<b>-B, --can-go-up-and-down</b> ] [ <b>-a,
+--stay-on-same-address</b> ] [ <b>-d,
+--stay-on-same-domain</b> ] [ <b>-l, --stay-on-same-tld</b>
+] [ <b>-e, --go-everywhere</b> ] [ <b>-%H,
+--debug-headers</b> ] [ <b>-V, --userdef-cmd</b> ] [ <b>-%U,
+--user</b> ] [ <b>-K, --keep-links[=N]</b> ] [</td></table>
+<a name="DESCRIPTION"></a>
+<h2>DESCRIPTION</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack</b> allows you to download a World Wide Web site
+from the Internet to a local directory, building recursively
+all directories, getting HTML, images, and other files from
+the server to your computer. HTTrack arranges the original
+site's relative link-structure. Simply open a page of the
+&quot;mirrored&quot; website in your browser, and you can
+browse the site from link to link, as if you were viewing it
+online. HTTrack can also update an existing mirrored site,
+and resume interrupted downloads.</td></table>
+<a name="EXAMPLES"></a>
+<h2>EXAMPLES</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack www.someweb.com/bob/</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+mirror site www.someweb.com/bob/ and only this
+site</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack www.someweb.com/bob/ www.anothertest.com/mike/
++*.com/*.jpg</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+mirror the two sites together (with shared links) and accept
+any .jpg files on .com sites</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack www.someweb.com/bob/bobby.html +*
+-r6</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+means get all files starting from bobby.html, with 6
+link-depth, and possibility of going everywhere on the
+web</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack www.someweb.com/bob/bobby.html --spider -P
+proxy.myhost.com:8080</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+runs the spider on www.someweb.com/bob/bobby.html using a
+proxy</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack --update</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+updates a mirror in the current folder</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+will bring you to the interactive mode</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>httrack --continue</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+continues a mirror in the current folder</td></table>
+<a name="OPTIONS"></a>
+<h2>OPTIONS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>General options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-O</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+path for mirror/logfiles+cache (-O path mirror[,path cache
+and logfiles]) (--path &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Action options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-w</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*mirror web sites (--mirror)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-W</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+mirror web sites, semi-automatic (asks questions)
+(--mirror-wizard)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-g</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+just get files (saved in the current directory)
+(--get-files)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-i</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+continue an interrupted mirror using the cache
+(--continue)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-Y</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+mirror ALL links located in the first level pages (mirror
+links) (--mirrorlinks)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Proxy options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-P</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+proxy use (-P proxy:port or -P user:pass@proxy:port)
+(--proxy &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%f</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*use proxy for ftp (f0 don t use)
+(--httpproxy-ftp[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Limits options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-rN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+set the mirror depth to N (* r9999)
+(--depth[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%eN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+set the external links depth to N (* %e0)
+(--ext-depth[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-mN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum file length for a non-html file
+(--max-files[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-mN,N2</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum file length for non html (N) and html
+(N2)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-MN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum overall size that can be uploaded/scanned
+(--max-size[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-EN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum mirror time in seconds (60=1 minute, 3600=1 hour)
+(--max-time[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-AN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum transfer rate in bytes/seconds (1000=1KB/s max)
+(--max-rate[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%cN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+maximum number of connections/seconds (*%c10)
+(--connection-per-second[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-GN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+pause transfer if N bytes reached, and wait until lock file
+is deleted (--max-pause[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Flow control:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-cN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+number of multiple connections (*c8)
+(--sockets[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-TN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+timeout, number of seconds after a non-responding link is
+shutdown (--timeout)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-RN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+number of retries, in case of timeout or non-fatal errors
+(*R1) (--retries[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-JN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+traffic jam control, minimum transfert rate (bytes/seconds)
+tolerated for a link (--min-rate[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-HN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout
+or slow (--host-control[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Links options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%P</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*extended parsing, attempt to parse all links, even in
+unknown tags or Javascript (%P0 don t use)
+(--extended-parsing[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-n</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+get non-html files near an html file (ex: an image located
+outside) (--near)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-t</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+test all URLs (even forbidden ones) (--test)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%L</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;file&gt; add all URL located in this text file (one URL
+per line) (--list &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Build options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-NN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+structure type (0 *original structure, 1+: see below)
+(--structure[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-or</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+user defined structure (-N
+&quot;%h%p/%n%q.%t&quot;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-LN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+long names (L1 *long names / L0 8-3 conversion / L2 ISO9660
+compatible) (--long-names[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-KN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+keep original links (e.g. http://www.adr/link) (K0 *relative
+link, K absolute links, K4 original links, K3 absolute URI
+links) (--keep-links[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-x</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+replace external html links by error pages
+(--replace-external)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%x</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+do not include any password for external password protected
+websites (%x0 include) (--no-passwords)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%q</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*include query string for local files (useless, for
+information purpose only) (%q0 don t include)
+(--include-query-string)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-o</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*generate output html file in case of error (404..) (o0 don
+t generate) (--generate-errors)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-X</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*purge old files after update (X0 keep delete)
+(--purge-old[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%p</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+preserve html files as is (identical to -K4 -%F &quot;&quot;
+) (--preserve)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Spider options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-bN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+accept cookies in cookies.txt (0=do not accept,* 1=accept)
+(--cookies[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-u</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+check document type if unknown (cgi,asp..) (u0 don t check,
+* u1 check but /, u2 check always)
+(--check-type[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-j</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*parse Java Classes (j0 don t parse)
+(--parse-java[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-sN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+follow robots.txt and meta robots tags
+(0=never,1=sometimes,* 2=always) (--robots[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%h</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+force HTTP/1.0 requests (reduce update features, only for
+old servers or proxies) (--http-10)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%B</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+tolerant requests (accept bogus responses on some servers,
+but not standard!) (--tolerant)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%s</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+update hacks: various hacks to limit re-transfers when
+updating (identical size, bogus response..)
+(--updatehack)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%A</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+assume that a type (cgi,asp..) is always linked with a mime
+type (-%A php3,cgi=text/html;dat,bin=application/x-zip)
+(--assume &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-@iN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6
+only) (--protocol[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Browser ID:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-F</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+user-agent field (-F &quot;user-agent name&quot;)
+(--user-agent &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%F</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+footer string in Html code (-%F &quot;Mirrored [from host %s
+[file %s [at %s]]]&quot; (--footer
+&lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%l</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+preffered language (-%l &quot;fr, en, jp, *&quot;
+(--language &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Log, index, cache</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-C</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+create/use a cache for updates and retries (C0 no cache,C1
+cache is prioritary,* C2 test update before)
+(--cache[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-k</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+store all files in cache (not useful if files on disk)
+(--store-all-in-cache)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%n</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+do not re-download locally erased files
+(--do-not-recatch)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%v</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+display on screen filenames downloaded (in realtime) - * %v1
+short version (--display)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-Q</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+no log - quiet mode (--do-not-log)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-q</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+no questions - quiet mode (--quiet)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-z</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+log - extra infos (--extra-log)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-Z</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+log - debug (--debug-log)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-v</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+log on screen (--verbose)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-f</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*log in files (--file-log)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-f2</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+one single log file (--single-log)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-I</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*make an index (I0 don t make) (--index)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%I</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+make an searchable index for this mirror (* %I0 don t make)
+(--search-index)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Expert options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-pN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+priority mode: (* p3) (--priority[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-p0</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+just scan, don t save anything (for checking
+links)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-p1</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+save only html files</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-p2</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+save only non html files</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-*p3</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+save all files</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-p7</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+get html files before, then treat other files</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-S</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+stay on the same directory (--stay-on-same-dir)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-D</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*can only go down into subdirs (--can-go-down)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-U</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+can only go to upper directories (--can-go-up)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-B</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+can both go up&amp;down into the directory structure
+(--can-go-up-and-down)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-a</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+*stay on the same address
+(--stay-on-same-address)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-d</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+stay on the same principal domain
+(--stay-on-same-domain)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-l</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+stay on the same TLD (eg: .com)
+(--stay-on-same-tld)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-e</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+go everywhere on the web (--go-everywhere)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%H</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+debug HTTP headers in logfile (--debug-headers)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Guru options: (do NOT use if possible)</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#0</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Filter test (-#0 *.gif www.bar.com/foo.gif )
+(--debug-testfilters &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#f</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Always flush log files (--advanced-flushlogs)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#FN</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Maximum number of filters
+(--advanced-maxfilters[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#h</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Version info (--version)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#K</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Scan stdin (debug) (--debug-scanstdin)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#L</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Maximum number of links (-#L1000000)
+(--advanced-maxlinks)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#p</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Display ugly progress information
+(--advanced-progressinfo)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#P</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Catch URL (--catch-url)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#R</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Old FTP routines (debug) (--debug-oldftp)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#T</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Generate transfer ops. log every minutes
+(--debug-xfrstats)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#u</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Wait time (--advanced-wait)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#Z</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Generate transfer rate statictics every minutes
+(--debug-ratestats)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-#!</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Execute a shell command (-#! &quot;echo hello&quot;) (--exec
+&lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Command-line specific options:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-V</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+execute system command after each files ($0 is the filename:
+-V &quot;rm &quot;) (--userdef-cmd
+&lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-%U</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+run the engine with another id when called as root (-%U
+smith) (--user &lt;param&gt;)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Details: Option N</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N0</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Site-structure (default)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+HTML in web/, images/other files in web/images/</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N2</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+HTML in web/HTML, images/other in web/images</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N3</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+HTML in web/, images/other in web/</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N4</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+HTML in web/, images/other in web/xxx, where xxx is the file
+extension (all gif will be placed onto web/gif, for
+example)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N5</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Images/other in web/xxx and HTML in web/HTML</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N99</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+All files in web/, with random names (gadget !)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N100</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Site-structure, without www.domain.xxx/</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N101</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N1 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N102</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N2 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N103</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N3 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N104</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N4 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N105</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N5 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N199</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N99 exept that &quot;web&quot; is replaced by
+the site s name</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1001</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N1 exept that there is no &quot;web&quot;
+directory</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1002</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N2 exept that there is no &quot;web&quot;
+directory</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1003</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N3 exept that there is no &quot;web&quot;
+directory (option set for g option)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1004</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N4 exept that there is no &quot;web&quot;
+directory</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1005</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N5 exept that there is no &quot;web&quot;
+directory</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-N1099</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Identical to N99 exept that there is no &quot;web&quot;
+directory</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Details: User-defined option N</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+%n Name of file without file type (ex: image) %N Name of
+file, including file type (ex: image.gif) %t File type (ex:
+gif) %p Path [without ending /] (ex: /someimages) %h Host
+name (ex: www.someweb.com) %M URL MD5 (128 bits, 32 ascii
+bytes) %Q query string MD5 (128 bits, 32 ascii bytes) %q
+small query string MD5 (16 bits, 4 ascii bytes) %s? Short
+name version (ex: %sN) %[param] param variable in query
+string</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Details: Option K</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-K0</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+foo.cgi?q=45 -&gt; foo4B54.html?q=45 (relative URI,
+default)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-K</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+-&gt; http://www.foobar.com/folder/foo.cgi?q=45 (absolute
+URL) (--keep-links[=N])</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-K4</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+-&gt; foo.cgi?q=45 (original URL)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+-K3</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+-&gt; /folder/foo.cgi?q=45 (absolute URI)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="4%"></td><td width="96%">
+<b>Shortcuts:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--mirror</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; *make a mirror of site(s)
+(default)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--get</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; get the files indicated, do not seek other URLs
+(-qg)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--list</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;text file&gt; add all URL located in this text file
+(-%L)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--mirrorlinks</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; mirror all links in 1st level pages
+(-Y)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--testlinks</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; test links in pages (-r1p0C0I0t)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--spider</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; spider site(s), to test links: reports Errors
+&amp; Warnings (-p0C0I0t)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--testsite</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; identical to --spider</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--skeleton</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+&lt;URLs&gt; make a mirror, but gets only html files
+(-p1)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--update</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+update a mirror, without confirmation (-iC2)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--continue</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+continue a mirror, without confirmation (-iC1)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--catchurl</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+create a temporary proxy to capture an URL or a form post
+URL</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--clean</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+erase cache &amp; log files</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+--http10</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+force http/1.0 requests (-%h)</td></table>
+<a name="FILES"></a>
+<h2>FILES</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<i>/etc/httrack.conf</i></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+The system wide configuration file.</td></table>
+<a name="ENVIRONMENT"></a>
+<h2>ENVIRONMENT</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+HOME</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Is being used if you defined in /etc/httrack.conf the line
+<i>path ~/websites/#</i></td></table>
+<a name="DIAGNOSTICS"></a>
+<h2>DIAGNOSTICS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Errors/Warnings are reported to <i>hts-log.txt</i> by
+default, or to stderr if the <i>-v</i> option was
+specified.</td></table>
+<a name="LIMITS"></a>
+<h2>LIMITS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+These are the principals limits of HTTrack for that moment.
+Note that most of them are generally shared among all
+offline browsers, due to technological limits.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<small>- Several scripts generating complex filenames may
+not find them (ex:
+img.src='image'+a+Mobj.dst+'.gif')</small></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<small>- Some java classes may not find some files on them
+(class included)</small></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<small>- Cgi-bin links may not work properly in some cases
+(parameters needed). To avoid them: use filters like
+-*cgi-bin*</small></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<small>- Flash files are not yet parsed</small></td></table>
+<a name="BUGS"></a>
+<h2>BUGS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Please reports bugs to <b>&lt;bugs@httrack.com&gt;.</b>
+Include a complete, self-contained example that will allow
+the bug to be reproduced, and say which version of httrack
+you are using. Do not forget to detail options used, OS
+version, and any other information you deem
+necessary.</td></table>
+<a name="COPYRIGHT"></a>
+<h2>COPYRIGHT</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Copyright (C) Xavier Roche and other
+contributors</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+This program is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or any later version.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+This program is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE. See the GNU General Public License for more
+details.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+You should have received a copy of the GNU General Public
+License along with this program; if not, write to the Free
+Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.</td></table>
+<a name="CRYPTOGRAPHY"></a>
+<h2>CRYPTOGRAPHY</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+PLEASE REMEMBER THAT EXPORT/IMPORT AND/OR USE OF STRONG
+CRYPTOGRAPHY SOFTWARE, PROVIDING CRYPTOGRAPHY HOOKS OR EVEN
+JUST COMMUNICATING TECHNICAL DETAILS ABOUT CRYPTOGRAPHY
+SOFTWARE IS ILLEGAL IN SOME PARTS OF THE WORLD. SO, WHEN YOU
+IMPORT THIS PACKAGE TO YOUR COUNTRY, RE-DISTRIBUTE IT FROM
+THERE OR EVEN JUST EMAIL TECHNICAL SUGGESTIONS OR EVEN
+SOURCE PATCHES TO THE AUTHOR OR OTHER PEOPLE YOU ARE
+STRONGLY ADVISED TO PAY CLOSE ATTENTION TO ANY EXPORT/IMPORT
+AND/OR USE LAWS WHICH APPLY TO YOU. THE AUTHORS ARE NOT
+LIABLE FOR ANY VIOLATIONS YOU MAKE HERE. SO BE CAREFUL, IT
+IS YOUR RESPONSIBILITY.</td></table>
+<a name="AVAILABILITY"></a>
+<h2>AVAILABILITY</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+The most recent released version of httrack can be found at:
+<b>http://www.httrack.com</b></td></table>
+<a name="AUTHOR"></a>
+<h2>AUTHOR</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Xavier Roche &lt;roche@httrack.com&gt;</td></table>
+<a name="SEE ALSO"></a>
+<h2>SEE ALSO</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+The <b>HTML</b> documentation (available online at
+<b>http://www.httrack.com/HelpHtml/</b> ) contains more
+detailed information. Please also refer to the <b>httrack
+FAQ</b> (available online at
+<b>http://www.httrack.com/HelpHtml/faq.html</b>
+)</td></table>
+<hr>
+</body>
+</html>
diff --git a/HelpHtml/images/bg_rings.gif b/HelpHtml/images/bg_rings.gif
new file mode 100644
index 0000000..da7c5c8
--- /dev/null
+++ b/HelpHtml/images/bg_rings.gif
Binary files differ
diff --git a/HelpHtml/images/header_title_4.gif b/HelpHtml/images/header_title_4.gif
new file mode 100644
index 0000000..93847d3
--- /dev/null
+++ b/HelpHtml/images/header_title_4.gif
Binary files differ
diff --git a/HelpHtml/images/screenshot_01.jpg b/HelpHtml/images/screenshot_01.jpg
new file mode 100644
index 0000000..cbafd82
--- /dev/null
+++ b/HelpHtml/images/screenshot_01.jpg
Binary files differ
diff --git a/HelpHtml/img/addurl1.gif b/HelpHtml/img/addurl1.gif
new file mode 100644
index 0000000..009277c
--- /dev/null
+++ b/HelpHtml/img/addurl1.gif
Binary files differ
diff --git a/HelpHtml/img/addurl2.gif b/HelpHtml/img/addurl2.gif
new file mode 100644
index 0000000..92b1a11
--- /dev/null
+++ b/HelpHtml/img/addurl2.gif
Binary files differ
diff --git a/HelpHtml/img/addurl3.gif b/HelpHtml/img/addurl3.gif
new file mode 100644
index 0000000..6df70f8
--- /dev/null
+++ b/HelpHtml/img/addurl3.gif
Binary files differ
diff --git a/HelpHtml/img/addurl4.gif b/HelpHtml/img/addurl4.gif
new file mode 100644
index 0000000..721e463
--- /dev/null
+++ b/HelpHtml/img/addurl4.gif
Binary files differ
diff --git a/HelpHtml/img/addurl5.gif b/HelpHtml/img/addurl5.gif
new file mode 100644
index 0000000..4f61bc4
--- /dev/null
+++ b/HelpHtml/img/addurl5.gif
Binary files differ
diff --git a/HelpHtml/img/backblue.gif b/HelpHtml/img/backblue.gif
new file mode 100644
index 0000000..99cbbaa
--- /dev/null
+++ b/HelpHtml/img/backblue.gif
Binary files differ
diff --git a/HelpHtml/img/fade.gif b/HelpHtml/img/fade.gif
new file mode 100644
index 0000000..579cc5f
--- /dev/null
+++ b/HelpHtml/img/fade.gif
Binary files differ
diff --git a/HelpHtml/img/httrack.gif b/HelpHtml/img/httrack.gif
new file mode 100644
index 0000000..51da0d2
--- /dev/null
+++ b/HelpHtml/img/httrack.gif
Binary files differ
diff --git a/HelpHtml/img/snap1_a.gif b/HelpHtml/img/snap1_a.gif
new file mode 100644
index 0000000..cd918e0
--- /dev/null
+++ b/HelpHtml/img/snap1_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap1_b.gif b/HelpHtml/img/snap1_b.gif
new file mode 100644
index 0000000..539e7c1
--- /dev/null
+++ b/HelpHtml/img/snap1_b.gif
Binary files differ
diff --git a/HelpHtml/img/snap1_c.gif b/HelpHtml/img/snap1_c.gif
new file mode 100644
index 0000000..496e1cf
--- /dev/null
+++ b/HelpHtml/img/snap1_c.gif
Binary files differ
diff --git a/HelpHtml/img/snap2_a.gif b/HelpHtml/img/snap2_a.gif
new file mode 100644
index 0000000..ddb0367
--- /dev/null
+++ b/HelpHtml/img/snap2_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap2_b.gif b/HelpHtml/img/snap2_b.gif
new file mode 100644
index 0000000..6d6ad07
--- /dev/null
+++ b/HelpHtml/img/snap2_b.gif
Binary files differ
diff --git a/HelpHtml/img/snap3_a.gif b/HelpHtml/img/snap3_a.gif
new file mode 100644
index 0000000..84a32ef
--- /dev/null
+++ b/HelpHtml/img/snap3_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap4_a.gif b/HelpHtml/img/snap4_a.gif
new file mode 100644
index 0000000..18a8df9
--- /dev/null
+++ b/HelpHtml/img/snap4_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap5_a.gif b/HelpHtml/img/snap5_a.gif
new file mode 100644
index 0000000..450fa13
--- /dev/null
+++ b/HelpHtml/img/snap5_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap9.gif b/HelpHtml/img/snap9.gif
new file mode 100644
index 0000000..1277e01
--- /dev/null
+++ b/HelpHtml/img/snap9.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_a.gif b/HelpHtml/img/snap9_a.gif
new file mode 100644
index 0000000..342f219
--- /dev/null
+++ b/HelpHtml/img/snap9_a.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_b.gif b/HelpHtml/img/snap9_b.gif
new file mode 100644
index 0000000..07b9bd4
--- /dev/null
+++ b/HelpHtml/img/snap9_b.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_c.gif b/HelpHtml/img/snap9_c.gif
new file mode 100644
index 0000000..0620351
--- /dev/null
+++ b/HelpHtml/img/snap9_c.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d.gif b/HelpHtml/img/snap9_d.gif
new file mode 100644
index 0000000..120a26f
--- /dev/null
+++ b/HelpHtml/img/snap9_d.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d2.gif b/HelpHtml/img/snap9_d2.gif
new file mode 100644
index 0000000..4970603
--- /dev/null
+++ b/HelpHtml/img/snap9_d2.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d3.gif b/HelpHtml/img/snap9_d3.gif
new file mode 100644
index 0000000..c8f666d
--- /dev/null
+++ b/HelpHtml/img/snap9_d3.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d4.gif b/HelpHtml/img/snap9_d4.gif
new file mode 100644
index 0000000..82034eb
--- /dev/null
+++ b/HelpHtml/img/snap9_d4.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d5.gif b/HelpHtml/img/snap9_d5.gif
new file mode 100644
index 0000000..5e4c4f8
--- /dev/null
+++ b/HelpHtml/img/snap9_d5.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d6.gif b/HelpHtml/img/snap9_d6.gif
new file mode 100644
index 0000000..e300bf3
--- /dev/null
+++ b/HelpHtml/img/snap9_d6.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d7.gif b/HelpHtml/img/snap9_d7.gif
new file mode 100644
index 0000000..67d73bf
--- /dev/null
+++ b/HelpHtml/img/snap9_d7.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_d8.gif b/HelpHtml/img/snap9_d8.gif
new file mode 100644
index 0000000..994a15b
--- /dev/null
+++ b/HelpHtml/img/snap9_d8.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_e.gif b/HelpHtml/img/snap9_e.gif
new file mode 100644
index 0000000..2ac97d1
--- /dev/null
+++ b/HelpHtml/img/snap9_e.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_f.gif b/HelpHtml/img/snap9_f.gif
new file mode 100644
index 0000000..58fced6
--- /dev/null
+++ b/HelpHtml/img/snap9_f.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_g.gif b/HelpHtml/img/snap9_g.gif
new file mode 100644
index 0000000..4b26899
--- /dev/null
+++ b/HelpHtml/img/snap9_g.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_g2.gif b/HelpHtml/img/snap9_g2.gif
new file mode 100644
index 0000000..4dbd71a
--- /dev/null
+++ b/HelpHtml/img/snap9_g2.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_g3.gif b/HelpHtml/img/snap9_g3.gif
new file mode 100644
index 0000000..318f016
--- /dev/null
+++ b/HelpHtml/img/snap9_g3.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_h.gif b/HelpHtml/img/snap9_h.gif
new file mode 100644
index 0000000..8232bdc
--- /dev/null
+++ b/HelpHtml/img/snap9_h.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_i.gif b/HelpHtml/img/snap9_i.gif
new file mode 100644
index 0000000..e671b61
--- /dev/null
+++ b/HelpHtml/img/snap9_i.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_j.gif b/HelpHtml/img/snap9_j.gif
new file mode 100644
index 0000000..c154dca
--- /dev/null
+++ b/HelpHtml/img/snap9_j.gif
Binary files differ
diff --git a/HelpHtml/img/snap9_k.gif b/HelpHtml/img/snap9_k.gif
new file mode 100644
index 0000000..8c5621e
--- /dev/null
+++ b/HelpHtml/img/snap9_k.gif
Binary files differ
diff --git a/HelpHtml/index.html b/HelpHtml/index.html
new file mode 100644
index 0000000..8b3f732
--- /dev/null
+++ b/HelpHtml/index.html
@@ -0,0 +1,153 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Documentation</em></h2>
+
+<ul>
+ <li><a href="overview.html">Overview</a></li>
+ <br>HTTrack overview and features<br><br>
+ <li><a href="../license.txt">License</a></li>
+ <br>The HTTrack license agreement<br><br>
+ <li>How to Use</li>
+ <ul>
+ <li><a href="shelldoc.html">Windows 95/98/NT/2K Shell version</a></li>
+ <br>Windows Shell documentation, with step-by-step example<br>
+ <br>
+ <li><a href="fcguide.html">HTTrack Users Guide By Fred Cohen</a></li>
+ <br>A tutorial that describes all command-line options, for Linux and Windows users<br>
+ <br>
+ <li><a href="dev.html">Developper/Programming</a></li>
+ <br>How to use HTTrack in batch files, indexing or as library<br>
+ </ul>
+ <br>
+ <li><a href="abuse.html">How not to Use</a></li>
+ <br>What you mustn't do<br>
+ <br>
+ <li><a href="faq.html">FAQ &amp; Troubleshooting</a></li>
+ <br>Frequently asked questions about HTTrack, and troubleshooting<br><br>
+ <li><a href="../history.txt">Release Changes</a></li>
+ <br>New features and bug fixes are listed here<br><br>
+ <li><a href="contact.html">About this program</a></li>
+ <br>How to reach us for comments, bug report and anything else, information about this project<br>
+</ul>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/options.html b/HelpHtml/options.html
new file mode 100644
index 0000000..7db0516
--- /dev/null
+++ b/HelpHtml/options.html
@@ -0,0 +1,363 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Options</em></h2>
+
+<ul>
+ <li>Filters: <a href="filters.html">how to use them</a></li>
+ <br><small>Here you can find informations on filters: how to accept all gif files in a mirror, for example</small>
+ <br><br>
+ <li>List of options</li>
+</ul>
+
+<tt>
+<pre>
+
+w mirror with automatic wizard
+This is the default scanning option, the engine automatically scans links according to the default options, and filters defined. It does not prompt a message when a "foreign" link is reached.
+
+W semi-automatic mirror with help-wizard (asks questions)
+This option lets the engine ask the user if a link must be mirrored or not, when a new web has been found.
+
+g just get files (saved in the current directory)
+This option forces the engine not to scan the files indicated - i.e. the engine only gets the files indicated.
+
+i continue an interrupted mirror using the cache
+This option indicates to the engine that a mirror must be updated or continued.
+
+rN recurse get with limited link depth of N
+This option sets the maximum recurse level. Default is infinite (the engine "knows" that it should not go out of current domain)
+
+a stay on the same address
+This is the default primary scanning option, the engine does not go out of domains without permissions (filters, for example)
+
+d stay on the same principal domain
+This option lets the engine go on all sites that exist on the same principal domain.
+Example: a link located at www.someweb.com that goes to members.someweb.com will be followed.
+
+l stay on the same location (.com, etc.)
+This option lets the engine go on all sites that exist on the same location.
+Example: a link located at www.someweb.com that goes to www.anyotherweb.com will be followed.
+Warning: this is a potentially dangerous option, limit the recurse depth with r option.
+
+e go everywhere on the web
+This option lets the engine go on any sites.
+Example: a link located at www.someweb.com that goes to www.anyotherweb.org will be followed.
+Warning: this is a potentially dangerous option, limit the recurse depth with r option.
+
+n get non-html files 'near' an html file (ex: an image located outside)
+This option lets the engine catch all files that have references on a page, but that exist outside the web site.
+Example: List of ZIP files links on a page.
+
+t test all URLs (even forbidden ones)
+This option lets the engine test all links that are not caught.
+Example: to test broken links in a site
+
+x replace external html links by error pages
+This option tells the engine to rewrite all links not taken into warning pages.
+Example: to browse offline a site, and to warn people that they must be online if they click to external links.
+
+sN follow robots.txt and meta robots tags
+This option sets the way the engine treats "robots.txt" files. This file is often set by webmasters to avoir cgi-bin directories, or other irrevelant pages.
+Values:
+ s0 Do not take robots.txt rules
+ s1 Follow rules, if compatible with internal filters
+ s2 Always follow site's rules
+
+bN accept cookies in cookies.txt
+This option activates or unactivates the cookie
+ b0 do not accept cookies
+ b1 accept cookies
+
+S stay on the same directory
+This option asks the engine to stay on the same folder level.
+Example: A link in /index.html that points to /sub/other.html will not be followed
+
+D can only go down into subdirs
+This is the default option, the engine can go everywhere on the same directoy, or in lower structures
+
+U can only go to upper directories
+This option asks the engine to stay on the same folder level or in upper structures
+
+B can both go up&down into the directory structure
+This option lets the engine to go in any directory level
+
+Y mirror ALL links located in the first level pages (mirror links)
+This option is activated for the links typed in the command line
+Example: if you have a list of web sites in www.asitelist.com/index.html, then all these sites will be mirrored
+
+NN name conversion type (0 *original structure 1,2,3 html/data in one directory)
+ N0 Site-structure (default)
+ N1 Html in web/, images/other files in web/images/
+ N2 Html in web/html, images/other in web/images
+ N3 Html in web/, images/other in web/
+ N4 Html in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)
+ N5 Images/other in web/xxx and Html in web/html
+
+ N99 All files in web/, with random names (gadget !)
+
+ N100 Site-structure, without www.domain.xxx/
+ N101 Identical to N1 exept that "web" is replaced by the site's name
+ N102 Identical to N2 exept that "web" is replaced by the site's name
+ N103 Identical to N3 exept that "web" is replaced by the site's name
+ N104 Identical to N4 exept that "web" is replaced by the site's name
+ N105 Identical to N5 exept that "web" is replaced by the site's name
+ N199 Identical to N99 exept that "web" is replaced by the site's name
+
+ N1001 Identical to N1 exept that there is no "web" directory
+ N1002 Identical to N2 exept that there is no "web" directory
+ N1003 Identical to N3 exept that there is no "web" directory (option set for g option)
+ N1004 Identical to N4 exept that there is no "web" directory
+ N1005 Identical to N5 exept that there is no "web" directory
+ N1099 Identical to N99 exept that there is no "web" directory
+
+LN long names
+ L0 Filenames and directory names are limited to 8 characters + 3 for extension
+ L1 No restrictions (default)
+
+K keep original links (e.g. http://www.adr/link) (K0 *relative link)
+This option has only been kept for compatibility reasons
+
+pN priority mode:
+ p0 just scan, don't save anything (for checking links)
+ p1 save only html files
+ p2 save only non html files
+ p3 save all files
+ p7 get html files before, then treat other files
+
+cN number of multiple connections (*c8)
+Set the numer of multiple simultaneous connections
+
+O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])
+This option define the path for mirror and log files
+Example: -P "/user/webs","/user/logs"
+
+P proxy use (-P proxy:port or -P user:pass@proxy:port)
+This option define the proxy used in this mirror
+Example: -P proxy.myhost.com:8080
+
+F user-agent field (-F \"user-agent name\
+This option define the user-agent field
+Example: -F "Mozilla/4.5 (compatible; HTTrack 1.2x; Windows 98)"
+
+mN maximum file length for a non-html file
+This option define the maximum size for non-html files
+Example: -m100000
+
+mN,N' for non html (N) and html (N')
+This option define the maximum size for non-html files and html-files
+Example: -m100000,250000
+
+MN maximum overall size that can be uploaded/scanned
+This option define the maximum amount of bytes that can be downloaded
+Example: -M1000000
+
+EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)
+This option define the maximum time that the mirror can last
+Example: -E3600
+
+AN maximum transfer rate in bytes/seconds (1000=1kb/s max)
+This option define the maximum transfer rate
+Example: -A2000
+
+GN pause transfer if N bytes reached, and wait until lock file is deleted
+This option asks the engine to pause every time N bytes have been transfered, and restarts when the lock file "hts-pause.lock" is being deleted
+Example: -G20000000
+
+u check document type if unknown (cgi,asp..)
+This option define the way the engine checks the file type
+ u0 do not check
+ u1 check but /
+ u2 check always
+
+RN number of retries, in case of timeout or non-fatal errors (*R0)
+This option sets the maximum number of tries that can be processed for a file
+
+o *generate output html file in case of error (404..) (o0 don't generate)
+This option define whether the engine has to generate html output file or not if an error occured
+
+TN timeout, number of seconds after a non-responding link is shutdown
+This option define the timeout
+Example: -T120
+
+JN traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link
+This option define the minimum transfer rate
+Example: -J200
+
+HN host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow
+This option define whether the engine has to abandon a host if a timeout/"too slow" error occured
+
+&P extended parsing, attempt to parse all links (even in unknown tags or Javascript)
+This option activates the extended parsing, that attempt to find links in unknown Html code/javascript
+
+j *parse Java Classes (j0 don't parse)
+This option define whether the engine has to parse java files or not to catch included files
+
+I *make an index (I0 don't make)
+This option define whether the engine has to generate an index.html on the top directory
+
+X *delete old files after update (X0 keep delete)
+This option define whether the engine has to delete locally, after an update, files that have been deleted in the remote mirror, or that have been excluded
+
+C *create/use a cache for updates and retries (C0 no cache)
+This option define whether the engine has to generate a cache for retries and updates or not
+
+k store all files in cache (not useful if files on disk)
+This option define whether the engine has to store all files in cache or not
+
+V execute system command after each files ($0 is the filename: -V \"rm \\$0\
+This option lets the engine execute a command for each file saved on disk
+
+q quiet mode (no questions)
+Do not ask questions (for example, for confirm an option)
+
+Q log quiet mode (no log)
+Do not generate log files
+
+v verbose screen mode
+Log files are printed in the screen
+
+f *log file mode
+Log files are generated into two log files
+
+z extra infos log
+Add more informations on log files
+
+Z debug log
+Add debug informations on log files
+
+
+--mirror <URLs> *make a mirror of site(s)
+--get <URLs> get the files indicated, do not seek other URLs
+--mirrorlinks <URLs> test links in pages (identical to -Y)
+--testlinks <URLs> test links in pages
+--spider <URLs> spider site(s), to test links (reports Errors & Warnings)
+--update <URLs> update a mirror, without confirmation
+--skeleton<URLs> make a mirror, but gets only html files
+
+--http10 force http/1.0 requests when possible
+
+</pre>
+</tt>
+
+<!-- ==================== Start epilogue ==================== -->
+
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/overview.html b/HelpHtml/overview.html
new file mode 100644
index 0000000..e48add9
--- /dev/null
+++ b/HelpHtml/overview.html
@@ -0,0 +1,156 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Overview</em></h2>
+
+<br>
+
+<h3>
+<center>
+<a href="http://www.httrack.com/" target="_blank">Please visit our website!</a>
+<br>
+</center>
+</h3>
+<br>
+
+<table border="0">
+<tr>
+<td>
+<p align="center"><img src="img/snap4_a.gif" width="325" height="233" alt="WinHTTrack snapshot"> </p>
+</td>
+<td valign="top">
+<p align="justify">
+<big>
+<b>HTTrack</b> HTTrack is an easy-to-use offline browser utility. It allows you to download a World
+Wide website from the Internet to a local directory, building recursively all directories,
+getting html, images, and other files from the server to your computer. HTTrack arranges
+the original site's relative link-structure. Simply open a page of the "mirrored" website in
+your browser, and you can browse the site from link to link, as if you were viewing it
+online. HTTrack can also update an existing mirrored site, and resume interrupted
+downloads. HTTrack is fully configurable, and has an integrated help system.
+</big>
+</p>
+</td>
+</tr>
+</table>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/shelldoc.html b/HelpHtml/shelldoc.html
new file mode 100644
index 0000000..826a8c8
--- /dev/null
+++ b/HelpHtml/shelldoc.html
@@ -0,0 +1,135 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Windows Shell Documentation</em></h2>
+
+<br>
+
+<ul>
+ <li><a href="step.html">Step-by-step example</a></li>
+ <br><small>Quick start with WinHTTrack</small><br><br>
+ <li><a href="step9.html">Options</a></li>
+ <br><small>List of available options</small><br><br>
+</ul>
+
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/start.html b/HelpHtml/start.html
new file mode 100644
index 0000000..fb66ab3
--- /dev/null
+++ b/HelpHtml/start.html
@@ -0,0 +1,16 @@
+<html>
+
+<head>
+<title>HTTrack documentation</title>
+<script language="JavaScript">
+<!--
+window.open("index.html","WinHTTrackHelp","toolbar=yes,location=no,directories=no,status=no,menubar=no,scrollbars=yes,resizable=yes");
+document.close();
+window.close();
+// -->
+</script>
+
+<body>
+HTTrack documentation
+</body>
+</html>
diff --git a/HelpHtml/step.html b/HelpHtml/step.html
new file mode 100644
index 0000000..41fbe3e
--- /dev/null
+++ b/HelpHtml/step.html
@@ -0,0 +1,139 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>How to start, Step-by-step</em></h2>
+
+<br>
+
+<ul>
+ <li>Step 1: <a href="step1.html">Choose a project name and destination folder</a></li>
+ <li>Step 2: <a href="step2.html">Fill the addresses</a></li>
+ <li>Step 3: <a href="step3.html">Ready to start</a></li>
+ <li>Step 4: <a href="step4.html">Wait!</a></li>
+ <li>Step 5: <a href="step5.html">Check the result</a></li>
+ <li><a href="step9.html">Option panel</a></li>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step1.html b/HelpHtml/step1.html
new file mode 100644
index 0000000..58f9045
--- /dev/null
+++ b/HelpHtml/step1.html
@@ -0,0 +1,154 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Step 1 : Choose a project name and destination folder</em></h2>
+
+<br>
+
+<ol>
+ <li>Change the destination folder if necessary</li>
+ <br><small>It is more convenient to organize all mirrors in one directory, for example <b>My Web Sites</b>
+ <br>If you already have made mirrors using HTTrack, be sure that you have selected the correct folder.</small>
+ <br><br><center><img src="img/snap1_c.gif" border="0"></center>
+<br><br>
+ <li>Select the project name:
+ <ul>
+ <li>Select a new project name</a></li>
+ <br><small>This name is, for example, the theme of the mirrored sites, for example <b>My Friend's Site</b></small>
+ <br><br><center><img src="img/snap1_a.gif" border="0"></center>
+ <br><br>
+ OR
+ <br><br>
+ <li>Select an existing project for update/retry</a></li>
+ <br><small>Directly select the existing project name in the popup list</small><br><br>
+ <center><img src="img/snap1_b.gif" border="0"></center>
+ </ul>
+<br><br>
+ <li>Click on the <b>NEXT</b> button</li>
+<br><br>
+ <li><a href="step2.html">Go to the next step</a>...</li>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step2.html b/HelpHtml/step2.html
new file mode 100644
index 0000000..8874762
--- /dev/null
+++ b/HelpHtml/step2.html
@@ -0,0 +1,168 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Step 2 : Fill the addresses</em></h2>
+
+<br>
+
+<ol>
+ <li>Select an action</li>
+ <br><small>The default action is <b>Download web sites</b></small><br><br>
+ <center><img src="img/snap2_a.gif" border="0"></center><br><br>
+
+ <ul>
+ <li>Download web site(s)</li>
+ <br><small>Will transfert the desired sites with default options</small>
+ <li>Download web site(s) + questions</li>
+ <br><small>Will transfert the desired sites with default options, and ask questions if any links are considered as potentially downloadable</small>
+ <li>Get individual files</li>
+ <br><small>Will only get the desired files you specify (for example, ZIP files), but will not spider through HTML files</small>
+ <li>Download all sites in pages (multiple mirror)</li>
+ <br><small>Will download all sites that appears in the site(s) selected. If you drag&drop your boormark file, this option lets you mirror all your favorite sites</small>
+ <li>Test links in pages (bookmark test)</li>
+ <br><small>Will test all links indicated. Useful to check a bookmark file</small>
+ <li>* Continue interrupted download</li>
+ <br><small>Use this option if a download has been interrupted (user interruption,crash..)</small>
+ <li>* Update existing download</li>
+ <br><small>Use this option to update an existing project. The engine will recheck the complete structure, checking each downloaded file for any updates on the web site</small>
+ </ul>
+
+<br><br>
+ <li>Enter the site's addresses</li>
+ <br><small>You can click on the <b>Add a URL</b> button to add each address, or just type them in the box</small><br><br>
+ <center><img src="img/snap2_b.gif" border="0"></center>
+<br><br>
+ <li>You may define options by clicking on the <b><a href="step9.html">Set options</a></b> button</li>
+ <br><small>You can define filters or download parameters in the option panel</small><br>
+<br><br>
+ <li>You may also add a URL by clicking on the <b><a href="addurl.html">Add a URL</a></b> button</li>
+ <br><small>This option lets you define additional parameters (login/password) for the URL, or capture a complex URL from your browser</small><br>
+<br><br>
+ <li>Click on the <b>NEXT</b> button</li>
+<br><br>
+ <li><a href="step3.html">Go to the next step</a>...</li>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step3.html b/HelpHtml/step3.html
new file mode 100644
index 0000000..2738368
--- /dev/null
+++ b/HelpHtml/step3.html
@@ -0,0 +1,140 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Step 3 : Ready to start</em></h2>
+
+<br>
+
+<ol>
+ <li>If you want, you may connect immediately or delay the mirror</li>
+ <br><small>If you don't select anything, HTTrack will assume that you are already connected to the Internet and that you want to start the mirror action now</small><br><br>
+<br><br>
+ <li>Click on the <b>START</b> button</li>
+ <center><img src="img/snap3_a.gif" border="0"></center>
+<br><br>
+ <li><a href="step4.html">Go to the next step</a>...</li>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step4.html b/HelpHtml/step4.html
new file mode 100644
index 0000000..45041af
--- /dev/null
+++ b/HelpHtml/step4.html
@@ -0,0 +1,139 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Step 4 : Wait!</em></h2>
+
+<br>
+
+<ol>
+ <li>Wait until the mirror is finishing</li>
+ <br><small>You can cancel at any time the mirror, or cancel files currently downloaded for any reasons (file too big, for example)</small>
+ <br><small>Options can be changed during the mirror: maximum number of connections, limits...</small><br><br>
+ <center><img src="img/snap4_a.gif" border="0"></center>
+<br><br>
+ <li><a href="step5.html">Go to the next step</a>...</li>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step5.html b/HelpHtml/step5.html
new file mode 100644
index 0000000..549474b
--- /dev/null
+++ b/HelpHtml/step5.html
@@ -0,0 +1,138 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Step 5 : Check the result</em></h2>
+
+<br>
+
+<ol>
+ <li>Check log files</li>
+ <br><small>You may check the error log file, which could contain useful information if errors have occurred</small><br><br>
+ <center><img src="img/snap5_a.gif" border="0"></center>
+ <br>
+ <li>See the <a href="faq.html">troubleshooting</a> page</li>
+</ol>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9.html b/HelpHtml/step9.html
new file mode 100644
index 0000000..caf696c
--- /dev/null
+++ b/HelpHtml/step9.html
@@ -0,0 +1,155 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel</em></h2>
+
+<br>
+
+<ul>
+ <li>Click on one of the option tab below to have more informations</li>
+ <br><small>Each option tab is described, including remarks and examples</small>
+</ul>
+
+<br>
+
+<center>
+<map name="options">
+ <AREA SHAPE="RECT" COORDS="658, 6, 677, 21" HREF="index.html">
+ <AREA SHAPE="RECT" COORDS="581, 51, 668, 76" HREF="step9_opt6.html">
+ <AREA SHAPE="RECT" COORDS="488, 50, 581, 75" HREF="step9_opt5.html">
+ <AREA SHAPE="RECT" COORDS="403, 52, 489, 75" HREF="step9_opt1.html">
+ <AREA SHAPE="RECT" COORDS="314, 50, 401, 74" HREF="step9_opt3.html">
+ <AREA SHAPE="RECT" COORDS="193, 50, 310, 75" HREF="step9_opt2.html">
+ <AREA SHAPE="RECT" COORDS="106, 50, 192, 74" HREF="step9_opt4.html">
+ <AREA SHAPE="RECT" COORDS="16, 50, 107, 73" HREF="step9_opt7.html">
+
+ <AREA SHAPE="RECT" COORDS="8, 32, 162, 52" HREF="step9_opt11.html">
+ <AREA SHAPE="RECT" COORDS="178, 32, 332, 52" HREF="step9_opt8.html">
+ <AREA SHAPE="RECT" COORDS="348, 32, 502, 52" HREF="step9_opt9.html">
+ <AREA SHAPE="RECT" COORDS="518, 32, 672, 52" HREF="step9_opt10.html">
+</MAP>
+<img src="img/snap9.gif" border="0" width="680" height="104" usemap="#options">
+</center>
+
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt1.html b/HelpHtml/step9_opt1.html
new file mode 100644
index 0000000..d4ba2f6
--- /dev/null
+++ b/HelpHtml/step9_opt1.html
@@ -0,0 +1,156 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Links</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_a.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Attempt to detect all links</li>
+ <br><small>Asks the engine to try to detect all links in a page, even for unknown tags or unknown javascript code. This can generate bad requests or error in pages, but may be helpful to catch all desired links
+ <br>Useful, for example, in pages with many javascript tricks
+ </small><br><br>
+<!-- -->
+ <li>Get non-html files related to a link</li>
+ <br><small>This option allows you to catch all file references in captured HTML files, even external ones
+ <br>For example, if an image in an Html page has its source on another web site, this image will be captured together.
+ </small><br><br>
+<!-- -->
+ <li>Test validity of all links</li>
+ <br><small>This option forces the engine to test all links in spidered pages, i.e. to check if every link is valid or not by performing a request to the server. If an error occured, it is reported to the error log-file.
+ <br>Useful to test all external links in a website
+ </small><br><br>
+<!-- -->
+ <li>Get HTML files first!</li>
+ <br><small>With this option enabled, the engine will attempt to download all HTML files first, and
+ then download other (images) files. This can speed up the parsing process, by efficiently scanning
+ the HTML structure.
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt10.html b/HelpHtml/step9_opt10.html
new file mode 100644
index 0000000..64f5440
--- /dev/null
+++ b/HelpHtml/step9_opt10.html
@@ -0,0 +1,162 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Expert Options</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_j.gif" border="0"></center>
+ <br><br>
+ <i>Advice: leave these options to default values!</i>
+ <br><br>
+<!-- -->
+ <li>Use a cache for updates</li>
+ <br><small>This option <b><u><font color="red">MUST</font></u></b> be set if you want to update the site later, or if you want to have the opportunity to continue a crashed mirror
+ <br>Disable it only if you want to save few kilobytes, but, err, again, it is not advised to disable this option!
+ </small><br><br>
+<!-- -->
+ <li>Primary filter (scan mode)</li>
+ <br><small>Which files must be saved?
+ <br>You can choose Html and/or Non-Html, or none (this last option is automatically set for scanning)
+ </small><br><br>
+<!-- -->
+ <li>Travel mode</li>
+ <br><small>Set the default spidering direction
+ <br>The default is to catch all files in the same level and lower levels, which is the most logical
+ </small><br><br>
+<!-- -->
+ <li>Global travel mode</li>
+ <br><small>Set the default global spidering direction
+ <br>The default is to stay on the same address if no specific authorization has been delivered
+ </small><br><br>
+<!-- -->
+ <li>Activate debug mode</li>
+ <br><small>Enables some extra debug informations, like headers debugging and some interface informations (for debugging purpose only)
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
+
diff --git a/HelpHtml/step9_opt11.html b/HelpHtml/step9_opt11.html
new file mode 100644
index 0000000..46c12fc
--- /dev/null
+++ b/HelpHtml/step9_opt11.html
@@ -0,0 +1,193 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : MIME Types</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_k.gif" border="0"></center>
+ <br><br>
+ <br><br>
+<!-- -->
+ <li>MIME Types</li>
+ <br><small>
+An important new feature for some people. This panel tells the engine that if a link is encountered, with a
+specific type (.cgi, .asp, or .php3 for example), it MUST assume that this link has always the same MIME type, for example
+the "text/html" MIME type.
+This is VERY important to speed up many mirrors.
+Some big HTML files which have many links of unknown type embedded, such as ".asp", cause the engine to test all links, and this
+slows down the parser.
+<br>
+<br>
+In this case, you can tell HTTrack: ".asp pages are in fact HTML pages"
+<br>
+This is possible, using:
+<br>
+<br>
+File type: <tt>asp</tt> MIME identity: <tt>text/html</tt>
+<br>
+<br>
+You can declare multiple definitions, or declare multiple types separed by ",", like in:
+<br>
+File type: <tt>asp,php,php3</tt> MIME identity: <tt>text/html</tt>
+<br>
+<br>
+Most important MIME types are:
+<br>
+<table border="1">
+<tr><td>text/html</td><td>Html files, parsed by HTTrack</td></tr>
+<tr><td>image/gif</td><td>GIF files</td></tr>
+<tr><td>image/jpeg</td><td>Jpeg files</td></tr>
+<tr><td>image/png</td><td>PNG files</td></tr>
+<tr><td>application/x-zip</td><td>.zip files</td></tr>
+<tr><td>application/x-mp3</td><td>.mp3 files</td></tr>
+<tr><td>application/x-foo</td><td>.foo files</td></tr>
+<tr><td>application/octet-stream</td><td>Unknown files</td></tr>
+</table>
+
+<br>
+
+You can rename files on a mirror. If you KNOW that all "dat" files are in fact "zip" files renamed into "dat", you can
+tell httrack:<br>
+
+File type: <tt>dat</tt> MIME identity: <tt>application/x-zip</tt>
+
+<br><br>
+
+You can also "name" a file type, with its original MIME type, if this type is not known by HTTrack. This will avoid a test
+when the link will be reached:<br>
+
+File type: <tt>foo</tt> MIME identity: <tt>application/octet-stream</tt>
+
+<br><br>
+
+In this case, HTTrack won't check the type, because it has learned that "foo" is a known type, or MIME type
+"application/octet-stream". Therefore, it will let untouched the "foo" type.
+
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt2.html b/HelpHtml/step9_opt2.html
new file mode 100644
index 0000000..9afb2d0
--- /dev/null
+++ b/HelpHtml/step9_opt2.html
@@ -0,0 +1,192 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Limits</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_b.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Maximum mirror depth</li>
+ <br><small>Define how deep will the engine seek in the site
+ A depth of 3 means that you will catch all pages you have indicated, plus all that can be accessed clicking twice on any link
+ <br>
+ Note: This option is not filled by default, so the depth is infinite. But because the engine will stay on the site you indicated, only the desired sites will be mirrored, and not all the web!
+ </small><br><br>
+<!-- -->
+ <li>Maximum external depth</li>
+ <br><small>Define how deep will the engine seek in external sites, or on addresses that were forbidden.<br>
+ Normally, HTTrack will not go on external sites by default (except if authorized by filters),
+ and will avoid addresses forbidden by filters. You can override this behaviour, and
+ tell the engine to catch N levels of "external" sites.<br>
+ Note: Use this option with great care, as it is overriding all other options (filters and default engine limiter)
+ <br>
+ Note: This option is not filled by default, so the depth is equal to zero.
+ </small><br><br>
+<!-- -->
+ <li>Maximum size of an HTML file</li>
+ <br><small>Define the biggest Html file the engine is allowed to catch.<br>
+ This option allows you to avoid big files if you do not want to download them.
+ </small><br><br>
+<!-- -->
+ <li>Max size of a non-HTML file</li>
+ <br><small>Define the biggest non-html file (image, ZIP file..) the engine is allowed to catch.<br>
+ This option allows you to avoid big files if you do not want to download them.
+ </small><br><br>
+<!-- -->
+ <li>Site size limit</li>
+ <br><small>This option limits the total amount of bytes that can be downloaded in the current mirror
+ </small><br><br>
+<!-- -->
+ <li>Pause after downloading..</li>
+ <br><small>This option lets the engine do a pause every time it has retrieved a specific amount of bytes
+ <br>Useful if you are mirroring a site bigger than the available space: you can then backup and erase the downloaded files during the pause
+ </small><br><br>
+<!-- -->
+ <li>Max time overall</li>
+ <br><small>This option limits the total amount of time that can be spent on the current mirror
+ </small><br><br>
+<!-- -->
+ <li>Max transfer rate</li>
+ <br><small>This option limits the transfer rate on the current mirror
+ <br>Useful if you do not want HTTrack to monopolize the bandwidth!
+ </small><br><br>
+<!-- -->
+ <li>Max connections / seconds</li>
+ <br><small>This option limits the number of connections per second for the current mirror
+ <br>Useful to limit server load.
+ <br>The default is 10, but you can disable it with a value of 0 - THIS IS NOT ADVISED UNLESS YOU KNOW WHAT YOU ARE DOING (risks of server overload)
+ </small><br><br>
+<!-- -->
+ <li>Maximum number of links</li>
+ <br><small>Maximum number of links that can be analyzed, that is, either downloaded, or not downloaded.
+ Do not set a too low limit for that, because once the limit is reached, the engine will stop immediately.
+ <br>Do not set a too high limit, too, because it will take some memory.. 100,000 links (default) is generally enough.
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt3.html b/HelpHtml/step9_opt3.html
new file mode 100644
index 0000000..7ca208f
--- /dev/null
+++ b/HelpHtml/step9_opt3.html
@@ -0,0 +1,156 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Flow Control</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_c.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Number of connections</li>
+ <br><small>Define the number of simultaneous connections that can be initiated by the engine.
+ <br>It is recommended to limit this number to 1 or 2 if you are mirroring big files on a site, more on standard sites (8 is recommended, up to 42 if it is supported by the system)
+ </small><br><br>
+<!-- -->
+ <li>TimeOut</li>
+ <br><small>Define what time the engine has to wait if no response if given by a server.
+ <br>120 seconds is recommended (less of fast pipes, more if you connection is sloppy)
+ <br>You can optionally skip all links from a host that has generated a timeout. Warning: is this checkbox is selected, a timeout will eliminate all links from the origin server
+ </small><br><br>
+<!-- -->
+ <li>Retries</li>
+ <br><small>Number of retries if a non-fatal error occured (timeout, for example)
+ <br>Note that this will not solve fatal errors such as "Not Found" pages and so on!
+ </small><br><br>
+<!-- -->
+ <li>Min Transfer Rate</li>
+ <br><small>Minimum transfer rate tolerated on a site. If the transfer rate if slower that the defined value, then the link is skipped
+ <br>You can optionally skip all links from a host that has generated a "too slow" error. Warning: is this checkbox is selected, a "too slow" errors will eliminate all links from the origin server
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt4.html b/HelpHtml/step9_opt4.html
new file mode 100644
index 0000000..b6c666b
--- /dev/null
+++ b/HelpHtml/step9_opt4.html
@@ -0,0 +1,187 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Scan Rules</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_d.gif" border="0"></center>
+ <br><br>
+ <i>
+ Filters (scan rules) are the most important and powerful option that can be used: you can exclude or accept subdirectories, skip certain types of files, and so on.. If you have missing files (images on top level directories, for example) using filters can help you!
+ </i>
+ <br><br>
+<!-- -->
+ <li>Exclude link(s)</li>
+ <br><small>This button lets you add a filter to authorize either a directory, a domain, a certain file type...
+ <br>See <a href="#add">below</a> to find out how to add a filter rule...
+ </small><br><br>
+<!-- -->
+ <li>Exclude link(s)</li>
+ <br><small>This button lets you add a filter to authorize either a directory, a domain, a certain file type...
+ <br>See <a href="#add">below</a> to find out how to add a filter rule...
+ </small><br><br>
+<!-- -->
+<a NAME="add">
+ <li>How to add a rule
+ <br><small>(accept or forbide links)</small>
+ <br><br>
+ <ul>
+ <li>Select a rule</li>
+ <center><img src="img/snap9_d2.gif" border="0"></center>
+ <br><br>
+ <li>Then, enter the keyword(s)</li>
+ <center><img src="img/snap9_d3.gif" border="0"></center>
+ <br><br>
+ <li>Clic on the ADD button to add the rule</li>
+ <center><img src="img/snap9_d4.gif" border="0"></center>
+ </ul>
+ </li>
+<!-- -->
+ <li>Another example:
+ <br><small>Accept a specific directory name<br>
+ Suppose that you are mirroring a site at http://www.awondefulsite.com/mike/index/index.html but you can not get images located in /images/landscapes/ (for example, the image http://www.awondefulsite.com/images/landscapes/bluewater.jpg has not been retrieved)</small>
+ <br><br>
+ <ul>
+ <li>Select a rule : in this case to identify all items from a specific folder name</li>
+ <center><img src="img/snap9_d5.gif" border="0"></center>
+ <br><br>
+ <li>Then, enter the keyword(s) : in this case it is the directory name (without the starting and ending /)</li>
+ <center><img src="img/snap9_d6.gif" border="0"></center>
+ <br><br>
+ <li>Clic on the ADD button to add the rule</li>
+ <center><img src="img/snap9_d7.gif" border="0"></center>
+ <br><br>
+ <li>The rule has been added</li>
+ <center><img src="img/snap9_d8.gif" border="0"></center>
+ </ul>
+ </li>
+<!-- -->
+ <br>
+ <li>See also: <a href="filters.html">advances filters</a></li>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt5.html b/HelpHtml/step9_opt5.html
new file mode 100644
index 0000000..8aa6a8a
--- /dev/null
+++ b/HelpHtml/step9_opt5.html
@@ -0,0 +1,176 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Build</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_e.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Local Structure Type</li>
+ <br><small>Lets you define the local structure of the site.
+ <br>The default is "site structure": you will get the same folder/files names and structure as the original
+ <br>You can, however, put all images in one single folder, html in another and so on..
+ </small><br><br>
+<!-- -->
+ <li>DOS Names</li>
+ <br><small>Force the engine to generate DOS names (8 characters for the name, 3 for the type)
+ </small><br><br>
+<!-- -->
+ <li>ISO9660 Names</li>
+ <br><small>Force the engine to generate ISO9660-compatible names for storing on medias such as CDROM or DVDROM
+ </small><br><br>
+<!-- -->
+ <li>No error pages</li>
+ <br><small>Do not generate error pages (if a 404 error occured, for example)
+ <br>If a page is missing on the remote site, there will not be any warning on the local site
+ </small><br><br>
+<!-- -->
+ <li>No external pages</li>
+ <br><small>Rewrite all external links (links that needs an Internet connection) so that there can be a warning page before ("Warning, you need to be online to go to this link..")
+ <br>Useful if you want to separate the local and online realm
+ </small><br><br>
+<!-- -->
+ <li>Hide passwords</li>
+ <br><small>Do not include username and password for protected sites in the code, when a link will not be caught.
+ This allow to remain the access data private.
+ </small><br><br>
+<!-- -->
+ <li>Hide query strings</li>
+ <br><small>Do not include query strings for local links.
+ Query strings (?foo=45&bar=67) are generally not necessary for local (file://) files, but
+ query strings can be useful to show several information (example: page-4.html?index=History).
+ However, some basic browsers may not understand that (wireless browsers, especially), and
+ hiding query strings might be a good idea in this case.
+ </small><br><br>
+<!-- -->
+ <li>Do not purge old files</li>
+ <br><small>Do not purge, after an update, the local files that no longer exist on the remote site, or that have been skipped
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt6.html b/HelpHtml/step9_opt6.html
new file mode 100644
index 0000000..b929c4b
--- /dev/null
+++ b/HelpHtml/step9_opt6.html
@@ -0,0 +1,173 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Spider</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_f.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Accept cookies</li>
+ <br><small>Accept cookies generated by the remote server
+ <br>If you do not accept cookies, some "session-generated" pages will not be retrieved
+ </small><br><br>
+<!-- -->
+ <li>Check document type</li>
+ <br><small>Define when the engine has to check document type
+ <br>The engine must know the document type, to rewrite the file types. For example, if a link called /cgi-bin/gen_image.cgi generates a gif image, the generated file will not be called "gen_image.cgi" but "gen_image.gif"
+ <br>Avoid "never", because the local mirror could be bogus
+ </small><br><br>
+<!-- -->
+ <li>Parse java files</li>
+ <br><small>Must the engine parse .java files (java classes) to seek included filenames?
+ <br>It is checked by default
+ </small><br><br>
+<!-- -->
+ <li>Spider</li>
+ <br><small>Must the engine follow remote robots.txt rules when they exist?
+ <br>The default is "follow"
+ </small><br><br>
+<!-- -->
+ <li>Update hack</li>
+ <br><small>Attempt to limit transfers by wrapping known bogus responses from servers.
+ For example, pages with same size will be considered as "up to date", even if the timestamp seems
+ different. This can be useful for many dynamically generated pages, but this can also cause
+ not-updated pages in rare cases.
+ </small><br><br>
+<!-- -->
+ <li>Tolerant requests</li>
+ <br><small>Tolerate wrong file size, and make requests compliant with old servers
+ <br>It is unchecked by default, because this option can cause files to become bogus
+ </small><br><br>
+<!-- -->
+ <li>Force old HTTP/1.0 requests</li>
+ <br><small>This option forces the engine to use HTTP/1.0 requests, and avoid HEAD requests.
+ <br>Useful for some sites with old server versions, or with many dynamically generated pages.
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt7.html b/HelpHtml/step9_opt7.html
new file mode 100644
index 0000000..911801c
--- /dev/null
+++ b/HelpHtml/step9_opt7.html
@@ -0,0 +1,162 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Proxy</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_g.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Proxy</li>
+ <br><small>You can enter manually the proxy name and port (enter the name in the first field, the port in the second field)
+ </small><br><br>
+<!-- -->
+ <li>Use proxy for FTP transfers</li>
+ <br><small>The engine can use default HTTP proxy for all ftp (ftp://) transfers. Most proxies allow this, and if you are behind
+ a firewall, this option will allow you to easily catch all ftp links. Besides, ftp transfers managed by the proxy are more reliable
+ than the engine's default FTP client.
+ <br>This option is checked by default
+ </small><br><br>
+<!-- -->
+ <li>Configure</li>
+ <br><small>Click on this button to configure the proxy.
+ <br>If the proxy <b>needs</b> authentication you can define the login username/password
+ </small>
+ <br><br>
+ <center><img src="img/snap9_g2.gif" border="0"></center>
+ <br><br>
+ <center><img src="img/snap9_g3.gif" border="0"></center>
+ <br>
+ <br><br>
+<!-- -->
+ <li>Hide password</li>
+ <br><small>Use it if you do not want to display the password (hides the proxy name)
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt8.html b/HelpHtml/step9_opt8.html
new file mode 100644
index 0000000..c241650
--- /dev/null
+++ b/HelpHtml/step9_opt8.html
@@ -0,0 +1,152 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Browser ID</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_h.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Browser "Identity"</li>
+ <br><small>Enter here the name of the engine, as it will be seen by Web-servers
+ <br>For example, entering "Mozilla/4.5 (compatible; MSIE 4.01; Windows 98)" will disguise HTTrack into a standard MSIE4 browser
+ <br>This field is for statistical purpose, and you can enter whatever you want, a browser name that does not exist or even your grandma's name
+ <br>However, beware that <b>several</b> sites may deliver a different content whether the browser is called "Netscape" or "Explorer".. some elitist ones will even refuse to deliver anything depending on the browser name. This case is rare, fortunately.
+ </small><br><br>
+<!-- -->
+ <li>HTML Footer</li>
+ <br><small>Enter here the optionnal text that will be included as a comment in each HTML file to make archiving easier
+ <br>The string entered is generally an HTML comment (<tt>&lt;!-- HTML comment --&gt;</tt>) with optionnal %s, which will be transformed into a specific string information:
+ <br>%s #1 : host name (for example, www.someweb.com)
+ <br>%s #2 : file name (for example, /index.html)
+ <br>%s #3 : date of the mirror
+ <br><b>Example</b>: <tt>&lt;!-- Page mirrored from %s, file %s. Archive date: %s --&gt;</tt>
+ <br><b>Note</b>: You can select (none), in this case no comments will be added to the pages. However, this is NOT advised as you may want to know in the future where the page has been taken, when/why..
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/HelpHtml/step9_opt9.html b/HelpHtml/step9_opt9.html
new file mode 100644
index 0000000..16a6d77
--- /dev/null
+++ b/HelpHtml/step9_opt9.html
@@ -0,0 +1,167 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>HTTrack Website Copier - Offline Browser</title>
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(images/bg_rings.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+ <tr>
+ <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+<h2 align="center"><em>Option panel : Log files, Index, Cache</em></h2>
+
+<br>
+
+<ul>
+ <center><img src="img/snap9_i.gif" border="0"></center>
+ <br><br>
+<!-- -->
+ <li>Force to store all files in cache</li>
+ <br><small>Force to store all files in the cache, even gif files, zip files and so on..
+ <br>Without this option, the engine will only save in cache html files for updating/continue purpose.
+ <br>It can be useful, however, to keep all files in cache if you want in the future to change the site structure
+ <br>Warning! This option will appreciably <b>inflate</b> the cache that will become as big as the mirror itself!
+ </small><br><br>
+<!-- -->
+ <li>Do not re-download locally erased files</li>
+ <br><small>This option prevents HTTrack from re-asking a file that exists locally with null size, or that has been erased by the user
+ <br>(If the user erased the file, this option will create a null-file to prevent the engine to catch the file next time)
+ <br>Useful if you are erasing progressively large files on the local mirror and do not want to reload them!
+ </small><br><br>
+<!-- -->
+ <li>Create Log files</li>
+ <br><small>Create log file where informations, error and warnings about the current mirror will be saved
+ <br>If you <b>do not</b> generate log files, you will not be able to know what errors occured!
+ <br>It is strongly advised to leave this option checked
+ <br>Note: You can define the debug-level of the log-files. Default is "normal"
+ </small><br><br>
+<!-- -->
+ <li>Make an index</li>
+ <br><small>Generate an index.html on the top of the directory. Very useful.
+ <br>
+ </small><br>
+<!-- -->
+ <li>Make a word database</li>
+ <br><small>Generate an index.txt database on the top of the directory. Very useful for linguistic analysis, this feature
+ will allow you to list all words of all mirrored pages in the current project.<br>
+ With this index file, you will be able to list which words were detected, and where.
+ <br>
+ </small><br><br>
+</ul>
+
+<br><br><br><br>
+<p align="right">Back to <a href="index.html">Home</a></p>
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%" height="100%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..f863ec7
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,36 @@
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+* How to install (Linux/Unix) :
+ tar xvfz httrack-xxxx.tar.gz
+ cd httrack-xxxx
+ cd src
+ ./configure
+ make
+ make install
+
+you can also use something like:
+ ./configure --prefix=/usr --etcdir=/etc --libdir=/usr/lib --bindir=/usr/bin --make --install
+
+or for static mode:
+ ./configure --static --prefix=/usr --etcdir=/etc --libdir=/usr/lib --bindir=/usr/bin --make --install
+
+If problems occurs, try
+ ./configure --help
+and follow the instructions
+
+* How to install (Windows with installshield) :
+ extract all files from the ZIP file and launch 'setup.exe'
+ after installing, launch WinHTTrack.exe
+
+* How to install (Windows without installshield) :
+ unzip all files in an empty folder
+ copy DLL's (if necessary) from dll/ in your windows dll directory (example: C:\WINNT\system32)
+ launch WinHTTrack.exe
+
+
+See other readme and doc files for more information!
+
+Have fun with HTTrack Website Copier!
+The authors
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c10e939
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,9 @@
+all :
+ @echo "please type in:"
+ @echo "cd src"
+ @echo "./configure"
+ @echo "make"
+ @echo "make install"
+ @echo ""
+ @echo "(see INSTALL file to know how-to-install)"
+
diff --git a/README b/README
new file mode 100644
index 0000000..2f2594f
--- /dev/null
+++ b/README
@@ -0,0 +1,58 @@
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+Welcome to HTTrack Website Copier!
+
+
+Information:
+
+The folder HelpHtml contains the documentation
+You might want to read these files, especially for installing HTTrack:
+ - See INSTALL file for installation information (NO WARRANTY)
+ - See license.txt file for license information
+
+
+Contacting us:
+
+If you want to ask any question, feel free to contact us!
+email: httrack@httrack.com
+
+
+Donations:
+
+HTTrack Website Copier is free software, but if you can, please donate to the Free Software Foundation (FSF) to support free software!
+
+(Europe)
+http://www.fsfeurope.org/help/donate.html
+
+(World)
+http://www.fsf.org/help/donate.html
+
+
+Engine limits:
+
+These are the principals limits of HTTrack for that moment. Note that most of them are generally shared among all offline browsers, due to technological limits.
+
+- Several scripts generating complex filenames may not find them (ex: img.src='image'+a+Mobj.dst+'.gif')
+- Some java classes may not find some files on them (class included)
+- Cgi-bin links may not work properly in some cases (parameters needed). To avoid them: use filters like -*cgi-bin*
+- Flash files are not yet parsed
+
+
+Advanced options:
+
+These options may not be necessary for a normal usage. But they can solve several problems.
+
+- If you need more than 100,000 links: -#L1000000 (1,000,000 links)
+- If you need more than 500 filters: -#F1000 (1,000 filters)
+- If you need transfer rate statictics every minutes: -#Z
+- If you need transfer operations statistics every minutes: -#T
+- If you want log files to be refreshed after every line: -#f
+
+
+Thank you!
+
+
+Have fun with HTTrack Website Copier!
+The authors
+
diff --git a/gpl-fr.txt b/gpl-fr.txt
new file mode 100644
index 0000000..a211ae8
--- /dev/null
+++ b/gpl-fr.txt
@@ -0,0 +1,197 @@
+GPL
+
+Introduction
+
+This is an unofficial translation of the GNU General Public License into French. It was not published by the Free Software Foundation, and does not legally
+state the distribution terms for software that uses the GNU GPL--only the original English text of the GNU GPL does that. However, we hope that this
+translation will help French speakers understand the GNU GPL better.
+
+Voici (http://www.linux-france.org/article/these/gpl.html) une adaptation non officielle de la Licence Publique Générale du projet GNU. Elle n'a pas été
+publiée par la Free Software Foundation et son contenu n'a aucune portée légale car seule la version anglaise de ce document détaille le mode de distribution
+des logiciels sous GNU GPL. Nous espérons cependant qu'elle permettra aux francophones de mieux comprendre la GPL.
+
+Licence Publique Générale GNU Version 2, Juin 1991
+
+Copyright © Free Software Foundation, Inc.
+59 Temple Place, Suite 330, Boston, MA 02111-1307
+États-Unis, 1989, 1991.
+La copie et la distribution de copies exactes de ce document sont autorisées, mais aucune modification n'est permise.
+
+Préambule
+
+Les licences d'utilisation de la plupart des programmes sont définies pour limiter ou supprimer toute liberté à l'utilisateur. À l'inverse, la Licence Publique
+Générale (General Public License) est destinée à vous garantir la liberté de partager et de modifier les logiciels libres, et de s'assurer que ces logiciels sont
+effectivement accessibles à tout utilisateur.
+
+Cette Licence Publique Générale s'applique à la plupart des programmes de la Free Software Foundation, comme à tout autre programme dont l'auteur l'aura
+décidé (d'autres logiciels de la FSF sont couverts pour leur part par la Licence Publique Générale pour Bibliothèques GNU (LGPL)). Vous pouvez aussi
+appliquer les termes de cette Licence à vos propres programmes, si vous le désirez.
+
+Liberté des logiciels ne signifie pas nécessairement gratuité. Notre Licence est conçue pour vous assurer la liberté de distribuer des copies des programmes,
+gratuitement ou non, de recevoir le code source ou de pouvoir l'obtenir, de modifier les programmes ou d'en utiliser des éléments dans de nouveaux
+programmes libres, en sachant que vous y êtes autorisé.
+
+Afin de garantir ces droits, nous avons dû introduire des restrictions interdisant à quiconque de vous les refuser ou de vous demander d'y renoncer. Ces
+restrictions vous imposent en retour certaines obligations si vous distribuez ou modifiez des copies de programmes protégés par la Licence. En d'autre termes,
+il vous incombera en ce cas de :
+
+ transmettre aux destinataires tous les droits que vous possédez,
+ expédier aux destinataires le code source ou bien tenir celui-ci à leur disposition,
+ leur remettre cette Licence afin qu'ils prennent connaissance de leurs droits.
+
+Nous protégeons vos droits de deux façons : d'abord par le copyright du logiciel, ensuite par la remise de cette Licence qui vous autorise légalement à copier,
+distribuer et/ou modifier le logiciel.
+
+En outre, pour protéger chaque auteur ainsi que la FSF, nous affirmons solennellement que le programme concerné ne fait l'objet d'aucune garantie. Si un tiers
+le modifie puis le redistribue, tous ceux qui en recevront une copie doivent savoir qu'il ne s'agit pas de l'original afin qu'une copie défectueuse n'entache pas
+la réputation de l'auteur du logiciel.
+
+Enfin, tout programme libre est sans cesse menacé par des dépôts de brevets. Nous souhaitons à tout prix éviter que des distributeurs puissent déposer des
+brevets sur les Logiciels Libres pour leur propre compte. Pour éviter cela, nous stipulons bien que tout dépôt éventuel de brevet doit accorder expressément à
+tout un chacun le libre usage du produit.
+
+Les dispositions précises et les conditions de copie, de distribution et de modification de nos logiciels sont les suivantes :
+
+Stipulations et conditions relatives à la copie, la distribution et la modification
+
+
+
+ Article 0
+ La présente Licence s'applique à tout Programme (ou autre travail) où figure une note, placée par le détenteur des droits, stipulant que ledit Programme
+ ou travail peut être distribué selon les termes de la présente Licence. Le terme Programme désigne aussi bien le Programme lui-même que tout travail
+ qui en est dérivé selon la loi, c'est-à-dire tout ouvrage reproduisant le Programme ou une partie de celui-ci, à l'identique ou bien modifié, et/ou traduit
+ dans une autre langue (la traduction est considérée comme une modification). Chaque personne concernée par la Licence Publique Générale sera
+ désignée par le terme Vous.
+
+ Les activités autres que copie, distribution et modification ne sont pas couvertes par la présente Licence et sortent de son cadre. Rien ne restreint
+ l'utilisation du Programme et les données issues de celui-ci ne sont couvertes que si leur contenu constitue un travail basé sur le logiciel
+ (indépendemment du fait d'avoir été réalisé en lançant le Programme). Tout dépend de ce que le Programme est censé produire.
+
+
+ Article 1.
+ Vous pouvez copier et distribuer des copies conformes du code source du Programme, tel que Vous l'avez reçu, sur n'importe quel support, à condition
+ de placer sur chaque copie un copyright approprié et une restriction de garantie, de ne pas modifier ou omettre toutes les stipulations se référant à la
+ présente Licence et à la limitation de garantie, et de fournir avec toute copie du Programme un exemplaire de la Licence.
+
+ Vous pouvez demander une rétribution financière pour la réalisation de la copie et demeurez libre de proposer une garantie assurée par vos soins,
+ moyennant finances.
+
+
+ Article 2.
+ Vous pouvez modifier votre copie ou vos copies du Programme ou partie de celui-ci, ou d'un travail basé sur ce Programme, et copier et distribuer ces
+ modifications selon les termes de l'article 1, à condition de Vous conformer également aux conditions suivantes :
+ a) Ajouter aux fichiers modifiés l'indication très claire des modifications effectuées, ainsi que la date de chaque changement.
+ b) Distribuer sous les termes de la Licence Publique Générale l'ensemble de toute réalisation contenant tout ou partie du Programme, avec ou sans
+ modifications.
+ c) Si le Programme modifié lit des commandes de manière interactive lors de son exécution, faire en sorte qu'il affiche, lors d'une invocation
+ ordinaire, le copyright approprié en indiquant clairement la limitation de garantie (ou la garantie que Vous Vous engagez à fournir Vous-même),
+ qu'il stipule que tout utilisateur peut librement redistribuer le Programme selon les conditions de la Licence Publique Générale GNU, et qu'il montre
+ à tout utilisateur comment lire une copie de celle-ci (exception : si le Programme original est interactif mais n'affiche pas un tel message en temps
+ normal, tout travail dérivé de ce Programme ne sera pas non plus contraint de l'afficher).
+
+ Toutes ces conditions s'appliquent à l'ensemble des modifications. Si des éléments identifiables de ce travail ne sont pas dérivés du Programme et
+ peuvent être raisonnablement considérés comme indépendants, la présente Licence ne s'applique pas à ces éléments lorsque Vous les distribuez seuls.
+ Mais, si Vous distribuez ces mêmes éléments comme partie d'un ensemble cohérent dont le reste est basé sur un Programme soumis à la Licence, ils lui
+ sont également soumis, et la Licence s'étend ainsi à l'ensemble du produit, quel qu'en soit l'auteur.
+
+ Cet article n'a pas pour but de s'approprier ou de contester vos droits sur un travail entièrement réalisé par Vous, mais plutôt d'ouvrir droit à un contrôle
+ de la libre distribution de tout travail dérivé ou collectif basé sur le Programme.
+
+ En outre, toute fusion d'un autre travail, non basé sur le Programme, avec le Programme (ou avec un travail dérivé de ce dernier), effectuée sur un
+ support de stockage ou de distribution, ne fait pas tomber cet autre travail sous le contrôle de la Licence.
+
+
+ Article 3.
+ Vous pouvez copier et distribuer le Programme (ou tout travail dérivé selon les conditions énoncées dans l'article 1) sous forme de code objet ou
+ exécutable, selon les termes des articles 0 et 1, à condition de respecter les clauses suivantes :
+ a) Fournir le code source complet du Programme, sous une forme lisible par un ordinateur et selon les termes des articles 0 et 1, sur un support
+ habituellement utilisé pour l'échange de données ; ou,
+ b) Faire une offre écrite, valable pendant au moins trois ans, prévoyant de donner à tout tiers qui en fera la demande une copie, sous forme lisible
+ par un ordinateur, du code source correspondant, pour un tarif n'excédant pas le coût de la copie, selon les termes des articles 0 et 1, sur un support
+ couramment utilisé pour l'échange de données informatiques ; ou,
+ c) Informer le destinataire de l'endroit où le code source peut être obtenu (cette solution n'est recevable que dans le cas d'une distribution non
+ commerciale, et uniquement si Vous avez reçu le Programme sous forme de code objet ou exécutable avec l'offre prévue à l'alinéa b ci-dessus).
+
+ Le code source d'un travail désigne la forme de cet ouvrage sous laquelle les modifications sont les plus aisées. Sont ainsi désignés la totalité du code
+ source de tous les modules composant un Programme exécutable, de même que tout fichier de définition associé, ainsi que les scripts utilisés pour
+ effectuer la compilation et l'installation du Programme exécutable. Toutefois, l'environnement standard de développement du système d'exploitation mis
+ en oeuvre (source ou binaire) -- compilateurs, bibliothèques, noyau, etc. -- constitue une exception, sauf si ces éléments sont diffusés en même temps que
+ le Programme exécutable.
+
+ Si la distribution de l'exécutable ou du code objet consiste à offrir un accès permettant de copier le Programme depuis un endroit particulier, l'offre d'un
+ accès équivalent pour se procurer le code source au même endroit est considéré comme une distribution de ce code source, même si l'utilisateur choisit
+ de ne pas profiter de cette offre.
+
+
+ Article 4.
+ Vous ne pouvez pas copier, modifier, céder, déposer ou distribuer le Programme d'une autre manière que l'autorise la Licence Publique Générale. Toute
+ tentative de ce type annule immédiatement vos droits d'utilisation du Programme sous cette Licence. Toutefois, les tiers ayant reçu de Vous des copies du
+ Programme ou le droit d'utiliser ces copies continueront à bénéficier de leur droit d'utilisation tant qu'ils respecteront pleinement les conditions de la
+ Licence.
+
+
+ Article 5.
+ Ne l'ayant pas signée, Vous n'êtes pas obligé d'accepter cette Licence. Cependant, rien d'autre ne Vous autorise à modifier ou distribuer le Programme ou
+ quelque travaux dérivés : la loi l'interdit tant que Vous n'acceptez pas les termes de cette Licence. En conséquence, en modifiant ou en distribuant le
+ Programme (ou tout travail basé sur lui), Vous acceptez implicitement tous les termes et conditions de cette Licence.
+
+
+ Article 6.
+ La diffusion d'un Programme (ou de tout travail dérivé) suppose l'envoi simultané d'une licence autorisant la copie, la distribution ou la modification du
+ Programme, aux termes et conditions de la Licence. Vous n'avez pas le droit d'imposer de restrictions supplémentaires aux droits transmis au
+ destinataire. Vous n'êtes pas responsable du respect de la Licence par un tiers.
+
+
+ Article 7.
+ Si, à la suite d'une décision de Justice, d'une plainte en contrefaçon ou pour toute autre raison (liée ou non à la contrefaçon), des conditions Vous sont
+ imposées (que ce soit par ordonnance, accord amiable ou autre) qui se révèlent incompatibles avec les termes de la présente Licence, Vous n'êtes pas
+ pour autant dégagé des obligations liées à celle-ci : si Vous ne pouvez concilier vos obligations légales ou autres avec les conditions de cette Licence,
+ Vous ne devez pas distribuer le Programme.
+
+ Si une partie quelconque de cet article est invalidée ou inapplicable pour quelque raison que ce soit, le reste de l'article continue de s'appliquer et
+ l'intégralité de l'article s'appliquera en toute autre circonstance.
+
+ Le présent article n'a pas pour but de Vous pousser à enfreindre des droits ou des dispositions légales ni en contester la validité ; son seul objectif est de
+ protéger l'intégrité du système de distribution du Logiciel Libre. De nombreuses personnes ont généreusement contribué à la large gamme de Programmes
+ distribuée de cette façon en toute confiance ; il appartient à chaque auteur/donateur de décider de diffuser ses Programmes selon les critères de son
+ choix.
+
+
+ Article 8.
+ Si la distribution et/ou l'utilisation du Programme est limitée dans certains pays par des brevets ou des droits sur des interfaces, le détenteur original des
+ droits qui place le Programme sous la Licence Publique Générale peut ajouter explicitement une clause de limitation géographique excluant ces pays.
+ Dans ce cas, cette clause devient une partie intégrante de la Licence.
+
+
+ Article 9.
+ La Free Software Foundation se réserve le droit de publier périodiquement des mises à jour ou de nouvelles versions de la Licence. Rédigées dans le
+ même esprit que la présente version, elles seront cependant susceptibles d'en modifier certains détails à mesure que de nouveaux problèmes se font jour.
+
+ Chaque version possède un numéro distinct. Si le Programme précise un numéro de version de cette Licence et « toute version ultérieure », Vous avez le
+ choix de suivre les termes et conditions de cette version ou de toute autre version plus récente publiée par la Free Software Foundation. Si le Programme
+ ne spécifie aucun numéro de version, Vous pouvez alors choisir l'une quelconque des versions publiées par la Free Software Foundation.
+
+
+ Article 10.
+ Si Vous désirez incorporer des éléments du Programme dans d'autres Programmes libres dont les conditions de distribution diffèrent, Vous devez écrire
+ à l'auteur pour lui en demander la permission. Pour ce qui est des Programmes directement déposés par la Free Software Foundation, écrivez-nous : une
+ exception est toujours envisageable. Notre décision sera basée sur notre volonté de préserver la liberté de notre Programme ou de ses dérivés et celle de
+ promouvoir le partage et la réutilisation du logiciel en général.
+
+ LIMITATION DE GARANTIE
+
+ Article 11.
+ Parce que l'utilisation de ce Programme est libre et gratuite, aucune garantie n'est fournie, comme le permet la loi. Sauf mention écrite, les détenteurs du
+ copyright et/ou les tiers fournissent le Programme en l'état, sans aucune sorte de garantie explicite ou implicite, y compris les garanties de
+ commercialisation ou d'adaptation dans un but particulier. Vous assumez tous les risques quant à la qualité et aux effets du Programme. Si le Programme
+ est défectueux, Vous assumez le coût de tous les services, corrections ou réparations nécessaires.
+
+
+ Article 12.
+ Sauf lorsqu'explicitement prévu par la Loi ou accepté par écrit, ni le détenteur des droits, ni quiconque autorisé à modifier et/ou redistribuer le
+ Programme comme il est permis ci-dessus ne pourra être tenu pour responsable de tout dommage direct, indirect, secondaire ou accessoire (pertes
+ financières dues au manque à gagner, à l'interruption d'activités ou à la perte de données, etc., découlant de l'utilisation du Programme ou de
+ l'impossibilité d'utiliser celui-ci).
+
+
+ FIN DES TERMES ET CONDITIONS
diff --git a/gpl.txt b/gpl.txt
new file mode 100644
index 0000000..4f92538
--- /dev/null
+++ b/gpl.txt
@@ -0,0 +1,287 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+
+
diff --git a/greetings.txt b/greetings.txt
new file mode 100644
index 0000000..7abc3a7
--- /dev/null
+++ b/greetings.txt
@@ -0,0 +1,78 @@
+Informations on this project:
+
+This project has been developed by:
+Xavier Roche (xroche@httrack.com)
+ for the main engine and Windows interface
+ and maintener for v2.0 and v3.0
+Yann Philippot (yphilippot@lemel.fr)
+ for the java binary .class parser
+David Lawrie (dalawrie@lineone.net)
+Robert Lagadec (rlagadec@yahoo.fr)
+ for checking both English & French translations
+Juan Pablo Barrio Lera (University of León)
+ for Spanish translations
+Rainer Klueting (rainer@klueting.de)
+Bastian Gorke (bastiang@yahoo.com)
+Rudi Ferrari (Wyando@netcologne.de)
+Marcus Gaza (MarcusGaza@t-online.de)
+ for German translations
+Rudi Ferrari (Wyando@netcologne.de)
+ for Dutch translations
+Lukasz Jokiel (Opole University of Technology, Lukasz.Jokiel@po.opole.pl)
+ for Polish translations
+Rui Fernandes (CANTIC, ruiefe@mail.malhatlantica.pt)
+Pedro T. Pinheiro (Universidade Nova de Lisboa-FCT, ptiago@mail.iupi.pt)
+ for Portuguese translations
+Andrei Iliev (iliev@vitaplus.ru)
+ for Russian translations
+Witold Krakowski (wkrakowski@libero.it)
+ for Italian translations
+Jozsef Tamas Herczeg (hdodi@freemail.hu)
+ for Hungarian translation
+Paulo Neto (company@layout.com.br)
+ for Brazilian translation
+Brook Qin (brookqwr@sina.com)
+ for simplified Chinese translation
+Addy Lin (addy1975@pchome.com.tw)
+ for traditional Chinese translation
+Jesper Bramm (bramm@get2net.dk)
+ for Danish translation
+Tõnu Virma
+ for Estonian translation
+Staffan Ström (staffan@fam-strom.org)
+ for Swedish translation
+Mehmet Akif Köeoðlu (mak@ttnet.net.tr)
+ for Turkish translation
+Aleksandar Savic (aleks@macedonia.eu.org)
+ for Macedonian translation
+Takayoshi Nakasikiryo
+ for Japanese translation
+Martin Sereday (sereday@slovanet.sk)
+ for Slovak translation
+Antonín Matìjèík (matejcik@volny.cz)
+ for Czech translation
+
+Thanks to:
+Kauler Leto (letok@optusnet.com.au)
+ for the site/logos design
+
+Special Thanks to:
+Patrick Ducrot & Daniel Carré (ENSI of Caen)
+ for their initial support
+Fred Cohen (fc@all.net)
+ for HTTrack user's guide
+
+Greetings to:
+Jean-loup Gailly from gzip.org
+ for the help provided with Zlib
+Eric A. Young (eay@cryptsoft.com)
+ for SSL cryptographic material (OpenSSL)
+
+Russ Freeman from gipsymedia
+ for hints on DLL dynamic load
+Paul DiLascia
+ for helping to fix problems with CHtmlView
+ISMRA/Ensi of Caen
+ for their initial support
+..and all users that are using and supporting HTTrack!
+
diff --git a/history.txt b/history.txt
new file mode 100644
index 0000000..616cbe3
--- /dev/null
+++ b/history.txt
@@ -0,0 +1,430 @@
+
+HTTrack Website Copier release history:
+--------------------------------------
+
+This file lists all changes and fixes that have been made for HTTrack.
+
+To do & Known bugs: (fixed soon)
+- Some interface problems the Windows release
+
+
+3.20-2
++ New: HTTPS support (SSL)
++ New: ipv6 support
++ New: 'longdesc' added
++ New: new file 'new.txt' generated for transfer status reports
++ New: ISO9660 compatibility option
++ New: empty mirror/update detection improved
++ New: Update hack now recognizes "imported" files
++ New: Option to disable ipv4/ipv6
++ New: Filters now recognize patterns like -https://*
++ Fixed: The engine should be now fully reentrant
++ Fixed: Fixes for alpha and other 64-bit systems
++ Fixed: Files downloaded twice if not found in cache
++ Fixed: ftp problems with 2xx responses
++ Fixed: ftp problems with multiple lines responses
++ Fixed: ftp %20 not escaped anymore
++ Fixed: ftp RETR with quotes problems
++ Fixed: now tolerent to empty header responses
++ Fixed: hts-log closed
++ Fixed: Compressed pages during updates
++ Fixed: Crash when receiving empty compressed pages
++ Fixed: Random crashes in 'spider' mode
++ Fixed: bcopy/bzero not used anymore..
++ Fixed: various code cleanups
++ Fixed: Better UTF8 detection
++ Fixed: External links now work with https and ftp
++ Fixed: Top index.html corrupted or missing
++ Fixed: URL list crashes
++ Fixed: Random crashes with large sites due to bogus naming handler
++ Fixed: Freezes on some robots.txt files
++ Fixed: Compressed files not stored
++ Fixed: SVG fixes
++ Fixed: Raw HTML responses
++ Fixed: 406 error workaround
++ Fixed: Crashes due to binary files with bogus HTML type (not parsed anymore)
++ Fixed: External https and ftp links broken, relative https links broken
++ Fixed: Automatic resizing of filter stack
++ Fixed: Various ampersand (&) elements added
++ Fixed: https with proxy temporary workaround (direct connection)
++ Fixed: "base href" with absolute uris
++ Fixed: stack frame too large on some systems
++ Fixed: random bad requests due to bogus authentication
++ Shell: Several fixes, including registration type problems
++ Shell: "template files not found" fixed
+
+3.16-2
++ Fixed: Zlib v1.4
++ Fixed: Gzipped files now downloaded without problems (HTTP compression bug)
++ Fixed: Ending spaces in URLs now handled correctly
++ Fixed: META-HTTP bug
++ Shell: Type registration done only once
+
+3.15
++ Fixed: Bogus HTTP-referer with protected sites
++ Fixed: Fatal IO/socket error with large sites (handles not closed)
++ Fixed: K4 option now works
++ Fixed: --continue+URL(s) now clears previous URLs
++ Fixed: Parsing bug with 'www.foo.com?query'
++ Shell: 'Soft cancel' documented
++ Shell: 'Kx' options added
+
+3.10
++ Fixed: Broken pipes on Linux version
++ Fixed: Commandline version bug with gzipped files
++ Fixed: Crash when reaching compressed error pages
++ Fixed: Bogus html-escaped characters in query strings
++ Fixed: Files skipped (bogus anticipating system)
++ Fixed: Crash when showing stats (div by zero)
++ Fixed: Problems with URLs/redirects containing spaces or quotes
++ Fixed: Slash added when ~ detected
++ Fixed: Ugly VT terminal
++ New: Faster and cleaner mirror interrupt
+
+3.09
++ Fixed: Several problems with javascript parsing
++ Fixed: Elements after onXXX not parsed
++ New: Source update wrapper
++ New: Style url() and @import parsed
++ Shell: Word database and maximum number of links
++ Shell: Option changes taken in account immediately
++ Shell: Cleaner installer (registry keys)
+
+3.08
++ New: HTTP compression is now supported
++ New: Faster response analysis
++ Fixed: External page in html if cgi
++ Fixed: Mix between CR and CR/LF for comments
++ Fixed: Top index corrupted
++ Shell: Better refresh during parsing
++ Shell: DLL error
+
+3.07
++ Fixed: Random crashes with HTTP redirects
++ New: New rate limiter (should be sharper)
++ New: Code cleaned up, new htscore.c/httrack.c files
+
+3.06
++ Fixed: Redirect to https/mailto now supported
++ New: Top index/top dir for Un*x version
++ New: Sources more modular (.so)
++ New: Quicktime targetX= tags
++ New: HTTP 100 partially supported
+
+3.05
++ Fixed: Non-scannable tag parameters ("id","name",..)
++ Fixed: Java classes not found when using "." as separator
++ Fixed: Java classes not found when missing .class
+
+3.04
++ Fixed: URLs with starting spaces
++ Fixed: bogus URLs when using "base href"
++ Shell: --assume and -%e options included
++ New: Documentation updated a little
+
+3.03
++ New: Parser optimizations, 10 times faster now!
++ New: New --assume option to speed up cgi tests
++ New: Option to avoid Username/password storage for external pages
++ New: Query string kept for local URIs
++ Fixed: RFC2396 compliant URLs accepted (//foo and http:foo)
++ Fixed: foo@foo.com not considered as URL anymore
++ Fixed: Space encoded into %20 in URIs
++ Fixed: "Unable to save file" bug
++ Fixed: Corrupted top index.html
++ Fixed: Cookies disabled with --get
++ Fixed: Cache bug for error pages
+
+3.02
++ Fixed: Pages without title recorded in top index
++ Fixed: Error with Content-type+Content-disposition
++ Fixed: backblue.gif/external.html files not purged anymore
++ Fixed: Encoding problems with files containing %2F or other characters
++ Fixed: Write error reported for HTML files
++ New: hts-stop.lock file to pause the engine
++ New: New install system using InnoSetup
+
+3.01
++ New: HTTP real media files captured
++ Fixed: Bogus statitics
++ Fixed: Minor fixes
+
+3.00
++ New: New interface, with MANY improvements!
++ New: Better parsing (enhanced javascript parsing, sharper HTML parsing)
++ New: Faster and more efficient background download system
++ New: ETag properly handled
++ New: Optional URL list
++ New: Optionnal config file
++ New: New structure options
++ New: New filters options (size filters)
++ New: Better password site handling
++ New: Traffic control to avoid server overload
++ New: Setuid and Chroot for Unix release
++ New: limited 64-bit handling
++ New: .js files are now parsed
++ New: Single hts-log.txt file, error level
++ New: New top index.html design
++ New: "Update hack" option to prevent unnecessary updates
++ New: Default language sent for mirrors
++ New: Searchable index
++ Fixed: Bogus ftp routines (Linux version)
++ Fixed: Bug that caused to mirror a complete site from a subdir
++ Fixed: Bug that caused restart to be very slow
++ Fixed: Bug that caused loops on several query-string pages (?foo=/)
++ Fixed: Corrupted cache bug
++ Fixed: Random broken links (pages not downloaded)
++ Fixed: Shared links problems
++ Fixed: Bogus URLs with commas (,)
++ Fixed: Bogus / and \ mixed
++ Fixed: Bogus addresses with multiple @
++ Fixed: Bogus links with %2E and %2F
++ Fixed: Bogus empty links
++ Fixed: "Unexpected backing error" bug fixed
++ Fixed: Files with incorrect size no more accepted
++ Fixed: Top index.html created even for untitled pages
++ Fixed: Bogus N100 option (unable to save file)
++ Fixed: Deadlock when using many hosts in URLs
++ Fixed: Password stored internally to avoid access errors
++ Fixed: Fixed /nul DOS limit
++ Fixed: Bogus -* filter (nothing mirrored)
++ Fixed: .shtml now renamed into .html
++ Fixed: Content-disposition without ""
++ Fixed: External html page for /foo links
++ Fixed: Username/password % compliant
++ Fixed: Javascript parser sometimes failed with " and ' mixed
++ Fixed: Some Range: bugs when regeting complete files
++ Fixed: Range: problems with html files
++ Fixed: HTTP/1.1 407 and 416 messages now handled
++ Fixed: Bogus timestamp
++ Fixed: Null chars in HTML bug
++ Fixed: Error pages cache bug
++ Fixed: Connect error/site moved do not delete everything anymore!
++ Fixed: Bogus garbage ../ in relative URL
++ Shell: New transfer rate estimation
++ Shell: Fixed crash when using verbose wizard
++ Shell: dynamic lang.h for easier translation updates
++ Shell: Fixed some options not passed to the engine
++ Fixed: A lots of minor fixes!
+
+2.2
+Note: 3.00 alpha major bug fixes are included in the 2.2
+
+2.02
++ New: Cache system improved, compatible with all platforms
++ New: Update process improved (accurate date)
++ New: Remote timestamp for files
++ New: ETag (HTTP/1.1) supported
++ Shell: Portugese interface available
++ Fixed: Bug with links containing commas
++ Fixed: 'file://' bug with proxy
++ New: Engine a little bit faster
++ Shell: Some bugs fixed in the interface
+
+2.01
++ New: ftp through proxy finally supported!
++ New: Sources cleaned up
++ New: Again some new marvelous options
++ New: Speed improved (links caught during parsing, faster "fast update")
++ New: Tool to catch "submit" URL (forms or complex javascript links)
++ Shell: German interface available
++ Shell: Dutch interface available
++ Shell: Polish interface available
++ Fixed: Level 1 bug fixed
++ Fixed: Still some parsing/structure problems
++ Fixed: Referer now sent to server
++ Fixed: Cookies did not work properly
++ Fixed: Problems with redirect pages
++ New: Better javascript parsing
++ Fixed: Problems with URL-parameters (foo.cgi?param=2&choice=1)
++ Fixed: Problems with ftp
++ New: ftp transfers are now in passive mode (firewall compliant)
+
+2.00 -- The First Free Software Release of HTTrack!
++ New: HTTrack sources (command line), now free software, are given
++ Shell: Interface rewritten!
++ New: Documentation rewritten
++ Shell: Drag&Drop abilities
++ Shell: More URL informations
++ Shell: Fixed: Remote access problems
++ Fixed: Loop problems on some sites causing crashes
++ Fixed: URL encoding problems
++ Fixed: Some file access problems for ../
++ Fixed: Some fixes for updating a mirror
++ Shell: Crazy progress bar fixed
++ Fixed: Form action are rewritten so that cgi on form can work from an offline mirror
++ Fixed: Crashes after continuing an "hand-interrupted" mirror
++ Fixed: Bogus files with some servers (chunk bug)
+
+1.30
++ Shell: Interface improved
++ New: robots.txt are followed by default
++ New: Parsing speed improved on big (>10,000 links) sites with an hash table
++ New: Mirror Link mode (mirror all links in a page)
++ New: Cookies are now understood
++ New: No external pages option (replace external html/gif by default files)
++ New: Command line version improved, background on Unix releases
++ Fixed: Problems with javascript parsing
++ Fixed: Username/password not set to lowercase anymore
++ Fixed: Problems with base href
++ New: Links in level 1 html files now patched
++ New: Expurge now deletes unused folders
++ New: Option -V executes shell command for every new file
++ Shell: Primary filter now works
+
+1.24
++ Fixed: Ftp protocol bogus (with login/pass)
++ Fixed: Cache problems (corrupted files)
++ New: Expurge old files for updates
++ New: "Updated" messages for mirror updates
++ Shell: Autodial/hangup option to RAS
++ Fixed: index.html were not created sometimes
++ Shell: Fixed: Random crashes with the interface
++ Shell: Fixed: Filters profile not saved
++ Fixed: Various (and numerous) fixes
+
+1.23
++ Shell: Interface improved
++ Shell: Multiple setups
++ Shell: Redefine options
++ Shell: Continue interrupted mirror improved
+
+1.22
++ Fixed: Parsing up/down did not work well
++ Fixed: Several files not catched, bugs fixes
++ Fixed: Problems with classes (1.21)
++ New: Transfer rate can be limited (-A option)
++ Shell: Smooth refresh
++ New: ftp basic protocol a little bit improved
+
+1.21
++ Fixed: Several java classes were not parsed
++ Fixed: Some folders without ending / ignored
++ Fixed: Crashes due to content-type too long
+
+1.20
++ Shell: documentation!
++ Fixed: Some problems with 'host cancel' system after timeouts (crashes)
++ New: Get only end of files if possible (file partially get)
++ New: New cache system (only HTML stored)
++ New: User-defined structure possible
++ New: Also available: french interface
++ Fixed: Random crashes (div by 0/illegal instruction) with null size files
++ New: Limited ftp protocol (files only), e.g. +ftp://* now works
++ Fixed: Some connect problems with several servers or proxies
++ New: New option, save html error report by default
++ Shell: Browse and see log files at the end of a mirror
++ New: Proxy authentication (ex: guest:star@myproxy.com:8080)
++ Shell: Interface improved (especially during mirror)
++ Fixed: Ambiguous files are renamed (asp,cgi->html/gif..)
++ Shell: New test link mode option
++ New: Site authentication (ex: guest:star@www.myweb.com/index.html)
++ Fixed: Minor bugs fixed
++ Shell: See log files during a mirror
++ Fixed: Some problems using CGI (different names now)
++ Fixed: Go down/up/both options and filters
++ Fixed: "Store html first" did not work
++ New: -F option ("Browser ID") disguise HTTrack into a browser
++ New: New filter system
++ Shell: New "Save as default" options
++ Fixed: "Build options" did NOT work properly! (files overwritten or missing)
++ Fixed: User agent ID fixed
++ Shell: Skip options
++ Shell: Better interface control during mirrors
++ Shell: InstallShield and Help files
++ Fixed: Some external links were not filtered sometimes
++ Fixed: Mirror crash at the end
+
+1.16b
++ Shell: Really *stupid* bug fixed causing WinHTTrack to be slooow
++ Fixed: Crash if the first page has no title fixed
++ Fixed: Bogus options like "Just scan" saved empty files
++ Fixed: Forbid all links (*) with manual accept did not work
++ Shell: Filters interface improved
+1.16:
++ New : Java Classes and subclasses are now retrieved!
++ New: Better JavaScripts parsing
++ New: Option: Abandon slowest hosts if timeout/transfer too slow
++ Shell: Interface improved
+
+1.15b
++ Fixed: Some bugs fixed
+1.15:
++ Shell: Interface improved
++ New: Robot improved (some files through javascript are now detected!)
++ New: Improved wild cards (for example, +www.*.com/*.zip)
++ New: 'config' file to configurate proxy, path.. only once
+
+1.11
++ New: Wait for specific time (begin transfer at specific hour)
++ New: Time limit option (stops transfer after x seconds)
++ Shell: Interface improved for an easy use
+
+1.10e
++ Fixed: Maps were not correctly managed (stupid bug)
+1.10d:
++ Fixed: Bogus index.html fixed
+1.10c
++ Shell: "Time out" field needed "transfer rate" field
+1.10b
++ Fixed: Better memory management
+1.10
++ New: "Transfer rate out" option added (abandon slowests sites)
++ New: "Deaf" hosts do not freeze HTTrack any more
++ Fixed: Again problems with code/codebase tags
++ New: Broken links detection improved
+
+1.04
++ Fixed:Some links were not correctly read (pages with "codebase" tags)
++ Shell: Interface improved
+
+1.03 (No changes for the command-line robot)
++ Shell: Big bug fixed! (VERY slow transfer rates..)
+
+1.02
++ Fixed: Some java files were not correctly transfered
++ New: Speed has been improved
++ Fixed: Log file more accurate
++ Shell: Interface has been improved
+
+1.01
++ Fixed: Structure check error in some cases
+
+1.00 -- The 1.00, Yeah!
++ New: base and codebase are now scanned
+
+0.998 beta-2
++ Fixed: Multiple name bug (files having the same name in the same directory) with -O option fixed
+
+0.997 beta-2
++ Fixed: Filenames with '%' were not correctly named
++ Fixed: Bug detected in 0.996: several files are not written on disk!!
+
+0.996 beta-2
++ New: -O option (path for mirror and log)
++ New: Unmodified file time/date are not changed during an update
+
+0.99 beta-2
++ New: User-agent field
++ New: Shortcuts (--spider etc.)
++ New: Links not retrieved are now rebuilt absolutly
++ New: The 'g' option (juste get files in current directory) has been added
++ New: Primary links analyste has been improved
++ Fixed: "304" bug fixed
+
+0.25 beta-2
++ Fixed: Freeze during several mirrors fixed!
++ New: More 'N' options (filenames type)
+
+0.24 beta-2
++ Fixed: Restart/Update with cache did not work (really not..)
++ Fixed: Wild cards now work properly (e.g. +www.abc.com* do works)
++ New: The 'n' option (get non-html files near a link) has been added!
+
+0.23 beta-2
++ Fixed: The 'M' option (site size) did not work
++ Fixed: Files larger than 65Kb were not correctly written
+
+older beta
++ Many, many bugs fixed
+
diff --git a/httrack-doc.html b/httrack-doc.html
new file mode 100644
index 0000000..4141763
--- /dev/null
+++ b/httrack-doc.html
@@ -0,0 +1,10 @@
+<html>
+
+<head>
+<title>Documentation</title>
+<meta HTTP-EQUIV="Refresh" CONTENT="0; URL=HelpHtml/index.html">
+</head>
+<body>
+<a HREF="HelpHtml/index.html">Documentation</a>
+</body>
+</html>
diff --git a/lib/example.c b/lib/example.c
new file mode 100644
index 0000000..33a7cc1
--- /dev/null
+++ b/lib/example.c
@@ -0,0 +1,156 @@
+/*
+ HTTrack library example
+ .c file
+
+ To Build on Windows:
+ - compile everything in src/ BUT htsparse.c, compile example.c
+ - multithreaded
+ - avoid precompiled headers with VC
+
+ To Build on Linux:
+ make lib_linux (or "make lib_netbsd", or "make lib_default" and so on)
+ cp htssystem.h src/htssystem.h
+ make build_httracklib
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "src/httrack-library.h"
+
+#include "example.h"
+
+
+
+/*
+ * Name: main
+ * Description: main() function
+ * Parameters: None
+ * Should return: error status
+*/
+int main(void) {
+ /*
+ First, ask for an URL
+ Note: For the test, option r2 (mirror max depth=1) and --testscan (no index, no cache, do not store, no log files)
+ */
+ char _argv[][256] = {"httrack_test" , "<URL>" , "-r3" , "--testscan" , "" };
+ char* argv[] = {NULL , NULL , NULL , NULL , NULL};
+ int argc = 0;
+ while(strlen(_argv[argc])) {
+ argv[argc]=_argv[argc];
+ argc++;
+ }
+ argv[argc]=NULL;
+ printf("HTTrackLib test program\n");
+ printf("Enter URL (example: www.foobar.com/index.html) :");
+ scanf("%s",argv[1]);
+ printf("Test: 1 depth\n");
+
+ hts_init();
+ htswrap_add("init",httrack_wrapper_init);
+ htswrap_add("free",httrack_wrapper_uninit);
+ htswrap_add("start",httrack_wrapper_start);
+ htswrap_add("change-options",httrack_wrapper_chopt);
+ htswrap_add("end",httrack_wrapper_end);
+ htswrap_add("check-html",httrack_wrapper_checkhtml);
+ htswrap_add("loop",httrack_wrapper_loop);
+ htswrap_add("query",httrack_wrapper_query);
+ htswrap_add("query2",httrack_wrapper_query2);
+ htswrap_add("query3",httrack_wrapper_query3);
+ htswrap_add("check-link",httrack_wrapper_check);
+ htswrap_add("pause",httrack_wrapper_pause);
+ htswrap_add("save-file",httrack_wrapper_filesave);
+ htswrap_add("link-detected",httrack_wrapper_linkdetected);
+ htswrap_add("transfer-status",httrack_wrapper_xfrstatus);
+ htswrap_add("save-name",httrack_wrapper_savename);
+
+ /* Then, launch the mirror */
+ hts_main(argc,argv);
+
+ /* Wait for a key */
+ printf("\nPress ENTER key to exit\n");
+ scanf("%s",argv[1]);
+
+ /* That's all! */
+ return 0;
+}
+
+
+/* CALLBACK FUNCTIONS */
+
+/* Initialize the Winsock */
+void __cdecl httrack_wrapper_init(void) {
+ printf("Engine started\n");
+#ifdef _WIN32
+ {
+ WORD wVersionRequested; // requested version WinSock API
+ WSADATA wsadata; // Windows Sockets API data
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ printf("Winsock not found!\n");
+ return;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ printf("WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ return;
+ }
+ }
+#endif
+
+}
+void __cdecl httrack_wrapper_uninit(void) {
+ printf("Engine exited\n");
+#ifdef _WIN32
+ WSACleanup();
+#endif
+}
+int __cdecl httrack_wrapper_start(httrackp* opt) {
+ printf("Start of mirror\n");
+ return 1;
+}
+int __cdecl httrack_wrapper_chopt(httrackp* opt) {
+ return __cdecl httrack_wrapper_start(opt);
+}
+int __cdecl httrack_wrapper_end(void) {
+ printf("End of mirror\n");
+ return 1;
+}
+int __cdecl httrack_wrapper_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ printf("Parsing html file: http://%s%s\n",url_adresse,url_fichier);
+ return 1;
+}
+int __cdecl httrack_wrapper_loop(void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) {
+ /* printf("..httrack_wrapper_loop called\n"); */
+ return 1;
+}
+char* __cdecl httrack_wrapper_query(char* question) {
+ return "N";
+}
+char* __cdecl httrack_wrapper_query2(char* question) {
+ return "N";
+}
+char* __cdecl httrack_wrapper_query3(char* question) {
+ return "";
+}
+int __cdecl httrack_wrapper_check(char* adr,char* fil,int status) {
+ printf("Link status tested: http://%s%s\n",adr,fil);
+ return -1;
+}
+void __cdecl httrack_wrapper_pause(char* lockfile) {
+}
+void __cdecl httrack_wrapper_filesave(char* file) {
+}
+void __cdecl httrack_wrapper_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+/* strcpy(save, "foo.html"); */
+}
+int __cdecl httrack_wrapper_linkdetected(char* link) {
+ printf("Link detected: %s\n",link);
+ return 1;
+}
+int __cdecl httrack_wrapper_xfrstatus(void* back) {
+ return 1;
+}
+
diff --git a/lib/example.dsp b/lib/example.dsp
new file mode 100644
index 0000000..4da0cb4
--- /dev/null
+++ b/lib/example.dsp
@@ -0,0 +1,311 @@
+# Microsoft Developer Studio Project File - Name="example" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=example - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "example.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "example.mak" CFG="example - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "example - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "example - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "example - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD BASE RSC /l 0x40c /d "NDEBUG"
+# ADD RSC /l 0x40c /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 wsock32.lib /nologo /subsystem:console /machine:I386
+
+!ELSEIF "$(CFG)" == "example - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c
+# SUBTRACT CPP /YX
+# ADD BASE RSC /l 0x40c /d "_DEBUG"
+# ADD RSC /l 0x40c /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 wsock32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+
+!ENDIF
+
+# Begin Target
+
+# Name "example - Win32 Release"
+# Name "example - Win32 Debug"
+# Begin Source File
+
+SOURCE=.\example.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\example.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsalias.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsalias.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsback.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsback.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsbase.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsbasenet.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsbauth.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsbauth.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscache.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscache.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscatchurl.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscatchurl.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscore.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscore.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscoremain.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htscoremain.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsdefines.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsfilters.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsfilters.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsftp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsftp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsglobal.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htshash.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htshash.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htshelp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htshelp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsindex.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsindex.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsjava.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsjava.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htslib.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htslib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsmd5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsmd5.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsname.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsname.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsnet.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsopt.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsrobots.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsrobots.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsthread.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htsthread.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htstools.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htstools.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htswizard.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htswizard.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htswrap.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\htswrap.h
+# End Source File
+# Begin Source File
+
+SOURCE=".\src\httrack-library.h"
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\httrack.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\httrack.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\md5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\src\md5.h
+# End Source File
+# End Target
+# End Project
diff --git a/lib/example.dsw b/lib/example.dsw
new file mode 100644
index 0000000..8ddad80
--- /dev/null
+++ b/lib/example.dsw
@@ -0,0 +1,29 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "example"=.\example.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/lib/example.h b/lib/example.h
new file mode 100644
index 0000000..2ea0a67
--- /dev/null
+++ b/lib/example.h
@@ -0,0 +1,27 @@
+/*
+ HTTrack library example
+ .h file
+*/
+
+#if __WIN32
+#else
+#define __cdecl
+#endif
+
+void __cdecl httrack_wrapper_init(void);
+void __cdecl httrack_wrapper_uninit(void);
+int __cdecl httrack_wrapper_start(httrackp* opt);
+int __cdecl httrack_wrapper_chopt(httrackp* opt);
+int __cdecl httrack_wrapper_end(void);
+int __cdecl httrack_wrapper_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl httrack_wrapper_loop(void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl httrack_wrapper_query(char* question);
+char* __cdecl httrack_wrapper_query2(char* question);
+char* __cdecl httrack_wrapper_query3(char* question);
+int __cdecl httrack_wrapper_check(char* adr,char* fil,int status);
+void __cdecl httrack_wrapper_pause(char* lockfile);
+void __cdecl httrack_wrapper_filesave(char* file);
+int __cdecl httrack_wrapper_linkdetected(char* link);
+int __cdecl httrack_wrapper_xfrstatus(void* back);
+void __cdecl httrack_wrapper_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
diff --git a/lib/htssystem.h b/lib/htssystem.h
new file mode 100644
index 0000000..5112ece
--- /dev/null
+++ b/lib/htssystem.h
@@ -0,0 +1,12 @@
+/*
+ HTTrack library example
+*/
+
+#define HTS_PLATFORM 1
+#define HTS_ANALYSTE 1
+#define HTS_PLATFORM_NAME "example"
+#ifdef _WIN32
+#define HTS_WIN 1
+#define HTS_USEZLIB 0
+#endif
+
diff --git a/lib/readme.txt b/lib/readme.txt
new file mode 100644
index 0000000..658cfdf
--- /dev/null
+++ b/lib/readme.txt
@@ -0,0 +1,35 @@
+HTTrack library example
+-----------------------
+
+Here is an example of how to integrate HTTrack Website Copier into a project
+to use it as a "core engine". Copy the src/ directory and the Makefile.*/configure
+scripts in lib/ to have a working example.
+
+
+Important Notice:
+----------------
+
+These sources are covered by the GNU General Public License (see below)
+(Projects based on these sources must follow the GPL, too)
+
+
+Copyright notice:
+----------------
+
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000..2de9d1a
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,40 @@
+HTTrack Website Copier License Agreement:
+
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private informations on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+- PLEASE REMEMBER THAT EXPORT/IMPORT AND/OR USE OF STRONG CRYPTOGRAPHY
+SOFTWARE, PROVIDING CRYPTOGRAPHY HOOKS OR EVEN JUST COMMUNICATING
+TECHNICAL DETAILS ABOUT CRYPTOGRAPHY SOFTWARE IS ILLEGAL IN SOME
+PARTS OF THE WORLD. SO, WHEN YOU IMPORT THIS PACKAGE TO YOUR
+COUNTRY, RE-DISTRIBUTE IT FROM THERE OR EVEN JUST EMAIL TECHNICAL
+SUGGESTIONS OR EVEN SOURCE PATCHES TO THE AUTHOR OR OTHER PEOPLE
+YOU ARE STRONGLY ADVISED TO PAY CLOSE ATTENTION TO ANY EXPORT/IMPORT
+AND/OR USE LAWS WHICH APPLY TO YOU. THE AUTHORS ARE NOT LIABLE FOR
+ANY VIOLATIONS YOU MAKE HERE. SO BE CAREFUL, IT IS YOUR RESPONSIBILITY.
+
+
+Contacting us / support:
+Please refer to the README file
+
diff --git a/man/httrack.1.gz b/man/httrack.1.gz
new file mode 100644
index 0000000..b56bbac
--- /dev/null
+++ b/man/httrack.1.gz
Binary files differ
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..be898d9
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,10 @@
+all :
+ @echo "please launch configure before! :"
+ @echo "./configure"
+ @echo ""
+ @echo "then, you can launch:"
+ @echo "make"
+ @echo "make install"
+ @echo ""
+ @echo "(see INSTALL file to know how-to-install)"
+
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..57bf1d3
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,417 @@
+# HTTrack Website Copier, Offline Browser for Windows and Unix
+# Copyright (C) Xavier Roche and other contributors
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Version: 1.3 (09/2001)
+# Usage: just type 'make'
+
+# Generated by configure
+AUTONAME =
+#__AUTONAME__
+
+
+### Makefile commands
+SHELL = /bin/sh
+MAIN =
+CC = gcc
+CFLAGS = __CFLAGS__
+# threads
+LFLAGS = __LFLAGS__ __LFLAGS2__ __LFLAGS3__
+
+
+BINARIES = htscore.o htsback.o htscache.o\
+ htscatchurl.o htsfilters.o htsftp.o htshash.o\
+ htshelp.o htsjava.o htslib.o htscoremain.o\
+ htsname.o htsrobots.o htstools.o htswizard.o\
+ htsalias.o htsthread.o htsindex.o\
+ htsbauth.o htsmd5.o\
+ htswrap.o md5.o htszlib.o\
+ htsnostatic.o \
+ httrack.o
+
+SOBINARIES = htscore.o htsback.o htscache.o\
+ htscatchurl.o htsfilters.o htsftp.o htshash.o\
+ htshelp.o htsjava.o htslib.o htscoremain.o\
+ htsname.o htsrobots.o htstools.o htswizard.o\
+ htsalias.o htsthread.o htsindex.o\
+ htsbauth.o htsmd5.o htszlib.o\
+ htsnostatic.o \
+ htswrap.o md5.o
+
+MAINBIN = httrack.c
+
+BOUTPUT = httrack
+BOUTPUTSO = libhttrack.so
+DOCS = ../HelpHtml ../templates ../httrack-doc.html ../COPYING ../INSTALL ../README ../*.txt
+HTSSYSTEM = htssystem.h
+BINPATH = __BINPATH__
+ETCPATH = __ETCPATH__
+LIBPATH = __LIBPATH__
+PREFIX = __PREFIX__
+
+## Defines for "library" (program compiled with HTTrack)
+BINARIES_LIB = example.o httracklib.o\
+ src/htscore.o src/htsback.o src/htscache.o\
+ src/htscatchurl.o src/htsfilters.o src/htsftp.o src/htshash.o\
+ src/htshelp.o src/htsjava.o src/htslib.o src/htscoremain.o\
+ src/htsname.o src/htsrobots.o src/htstools.o src/htswizard.o\
+ src/htsalias.o src/htsthread.o src/htsindex.o\
+ src/htsbauth.o src/htsmd5.o htszlib.o\
+ src/htsshow.o src/htswrap.o\
+ src/htsnostatic.o \
+ src/md5.o
+BOUTPUT_LIB = example
+HTSSYSTEM_LIB = src/htssystem.h
+
+# in_addr_t problems :
+# In case of problems during compiling,
+# make htss
+# edit htssystem.h and add the following line:
+# #define HTS_DO_NOT_REDEFINE_in_addr_t
+# make manual
+
+
+# Keywords for build types (example: make linux)
+MAKE_LINUX = linux
+MAKE_NETBSD = netbsd
+MAKE_OPENBSD = openbsd
+MAKE_SOLARIS = solaris
+MAKE_AIX = aix
+MAKE_STD = standard
+MAKE_STD2 = standard2
+MAKE_STD3 = standard3
+MAKE_STD4 = standard4
+###
+MAKE_LIB = build_httracklib
+###
+MAKE_AUTO = auto
+###
+
+# First, detect OS Type
+# If your make does not recognize this, change it!
+SHORTUNAME = $(shell uname)
+FULLUNAME = $(shell uname -a)
+
+### Targets:
+
+# Default target, attempt to use uname if necessary
+# if uname is empty, display info message
+all :
+ @if test -n "$(AUTONAME)"; then\
+ $(MAKE) $(AUTONAME);\
+ elif test -n "$(SHORTUNAME)"; then\
+ $(MAKE) $(SHORTUNAME);\
+ else\
+ $(MAKE) help;\
+ fi
+
+# If we can not detect the OS Type, show a message info
+help :
+ @clear
+ @echo ""
+ @echo "Welcome to HTTrack Website Copier install!"
+ @echo "-----------------------------------------"
+ @echo ""
+ @echo "1. To make HTTrack, just type in:"
+ @echo " make $(MAKE_LINUX)"
+ @echo " or"
+ @echo " make $(MAKE_NETBSD)"
+ @echo " or"
+ @echo " make $(MAKE_OPENBSD)"
+ @echo " or"
+ @echo " make $(MAKE_SOLARIS)"
+ @echo " or"
+ @echo " make $(MAKE_AIX)"
+ @echo " or"
+ @echo " make $(MAKE_STD)"
+ @echo " or (problems with in_addr_t)"
+ @echo " make $(MAKE_STD2)"
+ @echo " or (problems with 64-bit)"
+ @echo " make $(MAKE_STD3)"
+ @echo " or (problems with both in_addr_t and 64-bit)"
+ @echo " make $(MAKE_STD4)"
+ @echo ""
+ @echo "According to your OS type"
+ @echo "(example: type in 'make $(MAKE_LINUX)' if you compile HTTrack with linux)"
+ @echo
+ @echo "Or, if it does not work, you can try "
+ @echo " make htss"
+ @echo " edit htssystem.h (check OS type), and add the following line:"
+ @echo " #define HTS_DO_NOT_REDEFINE_in_addr_t"
+ @echo " make manual"
+ @echo
+ @echo "2. Then, type in 'make install' to copy httrack to $(BINPATH)"
+ @echo " or just use ./httrack to launch the program"
+ @echo ""
+ @echo "3. Build problems, type in:"
+ @echo " make moreinfo"
+ @echo ""
+ @echo "Have fun with HTTrack Website Copier!"
+ @echo ""
+info : help
+
+# Troubleshooter
+moreinfo :
+ @echo "Known problems:"
+ @echo ""
+ @echo "\`in_addr_t' undeclared (first use this function)"
+ @echo "see in_addr_t problems in Makefile"
+ @echo ""
+
+###
+
+## Build Targets (this is the name given by 'uname')
+Linux : $(MAKE_LINUX)
+SunOS : $(MAKE_SOLARIS)
+AIX : $(MAKE_AIX)
+NetBSD : $(MAKE_NETBSD)
+OpenBSD : $(MAKE_OPENBSD)
+
+### Build Targets (standard types)
+default : firstinfo htssystem htssystem_default build_default strip clean lastinfo
+$(MAKE_LINUX) : firstinfo htssystem htssystem_linux build_default strip clean lastinfo
+$(MAKE_SOLARIS) : firstinfo htssystem htssystem_solaris build_solaris strip clean lastinfo
+$(MAKE_AIX) : firstinfo htssystem htssystem_aix build_default strip clean lastinfo
+$(MAKE_NETBSD) : firstinfo htssystem htssystem_netbsd build_default strip clean lastinfo
+$(MAKE_OPENBSD) : firstinfo htssystem htssystem_openbsd build_nopthread strip clean lastinfo
+$(MAKE_STD) : firstinfo htssystem htssystem_default build_default strip clean lastinfo
+$(MAKE_STD2) : firstinfo htssystem htssystem_default2 build_default strip clean lastinfo
+$(MAKE_STD3) : firstinfo htssystem htssystem_default3 build_default strip clean lastinfo
+$(MAKE_STD4) : firstinfo htssystem htssystem_default4 build_default strip clean lastinfo
+### Defines for "library" (program compiled with HTTrack)
+$(MAKE_LIB) : build_lib strip_lib clean_lib lastinfo
+###
+$(MAKE_AUTO) : __MAKEAUTO__
+###
+
+## Defines for OSes
+lib_default : htssystem htssystem_default addhtssystem_lib info_lib
+lib_linux : htssystem htssystem_linux addhtssystem_lib info_lib
+lib_solaris : htssystem htssystem_solaris addhtssystem_lib info_lib
+lib_aix : htssystem htssystem_aix addhtssystem_lib info_lib
+lib_netbsd : htssystem htssystem_netbsd addhtssystem_lib info_lib
+lib_openbsd : htssystem htssystem_openbsd addhtssystem_lib info_lib
+lib_std : htssystem htssystem_default addhtssystem_lib info_lib
+
+
+# manual build
+htss : htssystem htssystem_default
+manual : build_default strip clean lastinfo
+
+# Creates htssystem.h file
+htssystem :
+ @echo "/* HTTrack, Offline Browser for Windows and Unix */" > $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* HTTrack system definition */" >> $(HTSSYSTEM)
+ @echo "/* This should be the only file you have to change */" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* Fix plateform number to 0 (SunOS) */" >> $(HTSSYSTEM)
+ @echo "/* If it doesn't compile, try another one */" >> $(HTSSYSTEM)
+
+htssystem_solaris :
+ @echo "#define HTS_PLATFORM 0" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_aix :
+ @echo "#define HTS_PLATFORM 2" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_linux :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_netbsd:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_openbsd:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_UID" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_nopthread:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default2 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default3 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default4 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+# Generated by configure
+htssystem_auto :
+ @echo "#define HTS_PLATFORM __PLATFORM__" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEUID__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEINA__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEPTH__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINE64B__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEFTI__" >> $(HTSSYSTEM)
+ @echo "#define HTS_PREFIX \"__DEFINEPRE__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_BINPATH \"__BINPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_ETCPATH \"__ETCPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LIBPATH \"__LIBPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_USEZLIB __ZLIB__" >> $(HTSSYSTEM)
+ @echo "#define HTS_ALIGN __PTRALIGN__" >> $(HTSSYSTEM)
+ @echo "#define HTS_INET6 __INET6__" >> $(HTSSYSTEM)
+ @echo "#define HTS_USEOPENSSL __SSL__" >> $(HTSSYSTEM)
+
+## Defines for "library" (program compiled with HTTrack)
+addhtssystem_lib :
+ @echo "/* Extended functions */" >> $(HTSSYSTEM)
+ @echo "#define HTS_ANALYSTE 2" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+# Info message before build
+firstinfo :
+ @echo "Building all, please wait"
+ @echo "In case of problems, type in:"
+ @echo "make help"
+ @echo ""
+ @echo "OS TYPE: $(SHORTUNAME)"
+ @echo "Make mode: $(MAKECMDGOALS)"
+ @echo
+
+##
+info_lib :
+ @echo "Please copy htssystem.h to src/htssystem.h by typing:"
+ @echo "cp htssystem.h src/htssystem.h"
+
+### Targets for compiling
+build_solaris : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) -lnsl -lsocket
+ chmod 755 $(BOUTPUT)
+
+build_nopthread: $(BINARIES)
+ $(CC) $(NOPCFLAGS) $(BINARIES) -o $(BOUTPUT) $(NOPLFLAGS)
+ chmod 755 $(BOUTPUT)
+
+build_default : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
+ chmod 755 $(BOUTPUT)
+
+## Defines for "library" (program compiled with HTTrack)
+build_lib : $(BINARIES_LIB)
+ $(CC) $(CFLAGS) $(BINARIES_LIB) -o $(BOUTPUT_LIB) $(LFLAGS)
+ chmod 644 $(BOUTPUT_LIB)
+
+## Auto
+build_auto : build_bin__DYNAMIC__
+
+build_bin : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
+ chmod 755 $(BOUTPUT)
+
+build_binso : $(SOBINARIES)
+ $(CC) $(CFLAGS) -shared -Wl,-x,-soname,$(BOUTPUTSO) -o $(BOUTPUTSO) $(SOBINARIES) -lc $(LFLAGS)
+ $(CC) -L. -lhttrack $(MAINBIN) -o $(BOUTPUT)
+ chmod 755 $(BOUTPUT)
+
+##
+# Strip file so that is can be shorter
+strip :
+ strip --strip-all $(BOUTPUT) || strip $(BOUTPUT)
+ __STRIPLIB__
+
+strip_lib :
+ strip --strip-unneeded $(BOUTPUT_LIB)
+
+# Cleaning up..
+clean :
+ rm -f $(BINARIES)
+
+## Defines for "library" (program compiled with HTTrack)
+clean_lib :
+ rm -f $(BINARIES_LIB)
+
+# Bye bye
+lastinfo :
+ @echo "Build successful"
+
+# Installing httrack into the correct folder
+install : __INSTALL__
+uninstall : remove
+remove : __UNINSTALL__
+
+# Install docs
+docinstall :
+ (mkdir -p "$(PREFIX)/doc/httrack" && chmod 755 "$(PREFIX)/doc/httrack") || true
+ cp -fR $(DOCS) "$(PREFIX)/doc/httrack/"
+
+# Uninstall docs
+docremove :
+ rm -rf "$(PREFIX)/doc/httrack"
+
+# Install binaries and conf files
+bininstall :
+ @echo "Copying $(BOUTPUT) to $(BINPATH).."
+ test ! -d "$(BINPATH)" && (mkdir -p "$(BINPATH)" && chmod 755 "$(BINPATH)") || true
+ cp -f $(BOUTPUT) $(BINPATH)
+ chmod 755 $(BINPATH)/$(BOUTPUT)
+
+libremove :
+ rm -f "$(LIBPATH)/$(BOUTPUTSO)"
+ test -L "$(PREFIX)/lib/$(BOUTPUTSO).1" && rm -f "$(PREFIX)/lib/$(BOUTPUTSO).1" || true
+
+libinstall :
+ @echo "Copying $(BOUTPUTSO) to $(LIBPATH)/.."
+ test ! -d "$(LIBPATH)" && (mkdir -p "$(LIBPATH)" && chmod 755 "$(LIBPATH)") || true
+ cp -f $(BOUTPUTSO) $(LIBPATH)/
+ chmod 644 $(LIBPATH)/$(BOUTPUTSO)
+ ln -sf "$(BOUTPUTSO)" "$(PREFIX)/lib/$(BOUTPUTSO).1"
+
+
+# Uninstall binaries
+binremove :
+ rm -f $(BINPATH)/$(BOUTPUT)
+ rm -f $(ETCPATH)/httrack.conf
+
+# Configure program
+config :
+ @./postinst-config
+
+###
+
diff --git a/src/configure b/src/configure
new file mode 100755
index 0000000..7c2d472
--- /dev/null
+++ b/src/configure
@@ -0,0 +1,603 @@
+#!/bin/sh
+# No, this isn't generated by autoconf
+# Some parts are inspired by autoconf (Free Software Foundation), however
+# And the idea is slightly the same
+
+# Usage:
+# './configure' and then 'make' and 'make install', or
+# './configure --make --install'
+
+SHELL=/bin/sh
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ case "$ac_option" in
+ -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) ac_optarg= ;;
+ esac
+
+ case "$ac_option" in
+
+ --pthread | --thread) THREADS=1 ;;
+ --nopthread | --nothread) THREADS=0 ;;
+
+ --zlib | --gzip) ZLIB=1 ;;
+ --nozlib | --nogzip) ZLIB=0 ;;
+
+ --static | --noso) DYNAMIC=0
+ ;;
+ --dynamic | --so) DYNAMIC=1
+ ;;
+
+ --longlong) LONGLONG=1 ;;
+ --nolonglong) LONGLONG=0 ;;
+
+ --inaddrt) NODECLINADDRT=0 ;;
+ --noinaddrt) NODECLINADDRT=1 ;;
+
+ --ipv6) IPV6=1 ;;
+ --noipv6) IPV6=0 ;;
+
+ --ssl) SSL=1 ;;
+ --https) SSL=1 ;;
+ --nossl) SSL=0 ;;
+ --nohttps) SSL=0 ;;
+
+ --useuid) NOUID=0 ;;
+ --nouseuid) NOUID=1 ;;
+
+ --useftime) NOFTIME=0 ;;
+ --nouseftime) NOFTIME=1 ;;
+
+ --system=*) SYSTEMTYPE="$ac_optarg" ;;
+ --system) ac_prev=SYSTEMTYPE ;;
+
+ --debug) OTYPE="-O0 -g3"
+ MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo" ;;
+
+ --make) DOMAKE=1 ;;
+ --install) DOINSTALL=1 ;;
+ --bininstall) DOINSTALL=1
+ NODOCINSTALL=1
+ ;;
+ --docinstall) DOINSTALL=1
+ DOCINSTALL=1
+ ;;
+ --cls)
+ cd ..
+ chmod 'u=rw,go=r' `find ./ -type f`
+ chmod 'u=rwx,go=rx' `find ./ -type d`
+ chmod 'u=rwx,go=rx' ./src/configure
+ chmod 'u=rwx,go=rx' ./src/strip_cr.in
+ chmod 'u=rwx,go=rx' ./src/postinst-config.in
+ cd src
+ strip_cr *.c *.h
+ rm -f ./httrack 2>/dev/null
+ exit
+ ;;
+ -prefix | --prefix | --prefi | --pref | --pre | --pr)
+ ac_prev=PREFIX ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=*)
+ PREFIX="$ac_optarg" ;;
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=BINPATH ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ BINPATH="$ac_optarg" ;;
+ -etcdir | --etcdir | --etcdi | --etcd | --etc | --et)
+ ac_prev=ETCPATH ;;
+ -etcdir=* | --etcdir=* | --etcdi=* | --etcd=* | --etc=* | --et=*)
+ ETCPATH="$ac_optarg" ;;
+ -libdir | --libdir | --libdi | --libd | --lib | --li)
+ ac_prev=LIBPATH ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=* | --lib=* | --li=*)
+ LIBPATH="$ac_optarg" ;;
+
+ --help)
+ cat <<EOF
+--prefix=.. : prefix (/usr)
+--bindir=.. : binary dir (/usr/bin)
+--etcdir=.. : config dir (/usr/etc or /etc)
+--libdir=.. : library dir (/usr/lib)
+--dynamic : do use dynamic (.so) mode
+--static : do use static mode
+--nopthread : do not use threads (pthread.h)
+--pthread : do use threads (pthread.h)
+--noipv6 : do not use ipv6 extensions
+--ipv6 : do use ipv6 extensions
+--nohttps : do not use SSL extensions
+--https : do use SSL extensions
+--nozlib : do not use compression (zlib)
+--zlib : do use compression (zlib)
+--nolonglong : do not use 64-bit int
+--longlong : do use 64-bit int
+--noinaddrt : do not redeclare in_addr_t
+--inaddrt : do redeclare in_addr_t
+--nouseuid : do not use setuid()/setgid()
+--useuid : do use setuid()/setgid()
+--nouseftime : do not use ftime()
+--useftime : do use ftime()
+--system=<type> : override system type (uname) - NOT RECOMMENDED! (types: 'Default','Linux','SunOS','AIX')
+--make : 'make' after configure
+--install : 'make install' after configure
+--bininstall : 'make bininstall' after configure
+--docinstall : 'make docinstall' after configure
+--debug : add debug information (for gdb)
+EOF
+ exit
+ ;;
+
+ *) echo "Unrecognized option: $ac_option"
+ exit
+ ;;
+
+ esac
+
+done
+
+echo "Welcome to HTTrack Website Copier!"
+echo "Type in ./configure --help for more details"
+echo "If this script fails, you can enter supplemental options through '--option=value'"
+echo "or enter in manual make, through 'make help'"
+echo ""
+
+if cp -f Makefile.in Makefile; then
+
+SEDEXEC=
+
+# System (OS) type?
+printf "Checking for OS type.. "
+if test -z "$SYSTEMTYPE"; then
+ SYSTEMTYPE="`uname`"
+fi
+case "$SYSTEMTYPE" in
+ SunOS) printf "SunOS/Solaris\n";
+ PLATFORM=0
+ SOLSOCK=1
+ ;;
+ AIX) printf "AIX\n"; PLATFORM=2 ;;
+ *) printf "Linux type\n"; PLATFORM=3 ;;
+esac
+
+WTYPE="-Wall -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith -Wnested-externs"
+
+if test -z "$OTYPE"; then
+ OTYPE="-O3"
+fi
+if test -z "$MKTYPE"; then
+ MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo"
+fi
+
+# Root group
+if test -z "$ROOTGROUP"; then
+ printf "Checking for 'root' group.."
+ if egrep "^root:" /etc/group >/dev/null; then
+ ROOTGROUP="root"
+ elif egrep "^wheel:" /etc/group >/dev/null; then
+ ROOTGROUP="wheel"
+ fi
+ printf "$ROOTGROUP\n"
+else
+ echo "Overriding ROOTGROUP=$ROOTGROUP"
+fi
+
+
+# Binaries location
+if test -z "$BINPATH"; then
+printf "Checking for bin directory.. "
+if test -n "$PREFIX"; then
+ BINPATH="$PREFIX/bin"
+elif test -d "/usr/bin"; then
+ BINPATH="/usr/bin"
+else
+ BINPATH="/bin"
+fi
+printf "$BINPATH\n"
+else
+ echo "Overriding BINPATH=$BINPATH"
+fi
+
+# shlib?
+if test -d "/usr/shlib"; then
+SYSLIB=/usr/shlib
+else
+SYSLIB=/usr/lib
+fi
+
+# /etc location
+if test -z "$ETCPATH"; then
+printf "Checking for etc directory.. "
+if test -n "$PREFIX"; then
+ ETCPATH="$PREFIX/etc"
+elif test -d "/usr/etc"; then
+ ETCPATH="/usr/etc"
+else
+ ETCPATH="/etc"
+fi
+printf "$ETCPATH\n"
+else
+ echo "Overriding ETCPATH=$ETCPATH"
+fi
+
+# /usr/lib location
+if test -z "$LIBPATH"; then
+printf "Checking for lib directory.. "
+if test -n "$PREFIX"; then
+ LIBPATH="$PREFIX/lib"
+elif test -d "/usr/lib"; then
+ LIBPATH="/usr/lib"
+elif test -d "/usr/local/lib"; then
+ LIBPATH="/usr/local/lib"
+else
+ LIBPATH="/lib"
+fi
+printf "$LIBPATH\n"
+else
+ echo "Overriding LIBPATH=$LIBPATH"
+fi
+
+# Prefix location
+if test -z "$PREFIX"; then
+ printf "Checking for prefix directory.. "
+ PREFIX="/usr"
+ printf "$PREFIX\n"
+else
+ echo "Overriding PREFIX=$PREFIX"
+fi
+
+
+# 64-bit (long long) cause some troubles to some processors
+# because some alignements aren't properly defined
+# we only accept 64-bit on tested processors here
+if test -z "$LONGLONG"; then
+printf "Checking for long long.. "
+LONGLONG=
+if grep "long long" /usr/include/sys/types.h >/dev/null; then
+if uname -a|egrep ' i[3-9]86 ' >/dev/null; then
+ LONGLONG=1
+fi
+fi
+if test -n "$LONGLONG"; then
+ printf "supported\n"
+else
+ printf "not tested/supported. Use --LONGLONG=1 to override\n"
+fi
+else
+ echo "Overriding LONGLONG=$LONGLONG"
+fi
+
+# IPV6?
+# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
+if test -z "$IPV6"; then
+printf "Checking for ipv6 support.. "
+if test -f "/usr/include/linux/in6.h" -o -f "/usr/local/include/linux/in6.h"; then
+IPV6=1
+else
+IPV6=0
+fi
+if test "$IPV6" -eq 1; then
+printf "supported\n"
+else
+printf "not supported\n"
+fi
+else
+ echo "Overriding IPV6=$IPV6"
+fi
+if test "$IPV6" -eq 1; then
+IPTYPE="-DINET6"
+LIPTYPE=""
+else
+IPTYPE=
+LIPTYPE=
+fi
+
+# HTTPS?
+# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
+if test -z "$SSL"; then
+printf "Checking for SSL support.. "
+if test -f "/usr/include/openssl/ssl.h" -o -f "/usr/local/include/openssl/ssl.h"; then
+SSL=1
+else
+SSL=0
+fi
+if test "$SSL" -eq 1; then
+printf "supported\n"
+else
+printf "not supported\n"
+fi
+else
+ echo "Overriding SSL=$SSL"
+fi
+if test "$SSL" -eq 1; then
+SSTYPE="-lssl -lcrypto"
+else
+SSTYPE=
+fi
+
+# Alignement
+if test -z "$PTRALIGN"; then
+printf "Checking for pointer alignements.. "
+PTRALIGN=
+if test `uname -p` = "alpha" -o `uname -p` = "sparc"; then
+PTRALIGN=8
+else
+PTRALIGN=4
+fi
+fi
+printf "$PTRALIGN\n"
+
+
+# Dynamic (.so) module?
+if test -z "$DYNAMIC"; then
+ DYNAMIC=1
+fi
+printf "Checking for compilation mode: "
+if test "$DYNAMIC" -eq "1"; then
+ echo "dynamic"
+ SOTYPE=-fPIC
+else
+ echo "static"
+ SOTYPE=
+fi
+
+# Do we not have to redeclare in_addr_t ?
+# Sometimes this type is defined, or not..
+if test -z "$NODECLINADDRT"; then
+printf "Checking for in_addr_t declaration in in.h.. "
+if grep -E "typedef .* in_addr_t" /usr/include/netinet/in.h >/dev/null || grep -E "typedef .* in_addr_t" /usr/include/sys/types.h; then
+ printf "found, do not redeclare\n"
+ NODECLINADDRT=1
+else
+ printf "not found, declaring\n"
+ NODECLINADDRT=
+fi
+else
+ echo "Overriding NODECLINADDRT=$NODECLINADDRT"
+fi
+
+# Test if we can use zlib (/usr/lib/libz.so)
+# This allow to speed up transfers using HTTP compression
+if test -z "$ZLIB"; then
+printf "Checking for ${SYSLIB}/libz.so.. "
+if test -f "${SYSLIB}/libz.so"; then
+ printf "found\n"
+ ZLIB=1
+else
+ printf "library not found (too bad), no http compression will be available\n"
+ ZLIB=0
+fi
+else
+ echo "Overriding ZLIB=$ZLIB"
+fi
+
+# Sometimes, pthread.h doesn't exists on some systems
+# This is sad, because it speeds up some useful things, like DNS or ftp
+if test -z "$THREADS"; then
+printf "Checking for /usr/include/pthread.h.. "
+if test -f "/usr/include/pthread.h" -o -f "/usr/local/include/pthread.h"; then
+if test -f "${SYSLIB}/libpthread.so"; then
+ printf "found\n"
+ THREADS=1
+else
+ printf "library not found (too bad), no threads will be available\n"
+ THREADS=
+fi
+else
+ printf "not found, no threads will be available\n"
+ THREADS=
+fi
+else
+ echo "Overriding THREADS=$THREADS"
+fi
+
+# Sometimes, setuid and setgid can't be used (missing pwd.h and unistd.h ?!)
+if test -z "$NOUID"; then
+NOUID=1
+printf "Checking for /usr/include/pwd.h and /usr/include/unistd.h.. "
+if test -f "/usr/include/pwd.h" -o "/usr/local/include/pwd.h"; then
+if test -f "/usr/include/unistd.h" -o -f "/usr/local/include/unistd.h" ; then
+ NOUID=
+fi
+fi
+if test -z "$NOUID"; then
+ printf "found\n"
+else
+ printf "not found, not using setuid() and setgid()\n"
+fi
+else
+ echo "Overriding NOUID=$NOUID"
+fi
+
+# Sometimes, ftime can't be used (missing declaration...)
+if test -z "$NOFTIME"; then
+NOFTIME=1
+printf "Checking for ftime in /usr/include/sys/timeb.h.. "
+if grep "int ftime" /usr/include/sys/timeb.h >/dev/null; then
+ NOFTIME=
+fi
+if test -z "$NOFTIME"; then
+ printf "found\n"
+else
+ printf "not found (too bad), not using ftime()\n"
+fi
+else
+ echo "Overriding NOFTIME=$NOFTIME"
+fi
+
+# Test variables
+if test "$NOUID" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEUID__/HTS_DO_NOT_USE_UID/'"
+fi
+if test "$NOFTIME" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEFTI__/HTS_DO_NOT_USE_FTIME/'"
+fi
+if test "$NODECLINADDRT" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEINA__/HTS_DO_NOT_REDEFINE_in_addr_t/'"
+fi
+if test "$THREADS" = 1; then
+THTYPE="-D_REENTRANT"
+LPTHTYPE="-lpthread"
+else
+THTYPE=
+LPTHTYPE=
+fi
+SEDEXEC="$SEDEXEC | sed -e \"s/__CFLAGS__/$SOTYPE $OTYPE $WTYPE $IPTYPE $THTYPE/g\""
+SEDEXEC="$SEDEXEC | sed -e \"s/__LFLAGS__/$LPTHTYPE $SSTYPE $LIPTYPE/g\""
+if test ! "$THREADS" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEPTH__/HTS_DO_NOT_USE_PTHREAD/'"
+fi
+if test "$ZLIB" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__/-lz/g'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__//g'"
+fi
+SEDEXEC="$SEDEXEC | sed -e \"s/__ZLIB__/$ZLIB/\""
+if test "$SOLSOCK" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__/-lnsl -lsocket/g'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__//g'"
+fi
+if test ! "$LONGLONG" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINE64B__/HTS_NO_64_BIT/'"
+fi
+if test -n "$PTRALIGN"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__PTRALIGN__/$PTRALIGN/g\""
+fi
+if test -n "$IPV6"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__INET6__/$IPV6/g\""
+fi
+if test -n "$SSL"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__SSL__/$SSL/g\""
+fi
+if test "$DYNAMIC" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__/so/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall libinstall docinstall/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove libremove docremove/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__/strip --strip-unneeded \\\$(BOUTPUTSO)/'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__//'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall docinstall/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove docremove/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__//'"
+fi
+
+SEDEXEC="$SEDEXEC | sed -e \"s/__PLATFORM__/$PLATFORM/g\""
+SEDEXEC="$SEDEXEC | sed -e 's/#__AUTONAME__/AUTONAME = auto/'"
+SEDEXEC="$SEDEXEC | sed -e 's/#define __DEFINE.*__//g'"
+
+# Paths
+TMP=`echo $BINPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__BINPATH__/$TMP/g\""
+TMP=`echo $ETCPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__ETCPATH__/$TMP/g\""
+TMP=`echo $LIBPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__LIBPATH__/$TMP/g\""
+TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__PREFIX__/$TMP/g\""
+TMP=`echo $ROOTGROUP | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__ROOTGROUP__/$TMP/g\""
+
+TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__DEFINEPRE__/$TMP/g\""
+SEDEXEC="$SEDEXEC | sed -e 's/__MAKEAUTO__/$MKTYPE/'"
+
+# Search for gmake
+printf "Checking for make.. "
+MAKEPATH=
+if test -f "/usr/bin/gmake"; then
+MAKEPATH=/usr/bin/gmake
+else
+if test -f "/bin/gmake"; then
+MAKEPATH=/bin/gmake
+else
+if test -f "/usr/local/bin/gmake"; then
+MAKEPATH=/usr/local/bin/gmake
+fi
+fi
+fi
+if test -n "$MAKEPATH"; then
+printf "found $MAKEPATH\n"
+else
+MAKEPATH=make
+printf "not found, assume make will work\n"
+fi
+
+# Sed strip_cr
+EXCL='#!'
+printf "Checking for perl.. "
+PERLPATH=
+cp -f strip_cr.in strip_cr
+if test -f "/usr/bin/perl"; then
+PERLPATH=/usr/bin/perl
+else
+if test -f "/bin/perl"; then
+PERLPATH=/bin/perl
+else
+if test -f "/usr/local/bin/perl"; then
+PERLPATH=/usr/local/bin/perl
+fi
+fi
+fi
+if test -n "$PERLPATH"; then
+printf "found $PERLPATH\nEnsuring that *.c/*.h source files don't contains CR (^M).. "
+TMP=`echo $PERLPATH | sed -e 's/\\//\\\\\\//g'`
+cat strip_cr | sed -e "s/__PERL__/${EXCL}${TMP}/" > __tmp; mv __tmp strip_cr
+chmod 755 strip_cr
+./strip_cr *.c *.h
+printf "done\n"
+fi
+
+# Sed postinst-config
+cp -f postinst-config.in postinst-config
+CMD="cat postinst-config $SEDEXEC > __tmp; mv __tmp postinst-config"
+if eval $CMD; then
+chmod 755 postinst-config
+else
+echo "Error while seding postinst-config"
+exit 1
+fi
+
+# Sed all __VARS__
+CMD="cat Makefile $SEDEXEC > __tmp; mv __tmp Makefile"
+echo "Command: $CMD"
+if eval $CMD; then
+
+echo ""
+echo "Makefile created!"
+echo "Type in '$MAKEPATH' to build and '$MAKEPATH install' to install"
+
+if test -n "$DOMAKE"; then
+ echo "Making.."
+ eval $MAKEPATH clean
+ eval $MAKEPATH
+fi
+if test -n "$DOINSTALL"; then
+ if test -n "$NODOCINSTALL"; then
+ echo "Installing binary.."
+ eval $MAKEPATH bininstall
+ elif test -n "$DOCINSTALL"; then
+ echo "Installing docs.."
+ eval $MAKEPATH docinstall
+ else
+ echo "Installing.."
+ eval $MAKEPATH install
+ fi
+fi
+
+else
+ echo "Error while seding Makefile.."
+ exit 1
+fi
+
+else
+ echo "Error copying Makefile.in -> Makefile.. giving up"
+ exit 1
+fi
+
diff --git a/src/gpl.txt b/src/gpl.txt
new file mode 100644
index 0000000..546a71a
--- /dev/null
+++ b/src/gpl.txt
@@ -0,0 +1,287 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+
+
diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h
new file mode 100644
index 0000000..a82b69d
--- /dev/null
+++ b/src/hts-indextmpl.h
@@ -0,0 +1,924 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Index.html templates file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTTRACK_DEFTMPL
+#define HTTRACK_DEFTMPL
+
+
+/* Index for each project */
+/*
+regen:
+(for i in *; do echo $i; cat $i | sed -e 's/"/\\"/g' | sed -e 's/^\(.*\)$/ "\1"LF\\/'; done) > /tmp/1.txt
+*/
+/* %s = INFO */
+#define HTS_INDEX_HEADER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Local index - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ ""LF\
+ "<H1 ALIGN=Center>Index of locally available sites:</H1>"LF\
+ " <TABLE BORDER=\"0\" WIDTH=\"100%%\" CELLSPACING=\"1\" CELLPADDING=\"0\">"LF
+
+/* %s = URL */
+/* %s = TITLE */
+#define HTS_INDEX_BODY \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " <TR>"LF\
+ " <TD BACKGROUND=\"fade.gif\">"LF\
+ " &middot;"LF\
+ " <A HREF=\"%s\">"LF\
+ " %s"LF\
+ " </A> "LF\
+ " </TD>"LF\
+ " </TR>"LF
+
+/* %s = INFO */
+/* %s = META REFRESH IF ANY */
+#define HTS_INDEX_FOOTER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " </TABLE>"LF\
+ " <BR>"LF\
+ " <BR>"LF\
+ " <BR>"LF\
+ " <H6 ALIGN=\"RIGHT\">"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2002]</I>"LF\
+ " </H6>"LF\
+ " %s"LF\
+ " <!-- Thanks for using HTTrack Website Copier! -->"LF\
+ " %s"LF\
+ ""LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+/* Index for all projects (top index) */
+/* %s = INFO */
+#define HTS_TOPINDEX_HEADER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>List of available projects - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ ""LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ ""LF\
+ ""LF\
+ "<h1 ALIGN=Center>Index of locally available projects:</H1>"LF\
+ " <table border=\"0\" width=\"100%%%\" cellspacing=\"1\" cellpadding=\"0\">"LF
+
+/* %s = URL */
+/* %s = TITLE */
+#define HTS_TOPINDEX_BODY \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " <TR>"LF\
+ " <TD BACKGROUND=\"fade.gif\">"LF\
+ " &middot; <A HREF=\"%s/index.html\">%s</A>"LF\
+ " </TD>"LF\
+ " </TR>"LF
+
+/* %s = INFO */
+#define HTS_TOPINDEX_FOOTER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " </TABLE>"LF\
+ " <BR>"LF\
+ " <H6 ALIGN=\"RIGHT\">"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>"LF\
+ " </H6>"LF\
+ " %s"LF\
+ " <!-- Thanks for using HTTrack Website Copier! -->"LF\
+ ""LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+
+/* Other files (fade and backblue images) */
+
+#define HTS_LOG_SECURITY_WARNING "note:\tthe hts-log.txt file, and hts-cache folder, may contain sensitive information,"LF\
+ "\tsuch as username/password authentication for websites mirrored in this project"LF\
+ "\tdo not share these files/folders if you want these information to remain private"LF
+
+#define HTS_DATA_UNKNOWN_HTML "<html>"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Page not retrieved! - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ "<h1><strong><u>Oops!...</u></strong></h1>"LF\
+ "<h3>This page has <font color=\"red\"><em>not</em></font> been retrieved by HTTrack Website Copier. </h3>"LF\
+ "<script language=\"Javascript\">"LF\
+ "<!--"LF\
+ " var loc=document.location.toString();"LF\
+ " if (loc) {"LF\
+ " var pos=loc.indexOf('link=');"LF\
+ " if (pos>0) {"LF\
+ " document.write('Clic to the link <b>below</b> to go to the online location!<br><a href=\"'+loc.substring(pos+5)+'\">'+loc.substring(pos+5)+'</a><br>');"LF\
+ " } else"LF\
+ " document.write('(no location defined)');"LF\
+ " }"LF\
+ "// -->"LF\
+ "</script>"LF\
+ "<h6 align=\"right\">Mirror by HTTrack Website Copier</h6>"LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+#define HTS_DATA_UNKNOWN_HTML_LEN 0
+
+#define HTS_DATA_ERROR_HTML "<html>"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Page not retrieved! - HTTrack Website Copier</title>"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ "<h1><strong><u>Oops!...</u></strong></h1>"LF\
+ "<h3>This page has <font color=\"red\"><em>not</em></font> been retrieved by HTTrack Website Copier (%s). </h3>"LF\
+ "<script language=\"Javascript\">"LF\
+ "<!--"LF\
+ " var loc=document.location.toString();"LF\
+ " if (loc) {"LF\
+ " var pos=loc.indexOf('link=');"LF\
+ " if (pos>0) {"LF\
+ " document.write('Clic to the link <b>below</b> to go to the online location!<br><a href=\"'+loc.substring(pos+5)+'\">'+loc.substring(pos+5)+'</a><br>');"LF\
+ " } else"LF\
+ " document.write('(no location defined)');"LF\
+ " }"LF\
+ "// -->"LF\
+ "</script>"LF\
+ "<h6 align=\"right\">Mirror by HTTrack Website Copier</h6>"LF\
+ "</body>"LF\
+ "</html>"LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+// image gif "unknown"
+#define HTS_DATA_UNKNOWN_GIF \
+ "\x47\x49\x46\x38\x39\x61\x20\x0\x20\x0\xf7\xff\x0\xc0\xc0\xc0\xff\x0\x0\xfc\x3\x0\xf8\x6\x0\xf6\x9\x0\xf2\xc\x0\xf0\xf\x0\xf0\xe\x0\xed\x11\x0\xec\x13\x0\xeb\x14\x0\xe9\x15\x0\xe8\x18\x0\xe6\x18\x0\xe5\x1a\x0\xe3\x1c\x0\xe2\x1d\x0\xe1\x1e\x0\xdf\x20\x0\xdd\x23\x0\xdd\x22\x0\xdb\x23\x0\xda\x25\x0\xd9\x25\x0\xd8\x27\x0\xd6\x29\x0\xd5\x2a\x0\xd3\x2c\x0\xd2\x2d\x0"\
+ "\xd1\x2d\x0\xd0\x2f\x0\xcf\x30\x0\xce\x31\x0\xcb\x34\x0\xcb\x33\x0\xc8\x36\x0\xc5\x3b\x0\xc2\x3c\x0\xc0\x3f\x0\xbc\x43\x0\xba\x45\x0\xb7\x48\x0\xb4\x4c\x0\xb1\x4e\x0\xad\x51\x0\xaa\x55\x0\xa8\x58\x0\xa4\x5a\x0\xa1\x5e\x0\x9f\x60\x0\x99\x66\x0\x96\x68\x0\x93\x6c\x0\x90\x6e\x0\x8d\x72\x0\x8b\x74\x0\x8a\x75\x0\x88\x78\x0\x85\x79\x0\x82\x7d\x0\x7e\x80\x0\x7d\x82\x0\x79"\
+ "\x86\x0\x77\x88\x0\x73\x8b\x0\x72\x8d\x0\x70\x8e\x0\x6e\x91\x0\x6a\x95\x0\x68\x97\x0\x65\x9a\x0\x63\x9d\x0\x62\x9e\x0\x60\xa0\x0\x5d\xa2\x0\x5c\xa3\x0\x5a\xa5\x0\x57\xa9\x0\x57\xa7\x0\x54\xab\x0\x53\xac\x0\x52\xad\x0\x51\xae\x0\x4f\xb0\x0\x4e\xb1\x0\x4d\xb2\x0\x4c\xb4\x0\x49\xb6\x0\x48\xb8\x0\x46\xba\x0\x45\xbb\x0\x43\xbd\x0\x43\xbc\x0\x40\xbf\x0\x3f\xc0\x0\x3e\xc1"\
+ "\x0\x3d\xc2\x0\x3a\xc5\x0\x39\xc5\x0\x38\xc7\x0\x37\xc8\x0\x35\xca\x0\x34\xcb\x0\x32\xcc\x0\x31\xce\x0\x30\xd0\x0\x30\xce\x0\x2f\xd1\x0\x2e\xd1\x0\x2c\xd2\x0\x2b\xd4\x0\x2a\xd5\x0\x29\xd6\x0\x27\xd8\x0\x26\xda\x0\x26\xd8\x0\x25\xdb\x0\x24\xdc\x0\x21\xde\x0\x20\xdf\x0\x1f\xe1\x0\x1e\xe1\x0\x1c\xe3\x0\x1b\xe5\x0\x19\xe6\x0\x18\xe7\x0\x15\xeb\x0\x15\xea\x0\x14\xec\x0"\
+ "\x12\xed\x0\x10\xef\x0\xf\xf0\x0\xd\xf2\x0\xa\xf5\x0\x9\xf6\x0\x7\xf8\x0\x5\xfa\x0\x3\xfb\x0\x1\xfd\x0\x0\xfe\x2\x0\xfb\x4\x0\xf8\x7\x0\xf6\xa\x0\xf3\xd\x0\xee\x12\x0\xaa\x54\x0\xa5\x5a\x0\xa2\x5d\x0\xa0\x60\x0\x9c\x62\x0\x99\x66\x0\x98\x67\x0\x94\x6b\x0\x92\x6d\x0\x91\x6e\x0\x8f\x70\x0\x8c\x74\x0\x8a\x75\x0\x86\x79\x0\x83\x7c\x0\x81\x7e\x0\x7e\x82\x0"\
+ "\x7b\x83\x0\x79\x87\x0\x76\x8a\x0\x73\x8c\x0\x70\x8f\x0\x6a\x95\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x21\xf9\x4\x1\x0\x0\x0\x0\x2c\x0\x0\x0\x0\x20\x0\x20\x0\x40\x8"\
+ "\xff\x0\x1\x8\x1c\x48\xb0\x60\x82\x7\x16\x3a\x8c\x30\x91\x82\xc5\x8b\x82\x10\x23\xa\xa4\x81\x83\xa0\x92\x27\x56\xb6\x88\x51\x23\xb1\xa3\xc0\x38\x78\xfe\x10\x4a\xe4\xb1\xa4\xc9\x93\x1e\xf\x30\x90\x90\x41\xa2\x8e\x1e\x40\x88\x20\x41\x29\xf1\x4b\x99\x36\x75\xf6\xd0\x8c\xe8\x8\xd2\xce\x92\x94\x2e\x6d\xf2\x14\x8a\xd4\xcf\x92\x1\x4\x14\x58\x10\x1\xc3\x87\x82\x32\x6a\xe4\xe0\x81\x72\xc2\x86\x10"\
+ "\x1d\x83\x14\x49\xd2\x84\x8a\x96\xa3\x5\xa3\x5c\xe9\x42\x66\x8d\x1c\xb0\x4\xcf\xbc\xb1\xd3\x67\x10\x5a\x82\x80\xa\x29\x6a\xf4\xb6\xee\xce\x48\x93\x2c\x69\xb2\x4b\x50\x54\xa9\x53\x7c\x25\x6\x18\x60\xa0\x1\x5\xd\x22\x4a\xa0\x58\xe1\x22\xc6\xc\x1b\x34\x3\x10\x40\xe0\xa0\x2\x87\x88\x37\x76\xf8\x10\x82\x52\x1\x84\xb\x1e\x3a\xfe\x18\x62\x64\x9\x14\x94\x20\x48\x9c\x50\xd1\x2\x6\xc4\x23\x4c"\
+ "\xa4\x60\xf1\x12\xd8\xc9\x94\x2c\x60\xcc\xb8\xe1\x5b\x85\x4b\x18\x34\x70\xee\x4\x1e\x93\x66\x4e\x1e\x3f\x81\xd9\xd0\xd1\x13\xc8\x50\x60\x0\x7c\x4\x1d\x5a\xf4\x1c\x0\x22\x46\x8f\xaa\x6b\xdf\xce\xbd\xa3\xa4\x4a\x98\x38\x7d\xb\x7a\x9e\xa9\x13\xa8\x51\xa6\xba\xbf\xd\x8\x0\x3b\xff"
+#define HTS_DATA_UNKNOWN_GIF_LEN 1070
+
+/* hexdump bg_rings.gif | cut -c9- - | sed -e 's/\([0-9a-f][0-9a-f]\)\([0-9a-f][0-9a-f]\)/\\x\2 \\x\1/g' | sed -e 's/ //g' | sed -e 's/^\(.*\)$/ \"\1\" \\/' */
+#define HTS_DATA_BACK_GIF \
+ "\x47\x49\x46\x38\x39\x61\xf5\x01\xc8\x01\xa2\x00\x00\xcc\xcc\xdd" \
+ "\xc7\xc7\xda\xc4\xc4\xd7\xbe\xbe\xd3\xbd\xbd\xd2\xb9\xb9\xd0\xfe" \
+ "\x01\x02\x00\x00\x00\x21\xf9\x04\xfd\x14\x00\x06\x00\x2c\x00\x00" \
+ "\x00\x00\xf5\x01\xc8\x01\x40\x03\xff\x08\xba\xdc\xfe\x30\xca\x49" \
+ "\xab\xbd\x38\xeb\xcd\xbb\xff\x60\x28\x8e\x64\x69\x9e\x68\xaa\xae" \
+ "\x6c\xeb\xbe\x70\x2c\xcf\x74\x6d\xdf\x62\x20\x08\x43\xe1\xff\xc0" \
+ "\xa0\x70\x48\x2c\x1a\x8f\xc8\xa4\xd2\x38\x68\x06\x02\xb8\xa8\x74" \
+ "\x4a\xad\xc2\x74\x84\xa5\x76\xcb\xed\x7a\xbf\xe0\x30\xd8\x69\x2d" \
+ "\x9b\xcf\xe8\x40\x4f\xcc\x6e\xbb\xdf\xf0\xb8\x9c\x4d\x20\x40\xd1" \
+ "\xf8\xbc\x79\x3d\xef\xfb\xff\x42\x4d\x3b\x77\x7a\x12\x4f\x3b\x4d" \
+ "\x80\x6d\x03\x02\x85\x8e\x8f\x0f\x02\x8a\x93\x94\x43\x02\x84\x90" \
+ "\x99\x1f\x58\x95\x05\x8d\x9a\xa0\x24\x92\x9d\xa4\x6e\x03\xa1\xa8" \
+ "\xa1\x3c\x73\x9f\xa9\x8e\x59\xa5\xb1\x5b\xad\xae\xb5\xb6\x20\x02" \
+ "\xb0\x62\xb4\xb7\x28\xba\xb2\xc0\x43\xa7\xbd\xc4\xc5\x36\x7c\x5d" \
+ "\xc3\xc6\x10\xc8\xc1\xce\x98\xcb\xd1\xd2\x67\xa3\x5c\xbc\x8e\xd5" \
+ "\xce\xc0\xd7\xd3\xdd\xde\xa1\x01\xbf\x49\xdc\x33\x01\xda\xc1\xdf" \
+ "\xe9\xea\xe9\xe1\x4b\x76\x29\xd9\xe7\x9d\xeb\xf4\x14\x3a\xcd\xf2" \
+ "\x71\x82\xd0\xc4\xf1\x46\xe4\x13\xc4\xe5\x03\xa4\xac\x5e\x3f\x81" \
+ "\x03\x13\xca\x61\x84\xcd\x88\x04\x7c\x0a\xe7\xf0\x33\xa8\xc7\x5c" \
+ "\xc4\x71\x00\x7b\x71\x1a\x33\x91\xff\xca\x45\x82\x14\xf1\x20\x4c" \
+ "\xd8\x31\xe4\x89\x5c\x4b\x32\x96\x80\xf8\xd1\x94\xc9\x29\x23\xb5" \
+ "\x95\x7c\x89\x47\xcd\x91\x82\x1b\x5a\xfa\x51\x49\x33\x45\x4c\x60" \
+ "\x33\x7b\x2e\x6b\x47\x84\x1c\x4b\x9d\x62\x70\x0a\x85\x47\x72\xa9" \
+ "\xd3\x07\x3d\x18\x1c\x45\x0a\x86\xe7\x53\x0e\x3f\x4b\x05\xbd\xca" \
+ "\x75\x2a\xd5\x2f\x5c\x49\x0c\x54\x1a\xb6\x6c\x83\xaf\x72\xcc\x7a" \
+ "\xf0\x87\x4e\xad\xdb\x07\x59\xd1\x5a\x7b\x9b\x61\x2c\xdd\xbb\x0b" \
+ "\xd8\xca\xad\x8a\x97\x82\x57\x52\x5b\xfb\x3a\xd5\xbb\x17\xac\x60" \
+ "\x66\x76\x0f\xdf\x2d\x0c\x47\xb1\x83\xb8\xa4\x1c\xd3\xb5\xc8\xb8" \
+ "\x8d\x55\xb5\x94\xf3\x49\xa6\x5b\xf9\x4d\x60\xb5\x11\x37\xbb\x25" \
+ "\xdc\x99\x0b\xd9\xbe\x11\x09\x88\x36\x4b\xba\xf4\xac\xcd\xad\xb7" \
+ "\xad\x0e\xeb\xfa\xcd\xe6\xcc\x0a\x3f\xcf\x66\x57\xdb\x8d\x6a\xc9" \
+ "\xb8\x15\x9e\xde\x4d\x2f\x78\x6f\x30\xba\xc3\xc6\x3e\x97\x9c\x78" \
+ "\xb1\xe5\xc7\x97\x34\xef\xfa\xf5\xb7\x73\x75\xd0\xa3\x2b\x19\x8e" \
+ "\xd7\x78\xcb\x77\xd7\xa7\x41\xd6\xbe\x84\x7b\xdf\xbf\x17\xcd\x87" \
+ "\xcf\xe4\x9d\x3c\xdf\xdd\x44\x4b\xab\x5f\x7f\x06\xbd\xfb\x2d\xf3" \
+ "\x05\x67\x97\x3b\x60\x3a\xfd\x17\xff\xe3\xdd\x87\xdf\x7f\xf1\x09" \
+ "\x18\x04\x43\xff\xc1\xb0\x9f\x81\x5a\xe4\x27\x9a\x4d\x0c\xce\x21" \
+ "\xc8\x0e\x97\x3c\x61\xa1\x21\x16\x1e\xc2\x43\x13\x53\xd1\xb4\x60" \
+ "\x84\x5a\x5c\x76\x1d\x4a\x20\x86\x16\x56\x81\x25\xc6\xe1\x1f\x7d" \
+ "\x10\xa6\x38\xc7\x61\x28\xba\xa8\xcf\x8a\x09\x52\xb0\x43\x80\xd1" \
+ "\x5d\xd7\x9e\x8c\x72\x88\x58\xe3\x8f\x4b\xd9\xc7\x63\x1c\x3e\x02" \
+ "\x69\x24\x3d\x1f\x0e\xc9\xca\x91\x4c\xbe\x94\xa4\x92\x94\xf4\xd7" \
+ "\xe4\x94\xd3\x3c\x09\xe5\x39\x64\x50\xa9\x25\x24\x56\x5e\xe9\x1a" \
+ "\x87\x4f\x14\x77\xa3\x61\x5b\x3e\xd2\xa2\x97\x68\xa2\x59\xe6\x34" \
+ "\x67\xa6\xe9\x66\x6d\x6b\x5e\x45\xe2\x9b\x74\x9e\x13\x27\x71\x73" \
+ "\xd6\xa9\x27\x18\x77\xf6\x99\x57\x9e\x7b\x42\xe9\xe7\xa0\x29\x1c" \
+ "\x22\x64\xa0\x7d\x30\x42\x23\xa1\x8c\xaa\x93\xe1\xa3\x90\x46\xfa" \
+ "\x68\xa3\x94\x56\x6a\xe9\x94\x19\x52\xb8\x21\x87\x89\x84\xc1\xe9" \
+ "\x84\x14\x66\x78\xe9\xa8\x43\x01\x8a\xa8\x36\xfb\x90\xea\xa7\x0e" \
+ "\xa7\xb6\x5a\x44\x96\xaa\x52\xd4\xa6\xab\xb4\x02\x43\x40\x91\xb1" \
+ "\xb6\x70\x68\xad\x3e\x28\x3a\xdb\x3d\xbb\x2e\x84\x6b\xac\x3b\x22" \
+ "\x8a\x60\xae\x1c\x6c\x94\xe8\xa2\xff\x23\xba\x3a\x2c\xb2\x36\xac" \
+ "\x62\x0a\xb3\x4b\xe1\x98\x22\xb5\xd0\xda\x22\xed\x7b\x1e\xd2\xe9" \
+ "\x60\xb6\x93\x05\xfb\x03\xb6\x55\x74\x19\x1d\xb9\xe0\xee\xf6\x21" \
+ "\x78\x67\x58\xeb\x1e\xba\xe9\x6e\x39\x2b\x13\x32\x98\xeb\x1a\xbc" \
+ "\xf1\x66\x60\x21\x85\x9f\x0a\xf9\xa9\xa6\x17\x72\x95\xdd\xb7\x86" \
+ "\x5c\x49\x70\xae\xf3\xf2\xda\xeb\xb3\x53\x64\x17\x94\xbd\x9d\x65" \
+ "\x0b\xb1\x70\x8c\x5c\x12\x83\x86\x9d\xca\x71\xab\x19\x05\x46\xe0" \
+ "\xee\x71\x95\xc6\xf8\x11\xc3\xf5\x28\x9b\x52\x14\x1f\xd7\x76\x30" \
+ "\x8b\x2d\x91\x1c\x9e\xa9\x96\x98\x30\x71\x61\xf2\xa6\x96\x6f\x04" \
+ "\x53\x39\x58\xac\x81\x4d\xce\x6c\xdb\xcd\x24\x88\x2c\x95\x92\x2e" \
+ "\xdf\xb5\x73\x64\x40\xd7\x20\xae\xca\x35\x0e\x64\x5d\xd2\x35\xf8" \
+ "\x2c\x57\xd1\x65\xa5\x2c\x21\xd4\x52\x48\x8d\xd6\xca\x6e\x2d\x7d" \
+ "\x35\xd6\x52\x1c\x2d\x20\xd5\x4e\x25\x06\x76\xd6\x06\xc3\x67\xf6" \
+ "\xd9\x51\x88\x3d\xf6\x6c\x56\x7f\xcd\xb6\x14\x5e\xf7\x46\xf6\x4b" \
+ "\x75\x2f\x39\x37\xdd\x68\xe2\x6b\x50\xdc\x7d\xec\x8d\x36\x9a\x77" \
+ "\xcb\x1a\x91\xdf\x82\x6b\x90\x37\xc8\x9b\xa5\x97\x78\xb4\x6f\x22" \
+ "\xbe\xce\x47\x5c\x3f\x0e\x02\xe0\xff\xd1\x3d\x0d\xe3\x77\x96\xc7" \
+ "\xb0\xb8\x76\xa2\x61\x0e\x52\xe7\x2c\x68\xfd\xa5\x68\x9f\x2b\xa2" \
+ "\x39\xe9\xa2\xec\xb9\xfa\x61\xa6\xfb\x21\x79\xe2\x6e\xf3\x58\xf9" \
+ "\x53\xb1\x03\x32\x3b\xd8\xa9\x0b\xb8\x3b\x92\xf2\xfd\x0e\x6e\xee" \
+ "\xe7\x3a\x47\x7c\x29\x85\x67\x2b\x3a\x88\xb7\xab\xd5\xfb\x45\x16" \
+ "\xef\xfd\x3c\x88\xc2\xd3\x34\x7d\x65\xb0\x5a\x2a\xb2\xb3\x4c\x1e" \
+ "\xaf\x70\x10\x4f\x2d\x4f\xf4\x9a\xdb\x7e\xaf\xc5\x64\xe2\xa7\x1d" \
+ "\x32\xcc\x74\xae\x76\x3d\xe1\x73\xeb\xb0\x21\x52\xa9\x02\xe9\xbd" \
+ "\x97\xc9\xb3\x9e\xed\xfb\xa7\xe6\xaf\xff\xa5\xb5\x33\x5f\x79\xfc" \
+ "\xf7\xbf\x3e\xf1\x4f\x80\xaf\xaa\x5e\x01\x59\x94\x3e\x04\xbe\x21" \
+ "\x7b\xeb\xb9\xc7\x0f\x16\x38\x01\xf6\x39\xd0\x5b\xfd\x3a\xe0\x8b" \
+ "\x28\x98\x03\x0d\x5e\xf0\x83\x0e\xe1\x20\x0d\x12\x06\xc2\x12\x26" \
+ "\x41\x84\x99\xb0\xa0\x09\x69\x85\xc2\x75\x48\x70\x85\x43\x6a\x21" \
+ "\x70\x54\x08\x43\x3b\xc9\x90\x54\x18\xf3\x20\xf6\x7c\x75\xc3\x1e" \
+ "\xfa\xf0\x87\x40\x0c\xa2\x10\x87\x48\xc4\x22\x76\x2e\x87\x0c\x02" \
+ "\x55\x98\x8c\x88\xb5\x17\xd6\x10\x09\xf5\x63\x62\x78\x4c\xf6\xc4" \
+ "\x60\x40\x50\x8a\x25\xd3\x61\x15\xff\x65\x51\x07\x05\x62\xb1\x04" \
+ "\x24\xdc\xa2\x9b\x76\xf0\xc5\x7a\x89\x51\x8c\xc7\x2a\x23\x06\xc2" \
+ "\x78\xc6\x06\x81\x6a\x10\x93\x5a\xe3\xa3\x34\xb5\x29\x2d\x52\x62" \
+ "\x63\x5f\xbc\x5f\x9a\xa2\xd8\x13\x43\xd9\x31\x44\x5e\xa4\xd4\xf6" \
+ "\x04\x78\xab\x40\x3a\xc5\x89\x9d\xc0\x23\xd8\x02\x58\xa7\xe8\xe5" \
+ "\x8b\x8a\x7d\x20\x20\x7d\xfe\x58\x1b\x45\x8a\x10\x92\x2e\x89\xd3" \
+ "\x20\xe9\x24\x49\xd2\xb1\xb1\x0b\x9d\x0c\xd2\xa9\x9a\xa7\x46\x05" \
+ "\xd0\x30\x44\xa1\x0b\x94\x21\xd5\x58\xbe\x2f\x84\xd2\x16\x0d\xac" \
+ "\xcd\x2b\x4b\xc9\x81\x53\x1a\x81\x5d\x21\xa1\x24\x52\x56\x49\xcb" \
+ "\x11\xd8\xb2\x28\xe9\xe0\x64\x2f\xa5\xf1\x24\x52\x52\x81\x91\xbe" \
+ "\x1b\x26\x45\x92\x64\x4c\x19\xb8\x89\x97\xca\xd4\x43\xb0\x66\xc9" \
+ "\x01\x5d\x0a\x27\x9a\x7d\x59\x50\x33\x3f\x00\x3f\x6c\xae\x66\x3f" \
+ "\xb8\x5c\x81\x1e\x1d\xe7\xcd\xff\x08\x29\x9c\x22\x18\xa7\x89\x7a" \
+ "\x28\x3f\x6b\x7a\x81\x43\x8e\xc4\x4e\x79\x70\xa1\xbe\xc7\x21\x12" \
+ "\x83\xd0\x44\x41\x76\x8a\x54\xcf\xa4\xfd\xf2\x82\x57\xcc\xc3\x72" \
+ "\xfc\x02\xa5\x6d\x52\x49\x9d\x84\xa4\xa6\x07\x20\x32\x81\x2b\x21" \
+ "\xcc\x9d\xfa\x88\xa7\x23\x80\x35\xff\xad\x72\xf9\x20\x02\xc8\xec" \
+ "\x8d\x41\x23\x58\x18\x89\x86\x04\x93\x49\x90\x92\x45\x95\x94\x4f" \
+ "\xb5\x20\xf4\x0b\x22\x15\xcd\x3f\x0b\x90\xd2\x18\x9c\x54\x1e\x83" \
+ "\xca\x68\x25\x36\x8a\x97\x56\x16\x21\x94\x32\xbd\x97\x26\x29\x37" \
+ "\xaa\x53\xde\x0d\x4a\x25\xed\x09\x44\x91\xf0\xba\xfd\x1d\x61\x58" \
+ "\x43\xb5\xa2\x96\x5e\x4a\xd4\xce\x4d\x45\x25\x39\x2d\x4d\x50\x0d" \
+ "\x12\x55\xbd\x71\x90\x30\xbc\x60\xaa\x33\x8a\x4a\x1f\xad\xde\x74" \
+ "\x88\xc8\x18\xda\x90\x68\x5a\x16\xaf\x12\xa1\x97\x49\x95\xcd\x8f" \
+ "\x12\xa2\xcc\x82\xd6\xc8\xac\x42\x50\xa8\xe0\xd2\x0a\x8c\x04\xd1" \
+ "\x35\x08\x53\xf5\x27\x50\xbb\x3a\x90\x72\x56\x15\x7b\x1c\xcd\x87" \
+ "\x5c\x2d\x77\xd7\x58\xe4\xd5\x18\x85\x9d\x60\x39\x01\xf0\xd7\xca" \
+ "\x84\x67\x6d\xde\x84\x6b\x5d\x9d\x03\x59\x6f\x26\xb6\x14\xce\xb9" \
+ "\x6c\x01\x16\xcb\x00\x2f\xe1\x69\x20\x83\x65\x9d\x97\x0e\x6b\x8b" \
+ "\xa6\x70\x56\x01\xb1\xdc\xda\xaf\xd8\x7a\x5a\x05\x68\x16\x69\x2a" \
+ "\x35\xed\x69\x25\x0b\x0c\xb2\x96\x2c\x21\xb6\x95\x61\x37\x1b\x27" \
+ "\x5b\xce\xd2\x16\x28\xa8\x53\x48\x6b\x15\x90\x26\xd2\xa2\x62\x9d" \
+ "\x9c\x6d\x6c\x69\x42\x6b\x0c\xe4\xff\x2e\xf6\xb7\x6a\x75\x8c\x72" \
+ "\xe1\xc0\x5c\xc2\xba\x49\x34\x17\x31\x2e\xd4\x5e\x6b\xd8\xcd\x70" \
+ "\xf7\xa2\xbe\x7d\xd3\x6d\x3e\xa2\xdd\x9b\x4d\x77\xb9\xb0\x19\xd9" \
+ "\x62\x85\x29\x99\xef\xfa\x80\xab\xac\xa4\x53\x79\x33\xe1\xde\x5e" \
+ "\x79\xb3\x91\xe9\x6d\x49\x6e\x59\x07\x5d\x54\x8d\x57\x27\xf0\x65" \
+ "\x62\x7d\x63\x11\x5b\xa4\x54\x37\x69\xa9\x45\xaf\x77\xbf\x72\xe0" \
+ "\x7c\x05\xca\x7d\x68\x69\x30\xb8\x02\xb5\x5f\x79\xca\x65\xbe\x94" \
+ "\x4a\xf0\xe9\x56\xa3\xe1\x3e\xa0\xf3\x87\x03\xae\xed\x6a\x19\x23" \
+ "\x61\xed\x9d\x2a\xc0\xdd\x51\x70\x0f\x3b\x5c\x1b\x0c\x87\x22\xc4" \
+ "\x50\x94\xe1\x79\xdd\x53\xe1\x7a\xf4\x97\x23\x14\x84\xb1\x33\x4a" \
+ "\x6c\x8c\x1b\x27\xc5\xc5\xeb\xd1\xf1\x33\xa6\xc8\xe2\xda\x02\x99" \
+ "\xc3\xbc\x3a\x72\x2a\x7c\xcc\x0a\x25\xdf\x45\xc8\xda\xe0\x71\x95" \
+ "\xa0\xd4\xc5\x5c\x41\x59\x1b\x28\x5e\xcd\x27\x95\x54\x87\x41\x58" \
+ "\xc1\xc9\x60\x14\xa0\x94\xd3\x71\xe5\x18\xe6\x12\x81\x63\x76\x54" \
+ "\x99\x05\x24\x94\x19\xbb\x28\xcd\x36\x2e\x72\x9d\x04\xf6\xc1\x1a" \
+ "\x4f\x66\xa5\xb5\x1a\x4d\x09\xe1\xdc\x66\x3c\xa7\x29\x9b\x2b\xe4" \
+ "\xf3\x89\x6c\xea\x50\xc7\x30\x19\xff\xb0\x8f\x44\x04\x9b\x7f\x25" \
+ "\x67\x1e\x09\x1a\x48\xfb\xe2\xd7\xa7\x92\x40\x80\x7f\x85\x6a\x89" \
+ "\x71\x3a\xb4\x2c\x87\x6b\xc0\x36\xb2\x14\xcc\x9c\x96\xc6\x9a\x19" \
+ "\xd3\xd2\x50\x1b\x69\xd4\x2a\x36\xf5\x5b\x3d\x7d\x13\x50\xab\x5a" \
+ "\x13\x5b\x66\xf5\xa7\x5f\x4d\x9c\x4d\xca\x9a\x08\x69\xa4\xb5\x60" \
+ "\x6c\x7d\xeb\x90\xba\x5a\xd7\x38\x40\xb5\x9b\x02\x0a\x6c\x93\xf8" \
+ "\xb9\xd7\x49\xe1\xe1\x64\x40\xaa\xd8\x16\x6a\x1a\xd9\x3f\x50\xa2" \
+ "\x8b\x73\xa8\x41\x22\x1e\x1b\xda\xc8\x2e\xe5\xb3\xb1\xed\xa2\xe7" \
+ "\x36\x9a\xdb\xd7\x55\x75\xac\xc1\xad\xb0\x62\x3b\x60\xdc\xe4\x6e" \
+ "\x9f\xb9\x2f\x80\xee\x74\xa7\x68\xdd\xbe\xfc\xb6\xbb\x61\x0a\xef" \
+ "\x16\xec\x60\xde\xa0\xab\x77\xdb\x08\x8d\x6f\xe1\xea\x7b\xa2\xfc" \
+ "\xee\xf7\x24\xfe\x1d\x8d\x7b\x0a\x1c\x3f\x1e\x25\x38\x4d\xda\x79" \
+ "\xc6\x8a\x29\x7c\x55\xf2\xdb\x14\xa9\x2b\x56\xa1\x87\x5b\xfc\xe2" \
+ "\x18\xcf\xb8\xc6\x37\xce\xf1\x8e\x7b\xfc\xe3\x20\x0f\xb9\xc8\x47" \
+ "\x4e\x72\xfd\x69\x48\xe2\xdf\x81\x27\x1c\x4b\x7e\x27\x3f\x62\xd0" \
+ "\x09\xbf\x66\xf9\x14\x5c\xfe\x44\x3e\xca\xdc\x85\x01\x07\xb7\xcd" \
+ "\x6f\x0e\xeb\x6b\x0b\xbc\xcb\x31\xff\xef\x78\xce\x0f\x6e\x8a\x84" \
+ "\xf3\xfc\x04\x6a\x10\xf6\x19\xab\x7c\x74\x0f\xb4\x9b\xe8\x5b\xfd" \
+ "\x70\xd3\x4d\x29\x6f\xa8\x2b\x22\xd7\x23\xf7\xb9\xd5\x0b\x83\x75" \
+ "\x8b\x6b\x7d\xeb\xda\x29\x75\xb1\x95\x0e\xf6\x49\x74\x7d\xb1\x4f" \
+ "\x2f\x3b\x9d\x2c\xd9\xcb\xb4\x6f\xb1\x0e\xb7\xaa\x90\x71\x23\x3d" \
+ "\xbf\xc2\xb0\xbd\x88\xdb\x26\x4f\xc5\x82\xde\x01\x8c\x51\x8e\xef" \
+ "\x35\xe2\xf5\x05\x81\x6e\x34\x83\x47\x77\x81\x79\xe7\x8f\xd1\xe1" \
+ "\x53\xf7\x58\x88\x7d\x6e\x64\xc7\x12\xe0\x87\x92\x74\x52\x3c\x3e" \
+ "\x5f\x91\xdf\xc6\xe4\x3f\xfa\xf5\x06\x6d\xde\x7a\x08\xbc\xfc\xcd" \
+ "\x86\x6e\x99\x58\x25\x5e\xb0\x14\xbc\xf7\x4e\x1a\xe5\x66\x01\x89" \
+ "\x5e\x84\xa4\x7f\xe7\xe7\x6d\x91\x79\x40\x3c\xfa\x52\xb1\x9f\x0b" \
+ "\x93\x5a\xaf\x9d\xdb\x0f\x6f\x7a\xaf\xff\x66\xab\xec\x2c\xc3\xce" \
+ "\x47\x7b\xf6\x55\xa8\xfa\x85\xf5\xed\xf6\x98\x49\x46\xf9\x54\x21" \
+ "\x7e\x7c\xd9\xe0\xfb\x32\xf0\x3e\xd5\x1f\x87\x58\xf5\x51\xa6\x27" \
+ "\xe9\x0f\xb7\xf9\x3f\xd8\xbe\x0c\xa0\xff\x91\x2c\x97\x1c\xfc\xc1" \
+ "\xf7\x46\xed\x4b\x3f\x75\x1b\x59\xcd\xfb\x0d\x5b\x7b\xfb\x37\xd0" \
+ "\x7c\xf1\x83\xe0\xfa\x7b\xb1\x3f\xff\xad\x97\x96\x7e\x50\x90\x3f" \
+ "\x37\xf3\x77\x12\x5e\xa0\x7f\x36\x12\x39\x01\x58\x3a\xc9\x30\x79" \
+ "\xff\x37\x10\xc8\x07\x6f\x49\x42\x80\x8c\xf5\x4c\x07\x78\x03\xc1" \
+ "\x02\x7f\xd5\x54\x5c\x13\x38\x73\xbb\xd2\x7f\x33\x90\x26\x10\xc8" \
+ "\x73\x78\x46\x5a\xf8\x87\x14\xe6\x97\x81\xc1\xf6\x1a\x90\x33\x5a" \
+ "\x26\xc8\x25\x38\xc2\x5c\xeb\xd7\x05\x25\xb8\x82\x65\x70\x28\x0a" \
+ "\xb5\x80\x32\x21\x83\xb5\xb0\x1f\xa1\x64\x83\xce\x80\x83\xc4\x20" \
+ "\x78\x3e\xe0\x3f\x5e\x62\x81\xc8\x82\x44\x8e\x47\x71\x98\x26\x2b" \
+ "\x01\x72\x37\x3c\x78\x78\xa9\x97\x7b\xf4\xa3\x6c\xdd\x20\x24\xcc" \
+ "\xd2\x84\xb2\xd0\x80\x46\x63\x7c\x9d\xb1\x73\xb5\x60\x1f\x52\x17" \
+ "\x10\x57\x82\x85\x83\x66\x85\x24\x78\x77\xa0\x60\x1f\xdf\xf2\x82" \
+ "\x5b\xc0\x36\xac\x52\x43\x4c\xa7\x09\xf6\x51\x01\xa7\x97\x16\x49" \
+ "\x03\x7e\x0e\xc4\x81\x61\x93\x15\xf6\x10\x86\xf9\xa2\x85\x2b\x64" \
+ "\x86\x5f\x56\x04\x14\xc0\x87\x12\x53\x22\x6f\x24\x2a\x15\x90\x29" \
+ "\x92\xf6\x47\x80\x38\x38\x40\x30\x01\x6a\x78\x32\xb9\xe2\x87\xa8" \
+ "\xb2\x78\x37\xc0\x70\x70\x70\x76\x14\xd8\x6c\x91\xe0\x56\xaa\xa2" \
+ "\x74\x52\xe8\x0a\x14\xb5\x0b\xc7\xff\x04\x5f\x85\x76\x29\xa3\xf6" \
+ "\x81\x23\xc0\x6c\xe3\x80\x06\x91\xa8\x04\x62\x88\x58\xfc\x11\x8b" \
+ "\xa2\x20\x2e\x8d\x58\x0e\x50\xa2\x8a\xe9\x30\x87\xba\xb8\x6f\xd6" \
+ "\x82\x87\x28\xf0\x8a\x27\xc4\x28\x30\x06\x8c\x77\xb1\x52\xa1\x35" \
+ "\x82\xd0\x13\x53\x06\x76\x24\xe3\x46\x4d\xc2\x88\x04\xab\xa2\x13" \
+ "\xb4\xa8\x09\xd0\xd1\x49\xb9\xd8\x72\x2d\x51\x8d\xb5\xb0\x65\xc9" \
+ "\x33\x87\x8d\xb1\x26\xca\xe8\x4a\xa6\x97\x15\x5f\x88\x01\x64\xd8" \
+ "\x09\xbd\xd8\x0b\xee\xc5\x8d\x64\x26\x8d\xc9\xb2\x57\x4b\x65\x33" \
+ "\x58\xe3\x15\xcd\x01\x8e\x3f\x43\x25\xe9\xf8\x03\x31\x68\x54\x5f" \
+ "\x55\x01\xd1\xf8\x0f\x54\x32\x8e\x5b\xd0\x8f\x89\xf6\x2a\x0d\x45" \
+ "\x52\x53\xe2\x63\xee\xa8\x67\x44\x80\x4e\x04\xc9\x5a\x4c\xf2\x5a" \
+ "\x44\x48\x29\x47\xc1\x00\xf8\xe8\x06\x4d\xb2\x8f\x05\xd0\x90\x86" \
+ "\x26\x04\x18\xa9\x24\x15\x19\x12\xfe\x36\x44\xd5\x20\x56\xb6\x73" \
+ "\x24\xb8\x85\x45\x98\x10\x90\x08\x09\x24\xfb\x38\x92\x40\xe3\x92" \
+ "\xc0\xf4\x23\x9a\x25\x93\x33\x39\x3e\xab\x96\x0f\x38\x09\x34\x1c" \
+ "\x39\x09\xeb\xf8\x08\x12\xd9\x4b\x3f\x79\x75\x35\xb2\x8f\xd8\x44" \
+ "\x93\xc2\x90\x20\x11\xd9\x8a\xd1\xff\xa4\x94\x43\x90\x20\x48\x99" \
+ "\x94\x82\xc2\x32\xa8\x87\x4d\x19\xd9\x06\x1e\x79\x0c\x7d\xe5\x4d" \
+ "\x4d\x79\x95\x8f\x05\x96\xd8\x44\x65\x81\x25\x0f\x5b\xc9\x24\x45" \
+ "\xa9\x08\x67\x69\x46\x9a\xb1\x58\x50\x19\x57\xe1\x71\x59\x3d\x79" \
+ "\x36\x59\xd9\x06\x61\x29\x0f\x73\x79\x36\x84\x48\x1c\x95\xf5\x94" \
+ "\x9e\xc8\x97\x3c\x39\x5b\x7b\x39\x1b\x7d\x19\x4d\xfd\xb4\x1a\x43" \
+ "\x59\x4e\x75\xc9\x06\x6b\xe9\x02\x9a\xd5\x5a\x5f\x19\x98\xea\xc2" \
+ "\x80\xad\xf5\x96\x42\xd0\x98\x08\x28\x99\xc9\xe5\x59\xbb\x91\x98" \
+ "\xe5\x64\x99\x07\xb2\x1b\x9a\x15\x94\x96\xa2\x82\xc2\xd7\x95\x82" \
+ "\x79\x8a\xd8\x05\x5a\xc3\x35\x84\xb3\x71\x63\xc3\xb5\x98\xd4\x47" \
+ "\x98\xac\x59\x99\xa6\x99\x5f\xf9\x80\x99\x4c\xa2\x26\x88\xe9\x99" \
+ "\x91\xc5\x9b\xb8\x89\x97\xc3\x05\x9a\x20\x19\x5c\x85\x19\x4d\x69" \
+ "\x39\x09\x06\xc9\x15\x0a\x41\x9a\x95\x82\x26\x79\x99\x0a\x91\x89" \
+ "\x04\xce\xd9\x28\xb2\xc9\x06\xd1\x89\x0a\x3e\x56\x9d\x8c\x72\x9d" \
+ "\x49\x51\x60\xb5\xc9\x59\xc4\x19\x9a\x0b\xb6\x92\xa7\x95\x9c\x94" \
+ "\x60\x9c\xc7\x39\x4c\xe8\x39\x70\xe5\xe9\x9b\x86\xf9\x67\xef\x49" \
+ "\x99\x9c\x25\x81\xed\x15\x11\xdc\xff\x39\x28\x63\x14\x9c\xf9\xb0" \
+ "\x9c\x45\x34\x6c\xfc\x99\x9b\xf5\x19\x6e\xd2\x45\x4e\xe5\x64\x80" \
+ "\xcf\x97\x5d\x8b\xd5\x9e\x40\x39\x9f\x4e\xb3\xa0\xe2\x05\x1c\xea" \
+ "\x65\x59\x08\xea\x18\xc9\xa9\x9b\xff\xe1\x9d\x8b\x10\xa0\xa8\xa9" \
+ "\x4c\x1a\xaa\x95\xbc\x75\x11\xfe\xf9\x43\xd3\x89\x9f\x1c\xaa\x99" \
+ "\xc3\x34\x67\x9b\x81\x9e\xd9\x99\x2e\xe3\x89\x6b\x27\x8a\xa2\xda" \
+ "\x56\x27\x18\xba\x12\x3a\xd1\xa2\xd0\x52\xa2\xf4\x98\xa0\x9c\x43" \
+ "\x94\xf8\x25\xa1\xbb\x44\x4b\x1f\xea\x06\x35\xda\x3a\xcd\x58\x4a" \
+ "\x7a\x32\xa2\xa2\x44\x3f\xa5\xf4\xa2\xff\x78\x9f\x54\x51\xa4\x40" \
+ "\xfa\xa3\x50\xca\xa4\x5f\xc4\xa0\x80\xa1\x9e\x41\x2a\x45\x3a\x2a" \
+ "\xa2\x10\x56\x1d\x58\xe4\xa4\x45\xa0\xa4\x4b\x01\x8e\x38\x2a\x48" \
+ "\x14\xa6\x65\xf9\x67\x44\x62\xfa\x92\xb7\x81\xa5\x40\x90\x9f\xf4" \
+ "\x61\x2c\xb3\xb1\x66\x72\xda\x2c\x69\x7a\x9a\x7b\x71\xa6\x7d\x02" \
+ "\xa7\x96\xb7\x1b\x7e\x0a\x04\xe7\xb8\x40\x5d\xda\xa3\x7a\xda\x51" \
+ "\x3e\x34\xa4\x71\x40\xa6\x4f\x21\x6c\xc6\x38\x57\xad\x22\xa5\x26" \
+ "\x50\xa8\xdf\xd9\x42\x81\x4a\x0a\x77\x7a\x03\xde\xc9\xa7\xce\xe8" \
+ "\x2a\x9c\xfa\x62\x1a\x95\x7a\xae\xff\xc2\xa8\x57\x51\x7b\x8f\x3a" \
+ "\x7a\xb4\x22\xa9\xc1\xd8\x7b\xac\xd3\xa6\x4e\x99\x59\xe4\x71\x8b" \
+ "\xdb\x45\x2b\x83\xaa\x18\xaf\x28\xab\xf9\x72\xa9\xb1\x90\xa9\x8e" \
+ "\x78\x1f\x96\x08\x2e\xba\x5a\x0a\xb5\xaa\x18\x94\x08\x3d\xaa\xda" \
+ "\x0b\xc1\x8a\x3c\xe6\xe4\x22\xa7\xea\x27\xae\x2a\x1d\x19\x2a\x92" \
+ "\xbc\x3a\x0d\xcf\xba\x1d\x35\x22\xa6\x5d\x86\x8a\xdf\x33\xad\x55" \
+ "\x50\xad\xbd\xd2\x1f\xc7\x1a\x7f\xdf\x13\xae\x2c\x60\x87\x86\xb8" \
+ "\x0f\x5e\xe4\x77\xe0\xb5\x8b\x02\x44\xae\xe5\xea\xad\x1f\x71\x5b" \
+ "\xe6\x33\xac\xa2\x09\x76\x21\x91\xac\x22\xd6\x3d\xf8\xea\x1e\x78" \
+ "\x13\x7a\x07\x35\x6f\x3d\xa1\xa8\x33\xe5\xae\x17\x03\xaf\xe9\xd9" \
+ "\x66\xfb\x6a\x2b\x04\x2b\x03\xe6\xca\x2b\xa5\xfa\x41\x0b\x4b\x03" \
+ "\xc5\xaa\x6e\x57\x41\xa9\xd8\x27\x2f\x02\x3b\x59\x65\x61\xb0\xa6" \
+ "\x41\x28\xac\xd8\x2a\x6f\x91\xb1\x5a\x41\x29\x50\x08\x9c\x6f\xc1" \
+ "\xb1\xc9\xd0\x53\x22\x2b\x06\xbb\x06\x43\x11\x9b\x06\x63\xa2\x9a" \
+ "\xe7\x01\x43\x9f\xea\x16\xf2\x93\xb0\x4a\xf0\xa6\x35\xc4\xad\x25" \
+ "\x33\x8a\x1b\x06\x9e\x30\xc4\xb3\x4b\x61\x84\xa5\x30\x21\x49\x18" \
+ "\xad\x55\x24\xb4\x3e\x08\x89\x62\xff\xa4\xb4\x4b\xfb\x00\x40\x88" \
+ "\x66\x4f\x5b\x1c\x9e\x56\xb3\x53\x1b\x66\x55\xfb\xb2\x37\xb7\xb2" \
+ "\x09\xe1\xb4\x4f\xcb\xb5\x09\x81\xab\x57\xab\x9d\xd0\xd6\xac\x63" \
+ "\x5b\x13\xdc\x66\xb6\x67\x6b\x7d\x38\x4b\x6a\x5e\x3b\x81\x28\x5b" \
+ "\x09\x6f\x3b\x7f\x60\xfb\x15\x9a\xb8\xb6\x02\x75\x70\x6a\x8b\xb7" \
+ "\x34\x10\xb7\x46\xc6\xb7\x85\x50\xb7\x9d\x51\x48\x80\x7b\x06\x0d" \
+ "\x5b\xb5\x73\x9b\x7d\x6a\x17\x04\x84\x5b\xb8\x51\xe0\xb7\xfa\xb5" \
+ "\xb7\x8e\xdb\x77\x90\x6b\xb7\xbf\xca\x22\x89\xeb\x21\x6d\x5b\x27" \
+ "\x30\xf7\x2b\x84\x26\xaa\x8b\x8b\x9d\xa1\xb8\x70\x86\x27\x88\x28" \
+ "\x24\xb8\xc3\x87\xae\xb7\xd0\x4e\xbd\x93\xa8\xa1\x8b\x2a\x87\x88" \
+ "\x88\xf7\xa7\x88\x75\x44\x60\x41\x34\xb1\xaf\x0b\xb2\x45\x74\xb8" \
+ "\xb9\x7b\x2a\x65\x54\xb9\xbd\xdb\x05\xbd\x84\xbb\xc1\xcb\x23\x14" \
+ "\x5a\xbc\x02\x34\x5b\xc0\x8b\xbc\x9b\x15\x6a\xc4\xcb\xbc\xae\x01" \
+ "\x6c\xcf\x0b\xbd\x72\x51\x6f\xd3\x4b\xbd\xf1\xaa\x70\x25\x8b\xbd" \
+ "\x11\x93\x71\xbc\xcb\xbd\x1d\xca\x71\xdb\x0b\xbe\xce\x15\x72\xd7" \
+ "\x4b\xbe\x74\xc8\x73\x1f\x8b\xbe\x3d\x38\x81\xe3\xcb\xbe\x7e\xb0" \
+ "\xb4\xeb\x0b\xbf\x1b\x74\xb6\x95\x58\x47\xbf\x7f\x30\xb9\x0a\x30" \
+ "\xbf\xf8\xbb\x04\xfa\x1b\x01\xef\xdb\xbf\xcd\xfb\xbf\x89\x78\xbf" \
+ "\x02\x7c\x56\x04\xec\x74\x8d\x47\xbf\x56\xdb\x74\x86\xd2\xbb\xa3" \
+ "\x9b\xc0\x0c\xab\x68\x69\x4b\x46\x12\xdc\x73\x0b\xfc\x3d\x5c\x78" \
+ "\xc1\xdd\x40\xb4\xcc\xb3\x77\x5a\xcb\xc1\xfa\x72\x72\x75\xf4\x47" \
+ "\x9c\x02\x30\x47\x2b\xc2\x2a\xbc\xc2\x2c\xdc\xc2\x2e\x0c\x6f\x09" \
+ "\x00\x00\x3b\x00"
+
+#define HTS_DATA_BACK_GIF_LEN 4243
+
+#define HTS_DATA_FADE_GIF \
+ "\x47\x49\x46\x38\x39\x61\x8\x0\x8\x0\xf7\x0\x0\x0\x0\x0\x0\x0\x33\x0\x0\x66\x0\x0\x99\x0\x0\xcc\x0\x0\xff\x0\x33\x0\x0\x33\x33\x0\x33\x66\x0\x33\x99\x0\x33\xcc\x0\x33\xff\x0\x66\x0\x0\x66\x33\x0\x66\x66\x0\x66\x99\x0\x66\xcc\x0\x66\xff\x0\x99\x0\x0\x99\x33\x0\x99\x66\x0\x99\x99\x0\x99\xcc\x0\x99\xff\x0\xcc\x0\x0\xcc\x33\x0\xcc\x66\x0\xcc\x99\x0\xcc\xcc"\
+ "\x0\xcc\xff\x0\xff\x0\x0\xff\x33\x0\xff\x66\x0\xff\x99\x0\xff\xcc\x0\xff\xff\x33\x0\x0\x33\x0\x33\x33\x0\x66\x33\x0\x99\x33\x0\xcc\x33\x0\xff\x33\x33\x0\x33\x33\x33\x33\x33\x66\x33\x33\x99\x33\x33\xcc\x33\x33\xff\x33\x66\x0\x33\x66\x33\x33\x66\x66\x33\x66\x99\x33\x66\xcc\x33\x66\xff\x33\x99\x0\x33\x99\x33\x33\x99\x66\x33\x99\x99\x33\x99\xcc\x33\x99\xff\x33\xcc\x0\x33\xcc\x33\x33"\
+ "\xcc\x66\x33\xcc\x99\x33\xcc\xcc\x33\xcc\xff\x33\xff\x0\x33\xff\x33\x33\xff\x66\x33\xff\x99\x33\xff\xcc\x33\xff\xff\x66\x0\x0\x66\x0\x33\x66\x0\x66\x66\x0\x99\x66\x0\xcc\x66\x0\xff\x66\x33\x0\x66\x33\x33\x66\x33\x66\x66\x33\x99\x66\x33\xcc\x66\x33\xff\x66\x66\x0\x66\x66\x33\x66\x66\x66\x66\x66\x99\x66\x66\xcc\x66\x66\xff\x66\x99\x0\x66\x99\x33\x66\x99\x66\x66\x99\x99\x66\x99\xcc\x66\x99"\
+ "\xff\x66\xcc\x0\x66\xcc\x33\x66\xcc\x66\x66\xcc\x99\x66\xcc\xcc\x66\xcc\xff\x66\xff\x0\x66\xff\x33\x66\xff\x66\x66\xff\x99\x66\xff\xcc\x66\xff\xff\x99\x0\x0\x99\x0\x33\x99\x0\x66\x99\x0\x99\x99\x0\xcc\x99\x0\xff\x99\x33\x0\x99\x33\x33\x99\x33\x66\x99\x33\x99\x99\x33\xcc\x99\x33\xff\x99\x66\x0\x99\x66\x33\x99\x66\x66\x99\x66\x99\x99\x66\xcc\x99\x66\xff\x99\x99\x0\x99\x99\x33\x99\x99\x66"\
+ "\x99\x99\x99\x99\x99\xcc\x99\x99\xff\x99\xcc\x0\x99\xcc\x33\x99\xcc\x66\x99\xcc\x99\x99\xcc\xcc\x99\xcc\xff\x99\xff\x0\x99\xff\x33\x99\xff\x66\x99\xff\x99\x99\xff\xcc\x99\xff\xff\xcc\x0\x0\xcc\x0\x33\xcc\x0\x66\xcc\x0\x99\xcc\x0\xcc\xcc\x0\xff\xcc\x33\x0\xcc\x33\x33\xcc\x33\x66\xcc\x33\x99\xcc\x33\xcc\xcc\x33\xff\xcc\x66\x0\xcc\x66\x33\xcc\x66\x66\xcc\x66\x99\xcc\x66\xcc\xcc\x66\xff\xcc"\
+ "\x99\x0\xcc\x99\x33\xcc\x99\x66\xcc\x99\x99\xcc\x99\xcc\xcc\x99\xff\xcc\xcc\x0\xcc\xcc\x33\xcc\xcc\x66\xcc\xcc\x99\xcc\xcc\xcc\xcc\xcc\xff\xcc\xff\x0\xcc\xff\x33\xcc\xff\x66\xcc\xff\x99\xcc\xff\xcc\xcc\xff\xff\xff\x0\x0\xff\x0\x33\xff\x0\x66\xff\x0\x99\xff\x0\xcc\xff\x0\xff\xff\x33\x0\xff\x33\x33\xff\x33\x66\xff\x33\x99\xff\x33\xcc\xff\x33\xff\xff\x66\x0\xff\x66\x33\xff\x66\x66\xff\x66"\
+ "\x99\xff\x66\xcc\xff\x66\xff\xff\x99\x0\xff\x99\x33\xff\x99\x66\xff\x99\x99\xff\x99\xcc\xff\x99\xff\xff\xcc\x0\xff\xcc\x33\xff\xcc\x66\xff\xcc\x99\xff\xcc\xcc\xff\xcc\xff\xff\xff\x0\xff\xff\x33\xff\xff\x66\xff\xff\x99\xff\xff\xcc\xff\xff\xff\x21\xe\x9\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x21\xf9\x4\x1\x0\x0\xd8\x0\x2c\x0\x0\x0\x0\x8\x0\x8\x0\x0\x8"\
+ "\x19\x0\xaf\x61\x13\x48\x10\xdb\xc0\x83\x4\xb\x16\x44\x88\x50\xe1\x41\x86\x9\x21\x1a\x74\x78\x2d\x20\x0\x3b\xff"
+#define HTS_DATA_FADE_GIF_LEN 828
+
+#endif
diff --git a/src/htsalias.c b/src/htsalias.c
new file mode 100644
index 0000000..65a34e3
--- /dev/null
+++ b/src/htsalias.c
@@ -0,0 +1,520 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsalias.c subroutines: */
+/* alias for command-line options and config files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "htsalias.h"
+#include "htsglobal.h"
+void linput(FILE* fp,char* s,int max);
+void hts_lowcase(char* s);
+
+#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
+#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",(c))!=NULL)
+
+// COPY OF cmdl_ins in htsmain.c
+// Insert a command in the argc/argv
+#define cmdl_ins(token,argc,argv,buff,ptr) \
+ { \
+ int i; \
+ for(i=argc;i>0;i--)\
+ argv[i]=argv[i-1];\
+ } \
+ argv[0]=(buff+ptr); \
+ strcpy(argv[0],token); \
+ ptr += (strlen(argv[0])+1); \
+ argc++
+// END OF COPY OF cmdl_ins in htsmain.c
+
+
+/*
+ Aliases for command-line and config file definitions
+ These definitions can be used:
+ in command line:
+ --sockets=8 --cache=0
+ --sockets 8 --cache off
+ --nocache
+ -c8 -C0
+ in config file:
+ sockets=8 cache=0
+ set sockets 8 cache off
+
+*/
+/*
+ single : no options
+ param : this option allows a number parameter (1, for example) and can be mixed with other options (R1C1c8)
+ param1 : this option must be alone, and needs one distinct parameter (-P <path>)
+ param0 : this option must be alone, but the parameter should be put together (+*.gif)
+*/
+const char hts_optalias[][4][64] = {
+ /* {"","","",""}, */
+ {"path","-O","param1","output path"},
+ {"chroot","-%O","param1","default top path"},
+ {"mirror","-w","single",""},
+ {"mirror-wizard","-W","single",""},
+ {"get-files","-g","single",""},
+ {"quiet","-q","single",""},
+ {"mirrorlinks","-Y","single",""},
+ {"proxy","-P","param1","proxy name:port"},
+ {"httpproxy-ftp","-%f","param",""},
+ {"depth","-r","param",""},{"recurse-levels","-r","param",""},
+ {"ext-depth","-%e","param",""},
+ {"max-files","-m","param",""},
+ {"max-size","-M","param",""},
+ {"max-time","-E","param",""},
+ {"max-rate","-A","param",""},
+ {"max-pause","-G","param",""},
+ {"sockets","-c","param","number of simultaneous connections allowed"},{"socket","-c","param","number of simultaneous connections allowed"},{"connection","-c","param","number of simultaneous connections allowed"},
+ {"connection-per-second","-%c","param","number of connection per second allowed"},
+ {"timeout","-T","",""},
+ {"retries","-R","param","number of retries for non-fatal errors"},
+ {"min-rate","-J","param",""},
+ {"host-control","-H","param",""},
+ {"extended-parsing","-%P","param",""},
+ {"near","-n","single",""},
+ {"test","-t","single",""},
+ {"list","-%L","param1",""},
+ {"language","-%l","param1",""}, {"lang","-%l","param1",""},
+ {"structure","-N","param",""}, {"user-structure","-N","param1",""},
+ {"long-names","-L","param",""},
+ {"keep-links","-K","param",""},
+ {"replace-external","-x","single",""},
+ {"no-passwords","-%x","single",""},{"no-password","-%x","single",""},
+ {"include-query-string","-%q","single",""},
+ {"generate-errors","-o","single",""},
+ {"purge-old","-X","param",""},
+ {"cookies","-b","param",""},
+ {"check-type","-u","param",""},
+ {"assume","-%A","param1",""}, {"mimetype","-%A","param1",""},
+ {"parse-java","-j","param",""},
+ {"protocol","-@i","param",""},
+ {"robots","-s","param",""},
+ {"http-10","-%h","single",""},{"http-1.0","-%h","single",""},
+ {"no-compression","-%z","single",""},
+ {"tolerant","-%B","single",""},
+ {"updatehack","-%s","single",""}, {"sizehack","-%s","single",""},
+ {"user-agent","-F","param1","user-agent identity"},
+ {"footer","-%F","param1",""},
+ {"cache","-C","param","number of retries for non-fatal errors"},
+ {"store-all-in-cache","-k","single",""},
+ {"do-not-recatch","-%n","single",""},
+ {"do-not-log","-Q","single",""},
+ {"extra-log","-z","single",""},
+ {"debug-log","-Z","single",""},
+ {"verbose","-v","single",""},
+ {"file-log","-f","single",""},
+ {"single-log","-f2","single",""},
+ {"index","-I","single",""},
+ {"search-index","-%I","single",""},
+ {"priority","-p","param",""},
+ {"debug-headers","-%H","single",""},
+ {"userdef-cmd","-V","param1",""},
+ {"structure","-N","param1","user-defined structure"},
+ {"usercommand","-V","param1","user-defined command"},
+ {"display","-%v","single","show files transfered and other funny realtime information"},
+ {"dos83","-L0","single",""},
+ {"iso9660","-L2","single",""},
+ /* */
+
+ /* DEPRECATED */
+ {"stay-on-same-dir","-S","single","stay on the same directory - DEPRECATED"},
+ {"can-go-down","-D","single","can only go down into subdirs - DEPRECATED"},
+ {"can-go-up","-U","single","can only go to upper directories- DEPRECATED"},
+ {"can-go-up-and-down","-B","single","can both go up&down into the directory structure - DEPRECATED"},
+ {"stay-on-same-address","-a","single","stay on the same address - DEPRECATED"},
+ {"stay-on-same-domain","-d","single","stay on the same principal domain - DEPRECATED"},
+ {"stay-on-same-tld","-l","single","stay on the same TLD (eg: .com) - DEPRECATED"},
+ {"go-everywhere","-e","single","go everywhere on the web - DEPRECATED"},
+
+ /* Badly documented */
+ {"debug-testfilters","-#0","param1","debug: test filters"},
+ {"advanced-flushlogs","-#f","single",""},
+ {"advanced-maxfilters","-#F","param",""},
+ {"version","-#h","single",""},
+ {"debug-scanstdin","-#K","single",""},
+ {"advanced-maxlinks","-#L","single",""},
+ {"advanced-progressinfo","-#p","single",""},
+ {"catch-url","-#P","single","catch complex URL through proxy"},
+ {"debug-oldftp","-#R","single",""},
+ {"debug-xfrstats","-#T","single",""},
+ {"advanced-wait","-#u","single",""},
+ {"debug-ratestats","-#Z","single",""},
+ {"exec","-#!","param1",""},
+
+ /* STANDARD ALIASES */
+ {"spider","-p0C0I0t","single",""},
+ {"testsite","-p0C0I0t","single",""},
+ {"testlinks","-r1p0C0I0t","single",""}, {"test","-r1p0C0I0t","single",""}, {"bookmark","-r1p0C0I0t","single",""},
+ {"mirror","-w","single",""},
+ {"testscan","-p0C0I0Q","single",""}, {"scan","-p0C0I0Q","single",""}, {"check","-p0C0I0Q","single",""},
+ {"skeleton","-p1","single",""},
+ {"preserve","-%p","single",""},
+ {"get","-qg","single",""},
+ {"update","-iC2","single",""},
+ {"continue","-iC1","single",""}, {"restart","-iC1","single",""},
+ {"continue","-i","single",""}, /* for help alias */
+ {"sucker","-r999","single",""},
+ {"help","-h","single",""}, {"documentation","-h","single",""}, {"doc","-h","single",""},
+ {"wide","-c32","single",""},
+ {"tiny","-c1","single",""},
+ {"ultrawide","-c48","single",""},
+ {"http10","-%h","single",""},
+ {"filelist","-%L","single",""}, {"list","-%L","single",""},
+ /* END OF ALIASES */
+
+ /* Filters */
+ {"allow","+","param0","allow filter"},
+ {"deny","-","param0","deny filter"},
+ /* */
+
+ /* URLs */
+ {"add","","param0","add URLs"},
+ /* */
+
+ /* Specific */
+ {"user","-%U","param1","output path"},
+ /* */
+
+ /* Internal */
+ {"catchurl","--catchurl","single","catch complex URL through proxy"},
+ {"updatehttrack","--updatehttrack","single","update HTTrack Website Copier"},
+ {"clean","--clean","single","clean up log files and cache"},
+ {"tide","--clean","single","clean up log files and cache"},
+ /* */
+
+ {"","","",""}
+};
+
+
+/*
+ Check for alias in command-line
+ argc,argv as in main()
+ n_arg argument position
+ return_argv a char[2][] where to put result
+ return_error buffer in case of syntax error
+
+ return value: number of arguments treated (0 if error)
+*/
+int optalias_check(int argc,const char * const * argv,int n_arg,
+ int* return_argc,char** return_argv,
+ char* return_error) {
+ return_error[0]='\0';
+ *return_argc=1;
+ if (argv[n_arg][0]=='-')
+ if (argv[n_arg][1]=='-') {
+ char command[1000];
+ char param[1000];
+ char addcommand[256];
+ /* */
+ char* position;
+ int need_param=1;
+ //int return_param=0;
+ int pos;
+ command[0]=param[0]=addcommand[0]='\0';
+
+ /* --sockets=8 */
+ if ( (position=strchr(argv[n_arg],'=')) ) {
+ /* Copy command */
+ strncat(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) );
+ /* Copy parameter */
+ strcpy(param,position+1);
+ }
+ /* --nocache */
+ else if (strncmp(argv[n_arg]+2,"no",2)==0) {
+ strcpy(command,argv[n_arg]+4);
+ strcpy(param,"0");
+ }
+ /* --sockets 8 */
+ else {
+ if (strncmp(argv[n_arg]+2,"wide-",5)==0) {
+ strcpy(addcommand,"c32");
+ strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ } else if (strncmp(argv[n_arg]+2,"tiny-",5)==0) {
+ strcpy(addcommand,"c1");
+ strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ } else
+ strcpy(command,argv[n_arg]+2);
+ need_param=2;
+ }
+
+ /* Now solve the alias */
+ pos=optalias_find(command);
+ if (pos>=0) {
+ /* Copy real name */
+ strcpy(command,hts_optalias[pos][1]);
+ /* With parameters? */
+ if (strncmp(hts_optalias[pos][2],"param",5)==0) {
+ /* Copy parameters? */
+ if (need_param == 2) {
+ if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) { /* no supplemental parameter */
+ sprintf(return_error,
+ "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
+ command,command,_NOT_NULL(optalias_help(command)));
+ return 0;
+ }
+ strcpy(param,argv[n_arg+1]);
+ need_param=2;
+ }
+ } else
+ need_param=1;
+
+ /* Final result */
+
+ /* Must be alone (-P /tmp) */
+ if (strcmp(hts_optalias[pos][2],"param1")==0) {
+ strcpy(return_argv[0],command);
+ strcpy(return_argv[1],param);
+ *return_argc=2; /* 2 parameters returned */
+ }
+ /* Alone with parameter (+*.gif) */
+ else if (strcmp(hts_optalias[pos][2],"param0")==0) {
+ /* Command */
+ strcpy(return_argv[0],command);
+ strcat(return_argv[0],param);
+ }
+ /* Together (-c8) */
+ else {
+ /* Command */
+ strcpy(return_argv[0],command);
+ /* Parameters accepted */
+ if (strncmp(hts_optalias[pos][2],"param",5)==0) {
+ /* --cache=off or --index=on */
+ if (strcmp(param,"off")==0)
+ strcat(return_argv[0],"0");
+ else if (strcmp(param,"on")==0) {
+ // on is the default
+ // strcat(return_argv[0],"1");
+ } else
+ strcat(return_argv[0],param);
+ }
+ *return_argc=1; /* 1 parameter returned */
+ }
+ } else {
+ sprintf(return_error,"Unknown option: %s\n",command);
+ return 0;
+ }
+ return need_param;
+ }
+
+ /* Check -P <path> */
+ {
+ int pos;
+ if ((pos=optreal_find(argv[n_arg]))>=0) {
+ if ( (strcmp(hts_optalias[pos][2],"param1")==0) || (strcmp(hts_optalias[pos][2],"param0")==0)) {
+ if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) { /* no supplemental parameter */
+ sprintf(return_error,
+ "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
+ argv[n_arg],argv[n_arg],_NOT_NULL(optalias_help(argv[n_arg])));
+ return 0;
+ }
+ /* Copy parameters */
+ strcpy(return_argv[0],argv[n_arg]);
+ strcpy(return_argv[1],argv[n_arg+1]);
+ /* And return */
+ *return_argc=2; /* 2 parameters returned */
+ return 2; /* 2 parameters used */
+ }
+ }
+ }
+
+ /* Copy and return other unknown option */
+ strcpy(return_argv[0],argv[n_arg]);
+ return 1;
+}
+
+/* Finds the <token> option alias and returns the index, or -1 if failed */
+int optalias_find(const char* token) {
+ if (token[0] != '\0') {
+ int i=0;
+ while(hts_optalias[i][0][0] != '\0') {
+ if (strcmp(token,hts_optalias[i][0])==0) {
+ return i;
+ }
+ i++;
+ }
+ }
+ return -1;
+}
+
+/* Finds the <token> real option and returns the index, or -1 if failed */
+int optreal_find(const char* token) {
+ if (token[0] != '\0') {
+ int i=0;
+ while(hts_optalias[i][0][0] != '\0') {
+ if (strcmp(token,hts_optalias[i][1])==0) {
+ return i;
+ }
+ i++;
+ }
+ }
+ return -1;
+}
+
+const char* optreal_value(int p) {
+ return hts_optalias[p][1];
+}
+const char* optalias_value(int p) {
+ return hts_optalias[p][0];
+}
+const char* opttype_value(int p) {
+ return hts_optalias[p][2];
+}
+const char* opthelp_value(int p) {
+ return hts_optalias[p][3];
+}
+
+/* Help for option <token>, empty if not available, or NULL if unknown <token> */
+const char* optalias_help(const char* token) {
+ int pos=optalias_find(token);
+ if (pos>=0)
+ return hts_optalias[pos][3];
+ else
+ return NULL;
+}
+
+/* Include a file to the current command line */
+/* example:
+ set sockets 8
+ index on
+ allow *.gif
+ deny ad.*
+*/
+int optinclude_file(const char* name,
+ int* argc,char** argv,char* x_argvblk,int* x_ptr) {
+ FILE* fp;
+ fp=fopen(name,"rb");
+ if (fp) {
+ char line[256];
+ int insert_after=1; /* first, insert after program filename */
+ while(!feof(fp)) {
+ char *a,*b;
+ int result;
+
+ /* read line */
+ linput(fp,line,250);
+ hts_lowcase(line);
+ if (strnotempty(line)) {
+ /* no comment line: # // ; */
+ if (strchr("#/;",line[0])==NULL) {
+ /* right trim */
+ a=line+strlen(line)-1;
+ while(is_realspace(*a)) *(a--) = '\0';
+ /* jump "set " and spaces */
+ a=line;
+ while(is_realspace(*a)) a++;
+ if (strncmp(a,"set",3)==0) {
+ if (is_realspace(*(a+3))) {
+ a+=4;
+ }
+ }
+ while(is_realspace(*a)) a++;
+ /* delete = ("sockets=8") */
+ if ( (b=strchr(a,'=')) )
+ *b=' ';
+
+ /* isolate option and parameter */
+ b=a;
+ while( (!is_realspace(*b)) && (*b) ) b++;
+ if (*b) {
+ *b='\0';
+ b++;
+ }
+ /* a is now the option, b the parameter */
+
+ {
+ int return_argc;
+ char return_error[256];
+ char _tmp_argv[4][HTS_CDLMAXSIZE];
+ char* tmp_argv[4];
+ tmp_argv[0]=_tmp_argv[0]; tmp_argv[1]=_tmp_argv[1]; tmp_argv[2]=_tmp_argv[2]; tmp_argv[3]=_tmp_argv[3];
+ strcpy(tmp_argv[0],"--");
+ strcat(tmp_argv[0],a);
+ strcpy(tmp_argv[1],b);
+
+ result=optalias_check(2,(const char * const *)tmp_argv,0,
+ &return_argc,(tmp_argv+2),
+ return_error);
+ if (!result) {
+ printf("%s\n",return_error);
+ } else {
+ int insert_after_argc;
+ /* Insert parameters BUT so that they can be in the same order */
+ /* temporary argc: Number of parameters after minus insert_after_argc */
+ insert_after_argc=(*argc)-insert_after;
+ cmdl_ins((tmp_argv[2]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
+ *argc=insert_after_argc+insert_after;
+ insert_after++;
+ /* Second one */
+ if (return_argc>1) {
+ insert_after_argc=(*argc)-insert_after;
+ cmdl_ins((tmp_argv[3]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
+ *argc=insert_after_argc+insert_after;
+ insert_after++;
+ }
+ /* increment to nbr of used parameters */
+ /* insert_after+=result; */
+ }
+ }
+ }
+
+ }
+ }
+ fclose(fp);
+ return 1;
+ }
+ return 0;
+}
+
+/* Get home directory, '.' if failed */
+/* example: /home/smith */
+char* hts_gethome(void) {
+ char* home = getenv( "HOME" );
+ if (home)
+ return home;
+ else
+ return ".";
+}
+
+/* Convert ~/foo into /home/smith/foo */
+void expand_home(char* str) {
+ if (str[0] == '~') {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,hts_gethome());
+ strcat(tempo,str+1);
+ strcpy(str,tempo);
+ }
+}
diff --git a/src/htsalias.h b/src/htsalias.h
new file mode 100644
index 0000000..1c94b19
--- /dev/null
+++ b/src/htsalias.h
@@ -0,0 +1,58 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsalias.h subroutines: */
+/* alias for command-line options and config files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSALIAS_DEFH
+#define HTSALIAS_DEFH
+
+extern const char hts_optalias[][4][64];
+int optalias_check(int argc,const char * const * argv,int n_arg,
+ int* return_argc,char** return_argv,
+ char* return_error);
+int optalias_find(const char* token);
+const char* optalias_help(const char* token);
+int optreal_find(const char* token);
+int optinclude_file(const char* name,
+ int* argc,char** argv,char* x_argvblk,int* x_ptr);
+const char* optreal_value(int p);
+const char* optalias_value(int p);
+const char* opttype_value(int p);
+const char* opthelp_value(int p);
+char* hts_gethome(void);
+void expand_home(char* str);
+
+#endif
diff --git a/src/htsback.c b/src/htsback.c
new file mode 100644
index 0000000..d99564f
--- /dev/null
+++ b/src/htsback.c
@@ -0,0 +1,2462 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* backing system (multiple socket download) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsback.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsthread.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+/* END specific definitions */
+
+//#if HTS_WIN
+#include "htsftp.h"
+#if HTS_USEZLIB
+#include "htszlib.h"
+#endif
+//#endif
+
+#if HTS_WIN
+#ifndef __cplusplus
+// DOS
+#include <process.h> /* _beginthread, _endthread */
+#endif
+#else
+#endif
+
+#undef test_flush
+#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } }
+
+#define VT_CLREOL "\33[K"
+
+
+// ---
+// routines de backing
+// retourne l'index d'un lien dans un tableau de backing
+int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
+ int i=0;
+ int index=-1;
+ while( i<back_max ) {
+ if (back[i].status>=0) // réception OU prêt
+ if (strfield2(back[i].url_adr,adr)) {
+ if (strcmp(back[i].url_fil,fil)==0) {
+ if (index==-1) /* first time we meet, store it */
+ index=i;
+ else if (strcmp(back[i].url_sav,sav)==0) { /* oops, check sav too */
+ index=i;
+ return index;
+ }
+ }
+ }
+ i++;
+ }
+ return index;
+}
+
+// nombre d'entrées libres dans le backing
+int back_available(lien_back* back,int back_max) {
+ int i;
+ int nb=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].status==-1) /* libre */
+ nb++;
+ return nb;
+}
+
+// retourne estimation de la taille des html et fichiers stockés en mémoire
+LLint back_incache(lien_back* back,int back_max) {
+ int i;
+ LLint sum=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].status!=-1)
+ if (back[i].r.adr) // ne comptabilier que les blocs en mémoire
+ sum+=max(back[i].r.size,back[i].r.totalsize);
+ return sum;
+}
+
+// le lien a-t-il été mis en backing?
+HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
+ return (back_index(back,back_max,adr,fil,sav)>=0);
+}
+
+// nombre de sockets en tâche de fond
+int back_nsoc(lien_back* back,int back_max) {
+ int n=0;
+ int i;
+ for(i=0;i<back_max;i++)
+ if (back[i].status>0) // réception uniquement
+ n++;
+
+ return n;
+}
+
+// objet (lien) téléchargé ou transféré depuis le cache
+//
+// fermer les paramètres de transfert,
+// et notamment vérifier les fichiers compressés (décompresser), callback etc.
+int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
+ if (
+ (back[p].status == 0) // ready
+ &&
+ (!back[p].testmode) // not test mode
+ &&
+ (back[p].r.statuscode>0) // not internal error
+ ) {
+ char* state="unknown";
+
+ /* décompression */
+#if HTS_USEZLIB
+ if (back[p].r.compressed) {
+ if (back[p].r.size > 0) {
+ //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
+ // stats
+ back[p].compressed_size=back[p].r.size;
+ // en mémoire -> passage sur disque
+ if (!back[p].r.is_write) {
+ back[p].tmpfile[0]='\0';
+ strcpy(back[p].tmpfile,tempnam(NULL,"httrz"));
+ if (back[p].tmpfile[0]) {
+ back[p].r.out=fopen(back[p].tmpfile,"wb");
+ if (back[p].r.out) {
+ if ((back[p].r.adr) && (back[p].r.size>0)) {
+ if ((INTsys)fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Write error when decompressing");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Empty compressed file");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Open error when decompressing");
+ }
+ }
+ }
+ // fermer fichier sortie
+ if (back[p].r.out!=NULL) {
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ // décompression
+ if (back[p].tmpfile[0] && back[p].url_sav[0]) {
+ LLint size;
+ filecreateempty(back[p].url_sav); // filenote & co
+ if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
+ back[p].r.size=back[p].r.totalsize=size;
+ // fichier -> mémoire
+ if (!back[p].r.is_write) {
+ back[p].r.adr=readfile(back[p].url_sav);
+ if (!back[p].r.adr) {
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Read error when decompressing");
+ }
+ remove(back[p].url_sav);
+ }
+ }
+ remove(back[p].tmpfile);
+ }
+ // stats
+ HTS_STAT.total_packed+=back[p].compressed_size;
+ HTS_STAT.total_unpacked+=back[p].r.size;
+ HTS_STAT.total_packedfiles++;
+ // unflag
+ }
+ }
+ back[p].r.compressed=0;
+#endif
+
+ /* Stats */
+ if (cache->txt) {
+ char flags[32];
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ strftime(s,250,"%H:%M:%S",A);
+
+ flags[0]='\0';
+ /* input flags */
+ if (back[p].is_update)
+ strcat(flags, "U"); // update request
+ else
+ strcat(flags, "-");
+ if (back[p].range_req_size)
+ strcat(flags, "R"); // range request
+ else
+ strcat(flags, "-");
+ /* state flags */
+ if (back[p].r.is_file) // direct to disk
+ strcat(flags, "F");
+ else
+ strcat(flags, "-");
+ /* output flags */
+ if (!back[p].r.notmodified)
+ strcat(flags, "M"); // modified
+ else
+ strcat(flags, "-");
+ if (back[p].r.is_chunk) // chunked
+ strcat(flags, "C");
+ else
+ strcat(flags, "-");
+ if (back[p].r.compressed)
+ strcat(flags, "Z"); // gzip
+ else
+ strcat(flags, "-");
+ fprintf(cache->txt,"%s\t"LLintP"/"LLintP"\t%s\t", s,
+ back[p].r.size, back[p].r.totalsize,
+ flags);
+ }
+ if (back[p].r.statuscode==200) {
+ if (back[p].r.size>=0) {
+ if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
+ HTS_STAT.stat_bytes+=back[p].r.size;
+ HTS_STAT.stat_files++;
+ }
+ if ( (!back[p].r.notmodified) && (opt->is_update) ) {
+ HTS_STAT.stat_updated_files++; // page modifiée
+ if (opt->log!=NULL) {
+ fspc(opt->log,"info");
+ if (back[p].is_update) {
+ fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ } else {
+ fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ }
+ test_flush;
+ }
+ if (cache->txt) {
+ if (back[p].is_update) {
+ state="updated";
+ } else {
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ test_flush;
+ }
+ if (cache->txt) {
+ if (opt->is_update)
+ state="untouched";
+ else
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ if (cache->txt) {
+ state="empty";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ }
+ if (cache->txt) {
+ state="error";
+ }
+ }
+ if (cache->txt) {
+ fprintf(cache->txt,
+ "%d\t"
+ "%s ('%s')\t"
+ "%s\t"
+ "%s%s\t"
+ "%s%s\t%s\t"
+ "(from %s%s)"
+ LF,
+ back[p].r.statuscode,
+ state, escape_check_url_addr(back[p].r.msg),
+ escape_check_url_addr(back[p].r.contenttype),
+ ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
+ escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
+ escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
+ );
+ if (opt->flush)
+ fflush(cache->txt);
+ }
+
+ /* Cache */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+
+ // status finished callback
+#if HTS_ANALYSTE
+ hts_htmlcheck_xfrstatus(&back[p]);
+#endif
+ return 0;
+ }
+ return -1;
+}
+
+
+// effacer entrée
+int back_delete(lien_back* back,int p) {
+ if (p>=0) { // on sait jamais..
+ // Vérificateur d'intégrité
+ #if DEBUG_CHECKINT
+ _CHECKINT(&back[p],"Appel back_delete")
+ #endif
+#if HTS_DEBUG_CLOSESOCK
+ char info[256];
+ sprintf(info,"back_delete: #%d\n",p);
+ DEBUG_W2(info);
+#endif
+
+ // Libérer tous les sockets, handles, buffers..
+ if (back[p].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_delete: deletehttp\n");
+#endif
+ deletehttp(&back[p].r);
+ back[p].r.soc=INVALID_SOCKET;
+ }
+
+#if HTS_USEOPENSSL
+ /* Free OpenSSL structures */
+ if (back[p].r.ssl_con) {
+ SSL_shutdown(back[p].r.ssl_con);
+ SSL_free(back[p].r.ssl_con);
+ back[p].r.ssl_con=NULL;
+ }
+ /*
+ if (back[p].r.ssl_soc) {
+ BIO_free_all(back[p].r.ssl_soc);
+ back[p].r.ssl_soc=NULL;
+ }
+ */
+#endif
+
+ if (back[p].r.adr!=NULL) { // reste un bloc à désallouer
+ freet(back[p].r.adr);
+ back[p].r.adr=NULL;
+ }
+ if (back[p].chunk_adr!=NULL) { // reste un bloc à désallouer
+ freet(back[p].chunk_adr);
+ back[p].chunk_adr=NULL;
+ back[p].chunk_size=0;
+ back[p].is_chunk=0;
+ }
+ // if (back[p].r.is_file) { // fermer fichier entrée
+ if (back[p].r.fp!=NULL) {
+ fclose(back[p].r.fp);
+ back[p].r.fp=NULL;
+ }
+ // }
+
+ /* fichier de sortie */
+ if (back[p].r.out!=NULL) { // fermer fichier sortie
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+
+ if (back[p].r.is_write) { // ecriture directe
+ /* écrire date "remote" */
+ if (strnotempty(back[p].url_sav)) // normalement existe si on a un fichier de sortie
+ if (strnotempty(back[p].r.lastmodified)) // last-modified existe
+ if (fexist(back[p].url_sav)) // ainsi que le fichier
+ set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
+
+ /* executer commande utilisateur après chargement du fichier */
+ usercommand(0,NULL,back[p].url_sav);
+ back[p].r.is_write=0;
+ }
+
+ // Tout nettoyer
+ memset(&back[p], 0, sizeof(lien_back));
+ back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+
+ // Le plus important: libérer le champ
+ back[p].status=-1;
+ }
+ return 0;
+}
+
+/* Space left on backing stack */
+int back_stack_available(lien_back* back,int back_max) {
+ int p=0,n=0;
+ for( ; p < back_max ; p++ )
+ if ( back[p].status == -1 )
+ n++;
+ return n;
+}
+
+// ajouter un lien en backing
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr) {
+ int p=0;
+
+ // vérifier cohérence de adr et fil (non vide!)
+ if (strnotempty(adr)==0) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: adr is empty for back_add"LF);
+ }
+ return -1; // erreur!
+ }
+ if (strnotempty(fil)==0) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: fil is empty for back_add"LF);
+ }
+ return -1; // erreur!
+ }
+ // FIN vérifier cohérence de adr et fil (non vide!)
+
+ // rechercher emplacement
+ while((p<back_max) && back[p].status!=-1) p++;
+ if (back[p].status==-1) { // ok on a de la place
+ back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur
+
+ // ne sert à rien normalement
+ if (back[p].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_add: deletehttp\n");
+#endif
+ deletehttp(&back[p].r);
+ }
+
+ // effacer r
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+
+ // créer entrée
+ strcpy(back[p].url_adr,adr);
+ strcpy(back[p].url_fil,fil);
+ strcpy(back[p].url_sav,save);
+ back[p].pass2_ptr=pass2_ptr;
+ // copier referer si besoin
+ strcpy(back[p].referer_adr,"");
+ strcpy(back[p].referer_fil,"");
+ if ((referer_adr) && (referer_fil)) { // existe
+ if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
+ if (referer_adr[0]!='!') { // non détruit
+ if (strcmp(referer_adr,"file://")) { // PAS file://
+ if (strcmp(referer_adr,"primary")) { // pas referer 1er lien
+ strcpy(back[p].referer_adr,referer_adr);
+ strcpy(back[p].referer_fil,referer_fil);
+ }
+ }
+ }
+ }
+ }
+ // sav ne sert à rien pour le moment
+ back[p].r.size=0; // rien n'a encore été chargé
+ back[p].r.soc=INVALID_SOCKET; // pas de socket
+ back[p].r.adr=NULL; // pas de bloc de mémoire
+ back[p].r.is_write=0; // à priori stockage en mémoire
+ back[p].maxfile_html=opt->maxfile_html;
+ back[p].maxfile_nonhtml=opt->maxfile_nonhtml;
+ back[p].testmode=test; // mode test?
+ if (!opt->http10) // option "forcer 1.0" désactivée
+ back[p].http11=1; // autoriser http/1.1
+ back[p].head_request=0;
+ if (strcmp(back[p].url_sav,BACK_ADD_TEST)==0) // HEAD
+ back[p].head_request=1;
+ else if (strcmp(back[p].url_sav,BACK_ADD_TEST2)==0) // test en GET
+ back[p].head_request=2; // test en get
+
+
+ /* Stop requested - abort backing */
+ if (opt->state.stop) {
+ back[p].r.statuscode=-1; // fatal
+ strcpy(back[p].r.msg,"mirror stopped by user");
+ back[p].status=0; // terminé
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush;
+ }
+ return 0;
+ }
+
+
+ // tester cache
+ if ((strcmp(adr,"file://")) /* pas fichier */
+ && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */
+ && ( (strnotempty(save)) || (strcmp(fil,"/robots.txt")==0) ) ) { // si en test on ne doit pas utiliser le cache sinon telescopage avec le 302..
+ //if ((!test) && (strcmp(adr,"file://"))
+ //if ((!test) && (strncmp(adr,"ftp://",6)) && (strcmp(adr,"file://"))
+#if HTS_FAST_CACHE
+ long int hash_pos;
+ int hash_pos_return=0;
+#else
+ char* a=NULL;
+#endif
+#if HTS_FAST_CACHE
+ if (cache->hashtable) {
+#else
+ if (cache->use) {
+#endif
+ char buff[HTS_URLMAXSIZE*4];
+#if HTS_FAST_CACHE
+ strcpy(buff,adr); strcat(buff,fil);
+ hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
+#else
+ buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ a=strstr(cache->use,buff);
+#endif
+
+ // Ok, noté en cache->. mais bien présent dans le cache ou sur disque?
+#if HTS_FAST_CACHE
+ if (hash_pos_return) {
+#else
+ if (a) {
+#endif
+ if (!test) { // non mode test
+#if HTS_FAST_CACHE
+ int pos=hash_pos;
+#else
+ int pos=-1;
+ a+=strlen(buff);
+ sscanf(a,"%d",&pos); // lire position
+#endif
+ if (pos<0) { // pas de mise en cache data, vérifier existence
+ if (fsize(antislash(save)) <= 0) { // fichier existe pas ou est vide!
+#if HTS_FAST_CACHE
+ hash_pos_return=0;
+#else
+ a=NULL;
+#endif
+ // dévalider car non présent sur disque dans structure originale!!!
+ // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour
+ // en être sûr
+ if (opt->norecatch) { // tester norecatch
+ if (!fexist(antislash(save))) { // fichier existe pas mais déclaré: on l'a effacé
+ FILE* fp=fopen(antislash(save),"wb");
+ if (fp) fclose(fp);
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ //
+ } else
+#if HTS_FAST_CACHE
+ hash_pos_return=0;
+#else
+ a=NULL;
+#endif
+
+ // Existe pas en cache, ou bien pas de cache présent
+#if HTS_FAST_CACHE
+ if (hash_pos_return) { // OK existe en cache (et données aussi)!
+#else
+ if (a!=NULL) { // OK existe en cache (et données aussi)!
+#endif
+ if (cache->type==1) { // cache prioritaire (pas de test if-modified..)
+ // dans ce cas on peut également lire des réponses cachées comme 404,302...
+ // lire dans le cache
+ if (!test)
+ back[p].r=cache_read(opt,cache,adr,fil,save);
+ else
+ back[p].r=cache_read(opt,cache,adr,fil,NULL); // charger en tête uniquement du cache
+ if (!back[p].r.location)
+ back[p].r.location=back[p].location_buffer;
+ else { /* recopier */
+ strcpy(back[p].location_buffer,back[p].r.location);
+ back[p].r.location=back[p].location_buffer;
+ }
+
+ /* Interdiction taille par le wizard? --> détruire */
+ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
+ if (!back_checksize(opt,&back[p],0)) {
+ back[p].status=0; // FINI
+ back[p].r.statuscode=-1;
+ if (!back[p].testmode)
+ strcpy(back[p].r.msg,"Cached file skipped (too big)");
+ else
+ strcpy(back[p].r.msg,"Test: Cached file skipped (too big)");
+ return 0;
+ }
+ }
+
+ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ if (!test) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ } else {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+ back[p].r.notmodified=1; // fichier non modifié
+ back[p].status=0; // OK prêt
+
+ // finalize transfer
+ if (!test) {
+ if (back[p].r.statuscode>0) {
+ back_finalize(opt,cache,back,p);
+ }
+ }
+
+ return 0;
+ } else { // erreur
+ // effacer r
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+ // et continuer (chercher le fichier)
+ }
+
+ } else if (cache->type==2) { // si en cache, demander de tester If-Modified-Since
+ htsblk* r=cache_header(opt,cache,adr,fil);
+
+ /* Interdiction taille par le wizard? */
+ {
+ LLint save_totalsize=back[p].r.totalsize;
+ back[p].r.totalsize=r->totalsize;
+ if (!back_checksize(opt,&back[p],1)) {
+ r=NULL;
+ //
+ back[p].status=0; // FINI
+ deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET;
+ if (!back[p].testmode)
+ strcpy(back[p].r.msg,"File too big");
+ else
+ strcpy(back[p].r.msg,"Test: File too big");
+ return 0;
+ }
+ back[p].r.totalsize=save_totalsize;
+ }
+
+ if (r) {
+ if (r->statuscode==200) { // uniquement des 200 (OK)
+ if (strnotempty(r->etag)) { // ETag (RFC2616)
+ /*
+ - If both an entity tag and a Last-Modified value have been
+ provided by the origin server, SHOULD use both validators in
+ cache-conditional requests. This allows both HTTP/1.0 and
+ HTTP/1.1 caches to respond appropriately.
+ */
+ if (strnotempty(r->lastmodified))
+ sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r->etag,r->lastmodified);
+ else
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",r->etag);
+ }
+ else if (strnotempty(r->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r->lastmodified);
+ else if (strnotempty(cache->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
+
+ /* this is an update of a file */
+ if (strnotempty(back[p].send_too))
+ back[p].is_update=1;
+ back[p].r.req.nocompression=1; /* Do not compress when updating! */
+
+ }
+ /* else if (strnotempty(cache->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
+ */
+ }
+#if DEBUGCA
+ printf("..is modified test %s\n",back[p].send_too);
+#endif
+ }
+ // Okay, pas trouvé dans le cache
+ // Et si le fichier existe sur disque?
+ // Pas dans le cache: fichier n'a pas été transféré du tout, donc pas sur disque?
+ } else {
+ if (fexist(save)) { // fichier existe? aghl!
+ LLint sz=fsize(save);
+ // Bon, là il est possible que le fichier ait été partiellement transféré
+ // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque)
+ // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache
+ // On demande juste les données restantes si le date est valide (206), tout sinon (200)
+ if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!)
+ if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!)
+ if (strnotempty(cache->lastmodified)) { /* pas de If-.. possible */
+ /*if ( (!opt->http10) && (strnotempty(cache->lastmodified)) ) { */ /* ne pas forcer 1.0 */
+#if DEBUGCA
+ printf("..if unmodified since %s size "LLintP"\n",cache->lastmodified,(LLint)sz);
+#endif
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+
+ /* impossible - don't have etag or date
+ if (strnotempty(back[p].r.etag)) { // ETag (RFC2616)
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",back[p].r.etag);
+ back[p].http11=1; // En tête 1.1
+ } else if (strnotempty(back[p].r.lastmodified)) {
+ sprintf(back[p].send_too,"If-Unmodified-Since: %s\r\n",back[p].r.lastmodified);
+ back[p].http11=1; // En tête 1.1
+ } else
+ */
+ if (strlen(cache->lastmodified)) {
+ sprintf(back[p].send_too,
+ "If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n"
+ ,cache->lastmodified,(LLint)sz);
+ back[p].http11=1; // En tête 1.1
+ back[p].range_req_size=sz;
+ back[p].r.req.range_used=1;
+ back[p].r.req.nocompression=1;
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+
+ } else {
+ if ((opt->debug>0) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"warning");
+ /*
+ if (opt->http10)
+ fprintf(opt->errlog,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil);
+ else
+ */
+ fprintf(opt->errlog,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
+ }
+ } else if (opt->norecatch) { // tester norecatch
+ filenote(save,NULL); // ne pas purger tout de même
+ back[p].status=0; // OK prêt
+ back[p].r.statuscode=-1; // erreur
+ strcpy(back[p].r.msg,"Null-size file not recaught");
+ return 0;
+ }
+ } else {
+ if ((opt->debug>0) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"warning");
+ fprintf(opt->errlog,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
+ }
+ }
+ }
+ }
+
+
+ {
+ ///htsblk r; non directement dans la structure-réponse!
+ T_SOC soc;
+
+ // ouvrir liaison, envoyer requète
+ // ne pas traiter ou recevoir l'en tête immédiatement
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+ // recopier proxy
+ memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy));
+ // et user-agent
+ strcpy(back[p].r.req.user_agent,opt->user_agent);
+ strcpy(back[p].r.req.lang_iso,opt->lang_iso);
+ back[p].r.req.user_agent_send=opt->user_agent_send;
+ // et http11
+ back[p].r.req.http11=back[p].http11;
+ back[p].r.req.nocompression=opt->nocompression;
+
+ // mode ftp, court-circuit!
+ if (strfield(back[p].url_adr,"ftp://")) {
+ if (back[p].testmode) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with ftp link for back_add"LF);
+ }
+ return -1; // erreur pas de test permis
+ }
+ if (!(back[p].r.req.proxy.active && opt->ftp_proxy)) { // connexion directe, gérée en thread
+ back[p].status=1000; // connexion ftp
+#if USE_BEGINTHREAD
+ launch_ftp(&(back[p]));
+#else
+ {
+ char nid[32];
+ sprintf(nid,"htsftp%d-in_progress.lock",p);
+ strcpy(back[p].location_buffer,fconcat(opt->path_log,nid));
+ }
+ launch_ftp(&(back[p]),back[p].location_buffer,opt->exec);
+#endif
+ return 0;
+ }
+ }
+#if HTS_USEOPENSSL
+ else if (strfield(back[p].url_adr,"https://")) { // let's rock
+ back[p].r.ssl = 1;
+ // back[p].r.ssl_soc = NULL;
+ back[p].r.ssl_con = NULL;
+ }
+#endif
+
+#if HTS_XGETHOST
+#if HDEBUG
+ printf("back_solve..\n");
+#endif
+ back[p].status=101; // tentative de résolution du nom de host
+ soc=INVALID_SOCKET; // pas encore ouverte
+ back_solve(&back[p]); // préparer
+ if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns
+#if HDEBUG
+ printf("ok, dns cache ready..\n");
+#endif
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+ if (soc==INVALID_SOCKET) {
+ back[p].status=0; // fini, erreur
+ }
+ }
+//
+#else
+//
+#if CNXDEBUG
+ printf("XFopen..\n");
+#endif
+
+ if (strnotempty(back[p].send_too)) // envoyer un if-modified-since
+#if HTS_XCONN
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+#else
+ soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r));
+#endif
+ else
+#if HTS_XCONN
+ soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r));
+#else
+ soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r));
+#endif
+#endif
+ if (opt->timeout>0) { // gestion du opt->timeout
+ back[p].timeout=opt->timeout;
+ back[p].timeout_refresh=time_local();
+ } else {
+ back[p].timeout=-1; // pas de gestion (default)
+ }
+
+ if (opt->rateout>0) { // gestion d'un taux minimum de transfert toléré
+ back[p].rateout=opt->rateout;
+ back[p].rateout_time=time_local();
+ } else {
+ back[p].rateout=-1; // pas de gestion (default)
+ }
+
+ // Note: on charge les code-page erreurs (erreur 404, etc) dans le cas où cela est
+ // rattrapable (exemple: 301,302 moved xxx -> refresh sur la page!)
+ //if ((back[p].statuscode!=200) || (soc<0)) { // ERREUR HTTP/autre
+
+#if CNXDEBUG
+printf("Xfopen ok, poll..\n");
+#endif
+
+#if HTS_XGETHOST
+ if (soc!=INVALID_SOCKET)
+ if (back[p].status==101) { // pas d'erreur
+ if (!back[p].r.is_file)
+ back[p].status=100; // connexion en cours
+ else
+ back[p].status=1; // fichier
+ }
+
+#else
+ if (soc==INVALID_SOCKET) { // erreur socket
+ back[p].status=0; // FINI
+ //if (back[p].soc!=INVALID_SOCKET) deletehttp(back[p].soc);
+ back[p].r.soc=INVALID_SOCKET;
+ } else {
+ if (!back[p].r.is_file)
+#if HTS_XCONN
+ back[p].status=100; // connexion en cours
+#else
+ back[p].status=99; // chargement en tête en cours
+#endif
+ else
+ back[p].status=1; // chargement fichier
+#if BDEBUG==1
+ printf("..loading header\n");
+#endif
+ }
+#endif
+
+ }
+
+
+ // note: si il y a erreur (404,etc) status=2 (terminé/échec) mais
+ // le lien est considéré comme traité
+ //if (back[p].soc<0) // erreur
+ // return -1;
+
+ return 0;
+ } else {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: no space left in stack for back_add"LF);
+ }
+ return -1; // plus de place
+ }
+}
+
+
+
+#if HTS_XGETHOST
+#if USE_BEGINTHREAD
+// lancement multithread du robot
+PTHREAD_TYPE Hostlookup(void* iadr_p) {
+ char iadr[256];
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ t_hostent* hp;
+ int error_found=0;
+
+ // recopier (après id:pass)
+#if DEBUGDNS
+ printf("resolv in background: %s\n",jump_identification(iadr_p));
+#endif
+ strcpy(iadr,jump_identification(iadr_p));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0'; // get rid of it
+ }
+ freet(iadr_p);
+
+ // attendre que le cache dns soit prêt
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+ while(cache->n) {
+ if (strcmp(cache->iadr,iadr)==0) {
+ error_found=1;
+ }
+ cache=cache->n; // calculer queue
+ }
+ if (strcmp(cache->iadr,iadr)==0) {
+ error_found=1;
+ }
+
+ if (!error_found) {
+ // en gros copie de hts_gethostbyname sans le return
+ cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
+ if (cache->n!=NULL) {
+ t_fullhostent fullhostent_buffer;
+ strcpy(cache->n->iadr,iadr);
+ cache->n->host_length=0; /* pour le moment rien */
+ cache->n->n=NULL;
+ _hts_lockdns(0); // délocker
+
+ /* resolve */
+#if DEBUGDNS
+ printf("gethostbyname() in progress for %s\n",iadr);
+#endif
+ cache->n->host_length=-1;
+ memset(cache->n->host_addr, 0, sizeof(cache->n->host_addr));
+ hp=vxgethostbyname(iadr, &fullhostent_buffer);
+ if (hp!=NULL) {
+ memcpy(cache->n->host_addr, hp->h_addr, hp->h_length);
+ cache->n->host_length = hp->h_length;
+ }
+ } else
+ _hts_lockdns(0); // délocker
+ } else {
+#if DEBUGDNS
+ printf("aborting resolv for %s (found)\n",iadr);
+#endif
+ _hts_lockdns(0); // délocker
+ }
+ // fin de copie de hts_gethostbyname
+
+#if DEBUGDNS
+ printf("quitting resolv for %s (result: %d)\n",iadr,(cache->n!=NULL)?cache->n->host_length:(-999));
+#endif
+
+ return PTHREAD_RETURN; /* _endthread implied */
+}
+#endif
+
+// attendre que le host (ou celui du proxy) ait été résolu
+// si c'est un fichier, la résolution est immédiate
+// idem pour ftp://
+void back_solve(lien_back* back) {
+ if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ //## if (back->url_adr[0]!=lOCAL_CHAR) { // qq chose à préparer
+ char* a;
+ if (!(back->r.req.proxy.active))
+ a=back->url_adr;
+ else
+ a=back->r.req.proxy.name;
+ a = jump_protocol(a);
+ if (!hts_dnstest(a)) { // non encore testé!..
+ // inscire en thread
+#if HTS_WIN
+ // Windows
+#if USE_BEGINTHREAD
+ {
+ char* p = calloct(strlen(a)+2,1);
+ if (p) {
+ strcpy(p,a);
+ _beginthread( Hostlookup , 0, p );
+ }
+ }
+#else
+ /*t_hostent* h=*/
+ /*hts_gethostbyname(a);*/ // calcul
+#endif
+#else
+#if USE_BEGINTHREAD
+ char* p = calloct(strlen(a)+2,1);
+ if (p) {
+ strcpy(p,a);
+ _beginthread( Hostlookup , 0, p );
+ }
+#else
+ // Sous Unix, le gethostbyname() est bloquant..
+ /*t_hostent* h=*/
+ /*hts_gethostbyname(a);*/ // calcul
+#endif
+#endif
+ }
+ }
+}
+
+// détermine si le host a pu être résolu
+int host_wait(lien_back* back) {
+ if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ //## if (back->url_adr[0]!=lOCAL_CHAR) {
+ if (!(back->r.req.proxy.active)) {
+ return (hts_dnstest(back->url_adr));
+ } else {
+ return (hts_dnstest(back->r.req.proxy.name));
+ }
+ } else return 1; // prêt, fichier local
+}
+#endif
+
+
+// élimine les fichiers non html en backing (anticipation)
+// cleanup non-html files in backing to save backing space
+// and allow faster "save in cache" operation
+void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
+ int i;
+ for(i=0;i<back_max;i++) {
+ if (back[i].status == 0) { // ready
+ if (!back[i].testmode) { // not test mode
+ if (strnotempty(back[i].url_sav)) { // filename exists
+ if (back[i].r.is_write) { // not in memory (on disk, ready)
+ if (back[i].r.size>0) { // size>0
+ if (back[i].r.statuscode==200) { // HTTP "OK"
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML/hypertext
+ if (!may_be_hypertext_mime(back[i].r.contenttype)) { // may NOT be parseable mime type
+ if (back[i].pass2_ptr) {
+ // finalize
+ // // back_finalize(opt,cache,back,i);
+ // stats
+ //HTS_STAT.stat_bytes+=back[i].r.size;
+ //HTS_STAT.stat_files++;
+ //if ( (!back[i].r.notmodified) && (opt->is_update) ) {
+ // HTS_STAT.stat_updated_files++; // page modifiée
+ //}
+ //cache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ *back[i].pass2_ptr=-1; // Done!
+ back_delete(back,i); // Delete backing entry
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+// attente (gestion des buffers des sockets)
+void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart) {
+ int i;
+ T_SOC nfds=INVALID_SOCKET;
+ fd_set fds,fds_c,fds_e; // fds pour lecture, connect (write), et erreur
+ int nsockets; // nbre sockets
+ LLint max_read_bytes; // max bytes read per sockets
+ struct timeval tv;
+ int do_wait=0;
+ int gestion_timeout=0;
+ int busy_recv=0; // pas de données pour le moment
+ int busy_state=0; // pas de connexions
+ int max_loop; // nombre de boucles max à parcourir..
+#if HTS_ANALYSTE
+ int max_loop_chk=0;
+#endif
+
+
+ // max. number of loops
+ max_loop=8;
+
+#if 1
+ // Cleanup the stack to save space!
+ back_clean(opt,cache,back,back_max);
+#endif
+
+ // recevoir tant qu'il y a des données (avec un maximum de max_loop boucles)
+ do_wait=0;
+ gestion_timeout=0;
+ do {
+ int max_c;
+ busy_state=busy_recv=0;
+
+#if 0
+ check_rate(stat_timestart,opt->maxrate); // vérifier taux de transfert
+#endif
+ // inscrire les sockets actuelles, et rechercher l'ID la plus élevée
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_c);
+ FD_ZERO(&fds_e);
+ nsockets=0;
+ max_read_bytes=TAILLE_BUFFER; // maximum bytes that can be read
+ nfds=INVALID_SOCKET;
+
+ max_c=1;
+ for(i=0;i<back_max;i++) {
+
+ // en cas de gestion du connect préemptif
+#if HTS_XCONN
+ if (back[i].status==100) { // connexion
+ do_wait=1;
+
+ // noter socket write
+ FD_SET(back[i].r.soc,&fds_c);
+
+ // noter socket erreur
+ FD_SET(back[i].r.soc,&fds_e);
+
+ // calculer max
+ if (max_c) {
+ max_c=0;
+ nfds=back[i].r.soc;
+ } else if (back[i].r.soc>nfds) {
+ // ID socket la plus élevée
+ nfds=back[i].r.soc;
+ }
+
+ } else
+#endif
+#if HTS_XGETHOST
+ if (back[i].status==101) { // attente
+ // rien à faire..
+ } else
+#endif
+ // poll pour la lecture sur les sockets
+ if ((back[i].status>0) && (back[i].status<100)) { // en réception http
+
+#if BDEBUG==1
+ //printf("....socket in progress: %d\n",back[i].r.soc);
+#endif
+ // non local et non ftp
+ if (!back[i].r.is_file) {
+ //## if (back[i].url_adr[0]!=lOCAL_CHAR) {
+
+ // vérification de sécurité
+ if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
+ do_wait=1;
+
+ // noter socket read
+ FD_SET(back[i].r.soc,&fds);
+
+ // noter socket error
+ FD_SET(back[i].r.soc,&fds_e);
+
+ // incrémenter nombre de sockets
+ nsockets++;
+
+ // calculer max
+ if (max_c) {
+ max_c=0;
+ nfds=back[i].r.soc;
+ } else if (back[i].r.soc>nfds) {
+ // ID socket la plus élevée
+ nfds=back[i].r.soc;
+ }
+ } else {
+ back[i].r.statuscode=-4;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Error");
+ else
+ strcpy(back[i].r.msg,"Receive Error");
+ back[i].status=0; // terminé
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush;
+ }
+ }
+#if WIDE_DEBUG
+ else {
+ DEBUG_W("PANIC!!! Socket is invalid in a poll test!\n");
+ }
+#endif
+
+ }
+
+ }
+ }
+ nfds++;
+
+ if (do_wait) { // attendre
+ // temps d'attente max: 2.5 seconde
+ tv.tv_sec=HTS_SOCK_SEC;
+ tv.tv_usec=HTS_SOCK_MS;
+
+#if BDEBUG==1
+ printf("..select\n");
+#endif
+
+ // poller les sockets-attention au noyau sous Unix..
+#if HTS_WIDE_DEBUG
+ DEBUG_W("select\n");
+#endif
+ select(nfds,&fds,&fds_c,&fds_e,&tv);
+#if HTS_WIDE_DEBUG
+ DEBUG_W("select done\n");
+#endif
+ }
+
+ // maximum data which can be received for a socket, if limited
+ if (nsockets) {
+ if (opt->maxrate>0) {
+ max_read_bytes = ( check_downloadable_bytes(opt->maxrate) / nsockets );
+ }
+ }
+ if (!max_read_bytes)
+ busy_recv=0;
+
+ // recevoir les données arrivées
+ for(i=0;i<back_max;i++) {
+
+ if (back[i].status>0) {
+ if (!back[i].r.is_file) { // not file..
+ if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
+ int err=FD_ISSET(back[i].r.soc,&fds_e);
+ if (err) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait: deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-4;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Error");
+ else
+ strcpy(back[i].r.msg,"Receive Error");
+ back[i].status=0; // terminé
+ }
+ }
+ }
+ }
+
+ // ---- FLAG WRITE MIS A UN?: POUR LE CONNECT
+ if (back[i].status==100) { // attendre connect
+ int dispo=0;
+ // vérifier l'existance de timeout-check
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ // connecté?
+ dispo=FD_ISSET(back[i].r.soc,&fds_c);
+ if (dispo) { // ok connected!!
+ busy_state=1;
+
+#if HTS_USEOPENSSL
+ /* SSL mode */
+ if (back[i].r.ssl) {
+ // handshake not yet launched
+ if (!back[i].r.ssl_con) {
+ SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL);
+ // new session
+ back[i].r.ssl_con = SSL_new(openssl_ctx);
+ if (back[i].r.ssl_con) {
+ SSL_clear(back[i].r.ssl_con);
+ if (SSL_set_fd(back[i].r.ssl_con, back[i].r.soc) == 1) {
+ SSL_set_connect_state(back[i].r.ssl_con);
+ back[i].status = 102; /* handshake wait */
+ } else
+ back[i].r.statuscode=-6;
+ } else
+ back[i].r.statuscode=-6;
+ }
+ /* Error */
+ if (back[i].r.statuscode == -6) {
+ strcpy(back[i].r.msg, "bad SSL/TLS handshake");
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+ }
+
+#endif
+
+#if BDEBUG==1
+ printf("..connect ok on socket %d\n",back[i].r.soc);
+#endif
+
+ if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) {
+ /* limit nb. connections/seconds to avoid server overload */
+ if (opt->maxconn>0) {
+ Sleep(1000/opt->maxconn);
+ }
+
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+ if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion
+ back[i].rateout_time=time_local();
+ }
+ // envoyer header
+ //if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get
+ if (!back[i].head_request)
+ http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ else if (back[i].head_request==2) // test en GET!
+ http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ else // test!
+ http_sendhead(opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ back[i].status=99; // attendre en tête maintenant
+ }
+ }
+
+ // attente gethostbyname
+ }
+#if HTS_USEOPENSSL
+ else if (back[i].status==102) { // wait for SSL handshake
+ /* SSL mode */
+ if (back[i].r.ssl) {
+ int conn_code;
+ if ((conn_code = SSL_connect(back[i].r.ssl_con)) <= 0) {
+ /* non blocking I/O, will retry */
+ int err_code = SSL_get_error(back[i].r.ssl_con, conn_code);
+ if (
+ (err_code != SSL_ERROR_WANT_READ)
+ &&
+ (err_code != SSL_ERROR_WANT_WRITE)
+ ) {
+ char tmp[256];
+ tmp[0]='\0';
+ ERR_error_string(err_code, tmp);
+ back[i].r.msg[0]='\0';
+ strncat(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2);
+ if (!strnotempty(back[i].r.msg)) {
+ sprintf(back[i].r.msg, "SSL/TLS error %d", err_code);
+ }
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+ } else { /* got it! */
+ back[i].status=100; // back to waitconnect
+ }
+ } else {
+ strcpy(back[i].r.msg, "unexpected SSL/TLS error");
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+
+ }
+#endif
+#if HTS_XGETHOST
+ else if (back[i].status==101) { // attendre gethostbyname
+#if DEBUGDNS
+ //printf("status 101 for %s\n",back[i].url_adr);
+#endif
+
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ if (host_wait(&back[i])) { // prêt
+ back[i].status=100; // attente connexion
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+ if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion
+ back[i].rateout_time=time_local();
+ }
+
+ back[i].r.soc=http_xfopen(0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r));
+ if (back[i].r.soc==INVALID_SOCKET) {
+ back[i].status=0; // fini, erreur
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(2): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ if (strnotempty(back[i].r.msg)==0)
+ strcpy(back[i].r.msg,"Unable to resolve host name");
+ }
+ }
+
+
+ // ---- FLAG READ MIS A UN?: POUR LA RECEPTION
+ }
+#endif
+#if USE_BEGINTHREAD
+ // ..rien à faire, c'est magic les threads
+#else
+ else if (back[i].status==1000) { // en réception ftp
+ if (!fexist(back[i].location_buffer)) { // terminé
+ FILE* fp;
+ fp=fopen(fconcat(back[i].location_buffer,".ok"),"rb");
+ if (fp) {
+ int j=0;
+ fscanf(fp,"%d ",&(back[i].r.statuscode));
+ while(!feof(fp)) {
+ int c = fgetc(fp);
+ if (c!=EOF)
+ back[i].r.msg[j++]=c;
+ }
+ back[i].r.msg[j++]='\0';
+ fclose(fp);
+ remove(fconcat(back[i].location_buffer,".ok"));
+ strcpy(fconcat(back[i].location_buffer,".ok"),"");
+ } else {
+ strcpy(back[i].r.msg,"Unknown ftp result, check if file is ok");
+ back[i].r.statuscode=-1;
+ }
+ back[i].status=0;
+ // finalize transfer
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+ }
+ }
+#endif
+ else if ((back[i].status>0) && (back[i].status<1000)) { // en réception http
+ int dispo=0;
+
+ // vérifier l'existance de timeout-check
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ // données dispo?
+ //## if (back[i].url_adr[0]!=lOCAL_CHAR)
+ if (!back[i].r.is_file) {
+ dispo=FD_ISSET(back[i].r.soc,&fds);
+ }
+ else
+ dispo=1;
+
+ // Check transfer rate!
+ if (!max_read_bytes)
+ dispo=0; // limit transfer rate
+
+ if (dispo) { // données dispo
+ LLint retour_fread;
+ busy_recv=1; // on récupère encore
+#if BDEBUG==1
+ printf("..data available on socket %d\n",back[i].r.soc);
+#endif
+
+
+ // range size hack old location
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ // Ahh que ca serait vachement mieux et que ahh que la mémoire vous dit merci!
+ if (back[i].status) {
+ if (back[i].r.is_write==0) { // mode mémoire
+ if (back[i].r.adr==NULL) { // rien n'a été écrit
+ if (!back[i].testmode) { // pas mode test
+ if (strnotempty(back[i].url_sav)) {
+ if (strcmp(back[i].url_fil,"/robots.txt")) {
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (opt->getmode&2) { // on peut ecrire des non html
+ back[i].r.is_write=1; // écrire
+ if (back[i].r.compressed
+ &&
+ /* .gz are *NOT* depacked!! */
+ (strfield(get_ext(back[i].url_sav),"gz") == 0)
+ ) {
+ back[i].tmpfile[0]='\0';
+ strcpy(back[i].tmpfile,tempnam(NULL,"httrZ"));
+ if (back[i].tmpfile[0])
+ back[i].r.out=fopen(back[i].tmpfile,"wb");
+ } else {
+ back[i].r.compressed=0;
+ back[i].r.out=filecreate(back[i].url_sav);
+ }
+#if HDEBUG
+ printf("direct-disk: %s\n",back[i].url_sav);
+#endif
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+
+ if (back[i].r.out==NULL) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"Unable to save file %s"LF,back[i].url_sav);
+ test_flush;
+ }
+ back[i].r.is_write=0; // erreur, abandonner
+#if HDEBUG
+ printf("..error!\n");
+#endif
+ }
+#if HTS_WIN==0
+ else chmod(back[i].url_sav,HTS_ACCESS_FILE);
+#endif
+ } else { // on coupe tout!
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ back[i].status=0; // terminé
+ if (!back[i].testmode)
+ back[i].r.statuscode=-10; // EUHH CANCEL
+ else
+ back[i].r.statuscode=-10; // "TEST OK"
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(3): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+
+ // réception de données depuis socket ou fichier
+ if (back[i].status) {
+ if (back[i].status==99) // recevoir par bloc de lignes
+ retour_fread=http_xfread1(&(back[i].r),0);
+ else if (back[i].status==98) { // recevoir longueur chunk en hexa caractère par caractère
+ // backuper pour lire dans le buffer chunk
+ htsblk r;
+ memcpy(&r, &(back[i].r), sizeof(htsblk));
+ back[i].r.is_write=0; // mémoire
+ back[i].r.adr=back[i].chunk_adr; // adresse
+ back[i].r.size=back[i].chunk_size; // taille taille chunk
+ back[i].r.totalsize=-1; // total inconnu
+ back[i].r.out=NULL;
+ back[i].r.is_file=0;
+ //
+ // ligne par ligne
+ retour_fread=http_xfread1(&(back[i].r),-1);
+ // modifier et restaurer
+ back[i].chunk_adr=back[i].r.adr; // adresse
+ back[i].chunk_size=back[i].r.size; // taille taille chunk
+ memcpy(&(back[i].r), &r, sizeof(htsblk)); // restaurer véritable r
+ }
+ else if (back[i].is_chunk) { // attention chunk, limiter taille à lire
+#if CHUNKDEBUG==1
+ printf("read %d bytes\n",(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
+#endif
+ retour_fread=(int) http_xfread1(&(back[i].r),(int) min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
+ } else
+ retour_fread=(int) http_xfread1(&(back[i].r),(int) max_read_bytes);
+ // retour_fread=http_fread1(&(back[i].r));
+ } else
+ retour_fread=-1; // interruption ou annulation interne (peut ne pas être une erreur)
+
+ // Si réception chunk, tester si on est pas à la fin!
+ if (back[i].status==1) {
+ if (back[i].is_chunk) { // attendre prochain chunk
+ if (back[i].r.size==back[i].r.totalsize) { // fin chunk!
+ //printf("chunk end at %d\n",back[i].r.size);
+ back[i].status=98; // prochain chunk
+ if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; } back[i].chunk_size=0;
+ retour_fread=0; // pas d'erreur
+#if CHUNKDEBUG==1
+ printf("waiting for next chunk header (soc %d)..\n",back[i].r.soc);
+#endif
+ }
+ }
+ }
+
+ if (retour_fread < 0) { // erreur réception
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(4): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+#if CHUNKDEBUG==1
+ if (back[i].is_chunk)
+ printf("must be the last chunk for %s (connection closed) - %d/%d\n",back[i].url_fil,back[i].r.size,back[i].r.totalsize);
+#endif
+ //if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) {
+ if ((back[i].r.statuscode<0) && (strnotempty(back[i].r.msg)==0)) {
+#if HDEBUG
+ printf("error interruped: %s\n",back[i].r.adr);
+#endif
+ if (back[i].r.size>0)
+ strcat(back[i].r.msg,"Interrupted transfer");
+ else
+ strcat(back[i].r.msg,"No data (connection closed)");
+ back[i].r.statuscode=-4;
+ }
+
+ // finalize transfer
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+
+ if (back[i].r.totalsize>0) { // tester totalsize
+ //if ((back[i].r.totalsize>0) && (back[i].status==99)) { // tester totalsize
+ if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
+ if (!opt->tolerant) {
+ //#if HTS_CL_IS_FATAL
+ if (back[i].r.adr) freet(back[i].r.adr); back[i].r.adr=NULL;
+ if (back[i].r.size<back[i].r.totalsize)
+ back[i].r.statuscode=-4; // recatch
+ sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",back[i].r.size,back[i].r.totalsize);
+ } else {
+ //#else
+ // Un warning suffira..
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+ //#endif
+ }
+ }
+ }
+#if BDEBUG==1
+ printf("transfer ok\n");
+#endif
+ } else if (retour_fread > 0) { // pas d'erreur de réception et data
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+
+ // Traitement des en têtes chunks ou en têtes
+ if (back[i].status==98) { // réception taille chunk en hexa ( après les en têtes, peut ne pas
+ if (back[i].chunk_size>=2) {
+ int chunk_size=-1;
+ // être présent)
+ if (back[i].chunk_adr[back[i].chunk_size-1]==10) { // LF, fin ligne chunk
+ char chunk_data[64];
+ if (back[i].chunk_size<32) { // pas trop gros
+ back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul
+ strcpy(chunk_data,""); // hex number
+ strcat(chunk_data,back[i].chunk_adr);
+#if CHUNKDEBUG==1
+ printf("chunk received and read: %s\n",chunk_data);
+#endif
+ if (back[i].r.totalsize<0)
+ back[i].r.totalsize=0; // initialiser à 0
+ if (sscanf(chunk_data,"%x",&chunk_size) == 1) {
+ back[i].r.totalsize+=chunk_size; // noter taille
+ back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1);
+ if (!back[i].r.adr) {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+ }
+#if CHUNKDEBUG==1
+ printf("chunk length: %d - next total "LLintP":\n",chunk_size,back[i].r.totalsize);
+#endif
+ } else
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil);
+ }
+ } else {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,back[i].chunk_size,back[i].url_adr,back[i].url_fil);
+ }
+ }
+
+ // ok, continuer sur le body
+
+ // si chunk non nul continuer (ou commencer)
+ if (chunk_size>0) {
+ back[i].status=1; // continuer body
+#if CHUNKDEBUG==1
+ printf("waiting for body (chunk)\n");
+#endif
+ } else { // chunk nul, c'est la fin
+#if CHUNKDEBUG==1
+ printf("chunk end, total: %d\n",back[i].r.size);
+#endif
+ back[i].status=0; // fin
+ // finalize transfer
+ back_finalize(opt,cache,back,i);
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(5): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+
+ /* Tester totalsize en fin de chunk */
+ if ((back[i].r.totalsize>0)) { // tester totalsize
+ if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
+#if HTS_CL_IS_FATAL
+ if (back[i].r.adr) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ back[i].r.statuscode=-1;
+ strcpy(back[i].r.msg,"Incorrect length");
+#else
+ // Un warning suffira..
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+#endif
+ }
+ }
+
+
+ }
+ }
+
+ // effacer buffer (chunk en tete)
+ if (back[i].chunk_adr!=NULL) {
+ freet(back[i].chunk_adr);
+ back[i].chunk_adr=NULL;
+ back[i].chunk_size=0;
+ }
+
+ } // chunk LF?
+ } // taille buffer chunk>2
+ //
+ } else if (back[i].status==99) { // en têtes (avant le chunk si il est présent)
+ //
+ if (back[i].r.size>=2) {
+ // double LF
+ if (
+ ((back[i].r.adr[back[i].r.size-1]==10) && (back[i].r.adr[back[i].r.size-2]==10))
+ ||
+ (back[i].r.adr[0] == '<') /* bogus server */
+ ) {
+ char rcvd[2048];
+ int ptr=0;
+ int noFreebuff=0;
+
+#if BDEBUG==1
+ printf("..ok, header received\n");
+#endif
+
+ /* Hack for zero-length headers */
+ if (back[i].r.adr[0] != '<') {
+
+ // ----------------------------------------
+ // traiter en-tête!
+ // status-line à récupérer
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
+ if (strnotempty(rcvd)==0)
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(&back[i].r,rcvd);
+
+#if HDEBUG
+ printf("(Buffer) Status-Code=%d\n",back[i].r.statuscode);
+#endif
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"response for %s%s:\r\ncode=%d\r\n",jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode);
+ fprintfio(ioinfo,back[i].r.adr,">>> ");
+ fprintf(ioinfo,"\r\n");
+ fflush(ioinfo);
+ } // en-tête
+ }
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
+#if HDEBUG
+ printf("(buffer)>%s\n",rcvd);
+#endif
+ /*
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"(buffer)>%s\r\n",rcvd);
+ fflush(ioinfo);
+ }
+ }
+ */
+
+ if (strnotempty(rcvd))
+ treathead(opt->cookie,back[i].url_adr,back[i].url_fil,&back[i].r,rcvd); // traiter
+
+ // parfois les serveurs buggés renvoient un content-range avec un 200
+ if (back[i].r.statuscode==200) // 'OK'
+ if (strfield(rcvd,"content-range:")) // Avec un content-range: relisez les RFC..
+ back[i].r.statuscode=206; // FORCER A 206 !!!!!
+
+ } while(strnotempty(rcvd));
+ // ----------------------------------------
+
+ // libérer mémoire -- après! --
+ if (back[i].r.adr!=NULL) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ } else {
+ // assume text/html, OK
+ treatfirstline(&back[i].r, back[i].r.adr);
+ noFreebuff=1;
+ }
+
+
+
+ /*
+ Status code and header-response hacks
+ */
+
+
+ // Check response : 203 == 200
+ if (back[i].r.statuscode==203) { // 'Non-Authoritative Information'
+ back[i].r.statuscode=200; // forcer "OK"
+ } else if (back[i].r.statuscode == 100) {
+ back[i].status=99;
+ back[i].r.size=0;
+ back[i].r.totalsize=0;
+ back[i].chunk_size=0;
+ back[i].r.statuscode=-1;
+ back[i].r.msg[0]='\0';
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ continue;
+ }
+
+ /*
+ Solve "false" 416 problems
+ */
+ if (back[i].r.statuscode==416) { // 'Requested Range Not Satisfiable'
+ // Example:
+ // Range: bytes=2830-
+ // ->
+ // Content-Range: bytes */2830
+ if (back[i].range_req_size == back[i].r.crange) {
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize=back[i].range_req_size;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // transform 406 into 200 ; we'll catch embedded links inside the choice page
+ if (back[i].r.statuscode==406) { // 'Not Acceptable'
+ back[i].r.statuscode=200;
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Force update if same size detected
+ if (opt->sizehack) {
+ // We already have the file
+ // and ask the remote server for an update
+ // Some servers, especially dynamic pages severs, always
+ // answer that the page has been modified since last visit
+ // And answer with a 200 (OK) response, and the same page
+ // If the size is the same, and the option has been set, we assume
+ // that the file is identical - and therefore let's break the connection
+ if (back[i].is_update) { // mise à jour
+ if (back[i].r.statuscode==200) { // 'OK'
+ htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL); // lire entrée cache
+ if (r.statuscode == 200) { // OK pas d'erreur cache
+ LLint len1,len2;
+ len1=r.totalsize;
+ len2=back[i].r.totalsize;
+ if (r.size>0)
+ len1=r.size;
+ if (len1>0) {
+ if (len1 == len2) { // tailles identiques
+ back[i].r.statuscode=304; // forcer NOT MODIFIED
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ } else {
+ if (opt->errlog!=NULL) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ if (r.adr) {
+ freet(r.adr);
+ }
+ }
+ }
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Detect already downloaded file (with another browser, for example)
+ if (opt->sizehack) {
+ if (!back[i].is_update) { // mise à jour
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML
+ if (strnotempty(back[i].url_sav)) { // target found
+ int size = fsize(back[i].url_sav); // target size
+ if (size >= 0) {
+ if (back[i].r.totalsize == size) { // same size!
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Detect bad range: header
+ if (opt->sizehack) {
+ // We have request for a partial file (with a 'Range: NNN-' header)
+ // and received a complete file notification (200), with 'Content-length: NNN'
+ // it might be possible that we had the complete file
+ // this is the case in *most* cases, so break the connection
+ if (back[i].r.is_write==0) { // mode mémoire
+ if (back[i].r.adr==NULL) { // rien n'a été écrit
+ if (!back[i].testmode) { // pas mode test
+ if (strnotempty(back[i].url_sav)) {
+ if (strcmp(back[i].url_fil,"/robots.txt")) {
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (back[i].r.statuscode==200) { // "OK"
+ if (back[i].range_req_size>0) { // but Range: requested
+ if (back[i].range_req_size == back[i].r.totalsize) { // And same size
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(skip_range): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // END - Various hacks to limit re-transfers when updating a mirror
+
+ /*
+ End of status code and header-response hacks
+ */
+
+
+
+ /* Interdiction taille par le wizard? */
+ if (back[i].r.soc!=INVALID_SOCKET) {
+ if (!back_checksize(opt,&back[i],1)) {
+ back[i].status=0; // FINI
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if (!back[i].testmode)
+ strcpy(back[i].r.msg,"File too big");
+ else
+ strcpy(back[i].r.msg,"Test: File too big");
+ }
+ }
+
+ /* sinon, continuer */
+ /* if (back[i].r.soc!=INVALID_SOCKET) { // ok récupérer body? */
+ // head: terminé
+ if (back[i].head_request) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(head request): deletehttp\n");
+#endif
+ // Couper connexion
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // terminé
+ }
+ // traiter une éventuelle erreur 304 (cache à jour utilisable)
+ else if (back[i].r.statuscode==304) { // document à jour dans le cache
+ // lire dans le cache
+ // ** NOTE: pas de vérif de la taille ici!!
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(file is not modified): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ if (!back[i].r.location)
+ back[i].r.location=back[i].location_buffer;
+ else { /* recopier */
+ strcpy(back[i].location_buffer,back[i].r.location);
+ back[i].r.location=back[i].location_buffer;
+ }
+
+ // hack:
+ // In case of 'if-unmodified-since' hack, a 304 status can be sent
+ // then, force 'ok' status
+ if (back[i].r.statuscode == -1) {
+ if (fexist(back[i].url_sav)) {
+ back[i].r.statuscode=200; // OK
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // Status is okay?
+ if (back[i].r.statuscode!=-1) { // pas d'erreur de lecture
+ back[i].status=0; // OK prêt
+ back[i].r.notmodified=1; // NON modifié!
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+
+ // finalize
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+
+#if DEBUGCA
+ printf("..document à jour après requète: %s%s\n",back[i].url_adr,back[i].url_fil);
+#endif
+
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ back[i].status=0; // terminé
+ //printf("erreur cache\n");
+
+ }
+
+ } else if ((back[i].r.statuscode==301)
+ || (back[i].r.statuscode==302)
+ || (back[i].r.statuscode==303)
+ || (back[i].r.statuscode==307)
+ || (back[i].r.statuscode==412)
+ || (back[i].r.statuscode==416)
+ ) { // Ne pas prendre le html, erreurs connues et gérées
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(301,302,303,307,412,416..): deletehttp\n");
+#endif
+ // Couper connexion
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // terminé
+ // finalize
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+ } else { // il faut aller le chercher
+
+ // effacer buffer (requète)
+ if (!noFreebuff) {
+ if (back[i].r.adr!=NULL) {
+ freet(back[i].r.adr);
+ back[i].r.adr=NULL;
+ }
+ back[i].r.size=0;
+ }
+
+ // traiter 206 (partial content)
+ // xxc SI CHUNK VERIFIER QUE CA MARCHE??
+ if (back[i].r.statuscode==206) { // on nous envoie un morceau (la fin) coz une partie sur disque!
+ LLint sz=fsize(back[i].url_sav);
+#if HDEBUG
+ printf("partial content: "LLintP" on disk..\n",(LLint)sz);
+#endif
+ if (sz>=0) {
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir**
+ filenote(back[i].url_sav,NULL); // noter fichier comme connu
+ back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append
+ if (back[i].r.out) {
+ back[i].r.is_write=1; // écrire
+ back[i].r.size=sz; // déja écrit
+ back[i].r.statuscode=200; // Forcer 'OK'
+ if (back[i].r.totalsize>0)
+ back[i].r.totalsize+=sz; // plus en fait
+ fseek(back[i].r.out,0,SEEK_END); // à la fin
+#if HDEBUG
+ printf("continue interrupted file\n");
+#endif
+ } else { // On est dans la m**
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not open partial file");
+ }
+ }
+ } else { // mémoire
+ FILE* fp=fopen(fconv(back[i].url_sav),"rb");
+ if (fp) {
+ LLint alloc_mem=sz + 1;
+ if (back[i].r.totalsize>0)
+ alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT!
+ if ( (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) {
+ back[i].r.size=sz;
+ if (back[i].r.totalsize>0)
+ back[i].r.totalsize+=sz; // plus en fait
+ if (((int) fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) {
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not read partial file");
+ } else {
+ back[i].r.statuscode=200; // Forcer 'OK'
+#if HDEBUG
+ printf("continue in mem interrupted file\n");
+#endif
+ }
+ } else {
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"No memory for partial file");
+ }
+ fclose(fp);
+ } else { // Argh..
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not open partial file");
+ }
+ }
+ } else { // Non trouvé??
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not find partial file");
+ }
+ // Erreur?
+ if (back[i].status==0) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(206 solve problems): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ //back[i].r.statuscode=206; ????????
+ back[i].r.statuscode=-5;
+ if (strnotempty(back[i].r.msg))
+ strcpy(back[i].r.msg,"Error attempting to solve status 206 (partial file)");
+ }
+ }
+
+ if (back[i].status!=0) { // non terminé (erreur)
+ if (!back[i].testmode) { // fichier normal
+
+ if (!back[i].r.is_chunk) { // pas de chunk
+ //if (back[i].r.http11!=2) { // pas de chunk
+ back[i].is_chunk=0;
+ back[i].status=1; // start body
+ } else {
+#if CHUNKDEBUG==1
+ printf("chunk encoding detected %s..\n",back[i].url_fil);
+#endif
+ back[i].is_chunk=1;
+ back[i].chunk_adr=NULL;
+ back[i].chunk_size=0;
+ back[i].status=98; // start body wait chunk
+ }
+ if (back[i].rateout>0) {
+ back[i].rateout_time=time_local(); // refresh pour transfer rate
+ }
+#if HDEBUG
+ printf("(buffer) start body!\n");
+#endif
+ } else { // mode test, ne pas passer en 1!!
+ back[i].status=0; // READY
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(test ok): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if (back[i].r.statuscode==200) {
+ strcpy(back[i].r.msg,"Test: OK");
+ back[i].r.statuscode=-10; // test réussi
+ }
+ else { // test a échoué, on ne change rien sauf que l'erreur est à titre indicatif
+ char tempo[1000];
+ strcpy(tempo,back[i].r.msg);
+ strcpy(back[i].r.msg,"Test: ");
+ strcat(back[i].r.msg,tempo);
+ }
+
+ }
+ }
+
+ }
+
+ /*}*/
+
+ } // si LF
+ } // r.size>2
+ } // si == 99
+
+ } // si pas d'erreurs
+#if BDEBUG==1
+ printf("bytes overall: %d\n",back[i].r.size);
+#endif
+ } // données dispo
+
+ // en cas d'erreur cl, supprimer éventuel fichier sur disque
+#if HTS_REMOVE_BAD_FILES
+ if (back[i].status<0) {
+ if (!back[i].testmode) { // pas en test
+ remove(back[i].url_sav); // éliminer fichier (endommagé)
+ //printf("&& %s\n",back[i].url_sav);
+ }
+ }
+#endif
+
+ /* funny log for commandline users */
+ //if (!opt->quiet) {
+ // petite animation
+ if (opt->verbosedisplay==1) {
+ if (back[i].status==0) {
+ if (back[i].r.statuscode==200)
+ printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size);
+ else
+ printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size,back[i].r.statuscode);
+ fflush(stdout);
+ }
+ }
+ //}
+
+
+ } // status>0
+ } // for
+
+ // vérifier timeouts
+ if (gestion_timeout) {
+ TStamp act;
+ act=time_local(); // temps en secondes
+ for(i=0;i<back_max;i++) {
+ if (back[i].status>0) { // réception/connexion/..
+ if (back[i].timeout>0) {
+ //printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
+ if (((int) (act-back[i].timeout_refresh))>=back[i].timeout) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(timeout): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-2;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Time Out");
+ else if (back[i].status==101)
+ strcpy(back[i].r.msg,"DNS Time Out");
+ else
+ strcpy(back[i].r.msg,"Receive Time Out");
+ back[i].status=0; // terminé
+ } else if ((back[i].rateout>0) && (back[i].status<99)) {
+ if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s
+ if ( (int) ((back[i].r.size)/(act-back[i].rateout_time)) < back[i].rateout ) { // trop lent
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(rateout): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-3;
+ strcpy(back[i].r.msg,"Transfer Rate Too Low");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ max_loop--;
+#if HTS_ANALYSTE
+ max_loop_chk++;
+#endif
+ } while((busy_state) && (busy_recv) && (max_loop>0));
+#if HTS_ANALYSTE
+ if ((!busy_recv) && (!busy_state)) {
+ if (max_loop_chk>=1) {
+ Sleep(10); // un tite pause pour éviter les lag..
+ }
+ }
+#endif
+}
+
+int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) {
+ LLint size_to_test;
+ if (check_only_totalsize)
+ size_to_test=eback->r.totalsize;
+ else
+ size_to_test=max(eback->r.totalsize,eback->r.size);
+ if (size_to_test>=0) {
+
+ /* Interdiction taille par le wizard? */
+ if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,(eback->r.totalsize+1023)/1024)==-1) {
+ return 0; /* interdit */
+ }
+
+ /* vérifier taille classique (heml et non html) */
+ if ((istoobig(size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) {
+ return 0; /* interdit */
+ }
+ }
+ return 1;
+}
+
+
+// octets transférés + add
+LLint back_transfered(LLint nb,lien_back* back,int back_max) {
+ int i;
+ // ajouter octets en instance
+ for(i=0;i<back_max;i++)
+ if ((back[i].status>0) && (back[i].status<99))
+ nb+=back[i].r.size;
+ return nb;
+}
+
+// infos backing
+// j: 1 afficher sockets 2 afficher autres 3 tout afficher
+void back_info(lien_back* back,int i,int j,FILE* fp) {
+ if (back[i].status>=0) {
+ char s[256];
+ s[0]='\0';
+ back_infostr(back,i,j,s);
+ strcat(s,LF);
+ fprintf(fp,"%s",s);
+ }
+}
+
+// infos backing
+// j: 1 afficher sockets 2 afficher autres 3 tout afficher
+void back_infostr(lien_back* back,int i,int j,char* s) {
+ if (back[i].status>=0) {
+ int aff=0;
+ if (j & 1) {
+ if (back[i].status==100) {
+ strcat(s,"CONNECT ");
+ } else if (back[i].status==99) {
+ strcat(s,"INFOS ");
+ aff=1;
+ } else if (back[i].status==98) {
+ strcat(s,"INFOSC"); // infos chunk
+ aff=1;
+ }
+ else if (back[i].status>0) {
+#if HTS_ANALYSTE==2
+ strcat(s,"WAIT ");
+#else
+ strcat(s,"RECEIVE ");
+#endif
+ aff=1;
+ }
+ }
+ if (j & 2) {
+ if (back[i].status==0) {
+ switch (back[i].r.statuscode) {
+ case 200:
+ strcat(s,"READY ");
+ aff=1;
+ break;
+#if HTS_ANALYSTE==2
+ default:
+ strcat(s,"ERROR ");
+ break;
+#else
+ case -1:
+ strcat(s,"ERROR ");
+ aff=1;
+ break;
+ case -2:
+ strcat(s,"TIMEOUT ");
+ aff=1;
+ break;
+ case -3:
+ strcat(s,"TOOSLOW ");
+ aff=1;
+ break;
+ case 400:
+ strcat(s,"BADREQUEST ");
+ aff=1;
+ break;
+ case 401: case 403:
+ strcat(s,"FORBIDDEN ");
+ aff=1;
+ break;
+ case 404:
+ strcat(s,"NOT FOUND ");
+ aff=1;
+ break;
+ case 500:
+ strcat(s,"SERVERROR ");
+ aff=1;
+ break;
+ default:
+ {
+ char s2[256];
+ sprintf(s2,"ERROR(%d)",back[i].r.statuscode);
+ strcat(s,s2);
+ }
+ aff=1;
+#endif
+ }
+ }
+ }
+
+ if (aff) {
+ {
+ char s2[1024];
+ sprintf(s2,"\"%s",back[i].url_adr); strcat(s,s2);
+
+ if (back[i].url_fil[0]!='/') strcat(s,"/");
+ sprintf(s2,"%s\" ",back[i].url_fil); strcat(s,s2);
+ sprintf(s,LLintP" "LLintP" ",back[i].r.size,back[i].r.totalsize); strcat(s,s2);
+ }
+ }
+ }
+}
+
+// -- backing --
+
+#undef test_flush
diff --git a/src/htsback.h b/src/htsback.h
new file mode 100644
index 0000000..af5fe6c
--- /dev/null
+++ b/src/htsback.h
@@ -0,0 +1,75 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* backing system (multiple socket download) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSBACK_DEFH
+#define HTSBACK_DEFH
+
+#include "htsglobal.h"
+#include "htsbasenet.h"
+#include "htscore.h"
+
+// backing
+#define BACK_ADD_TEST "(dummy)"
+#define BACK_ADD_TEST2 "(dummy2)"
+int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav);
+int back_available(lien_back* back,int back_max);
+LLint back_incache(lien_back* back,int back_max);
+HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav);
+int back_nsoc(lien_back* back,int back_max);
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr);
+int back_stack_available(lien_back* back,int back_max);
+void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max);
+void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart);
+int back_delete(lien_back* back,int p);
+int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p);
+void back_info(lien_back* back,int i,int j,FILE* fp);
+void back_infostr(lien_back* back,int i,int j,char* s);
+LLint back_transfered(LLint add,lien_back* back,int back_max);
+// hostback
+#if HTS_XGETHOST
+void back_solve(lien_back* back);
+int host_wait(lien_back* back);
+#endif
+int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize);
+
+#if HTS_XGETHOST
+#if USE_BEGINTHREAD
+PTHREAD_TYPE Hostlookup(void* iadr_p);
+#endif
+#endif
+
+#endif
diff --git a/src/htsbase.h b/src/htsbase.h
new file mode 100644
index 0000000..3e83471
--- /dev/null
+++ b/src/htsbase.h
@@ -0,0 +1,136 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Basic definitions */
+/* Used in .c files for basic (malloc() ..) definitions */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_BASICH
+#define HTS_BASICH
+
+#include "htsglobal.h"
+
+// size_t et mode_t
+#include <stdio.h>
+#if HTS_WIN
+#else
+#include <fcntl.h>
+#endif
+
+#if HTS_WIN
+#else
+ #define min(a,b) ((a)>(b)?(b):(a))
+ #define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+// teste égalité de 2 chars, case insensitive
+#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
+#define streql(a,b) (hichar(a)==hichar(b))
+
+// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
+#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
+#define is_hypertext_mime(a) \
+ ( (strfield2((a),"text/html")!=0)\
+ || (strfield2((a),"application/x-javascript")!=0) \
+ || (strfield2((a),"text/css")!=0) \
+ || (strfield2((a),"image/svg+xml")!=0) \
+ || (strfield2((a),"image/svg-xml")!=0) \
+ /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\
+ )
+
+#define may_be_hypertext_mime(a) \
+ (\
+ (strfield2((a),"audio/x-pn-realaudio")!=0) \
+ )
+
+
+// caractère maj
+#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
+
+// conversion éventuelle / vers antislash
+#if HTS_WIN
+char* antislash(char* s);
+#else
+#define antislash(A) (A)
+#endif
+
+
+// functions
+#if HTS_PLATFORM!=3
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if HTS_PLATFORM!=2
+#if HTS_PLATFORM!=1
+ int open (const char *, int, ...);
+#endif
+ //int read (int,const char*,int);
+ //int write (int,char*,int);
+#endif
+#if HTS_PLATFORM!=1
+ int close (int);
+ void* calloc (size_t,size_t);
+ void* malloc (size_t);
+ void* realloc (void*,size_t);
+ void free (void*);
+#endif
+#if HTS_WIN
+#else
+ int mkdir (const char*,mode_t);
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+
+// tracer malloc()
+#if HTS_TRACE_MALLOC
+#define malloct(A) hts_malloc(A,0)
+#define calloct(A,B) hts_malloc(A,B)
+#define freet(A) hts_free(A)
+#define realloct(A,B) hts_realloc(A,B)
+void hts_freeall();
+void* hts_malloc (size_t,size_t);
+void hts_free (void*);
+void* hts_realloc (void*,size_t);
+#else
+#define malloct(A) malloc(A)
+#define calloct(A,B) calloc(A,B)
+#define freet(A) free(A)
+#define realloct(A,B) realloc(A,B)
+#endif
+
+
+#endif
+
diff --git a/src/htsbasenet.h b/src/htsbasenet.h
new file mode 100644
index 0000000..d63a2e7
--- /dev/null
+++ b/src/htsbasenet.h
@@ -0,0 +1,86 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Basic net definitions */
+/* Used in .c and .h files that needs T_SOC and so */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFBASENETH
+#define HTS_DEFBASENETH
+
+#if HTS_WIN
+
+#if HTS_INET6==0
+ #include <winsock.h>
+#else
+#undef HTS_USESCOPEID
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <tpipv6.h>
+#endif
+ typedef SOCKET T_SOC;
+ typedef struct hostent FAR t_hostent;
+
+#else
+#define HTS_USESCOPEID
+ #define INVALID_SOCKET -1
+ typedef int T_SOC;
+ typedef struct hostent t_hostent;
+#endif
+
+#if HTS_USEOPENSSL
+/*
+ OpensSSL crypto routines by Eric Young (eay@cryptsoft.com)
+ Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ All rights reserved
+*/
+#ifndef HTS_OPENSSL_H_INCLUDED
+#define HTS_OPENSSL_H_INCLUDED
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <openssl/ssl.h>
+#include <openssl/crypto.h>
+#include <openssl/err.h>
+//#include <openssl/bio.h>
+#ifdef __cplusplus
+ };
+#endif
+/* OpenSSL structure */
+extern SSL_CTX *openssl_ctx;
+
+#endif
+#endif
+
+#endif
diff --git a/src/htsbauth.c b/src/htsbauth.c
new file mode 100644
index 0000000..a1506c1
--- /dev/null
+++ b/src/htsbauth.c
@@ -0,0 +1,401 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* basic authentication: password storage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsbauth.h"
+
+/* specific definitions */
+#include "htsglobal.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+
+/* END specific definitions */
+
+// gestion des cookie
+// ajoute, dans l'ordre
+// !=0 : erreur
+int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path) {
+ char* a=cookie->data;
+ char* insert;
+ char cook[16384];
+ // effacer éventuel cookie en double
+ cookie_del(cookie,cook_name,domain,path);
+ if ((int)strlen(cook_value)>1024) return -1; // trop long
+ if ((int)strlen(cook_name)>256) return -1; // trop long
+ if ((int)strlen(domain)>256) return -1; // trop long
+ if ((int)strlen(path)>256) return -1; // trop long
+ if ((int)(
+ strlen(cookie->data)
+ +strlen(cook_value)
+ +strlen(cook_name)
+ +strlen(domain)
+ +strlen(path)
+ +256
+ ) > cookie->max_len) return -1; // impossible d'ajouter
+
+ insert=a; // insérer ici
+ while (*a) {
+ if ( strlen(cookie_get(a,2)) < strlen(path) ) // long. path (le + long est prioritaire)
+ a=cookie->data+strlen(cookie->data); // fin
+ else {
+ a=strchr(a,'\n'); // prochain champ
+ if (a==NULL)
+ a=cookie->data+strlen(cookie->data); // fin
+ else
+ a++;
+ while(*a=='\n') a++;
+ insert=a; // insérer ici
+ }
+ }
+ // construction du cookie
+ strcpy(cook,domain);
+ strcat(cook,"\t");
+ strcat(cook,"TRUE");
+ strcat(cook,"\t");
+ strcat(cook,path);
+ strcat(cook,"\t");
+ strcat(cook,"FALSE");
+ strcat(cook,"\t");
+ strcat(cook,"1999999999");
+ strcat(cook,"\t");
+ strcat(cook,cook_name);
+ strcat(cook,"\t");
+ strcat(cook,cook_value);
+ strcat(cook,"\n");
+ if (!( ((int) strlen(cookie->data) + (int) strlen(cook)) < cookie->max_len)) return -1; // impossible d'ajouter
+ cookie_insert(insert,cook);
+#if DEBUG_COOK
+ printf("add_new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",cook_name,cook_value,domain,path);
+ //printf(">>>cook: %s<<<\n",cookie->data);
+#endif
+ return 0;
+}
+
+// effacer cookie si existe
+int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path) {
+ char *a,*b;
+ b=cookie_find(cookie->data,cook_name,domain,path);
+ if (b) {
+ a=cookie_nextfield(b);
+ cookie_delete(b,(int) (a - b));
+#if DEBUG_COOK
+ printf("deleted old cookie: %s %s %s\n",cook_name,domain,path);
+#endif
+ }
+ return 0;
+}
+
+// rechercher cookie à partir de la position s (par exemple s=cookie.data)
+// renvoie pointeur sur ligne, ou NULL si introuvable
+// path est aligné à droite et cook_name peut être vide (chercher alors tout cookie)
+// .doubleclick.net TRUE / FALSE 1999999999 id A
+char* cookie_find(char* s,char* cook_name,char* domain,char* path) {
+ char* a=s;
+ while (*a) {
+ int t;
+ if (strnotempty(cook_name)==0)
+ t=1; // accepter par défaut
+ else
+ t=( strcmp(cookie_get(a,5),cook_name)==0 ); // tester si même nom
+ if (t) { // même nom ou nom qualconque
+ //
+ char* chk_dom=cookie_get(a,0); // domaine concerné par le cookie
+ if ((int) strlen(chk_dom) <= (int) strlen(domain)) {
+ if ( strcmp(chk_dom,domain+strlen(domain)-strlen(chk_dom))==0 ) { // même domaine
+ //
+ char* chk_path=cookie_get(a,2); // chemin concerné par le cookie
+ if ((int) strlen(chk_path) <= (int) strlen(path)) {
+ if (strncmp(path,chk_path,strlen(chk_path))==0 ) { // même chemin
+ return a;
+ }
+ }
+ }
+ }
+ }
+ a=cookie_nextfield(a);
+ }
+ return NULL;
+}
+
+// renvoie prochain champ
+char* cookie_nextfield(char* a) {
+ char* b=a;
+ a=strchr(a,'\n'); // prochain champ
+ if (a==NULL)
+ a=b+strlen(b); // fin
+ else
+ a++;
+ while(*a=='\n') a++;
+ return a;
+}
+
+// lire cookies.txt
+// lire également (Windows seulement) les *@*.txt (cookies IE copiés)
+// !=0 : erreur
+int cookie_load(t_cookie* cookie,char* fpath,char* name) {
+ cookie->data[0]='\0';
+
+ // Fusionner d'abord les éventuels cookies IE
+#if HTS_WIN
+ {
+ WIN32_FIND_DATA find;
+ HANDLE h;
+ char pth[MAX_PATH + 32];
+ strcpy(pth,fpath);
+ strcat(pth,"*@*.txt");
+ h = FindFirstFile(pth,&find);
+ if (h != INVALID_HANDLE_VALUE) {
+ do {
+ if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ))
+ if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM )) {
+ FILE* fp=fopen(fconcat(fpath,find.cFileName),"rb");
+ if (fp) {
+ char cook_name[256];
+ char cook_value[1000];
+ char domainpathpath[512];
+ //
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ int cookie_merged=0;
+ linput(fp,cook_name,250);
+ if (!feof(fp)) {
+ linput(fp,cook_value,250);
+ if ( (!feof(fp)) && (strnotempty(cook_value)) ) {
+ linput(fp,domainpathpath,500);
+ if (strnotempty(domainpathpath)) {
+ if (ident_url_absolute(domainpathpath,domain,path)>=0) {
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ cookie_merged=1;
+ }
+ }
+ }
+ }
+ fclose(fp);
+ if (cookie_merged)
+ remove(fconcat(fpath,find.cFileName));
+ } // if fp
+ }
+ } while(FindNextFile(h,&find));
+ FindClose(h);
+ }
+ }
+#endif
+
+ // Ensuite, cookies.txt
+ {
+ FILE* fp = fopen(fconcat(fpath,name),"rb");
+ if (fp) {
+ char line[8192];
+ while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) {
+ rawlinput(fp,line,8100);
+ if (strnotempty(line)) {
+ if (strlen(line)<8000) {
+ if (line[0]!='#') {
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ char cook_name[256]; // nom cookie (MYCOOK)
+ char cook_value[8192]; // valeur (ID=toto,S=1234)
+ strcpy(domain,cookie_get(line,0)); // host
+ strcpy(path,cookie_get(line,2)); // path
+ strcpy(cook_name,cookie_get(line,5)); // name
+ strcpy(cook_value,cookie_get(line,6)); // value
+#if DEBUG_COOK
+ printf("%s\n",line);
+#endif
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ }
+ }
+ }
+ }
+ fclose(fp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+// écrire cookies.txt
+// !=0 : erreur
+int cookie_save(t_cookie* cookie,char* name) {
+ if (strnotempty(cookie->data)) {
+ char line[8192];
+ FILE* fp = fopen(fconv(name),"wb");
+ if (fp) {
+ char* a=cookie->data;
+ fprintf(fp,"# HTTrack Website Copier Cookie File"LF"# This file format is compatible with Netscape cookies"LF);
+ do {
+ a+=binput(a,line,8000);
+ fprintf(fp,"%s"LF,line);
+ } while(strnotempty(line));
+ fclose(fp);
+ return 0;
+ }
+ } else
+ return 0;
+ return -1;
+}
+
+// insertion chaine ins avant s
+void cookie_insert(char* s,char* ins) {
+ char* buff;
+ if (strnotempty(s)==0) { // rien à faire, juste concat
+ strcat(s,ins);
+ } else {
+ buff=(char*) malloc(strlen(s)+2);
+ if (buff) {
+ strcpy(buff,s); // copie temporaire
+ strcpy(s,ins); // insérer
+ strcat(s,buff); // copier
+ free(buff);
+ }
+ }
+}
+// destruction chaine dans s position pos
+void cookie_delete(char* s,int pos) {
+ char* buff;
+ if (strnotempty(s+pos)==0) { // rien à faire, effacer
+ s[0]='\0';
+ } else {
+ buff=(char*) malloc(strlen(s+pos)+2);
+ if (buff) {
+ strcpy(buff,s+pos); // copie temporaire
+ strcpy(s,buff); // copier
+ free(buff);
+ }
+ }
+}
+
+// renvoie champ param de la chaine cookie_base
+// ex: cookie_get("ceci est<tab>un<tab>exemple",1) renvoi "un"
+char* cookie_get(char* cookie_base,int param) {
+ char* buffer;
+ //
+ char * limit;
+ NOSTATIC_RESERVE(buffer, char, 8192);
+
+ while(*cookie_base=='\n') cookie_base++;
+ limit = strchr(cookie_base,'\n');
+ if (!limit) limit=cookie_base+strlen(cookie_base);
+ if (limit) {
+ if (param) {
+ int i;
+ for(i=0;i<param;i++) {
+ if (cookie_base) {
+ cookie_base=strchr(cookie_base,'\t'); // prochain tab
+ if (cookie_base) cookie_base++;
+ }
+ }
+ }
+ if (cookie_base) {
+ if ( cookie_base < limit) {
+ char* a = cookie_base;
+ while( (*a) && (*a!='\t') && (*a!='\n')) a++;
+ buffer[0]='\0';
+ strncat(buffer,cookie_base,(int) (a - cookie_base));
+ return buffer;
+ } else
+ return "";
+ } else
+ return "";
+ } else
+ return "";
+}
+// fin cookies
+
+
+
+// -- basic auth --
+
+/* déclarer un répertoire comme possédant une authentification propre */
+int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) {
+ if (cookie) {
+ if (!bauth_check(cookie,adr,fil)) { // n'existe pas déja
+ bauth_chain* chain=&cookie->auth;
+ char* prefix=bauth_prefix(adr,fil);
+ /* fin de la chaine */
+ while(chain->next)
+ chain=chain->next;
+ chain->next=(bauth_chain*) calloc(sizeof(bauth_chain),1);
+ if (chain->next) {
+ chain=chain->next;
+ chain->next=NULL;
+ strcpy(chain->auth,auth);
+ strcpy(chain->prefix,prefix);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* tester adr et fil, et retourner authentification si nécessaire */
+/* sinon, retourne NULL */
+char* bauth_check(t_cookie* cookie,char* adr,char* fil) {
+ if (cookie) {
+ bauth_chain* chain=&cookie->auth;
+ char* prefix=bauth_prefix(adr,fil);
+ while(chain) {
+ if (strnotempty(chain->prefix)) {
+ if (strncmp(prefix,chain->prefix,strlen(chain->prefix))==0) {
+ return chain->auth;
+ }
+ }
+ chain=chain->next;
+ }
+ }
+ return NULL;
+}
+
+char* bauth_prefix(char* adr,char* fil) {
+ char* prefix;
+ char* a;
+ NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2);
+ strcpy(prefix,jump_identification(adr));
+ strcat(prefix,fil);
+ a=strchr(prefix,'?');
+ if (a) *a='\0';
+ if (strchr(prefix,'/')) {
+ a=prefix+strlen(prefix)-1;
+ while(*a != '/') a--;
+ *(a+1)='\0';
+ }
+ return prefix;
+}
diff --git a/src/htsbauth.h b/src/htsbauth.h
new file mode 100644
index 0000000..d361d83
--- /dev/null
+++ b/src/htsbauth.h
@@ -0,0 +1,74 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* basic authentication: password storage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSBAUTH_DEFH
+#define HTSBAUTH_DEFH
+
+// robots wizard
+typedef struct bauth_chain {
+ char prefix[1024]; /* www.foo.com/secure/ */
+ char auth[1024]; /* base-64 encoded user:pass */
+ struct bauth_chain* next; /* next element */
+} bauth_chain;
+
+
+// buffer pour les cookies et authentification
+typedef struct {
+ int max_len;
+ char data[32768];
+ bauth_chain auth;
+} t_cookie;
+
+// cookies
+int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path);
+int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path);
+int cookie_load(t_cookie* cookie,char* path,char* name);
+int cookie_save(t_cookie* cookie,char* name);
+void cookie_insert(char* s,char* ins);
+void cookie_delete(char* s,int pos);
+char* cookie_get(char* cookie_base,int param);
+char* cookie_find(char* s,char* cook_name,char* domain,char* path);
+char* cookie_nextfield(char* a);
+
+// basic auth
+int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth);
+char* bauth_check(t_cookie* cookie,char* adr,char* fil);
+char* bauth_prefix(char* adr,char* fil);
+
+
+#endif
diff --git a/src/htscache.c b/src/htscache.c
new file mode 100644
index 0000000..da8791e
--- /dev/null
+++ b/src/htscache.c
@@ -0,0 +1,881 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* cache system (index and stores files in cache) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htscache.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsbasenet.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+/* END specific definitions */
+
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+// routines de mise en cache
+
+/*
+ VERSION 1.0 :
+ -----------
+
+.ndx file
+ file with data
+ <string>(date/time) [ <string>(hostname+filename) (datfile_position_ascii) ] * number_of_links
+ file without data
+ <string>(date/time) [ <string>(hostname+filename) (-datfile_position_ascii) ] * number_of_links
+
+.dat file
+ [ file ] *
+with
+ file= (with data)
+ [ bytes ] * sizeof(htsblk header) [ bytes ] * n(length of file given in htsblk header)
+ file= (without data)
+ [ bytes ] * sizeof(htsblk header)
+with
+ <string>(name) = <length in ascii>+<lf>+<data>
+
+
+ VERSION 1.1/1.2 :
+ ---------------
+
+.ndx file
+ file with data
+ <string>("CACHE-1.1") <string>(date/time) [ <string>(hostname+filename) (datfile_position_ascii) ] * number_of_links
+ file without data
+ <string>("CACHE-1.1") <string>(date/time) [ <string>(hostname+filename) (-datfile_position_ascii) ] * number_of_links
+
+.dat file
+ <string>("CACHE-1.1") [ [Header_1.1] [bytes] * n(length of file given in header) ] *
+with
+ Header_1.1=
+ <int>(statuscode)
+ <int>(size)
+ <string>(msg)
+ <string>(contenttype)
+ <string>(last-modified)
+ <string>(Etag)
+ [<string>"SD" <string>(supplemental data)]
+ [<string>"SD" <string>(supplemental data)]
+ ...
+ <string>"HTS" (end of header)
+ <int>(number of bytes of data) (0 if no data written)
+*/
+
+// Nouveau: si != text/html ne stocke que la taille
+
+
+void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save) {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File checked by cache: %s"LF,url_adr);
+ }
+ // ---stockage en cache---
+ // stocker dans le cache?
+ if (opt->cache) {
+ if (cache->dat!=NULL) {
+ // c'est le seul endroit ou l'on ajoute des elements dans le cache (fichier entier ou header)
+ // on stocke tout fichier "ok", mais également les réponses 404,301,302...
+ if ((r->statuscode==200) /* stocker réponse standard, plus */
+ || (r->statuscode==204) /* no content */
+ || (r->statuscode==301) /* moved perm */
+ || (r->statuscode==302) /* moved temp */
+ || (r->statuscode==303) /* moved temp */
+ || (r->statuscode==307) /* moved temp */
+ || (r->statuscode==401) /* authorization */
+ || (r->statuscode==403) /* unauthorized */
+ || (r->statuscode==404) /* not found */
+ || (r->statuscode==410) /* gone */
+ )
+ { /* ne pas stocker si la page générée est une erreur */
+ if (!r->is_file) {
+ // stocker fichiers (et robots.txt)
+ if ( (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
+ // ajouter le fichier au cache
+ cache_add(*r,url_adr,url_fil,url_save,cache->ndx,cache->dat,opt->all_in_cache);
+ }
+ }
+ }
+ }
+ }
+ // ---fin stockage en cache---
+}
+
+
+/* Ajout d'un fichier en cache */
+void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache) {
+ int pos;
+ char s[256];
+ char buff[HTS_URLMAXSIZE*4];
+ int ok=1;
+ int dataincache=0; // donnée en cache?
+ /*char digest[32+2];*/
+ /*digest[0]='\0';*/
+
+ // Longueur url_save==0?
+ if ( (strnotempty(url_save)==0) ) {
+ if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
+ dataincache=1;
+ else
+ return; // erreur (sauf robots.txt)
+ }
+
+ if (r.size <= 0) // taille <= 0
+ return; // refusé..
+
+ // Mettre les *donées* en cache ?
+ if (is_hypertext_mime(r.contenttype)) // html, mise en cache des données et
+ dataincache=1; // pas uniquement de l'en tête
+ else if (all_in_cache)
+ dataincache=1; // forcer tout en cache
+
+ /* calcul md5 ? */
+ /*
+ if (is_hypertext_mime(r.contenttype)) { // html, calcul MD5
+ if (r.adr) {
+ domd5mem(r.adr,r.size,digest,1);
+ }
+ }*/
+
+ // Position
+ fflush(cache_dat); fflush(cache_ndx);
+ pos=ftell(cache_dat);
+ // écrire pointeur seek, adresse, fichier
+ if (dataincache) // patcher
+ sprintf(s,"%d\n",pos); // ecrire tel que (eh oui évite les \0..)
+ else
+ sprintf(s,"%d\n",-pos); // ecrire tel que (eh oui évite les \0..)
+
+ // data
+ // écrire données en-tête, données fichier
+ /*if (!dataincache) { // patcher
+ r.size=-r.size; // négatif
+ }*/
+
+ // Construction header
+ ok=0;
+ if (cache_wint(cache_dat,r.statuscode)!=-1) // statuscode
+ if (cache_wLLint(cache_dat,r.size)!=-1) // size
+ if (cache_wstr(cache_dat,r.msg)!=-1) // msg
+ if (cache_wstr(cache_dat,r.contenttype)!=-1) // contenttype
+ if (cache_wstr(cache_dat,r.lastmodified)!=-1) // last-modified
+ if (cache_wstr(cache_dat,r.etag)!=-1) // Etag
+ if (cache_wstr(cache_dat,(r.location!=NULL)?r.location:"")!=-1) // 'location' pour moved
+ if (cache_wstr(cache_dat,r.cdispo)!=-1) // Content-disposition
+ if (cache_wstr(cache_dat,"HTS")!=-1) // end of header
+ ok=1; /* ok */
+ // Fin construction header
+
+ /*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/
+ if (ok) {
+ if (dataincache) { // mise en cache?
+ if (!r.adr) { /* taille nulle (parfois en cas de 301 */
+ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */
+ ok=0;
+ } else if (r.is_write==0) { // en mémoire, recopie directe
+ if (cache_wLLint(cache_dat,r.size)!=-1) {
+ if (r.size>0) { // taille>0
+ if ((INTsys) fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size)
+ ok=0;
+ } else // taille=0, ne rien écrire
+ ok=0;
+ } else
+ ok=0;
+ } else { // recopier fichier dans cache
+ FILE* fp;
+ // On recopie le fichier..
+ LLint file_size=fsize(fconv(url_save));
+ if (file_size>=0) {
+ if (cache_wLLint(cache_dat,file_size)!=-1) {
+ fp=fopen(fconv(url_save),"rb");
+ if (fp!=NULL) {
+ char buff[32768];
+ int nl;
+ do {
+ nl=fread(buff,1,32768,fp);
+ if (nl>0) {
+ if ((INTsys) fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur
+ nl=-1;
+ ok=0;
+ }
+ }
+ } while(nl>0);
+ fclose(fp);
+ } else ok=0;
+ } else ok=0;
+ } else ok=0;
+ }
+ } else {
+ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */
+ ok=0;
+ }
+ } else ok=0;
+ /*if (!dataincache) { // dépatcher
+ r.size=-r.size;
+ }*/
+
+ // index
+ // adresse+cr+fichier+cr
+ if (ok) {
+ buff[0]='\0'; strcat(buff,url_adr); strcat(buff,"\n"); strcat(buff,url_fil); strcat(buff,"\n");
+ cache_wstr(cache_ndx,buff);
+ fwrite(s,1,strlen(s),cache_ndx);
+ } // si ok=0 on a peut être écrit des données pour rien mais on s'en tape
+
+ // en cas de plantage, on aura au moins le cache!
+ fflush(cache_dat); fflush(cache_ndx);
+}
+
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save) {
+#if HTS_FAST_CACHE
+ long int hash_pos;
+ int hash_pos_return;
+#else
+ char* a;
+#endif
+ char buff[HTS_URLMAXSIZE*2];
+ char location[HTS_URLMAXSIZE*2];
+ htsblk r;
+ int ok=0;
+ int header_only=0;
+
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; strcpy(location,""); r.location=location;
+#if HTS_FAST_CACHE
+ strcpy(buff,adr); strcat(buff,fil);
+ hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
+#else
+ buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ if (cache->use)
+ a=strstr(cache->use,buff);
+ else
+ a=NULL; // forcer erreur
+#endif
+
+ // en cas de succès
+#if HTS_FAST_CACHE
+ if (hash_pos_return) {
+#else
+ if (a!=NULL) { // OK existe en cache!
+#endif
+ int pos;
+#if DEBUGCA
+ fprintf(stdout,"..cache: %s%s at ",adr,fil);
+#endif
+
+#if HTS_FAST_CACHE
+ pos=hash_pos; /* simply */
+#else
+ a+=strlen(buff);
+ sscanf(a,"%d",&pos); // lire position
+#endif
+#if DEBUGCA
+ printf("%d\n",pos);
+#endif
+
+ fflush(cache->olddat);
+ if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) {
+ /* Importer cache1.0 */
+ if (cache->version==0) {
+ OLD_htsblk old_r;
+ if (fread((char*) &old_r,1,sizeof(old_r),cache->olddat)==sizeof(old_r)) { // lire tout (y compris statuscode etc)
+ r.statuscode=old_r.statuscode;
+ r.size=old_r.size; // taille fichier
+ strcpy(r.msg,old_r.msg);
+ strcpy(r.contenttype,old_r.contenttype);
+ ok=1; /* import ok */
+ }
+ /* */
+ /* Cache 1.1 */
+ } else {
+ char check[256];
+ LLint size_read;
+ check[0]='\0';
+ //
+ cache_rint(cache->olddat,&r.statuscode);
+ cache_rLLint(cache->olddat,&r.size);
+ cache_rstr(cache->olddat,r.msg);
+ cache_rstr(cache->olddat,r.contenttype);
+ cache_rstr(cache->olddat,r.lastmodified);
+ cache_rstr(cache->olddat,r.etag);
+ cache_rstr(cache->olddat,r.location);
+ if (cache->version >= 2)
+ cache_rstr(cache->olddat,r.cdispo);
+ //
+ cache_rstr(cache->olddat,check);
+ if (strcmp(check,"HTS")==0) { /* intégrité OK */
+ ok=1;
+ }
+ cache_rLLint(cache->olddat,&size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
+ if (size_read>0) { /* si inscrite ici */
+ r.size=size_read;
+ } else { /* pas de données directement dans le cache, fichier présent? */
+ if (r.statuscode!=200)
+ header_only=1; /* que l'en tête ici! */
+ }
+ }
+
+ /* Remplir certains champs */
+ r.totalsize=r.size;
+
+ // lecture du header (y compris le statuscode)
+ /*if (fread((char*) &r,1,sizeof(htsblk),cache->olddat)==sizeof(htsblk)) { // lire tout (y compris statuscode etc)*/
+ if (ok) {
+ // sécurité
+ r.adr=NULL;
+ r.out=NULL;
+ ////r.location=NULL; non, fixée lors des 301 ou 302
+ r.fp=NULL;
+
+ if ( (r.statuscode>=0) && (r.statuscode<=999)
+ && (r.notmodified>=0) && (r.notmodified<=9) ) { // petite vérif intégrité
+ if ((save) && (!header_only) ) { /* ne pas lire uniquement header */
+ //int to_file=0;
+
+ r.adr=NULL; r.soc=INVALID_SOCKET;
+ // // r.location=NULL;
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ if ((r.statuscode==200) && (!is_hypertext_mime(r.contenttype)) && (strnotempty(save))) { // pas HTML, écrire sur disk directement
+ int ok=0;
+
+ r.is_write=1; // écrire
+ if (fexist(antislash(save))) { // un fichier existe déja
+ //if (fsize(antislash(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ ok=1; // plus rien à faire
+ filenote(save,NULL); // noter comme connu
+ //}
+ }
+
+ if ((pos<0) && (!ok)) { // Pas de donnée en cache et fichier introuvable : erreur!
+ if (opt->norecatch) {
+ filecreateempty(save);
+ //
+ r.statuscode=-1;
+ strcpy(r.msg,"File deleted by user not recaught");
+ ok=1; // ne pas récupérer (et pas d'erreur)
+ } else {
+ r.statuscode=-1;
+ strcpy(r.msg,"Previous cache file not found");
+ ok=1; // ne pas récupérer
+ }
+ }
+
+ if (!ok) {
+ r.out=filecreate(save);
+#if HDEBUG
+ printf("direct-disk: %s\n",save);
+#endif
+ if (r.out!=NULL) {
+ char buff[32768+4];
+ LLint nl;
+ LLint size;
+ size=r.size;
+ do {
+ nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat);
+ if (nl>0) {
+ size-=nl;
+ if ((INTsys) fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read To Disk");
+ }
+ }
+ } while((nl>0) && (size>0) && (r.statuscode!=-1));
+
+ fclose(r.out);
+ r.out=NULL;
+#if HTS_WIN==0
+ chmod(save,HTS_ACCESS_FILE);
+#endif
+ usercommand(0,NULL,antislash(save));
+ } else {
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Write Error : Unable to Create File");
+ //printf("%s\n",save);
+ }
+ }
+
+ } else
+#endif
+ { // lire en mémoire
+
+ if (pos<0) { // Pas de donnée en cache, bizarre car html!!!
+ r.statuscode=-1;
+ strcpy(r.msg,"Previous cache file not found (2)");
+ } else {
+ // lire fichier (d'un coup)
+ r.adr=(char*) malloct((INTsys)r.size+4);
+ if (r.adr!=NULL) {
+ if ((INTsys) fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur
+ freet(r.adr);
+ r.adr=NULL;
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read Data");
+ } else
+ *(r.adr+r.size)='\0';
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Memory Error");
+ }
+ }
+ }
+ } // si save==null, ne rien charger (juste en tête)
+ } else {
+#if DEBUGCA
+ printf("Cache Read Error : Bad Data");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Bad Data");
+ }
+ } else { // erreur
+#if DEBUGCA
+ printf("Cache Read Error : Read Header");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read Header");
+ }
+ } else {
+#if DEBUGCA
+ printf("Cache Read Error : Seek Failed");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Seek Failed");
+ }
+ } else {
+#if DEBUGCA
+ printf("File Cache Not Found");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"File Cache Not Found");
+ }
+ return r;
+}
+
+/* write (string1-string2)-data in cache */
+/* 0 if failed */
+int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) {
+ if (cache_dat) {
+ char buff[HTS_URLMAXSIZE*4];
+ char s[256];
+ int pos;
+ fflush(cache_dat); fflush(cache_ndx);
+ pos=ftell(cache_dat);
+ /* first write data */
+ if (cache_wint(cache_dat,len)!=-1) { // length
+ if ((INTsys) fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data
+ /* then write index */
+ sprintf(s,"%d\n",pos);
+ buff[0]='\0'; strcat(buff,str1); strcat(buff,"\n"); strcat(buff,str2); strcat(buff,"\n");
+ cache_wstr(cache_ndx,buff);
+ if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) {
+ fflush(cache_dat); fflush(cache_ndx);
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/* read the data corresponding to (string1-string2) in cache */
+/* 0 if failed */
+int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) {
+#if HTS_FAST_CACHE
+ if (cache->hashtable) {
+ char buff[HTS_URLMAXSIZE*4];
+ long int pos;
+ strcpy(buff,str1); strcat(buff,str2);
+ if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) {
+ if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) {
+ int len;
+ cache_rint(cache->olddat,&len);
+ if (len>0) {
+ char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */
+ if (mem_buff) {
+ if ((int)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/
+ *inbuff=mem_buff;
+ *inlen=len;
+ return 1;
+ } else
+ freet(mem_buff);
+ }
+ }
+ }
+ }
+ }
+#endif
+ *inbuff=NULL;
+ *inlen=0;
+ return 0;
+}
+
+// renvoyer uniquement en tête, ou NULL si erreur
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil) {
+ htsblk* r;
+ NOSTATIC_RESERVE(r, htsblk, 1);
+ *r=cache_read(opt,cache,adr,fil,NULL); // test uniquement
+ if (r->statuscode != -1)
+ return r;
+ else
+ return NULL;
+}
+
+
+// Initialisation du cache: créer nouveau, renomer ancien, charger..
+void cache_init(cache_back* cache,httrackp* opt) {
+ // ---
+ // utilisation du cache: renommer ancien éventuel et charger index
+ if (opt->cache) {
+#if DEBUGCA
+ printf("cache init: ");
+#endif
+#if HTS_WIN
+ mkdir(fconcat(opt->path_log,"hts-cache"));
+#else
+ mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
+#endif
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+#if DEBUGCA
+ printf("work with former cache\n");
+#endif
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
+
+ rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
+ rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
+ } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel
+#if DEBUGCA
+ printf("new cache\n");
+#endif
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/new.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ }
+
+ // charger index cache précédent
+ if ((fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))) { // cache précédent
+ if ((fsize(fconcat(opt->path_log,"hts-cache/old.dat"))>=0) && (fsize(fconcat(opt->path_log,"hts-cache/old.ndx"))>0)) {
+ FILE* oldndx=NULL;
+#if DEBUGCA
+ printf("..load cache\n");
+#endif
+ cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb");
+ oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb");
+ // les deux doivent être ouvrables
+ if ((cache->olddat==NULL) && (oldndx!=NULL)) {
+ fclose(oldndx);
+ oldndx=NULL;
+ }
+ if ((cache->olddat!=NULL) && (oldndx==NULL)) {
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ }
+ // lire index
+ if (oldndx!=NULL) {
+ int buffl;
+ fclose(oldndx); oldndx=NULL;
+ // lire ndx, et lastmodified
+ buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx"));
+ cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx"));
+ if (cache->use!=NULL) {
+ char firstline[256];
+ char* a=cache->use;
+ a+=cache_brstr(a,firstline);
+ if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
+ if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
+ cache->version=(int)(firstline[8]-'0'); // cache 1.x
+ if (cache->version <= 2) {
+ a+=cache_brstr(a,firstline);
+ strcpy(cache->lastmodified,firstline);
+ } else {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
+ fflush(opt->errlog);
+ }
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ freet(cache->use);
+ cache->use=NULL;
+ }
+ } else { // non supporté
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fflush(opt->errlog);
+ }
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ freet(cache->use);
+ cache->use=NULL;
+ }
+ /* */
+ } else { // Vieille version du cache
+ /* */
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
+ fflush(opt->log);
+ }
+ cache->version=0; // cache 1.0
+ strcpy(cache->lastmodified,firstline);
+ }
+ opt->is_update=1; // signaler comme update
+
+ /* Create hash table for the cache (MUCH FASTER!) */
+#if HTS_FAST_CACHE
+ if (cache->use) {
+ char line[HTS_URLMAXSIZE*2];
+ char linepos[256];
+ int pos;
+ while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
+ a=strchr(a+1,'\n'); /* start of line */
+ if (a) {
+ a++;
+ /* read "host/file" */
+ a+=binput(a,line,HTS_URLMAXSIZE);
+ a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
+ /* read position */
+ a+=binput(a,linepos,200);
+ sscanf(linepos,"%d",&pos);
+ inthash_add((inthash)cache->hashtable,line,pos);
+ }
+ }
+ /* Not needed anymore! */
+ freet(cache->use);
+ cache->use=NULL;
+ }
+#endif
+ }
+ }
+ } // taille cache>0
+ } // cache precedent existe
+
+#if DEBUGCA
+ printf("..create cache\n");
+#endif
+ // ouvrir caches actuels
+ cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
+ cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
+ // les deux doivent être ouvrables
+ if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
+ fclose(cache->ndx);
+ cache->ndx=NULL;
+ }
+ if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
+ fclose(cache->dat);
+ cache->dat=NULL;
+ }
+
+ if (cache->ndx!=NULL) {
+ char s[256];
+
+ cache_wstr(cache->dat,"CACHE-1.2");
+ fflush(cache->dat);
+ cache_wstr(cache->ndx,"CACHE-1.2");
+ fflush(cache->ndx);
+ //
+ time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
+ cache_wstr(cache->ndx,s);
+ fflush(cache->ndx); // un petit fflush au cas où
+
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpy(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
+
+ // test
+ // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ }
+
+ }
+
+}
+
+
+
+
+// lire un fichier.. (compatible \0)
+char* readfile(char* fil) {
+ char* adr=NULL;
+ int len=0;
+ len=fsize(fil);
+ if (len>0) { // existe
+ FILE* fp;
+ fp=fopen(fconv(fil),"rb");
+ if (fp!=NULL) { // n'existe pas (!)
+ adr=(char*) malloct(len+1);
+ if (adr!=NULL) {
+ if ((int) fread(adr,1,len,fp)!=len) { // fichier endommagé ?
+ freet(adr);
+ adr=NULL;
+ } else
+ *(adr+len)='\0';
+ }
+ fclose(fp);
+ }
+ }
+ return adr;
+}
+
+char* readfile_or(char* fil,char* defaultdata) {
+ char* realfile=fil;
+ char* ret;
+ if (!fexist(fil))
+ realfile=fconcat(hts_rootdir(NULL),fil);
+ ret=readfile(realfile);
+ if (ret)
+ return ret;
+ else {
+ char *adr=malloct(strlen(defaultdata)+2);
+ if (adr) {
+ strcpy(adr,defaultdata);
+ return adr;
+ }
+ }
+ return NULL;
+}
+
+// écriture/lecture d'une chaîne sur un fichier
+// -1 : erreur, sinon 0
+int cache_wstr(FILE* fp,char* s) {
+ int i;
+ char buff[256+4];
+ i=strlen(s);
+ sprintf(buff,"%d\n",i);
+ if (fwrite(buff,1,strlen(buff),fp) != strlen(buff))
+ return -1;
+ if (i>0)
+ if ((int) fwrite(s,1,i,fp) != i)
+ return -1;
+ return 0;
+}
+void cache_rstr(FILE* fp,char* s) {
+ int i;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ fread(s,1,i,fp);
+ *(s+i)='\0';
+}
+int cache_brstr(char* adr,char* s) {
+ int i;
+ int off;
+ char buff[256+4];
+ off=binput(adr,buff,256);
+ adr+=off;
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ strncpy(s,adr,i);
+ *(s+i)='\0';
+ off+=i;
+ return off;
+}
+int cache_quickbrstr(char* adr,char* s) {
+ int i;
+ int off;
+ char buff[256+4];
+ off=binput(adr,buff,256);
+ adr+=off;
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ strncpy(s,adr,i);
+ *(s+i)='\0';
+ off+=i;
+ return off;
+}
+/* idem, mais en int */
+int cache_brint(char* adr,int* i) {
+ char s[256];
+ int r=cache_brstr(adr,s);
+ if (r!=-1)
+ sscanf(s,"%d",i);
+ return r;
+}
+void cache_rint(FILE* fp,int* i) {
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,"%d",i);
+}
+int cache_wint(FILE* fp,int i) {
+ char s[256];
+ sprintf(s,"%d",(int) i);
+ return cache_wstr(fp,s);
+}
+void cache_rLLint(FILE* fp,LLint* i) {
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,LLintP,i);
+}
+int cache_wLLint(FILE* fp,LLint i) {
+ char s[256];
+ sprintf(s,LLintP,(LLint) i);
+ return cache_wstr(fp,s);
+}
+// -- cache --
diff --git a/src/htscache.h b/src/htscache.h
new file mode 100644
index 0000000..08069d1
--- /dev/null
+++ b/src/htscache.h
@@ -0,0 +1,64 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* cache system (index and stores files in cache) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSCACHE_DEFH
+#define HTSCACHE_DEFH
+
+#include "htscore.h"
+
+// cache
+void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save);
+void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache);
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save);
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil);
+void cache_init(cache_back* cache,httrackp* opt);
+
+int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len);
+int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* len);
+
+int cache_wstr(FILE* fp,char* s);
+void cache_rstr(FILE* fp,char* s);
+int cache_brstr(char* adr,char* s);
+int cache_quickbrstr(char* adr,char* s);
+int cache_brint(char* adr,int* i);
+void cache_rint(FILE* fp,int* i);
+int cache_wint(FILE* fp,int i);
+void cache_rLLint(FILE* fp,LLint* i);
+int cache_wLLint(FILE* fp,LLint i);
+#endif
diff --git a/src/htscatchurl.c b/src/htscatchurl.c
new file mode 100644
index 0000000..c119677
--- /dev/null
+++ b/src/htscatchurl.c
@@ -0,0 +1,296 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: URL catch .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier intercepteur d'URL .c
+
+/* specific definitions */
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#if HTS_WIN
+#else
+#include <arpa/inet.h>
+#endif
+/* END specific definitions */
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* htslib */
+/*#include "htslib.h"*/
+
+/* catch url */
+#include "htscatchurl.h"
+
+
+// URL Link catcher
+
+// 0- Init the URL catcher with standard port
+
+// catch_url_init(&port,&return_host);
+T_SOC catch_url_init_std(int* port_prox,char* adr_prox) {
+ T_SOC soc;
+ int try_to_listen_to[]={8080,3128,80,81,82,8081,3129,31337,0,-1};
+ int i=0;
+ do {
+ soc=catch_url_init(&try_to_listen_to[i],adr_prox);
+ *port_prox=try_to_listen_to[i];
+ i++;
+ } while( (soc == INVALID_SOCKET) && (try_to_listen_to[i]>=0));
+ return soc;
+}
+
+
+// 1- Init the URL catcher
+
+// catch_url_init(&port,&return_host);
+T_SOC catch_url_init(int* port,char* adr) {
+ T_SOC soc = INVALID_SOCKET;
+ char h_loc[256+2];
+
+ /*
+#ifdef _WIN32
+ {
+ WORD wVersionRequested;
+ WSADATA wsadata;
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ return INVALID_SOCKET;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ WSACleanup();
+ return INVALID_SOCKET;
+ }
+ }
+#endif
+ */
+
+ if (gethostname(h_loc,256)==0) { // host name
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp_loc;
+ t_fullhostent buffer;
+
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) ) { // notre host
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length);
+
+ if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) {
+ SOCaddr_initport(server, *port);
+ if ( bind(soc,(struct sockaddr*) &server,server_size) == 0 ) {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getsockname(soc,(struct sockaddr*) &server2,&len) == 0) {
+ *port=ntohs(SOCaddr_sinport(server)); // récupérer port
+ if (listen(soc,10)>=0) { // au pif le 10
+ SOCaddr_inetntoa(adr, 128, server2, len);
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+ return soc;
+}
+
+// 2 - Wait for URL
+
+// catch_url
+// returns 0 if error
+// url: buffer where URL must be stored - or ip:port in case of failure
+// data: 32Kb
+int catch_url(T_SOC soc,char* url,char* method,char* data) {
+ int retour=0;
+
+ // connexion (accept)
+ if (soc != INVALID_SOCKET) {
+ T_SOC soc2;
+ struct sockaddr dummyaddr;
+ int dummylen = sizeof(struct sockaddr);
+ while ( (soc2=accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET);
+ /*
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ */
+ soc = soc2;
+ /* INFOS */
+ {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getpeername(soc,(struct sockaddr*) &server2,&len) == 0) {
+ char dot[256+2];
+ SOCaddr_inetntoa(dot, 256, server2, sizeof(server2));
+ sprintf(url,"%s:%d", dot, htons(SOCaddr_sinport(server2)));
+ }
+ }
+ /* INFOS */
+
+ // réception
+ if (soc != INVALID_SOCKET) {
+ char line[1000];
+ char protocol[256];
+ line[0]=protocol[0]='\0';
+ //
+ socinput(soc,line,1000);
+ if (strnotempty(line)) {
+ if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
+ char url_adr[HTS_URLMAXSIZE*2];
+ char url_fil[HTS_URLMAXSIZE*2];
+ // méthode en majuscule
+ int i,r=0;
+ url_adr[0]=url_fil[0]='\0';
+ //
+ for(i=0;i<(int) strlen(method);i++) {
+ if ((method[i]>='a') && (method[i]<='z'))
+ method[i]-=('a'-'A');
+ }
+ // adresse du lien
+ if (ident_url_absolute(url,url_adr,url_fil)>=0) {
+ // Traitement des en-têtes
+ char loc[HTS_URLMAXSIZE*2];
+ htsblk blkretour;
+ memset(&blkretour, 0, sizeof(htsblk)); // effacer
+ blkretour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
+ // Lire en têtes restants
+ sprintf(data,"%s %s %s\r\n",method,url_fil,protocol);
+ while(strnotempty(line)) {
+ socinput(soc,line,1000);
+ treathead(NULL,NULL,NULL,&blkretour,line); // traiter
+ strcat(data,line);
+ strcat(data,"\r\n");
+ }
+ // CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus
+ //strcat(data,"\r\n");
+ if (blkretour.totalsize>0) {
+ int len=(int)min(blkretour.totalsize,32000);
+ int pos=strlen(data);
+ // Copier le reste (post éventuel)
+ while((len>0) && ((r=recv(soc,(char*) data+pos,len,0))>0) ) {
+ pos+=r;
+ len-=r;
+ data[pos]='\0'; // terminer par NULL
+ }
+ }
+ // Envoyer page
+ sprintf(line,CATCH_RESPONSE);
+ send(soc,line,strlen(line),0);
+ // OK!
+ retour=1;
+ }
+ }
+ } // sinon erreur
+ }
+ }
+ if (soc != INVALID_SOCKET) {
+#ifdef _WIN32
+ closesocket(soc);
+ /*
+ WSACleanup();
+ */
+#else
+ close(soc);
+#endif
+ }
+ return retour;
+}
+
+
+
+// Lecture de ligne sur socket
+void socinput(T_SOC soc,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ unsigned char b;
+ if (recv(soc,(char*) &b,1,0)==1) {
+ c=b;
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ } else
+ c=EOF;
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j++]='\0';
+}
+
diff --git a/src/htscatchurl.h b/src/htscatchurl.h
new file mode 100644
index 0000000..77036fd
--- /dev/null
+++ b/src/htscatchurl.h
@@ -0,0 +1,76 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: URL catch .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier intercepteur d'URL .h
+
+#ifndef HTS_CATCHURL_DEFH
+#define HTS_CATCHURL_DEFH
+
+#include "htsbasenet.h"
+
+// Fonctions
+void socinput(T_SOC soc,char* s,int max);
+T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
+T_SOC catch_url_init(int* port,char* adr);
+int catch_url(T_SOC soc,char* url,char* method,char* data);
+
+#define CATCH_RESPONSE \
+ "HTTP/1.0 200 OK\r\n"\
+ "Content-type: text/html\r\n"\
+ "\r\n"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "<HTML><HEAD>\r\n"\
+ "<TITLE>Link caught!</TITLE>\r\n"\
+ "<SCRIPT LANGUAGE=\"Javascript\">\r\n"\
+ "<!--\r\n"\
+ "function back() {\r\n"\
+ " history.go(-1);\r\n"\
+ "}\r\n"\
+ "// -->\r\n"\
+ "</SCRIPT>\r\n"\
+ "</HEAD>\r\n"\
+ "<BODY>\r\n"\
+ "<H2>Link captured into HTTrack Website Copier, you can now restore your proxy preferences!</H2>\r\n"\
+ "<BR><BR>\r\n"\
+ "<H3><A HREF=\"javascript:back();\">Clic here to go back</A></H3>\r\n"\
+ "</BODY></HTML>"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "\r\n"\
+
+#endif
+
+
+
diff --git a/src/htsconfig.h b/src/htsconfig.h
new file mode 100644
index 0000000..665c9df
--- /dev/null
+++ b/src/htsconfig.h
@@ -0,0 +1,133 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Global engine definition file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Ensemble des paramètres du robot
+
+#ifndef HTTRACK_GLOBAL_ENGINE_DEFH
+#define HTTRACK_GLOBAL_ENGINE_DEFH
+
+// ------------------------------------------------------------
+// Définitions du ROBOT
+
+// accès des miroirs pour les autres utilisateurs (0/1)
+#define HTS_ACCESS 1
+
+// temps de poll d'une socket: 1/10s
+#define HTS_SOCK_SEC 0
+#define HTS_SOCK_MS 100000
+
+// nom par défaut
+#define DEFAULT_HTML "index.html"
+
+// nom par défaut pour / en ftp
+#define DEFAULT_FTP "index.txt"
+
+// extension par défaut pour fichiers n'en ayant pas
+#define DEFAULT_EXT ".html"
+#define DEFAULT_EXT_SHORT ".htm"
+//#define DEFAULT_EXT ".txt"
+//#define DEFAULT_EXT_SHORT ".txt"
+
+// éviter les /nul, /con..
+#define HTS_OVERRIDE_DOS_FOLDERS 1
+
+// indexing (keyword)
+#define HTS_MAKE_KEYWORD_INDEX 1
+
+// poll stdin autorisé? (0/1)
+#define HTS_POLL 1
+
+// vérifier les liens sans extension (0/1) [à éviter, très lent]
+#define HTS_CHECK_STRANGEDIR 0
+
+// le slash est un html par défaut (exemple/ est toujours un html)
+#define HTS_SLASH_ISHTML 1
+
+// supprimer index si un répertoire identique existe
+#define HTS_REMOVE_ANNOYING_INDEX 1
+
+// écriture directe dur disque possible (0/1)
+#define HTS_DIRECTDISK 1
+
+// gérer une table de hachage?
+#define HTS_HASH 1
+
+// fast cache (build hash table)
+#define HTS_FAST_CACHE 1
+
+// le > peut être considéré comme un tag de fermeture de commentaire (<!-- > est valide)
+#define GT_ENDS_COMMENT 1
+
+// always adds a '/' at the end if a '~' is encountered (/~smith -> /~smith/)
+#define HTS_TILDE_SLASH 0
+
+// always transform a '//' into a sigle '/'
+#define HTS_STRIP_DOUBLE_SLASH 0
+
+// case-sensitive pour les dossiers et fichiers (0/1)
+// [normalement 1, mais pose des problèmes (url malformée par exemple) et n'est pas très utile..
+// ..et pas bcp respecté]
+#define HTS_CASSE 0
+
+// Un fichier ayant une taille différente du content-length doit il être annulé?
+// SEE opt.tolerant and opt.http10
+// #define HTS_CL_IS_FATAL 0
+
+// une erreur supprime le fichier sur disque
+// (non fixé pour cause de retry)
+#define HTS_REMOVE_BAD_FILES 0
+
+// en cas de Range: xx- donnant un Content-length: xx
+// alors skipper le fichier, considéré comme transmis
+// #define HTS_SKIP_FULL_RANGE 1
+
+// nombre max de filtres que l'utilisateur peut fixer
+// #define HTS_FILTERSMAX 10000
+#define HTS_FILTERSINC 1000
+
+// connect non bloquant? (poll sur write)
+#define HTS_XCONN 1
+
+// gethostbyname non bloquant? (gestion multithread)
+#define HTS_XGETHOST 1
+
+// à partir de combien de secondes doit-on étudier le taux de transfert?
+#define HTS_WATCHRATE 15
+
+// ------------------------------------------------------------
+//
+
+#endif
diff --git a/src/htscore.c b/src/htscore.c
new file mode 100644
index 0000000..1b9db7a
--- /dev/null
+++ b/src/htscore.c
@@ -0,0 +1,4158 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main source */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+/* File defs */
+#include "htscore.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsbauth.h"
+#include "htsmd5.h"
+#include "htsindex.h"
+
+// htswrap_add
+#include "htswrap.h"
+/* END specific definitions */
+
+
+/* HTML parsing */
+#if HTS_ANALYSTE
+
+t_hts_htmlcheck_init hts_htmlcheck_init;
+t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+t_hts_htmlcheck_start hts_htmlcheck_start;
+t_hts_htmlcheck_end hts_htmlcheck_end;
+t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+t_hts_htmlcheck hts_htmlcheck;
+t_hts_htmlcheck_query hts_htmlcheck_query;
+t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+t_hts_htmlcheck_loop hts_htmlcheck_loop;
+t_hts_htmlcheck_check hts_htmlcheck_check;
+t_hts_htmlcheck_pause hts_htmlcheck_pause;
+t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
+t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
+t_hts_htmlcheck_savename hts_htmlcheck_savename;
+
+char _hts_errmsg[1100]="";
+int _hts_in_html_parsing=0;
+int _hts_in_html_done=0; // % done
+int _hts_in_html_poll=0; // parsing
+int _hts_setpause=0;
+//httrackp* _hts_setopt=NULL;
+char** _hts_addurl=NULL;
+
+//
+int _hts_cancel=0;
+#endif
+
+
+
+int exit_xh; /* quick exit (fatal error or interrupt) */
+
+/* debug */
+#if DEBUG_SHOWTYPES
+char REG[32768]="\n";
+#endif
+#if NSDEBUG
+int nsocDEBUG=0;
+#endif
+
+//
+#define _CLRSCR printf("\33[m\33[2J");
+#define _GOTOXY(X,Y) printf("\33[" X ";" Y "f");
+
+#if DEBUG_CHECKINT
+ #define _CHECKINT_FAIL(a) printf("\n%s\n",a); fflush(stdout); exit(1);
+ #define _CHECKINT(obj_ptr,message) \
+ if (obj_ptr) {\
+ if (( * ((char*) (obj_ptr)) != 0) || ( * ((char*) (((char*) (obj_ptr)) + sizeof(*(obj_ptr))-1)) != 0)) {\
+ char msg[1100];\
+ if (( * ((char*) (obj_ptr)) != 0) && ( * ((char*) (((char*) (obj_ptr)) + sizeof(*(obj_ptr))-1)) != 0))\
+ sprintf(msg,"* PANIC: Integrity error (structure crushed) in: %s",message);\
+ else if ( * ((char*) (obj_ptr)) != 0)\
+ sprintf(msg,"* PANIC: Integrity error (start of structure) in: %s",message);\
+ else\
+ sprintf(msg,"* PANIC: Integrity error (end of structure) in: %s",message);\
+ _CHECKINT_FAIL(msg);\
+ }\
+ } else {\
+ char msg[1100];\
+ sprintf(msg,"* PANIC: NULL pointer in: %s",message);\
+ _CHECKINT_FAIL(msg);\
+ }
+#endif
+
+#if DEBUG_HASH
+ // longest hash chain?
+ int longest_hash[3]={0,0,0},hashnumber=0;
+#endif
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+char HTbuff[2048];
+#endif
+
+
+
+// Début de httpmirror, routines annexes
+
+// version 1 pour httpmirror
+// flusher si on doit lire peu à peu le fichier
+#define test_flush if (opt.flush) { fflush(opt.log); fflush(opt.errlog); }
+
+// pour alléger la syntaxe, des raccourcis sont créés
+#define urladr (liens[ptr]->adr)
+#define urlfil (liens[ptr]->fil)
+#define savename (liens[ptr]->sav)
+//#define level (liens[ptr]->depth)
+
+// au cas où nous devons quitter rapidement xhttpmirror (plus de mémoire, etc)
+// note: partir de liens_max.. vers 0.. sinon erreur de violation de mémoire: les liens suivants
+// ne sont plus à nous.. agh! [dur celui-là]
+#if HTS_ANALYSTE
+#define HTMLCHECK_UNINIT { \
+if ( (opt.debug>0) && (opt.log!=NULL) ) { \
+fspc(opt.log,"info"); fprintf(opt.log,"engine: end"LF); \
+} \
+hts_htmlcheck_end(); \
+}
+#else
+ #define HTMLCHECK_UNINIT
+#endif
+
+#define XH_extuninit { \
+ int i; \
+ HTMLCHECK_UNINIT \
+ if (liens!=NULL) { \
+ for(i=lien_max-1;i>=0;i--) { \
+ if (liens[i]) { \
+ if (liens[i]->firstblock==1) { \
+ freet(liens[i]); \
+ liens[i]=NULL; \
+ } \
+ } \
+ } \
+ freet(liens); \
+ liens=NULL; \
+ } \
+ if (filters && filters[0]) { \
+ freet(filters[0]); filters[0]=NULL; \
+ } \
+ if (filters) { \
+ freet(filters); filters=NULL; \
+ } \
+ if (back) { \
+ int i; \
+ for(i=0;i<back_max;i++) { \
+ back_delete(back,i); \
+ } \
+ freet(back); back=NULL; \
+ } \
+ checkrobots_free(&robots);\
+ if (cache.use) { freet(cache.use); cache.use=NULL; } \
+ if (cache.dat) { fclose(cache.dat); cache.dat=NULL; } \
+ if (cache.ndx) { fclose(cache.ndx); cache.ndx=NULL; } \
+ if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \
+ if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \
+ if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \
+ if (opt.log) fflush(opt.log); \
+ if (opt.errlog) fflush(opt.errlog);\
+ if (makestat_fp) { fclose(makestat_fp); makestat_fp=NULL; } \
+ if (maketrack_fp){ fclose(maketrack_fp); maketrack_fp=NULL; } \
+ if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \
+ if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \
+ if (cache_hashtable) { inthash_delete(&cache_hashtable); } \
+ if (template_header) { freet(template_header); template_header=NULL; } \
+ if (template_body) { freet(template_body); template_body=NULL; } \
+ if (template_footer) { freet(template_footer); template_footer=NULL; } \
+ structcheck_init(-1); \
+}
+#define XH_uninit XH_extuninit if (r.adr) { freet(r.adr); r.adr=NULL; }
+
+// Enregistrement d'un lien:
+// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité)
+// puis on vérifie qu'on a assez de marge dans le buffer - sinon on en réalloue un autre
+// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite
+// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
+// FA,FS: former_adr et former_fil, lien original
+#define REALLOC_SIZE 8192
+#if HTS_HASH
+#define liens_record_sav_len(A)
+#else
+#define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
+#endif
+
+#define liens_record(A,F,S,FA,FF) { \
+int notecode=0; \
+int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
+ adr_len=strlen(A),\
+ fil_len=strlen(F),\
+ sav_len=strlen(S),\
+ cod_len=0,\
+ former_adr_len=strlen(FA),\
+ former_fil_len=strlen(FF); \
+if (former_adr_len>0) {\
+ former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+ former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+} else former_adr_len=former_fil_len=0;\
+if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
+cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
+adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
+lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
+lien_size=add_tab_alloc; \
+if (lien_buffer!=NULL) { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=1; \
+} \
+} else { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=0; \
+} \
+if (liens[lien_tot]!=NULL) { \
+liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
+liens[lien_tot]->cod=NULL; \
+if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpy(liens[lien_tot]->cod,codebase); } \
+if (former_adr_len>0) {\
+liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
+liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
+strcpy(liens[lien_tot]->former_adr,FA); \
+strcpy(liens[lien_tot]->former_fil,FF); \
+}\
+strcpy(liens[lien_tot]->adr,A); \
+strcpy(liens[lien_tot]->fil,F); \
+strcpy(liens[lien_tot]->sav,S); \
+liens_record_sav_len(liens[lien_tot]); \
+hash_write(&hash,lien_tot); \
+} \
+}
+
+/* - abandonné (simplifie) -
+// Ajouter à un lien EXISTANT deux champs former_adr et former_fil pour indiquer le nom d'un fichier avant un "move"
+// NOTE: si un alloc est fait ici il n'y aura pas de freet() à la fin, tant pis (firstbloc)
+#define liens_add_former(index,A,F) { \
+int adr_len=strlen(A),fil_len=strlen(F); \
+adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; \
+if ((int) lien_size < (int) (adr_len+fil_len)) { \
+lien_buffer=(char*) calloct(add_tab_alloc,1); \
+lien_size=add_tab_alloc; \
+} \
+if (lien_buffer!=NULL) { \
+if (liens[lien_tot]!=NULL) { \
+liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+strcpy(liens[lien_tot]->former_adr,A); \
+strcpy(liens[lien_tot]->former_fil,F); \
+} \
+} \
+}
+*/
+
+#if 0
+#define HT_ADD_ADR { \
+ fwrite(lastsaved,1,((int) (adr - lastsaved)),fp); \
+ lastsaved=adr; }
+#define HT_ADD(A) fwrite(A,1,(int) strlen(A),fp);
+#define HT_ADD_START
+#define HT_ADD_END if (fp) { fclose(fp); fp=NULL; }
+#define HT_ADD_FOP { \
+ fp=filecreate(savename); \
+ if (fp==NULL) { \
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to create %s for %s%s"LF,savename,urladr,urlfil); \
+ test_flush; \
+ } \
+ freet(r.adr); r.adr=NULL; \
+ error=1; \
+ } \
+ }
+#else
+// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
+#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
+ ht_size=(A)+ht_len+REALLOC_SIZE; \
+ ht_buff=(char*) realloct(ht_buff,ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ exit(1); \
+ } \
+ } \
+ ht_len+=A;
+/*
+(Optimized)
+#define HT_ADD_ADR { int i,j=ht_len; HT_ADD_CHK(((int) adr)- ((int) lastsaved)) \
+ for(i=0;i<((int) adr)- ((int) lastsaved);i++) \
+ ht_buff[j+i]=lastsaved[i]; \
+ ht_buff[j+((int) adr)- ((int) lastsaved)]='\0'; \
+ lastsaved=adr; }
+*/
+#define HT_ADD_ADR \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, lastsaved, i); \
+ ht_buff[j+i]='\0'; \
+ lastsaved=adr; \
+ }
+/*
+(Optimized)
+#define HT_ADD(A) { HT_ADD_CHK(strlen(A)) strcat(ht_buff,A); }
+*/
+#define HT_ADD(A) \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ int i=strlen(A),j=ht_len; \
+ if (i) { \
+ HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, A, i); \
+ ht_buff[j+i]='\0'; \
+ } }
+#define HT_ADD_START \
+ int ht_size=(int)(r.size*5)/4+REALLOC_SIZE; \
+ int ht_len=0; \
+ char* ht_buff=NULL; \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ ht_buff=(char*) malloct(ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ exit(1); \
+ } \
+ ht_buff[0]='\0'; \
+ }
+#define HT_ADD_END { \
+ int ok=0;\
+ if (ht_buff) { \
+ int file_len=(int) strlen(ht_buff);\
+ char digest[32+2];\
+ digest[0]='\0';\
+ domd5mem(ht_buff,file_len,digest,1);\
+ if (fsize(antislash(savename))==file_len) { \
+ int mlen;\
+ char* mbuff;\
+ cache_readdata(&cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
+ if (mlen) mbuff[mlen]='\0';\
+ if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
+ ok=1;\
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {\
+ fspc(opt.log,"debug"); fprintf(opt.log,"File not re-written (md5): %s"LF,savename);\
+ test_flush;\
+ }\
+ } else {\
+ ok=0;\
+ } \
+ }\
+ if (!ok) { \
+ fp=filecreate(savename); \
+ if (fp) { \
+ if (file_len>0) {\
+ if ((int)fwrite(ht_buff,1,file_len,fp) != file_len) { \
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to write HTML file %s"LF,savename);\
+ test_flush;\
+ }\
+ }\
+ }\
+ fclose(fp); fp=NULL; \
+ if (strnotempty(r.lastmodified)) \
+ set_filetime_rfc822(savename,r.lastmodified); \
+ usercommand(0,NULL,antislash(savename)); \
+ } else {\
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error");\
+ fprintf(opt.errlog,"Unable to save file %s"LF,savename);\
+ test_flush;\
+ }\
+ }\
+ } else {\
+ filenote(savename,NULL); \
+ }\
+ if (cache.ndx)\
+ cache_writedata(cache.ndx,cache.dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
+ } \
+ freet(ht_buff); ht_buff=NULL; \
+ }
+#define HT_ADD_FOP
+#endif
+
+// libérer filters[0] pour insérer un élément dans filters[0]
+#define HT_INSERT_FILTERS0 {\
+ int i;\
+ if (filptr>0) {\
+ for(i=filptr-1;i>=0;i--) {\
+ strcpy(filters[i+1],filters[i]);\
+ }\
+ }\
+ strcpy(filters[0],"");\
+ filptr++;\
+ filptr=minimum(filptr,filter_max);\
+}
+
+#define HT_INDEX_END do { \
+if (!makeindex_done) { \
+if (makeindex_fp) { \
+ char tempo[1024]; \
+ if (makeindex_links == 1) { \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ } else \
+ tempo[0]='\0'; \
+ fprintf(makeindex_fp,template_footer, \
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
+ tempo \
+ ); \
+ fflush(makeindex_fp); \
+ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
+ makeindex_fp=NULL; \
+ usercommand(0,NULL,fconcat(opt.path_html,"index.html")); \
+} \
+} \
+makeindex_done=1; /* ok c'est fait */ \
+} while(0)
+
+
+
+
+// Début de httpmirror, robot
+// url1 peut être multiple
+int httpmirror(char* url1,httrackp* ptropt) {
+ httrackp opt = *ptropt; // structure d'options
+ char* primary=NULL; // première page, contenant les liens à scanner
+ int lien_tot=0; // nombre de liens pour le moment
+ lien_url** liens=NULL; // les pointeurs sur les liens
+ hash_struct hash; // système de hachage, accélère la recherche dans les liens
+ t_cookie cookie; // gestion des cookies
+ int lien_max=0;
+ int lien_size=0; // octets restants dans buffer liens dispo
+ char* lien_buffer=NULL; // buffer liens actuel
+ int add_tab_alloc=256000; // +256K de liens à chaque fois
+ //char* tab_alloc=NULL;
+ int ptr; // pointeur actuel sur les liens
+ //
+ int numero_passe=0; // deux passes pour html puis images
+ int back_max=0; // fichiers qui peuvent être en local
+ lien_back* back=NULL; // backing en local
+ htsblk r; // retour de certaines fonctions
+ TStamp lastime=0; // pour affichage infos de tmp en tmp
+ // pour les stats, nombre de fichiers & octets écrits
+ LLint stat_fragment=0; // pour la fragmentation
+ //TStamp istat_timestart; // départ pour calcul instantanné
+ //
+ TStamp last_info_shell=0;
+ int info_shell=0;
+ // filtres
+ char** filters = NULL;
+ //int filter_max=0;
+ int filptr=0;
+ //
+ int makeindex_done=0; // lorsque l'index sera fait
+ FILE* makeindex_fp=NULL;
+ int makeindex_links=0;
+ char makeindex_firstlink[HTS_URLMAXSIZE*2];
+ // statistiques (mode #Z)
+ FILE* makestat_fp=NULL; // fichier de stats taux transfert
+ FILE* maketrack_fp=NULL; // idem pour le tracking
+ TStamp makestat_time=0; // attente (secondes)
+ LLint makestat_total=0; // repère du nombre d'octets transférés depuis denrière stat
+ int makestat_lnk=0; // idem, pour le nombre de liens
+ //
+ char codebase[HTS_URLMAXSIZE*2]; // base pour applet java
+ char base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers
+ //
+ cache_back cache;
+ robots_wizard robots; // gestion robots.txt
+ inthash cache_hashtable=NULL;
+ int cache_hash_size=0;
+ //
+ char *template_header=NULL,*template_body=NULL,*template_footer=NULL;
+ //
+ codebase[0]='\0'; base[0]='\0';
+ //
+ cookie.auth.next=NULL;
+ cookie.auth.auth[0]=cookie.auth.prefix[0]='\0';
+ //
+
+ // noter heure actuelle de départ en secondes
+ memset(&HTS_STAT, 0, sizeof(HTS_STAT));
+ HTS_STAT.stat_timestart=time_local();
+ //istat_timestart=stat_timestart;
+ HTS_STAT.istat_timestart[0]=HTS_STAT.istat_timestart[1]=mtime_local();
+ /* reset stats */
+ HTS_STAT.HTS_TOTAL_RECV=0;
+ HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0;
+ if (opt.aff_progress)
+ lastime=HTS_STAT.stat_timestart;
+ if (opt.shell) {
+ last_info_shell=HTS_STAT.stat_timestart;
+ }
+ if ((opt.makestat) || (opt.maketrack)){
+ makestat_time=HTS_STAT.stat_timestart;
+ }
+ // initialiser compteur erreurs
+ fspc(NULL,NULL);
+
+ // initialiser cookie
+ if (opt.accept_cookie) {
+ opt.cookie=&cookie;
+ cookie.max_len=30000; // max len
+ strcpy(cookie.data,"");
+ // Charger cookies.txt par défaut ou cookies.txt du miroir
+ if (fexist(fconcat(opt.path_log,"cookies.txt")))
+ cookie_load(opt.cookie,opt.path_log,"cookies.txt");
+ else if (fexist("cookies.txt"))
+ cookie_load(opt.cookie,"","cookies.txt");
+ } else
+ opt.cookie=NULL;
+
+ // initialiser exit_xh
+ exit_xh=0; // sortir prématurément (var globale)
+
+ // initialiser usercommand
+ usercommand(opt.sys_com_exec,opt.sys_com,"");
+
+ // initialiser structcheck
+ structcheck_init(1);
+
+ // initialiser tableau options accessible par d'autres fonctions (signal)
+ hts_declareoptbuffer(&opt);
+
+ // initialiser verif_backblue
+ verif_backblue(NULL);
+ verif_external(0,0);
+ verif_external(1,0);
+
+ // et templates html
+ template_header=readfile_or(fconcat(opt.path_bin,"templates/index-header.html"),HTS_INDEX_HEADER);
+ template_body=readfile_or(fconcat(opt.path_bin,"templates/index-body.html"),HTS_INDEX_BODY);
+ template_footer=readfile_or(fconcat(opt.path_bin,"templates/index-footer.html"),HTS_INDEX_FOOTER);
+
+ // initialiser mimedefs
+ get_userhttptype(1,opt.mimedefs,NULL);
+
+ // Initialiser indexation
+ if (opt.kindex)
+ index_init(opt.path_html);
+
+ // effacer bloc cache
+ memset(&cache, 0, sizeof(cache_back));
+ cache.type=opt.cache; // cache?
+ cache.errlog=opt.errlog; // err log?
+ cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper
+
+ // initialiser hash cache
+ if (!cache_hash_size)
+ cache_hash_size=HTS_HASH_SIZE;
+ cache_hashtable=inthash_new(cache_hash_size);
+ if (cache_hashtable==NULL) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ filters[0]=NULL; back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */
+
+ // initialiser cache DNS
+ _hts_lockdns(-999);
+
+ // robots.txt
+ strcpy(robots.adr,"!"); // dummy
+ robots.token[0]='\0';
+ robots.next=NULL; // suivant
+ opt.robotsptr = &robots;
+
+ // effacer filters
+ opt.maxfilter = maximum(opt.maxfilter, 128);
+ if (filters_init(&filters, opt.maxfilter, 0) == 0) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ opt.filters.filters=&filters;
+ //
+ opt.filters.filptr=&filptr;
+ //opt.filters.filter_max=&filter_max;
+
+ // tableau de pointeurs sur les liens
+ lien_max=maximum(opt.maxlink,32);
+ liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens
+ if (liens==NULL) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ //XH_uninit;
+ return 0;
+ } else {
+ int i;
+ for(i=0;i<lien_max;i++) {
+ liens[i]=NULL;
+ }
+ }
+ // initialiser ptr et lien_tot
+ ptr=0;
+ lien_tot=0;
+#if HTS_HASH
+ // initialiser hachage
+ {
+ int i;
+ for(i=0;i<HTS_HASH_SIZE;i++)
+ hash.hash[0][i]=hash.hash[1][i]=hash.hash[2][i] = -1; // pas d'entrées
+ hash.liens = liens;
+ hash.max_lien=0;
+ }
+#endif
+
+
+ // copier adresse(s) dans liste des adresses
+ {
+ char *a=url1;
+ int primary_len=8192;
+ if (strnotempty(opt.filelist)) {
+ primary_len+=max(0,fsize(opt.filelist)*2);
+ }
+ primary_len+=strlen(url1)*2;
+
+ // création de la première page, qui contient les liens de base à scanner
+ // c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
+ // on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
+ primary=(char*) malloct(primary_len);
+ if (primary) {
+ primary[0]='\0';
+ } else {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+
+ while(*a) {
+ int i;
+ int joker=0;
+
+ // vérifier qu'il n'y a pas de * dans l'url
+ if (*a=='+')
+ joker=1;
+ else if (*a=='-')
+ joker=1;
+ /* NON, certaines URL ont des * (!)
+ else {
+ int i=0;
+ while((a[i]!=0) && (a[i]!=' ')) if (a[i++]=='*') joker=1;
+ }
+ */
+
+ if (joker) { // joker ou filters
+ //char* p;
+ char tempo[HTS_URLMAXSIZE*2];
+ int type; int plus=0;
+
+ // noter joker (dans b)
+ if (*a=='+') { // champ +
+ type=1; plus=1; a++;
+ } else if (*a=='-') { // champ forbidden[]
+ type=0; a++;
+ } else { // champ + avec joker sans doute
+ type=1;
+ }
+
+ // recopier prochaine chaine (+ ou -)
+ i=0;
+ while((*a!=0) && (*a!=' ')) { tempo[i++]=*a; a++; }
+ tempo[i++]='\0';
+ while(*a==' ') { a++; }
+
+ // sauter les + sans rien après..
+ if (strnotempty(tempo)) {
+ if ((plus==0) && (type==1)) { // implicite: *www.edf.fr par exemple
+ if (tempo[strlen(tempo)-1]!='*') {
+ strcat(tempo,"*"); // ajouter un *
+ }
+ }
+ if (type)
+ strcpy(filters[filptr],"+");
+ else
+ strcpy(filters[filptr],"-");
+ /*
+ if (strfield(tempo,"http://"))
+ strcat(filters[filptr],tempo+7); // ignorer http://
+ else if (strfield(tempo,"ftp://"))
+ strcat(filters[filptr],tempo+6); // ignorer ftp://
+ else
+ */
+ strcat(filters[filptr],tempo);
+ filptr++;
+
+ /* sanity check */
+ if (filptr + 1 >= opt.maxfilter) {
+ opt.maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt.maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n",filptr,__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,LF"Too many filters, giving up..(>%d)"LF,filptr);
+ fprintf(opt.errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ test_flush;
+ }
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ //opt.filters.filters=filters;
+ }
+
+ }
+
+ } else { // adresse normale
+ char url[HTS_URLMAXSIZE*2];
+ // prochaine adresse
+ i=0;
+ while((*a!=0) && (*a!=' ')) { url[i++]=*a; a++; }
+ while(*a==' ') { a++; }
+ url[i++]='\0';
+
+ //strcat(primary,"<PRIMARY=\"");
+ if (strstr(url,":/")==NULL)
+ strcat(primary,"http://");
+ strcat(primary,url);
+ //strcat(primary,"\">");
+ strcat(primary,"\n");
+ }
+ } // while
+
+ /* load URL file list */
+ /* OPTIMIZED for fast load */
+ if (strnotempty(opt.filelist)) {
+ char* filelist_buff=NULL;
+ int filelist_sz=fsize(opt.filelist);
+ if (filelist_sz>0) {
+ FILE* fp=fopen(opt.filelist,"rb");
+ if (fp) {
+ filelist_buff=malloct(filelist_sz + 2);
+ if (filelist_buff) {
+ if ((int)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) {
+ freet(filelist_buff);
+ filelist_buff=NULL;
+ } else {
+ *(filelist_buff + filelist_sz) = '\0';
+ }
+ }
+ fclose(fp);
+ }
+ }
+
+ if (filelist_buff) {
+ int filelist_ptr=0;
+ int n=0;
+ char line[HTS_URLMAXSIZE*2];
+ char* primary_ptr = primary + strlen(primary);
+ while( filelist_ptr < filelist_sz ) {
+ int count=binput(filelist_buff+filelist_ptr,line,HTS_URLMAXSIZE);
+ filelist_ptr+=count;
+ if (count && line[0]) {
+ n++;
+ if (strstr(line,":/")==NULL) {
+ strcpy(primary_ptr, "http://");
+ primary_ptr += strlen(primary_ptr);
+ }
+ strcpy(primary_ptr, line);
+ primary_ptr += strlen(primary_ptr);
+ strcpy(primary_ptr, "\n");
+ primary_ptr += 1;
+ }
+ }
+ // fclose(fp);
+ if (opt.log!=NULL) {
+ fspc(opt.log,"info"); fprintf(opt.log,"%d links added from %s"LF,n,opt.filelist); test_flush;
+ }
+
+ // Free buffer
+ freet(filelist_buff);
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Could not include URL list: %s"LF,opt.filelist); test_flush;
+ }
+ }
+ }
+
+
+ // lien primaire
+ liens_record("primary","/primary","primary.html","","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ liens[lien_tot]->testmode=0; // pas mode test
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=opt.depth+1; // lien de priorité maximale
+ liens[lien_tot]->pass2=0; // 1ère passe
+ liens[lien_tot]->retry=opt.retry; // lien de priorité maximale
+ liens[lien_tot]->premier=lien_tot; // premier lien, objet-père=objet
+ liens[lien_tot]->precedent=lien_tot; // lien précédent
+ lien_tot++;
+
+ // Initialiser cache
+ cache_init(&cache,&opt);
+ }
+
+#if BDEBUG==3
+ {
+ int i;
+ for(i=0;i<lien_tot;i++) {
+ printf("%d>%s%s as %s\n",i,liens[i]->adr,liens[i]->fil,liens[i]->sav);
+ }
+ for(i=0;i<filptr;i++) {
+ printf("%d>filters=%s\n",i,filters[i]);
+ }
+ }
+#endif
+
+ // backing
+ //soc_max=opt.maxsoc;
+ if (opt.maxsoc>0) {
+#if BDEBUG==2
+ _CLRSCR;
+#endif
+ // Nombre de fichiers HTML pouvant être présents en mémoire de manière simultannée
+ // On prévoit large: les fichiers HTML ne prennent que peu de place en mémoire, et les
+ // fichiers non html sont sauvés en direct sur disque.
+ // --> 1024 entrées + 32 entrées par socket en supplément
+ back_max=opt.maxsoc*32+1024;
+ //back_max=opt.maxsoc*8+32;
+ back=(lien_back*) calloct((back_max+1),sizeof(lien_back));
+ if (back==NULL) {
+ if (opt.errlog)
+ fprintf(opt.errlog,"Not enough memory, can not allocate %d bytes"LF,(int)((opt.maxsoc+1)*sizeof(lien_back)));
+ return 0;
+ } else { // copier buffer-location & effacer
+ int i;
+ for(i=0;i<back_max;i++){
+ back[i].r.location=back[i].location_buffer;
+ back[i].status=-1;
+ back[i].r.soc=INVALID_SOCKET;
+ }
+ }
+ }
+
+
+ // flush
+ test_flush;
+
+ // statistiques
+ if (opt.makestat) {
+ makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb");
+ if (makestat_fp != NULL) {
+ fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF);
+ }
+ }
+
+ // tracking -- débuggage
+ if (opt.maketrack) {
+ maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb");
+ if (maketrack_fp != NULL) {
+ fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF);
+ }
+ }
+
+ // on n'a pas de liens!! (exemple: httrack www.* impossible sans départ..)
+ if (lien_tot<=0) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF);
+ }
+ }
+
+
+ // attendre une certaine heure..
+ if (opt.waittime>0) {
+ int rollover=0;
+ int ok=0;
+ {
+ TStamp tl=0;
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ tl+=A->tm_sec;
+ tl+=A->tm_min*60;
+ tl+=A->tm_hour*60*60;
+ if (tl>opt.waittime) // attendre minuit
+ rollover=1;
+ }
+
+ // attendre..
+ do {
+ TStamp tl=0;
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ tl+=A->tm_sec;
+ tl+=A->tm_min*60;
+ tl+=A->tm_hour*60*60;
+
+ if (rollover) {
+ if (tl<=opt.waittime)
+ rollover=0; // attendre heure
+ } else {
+ if (tl>opt.waittime)
+ ok=1; // ok!
+ }
+
+#if HTS_ANALYSTE
+ {
+ int r;
+ if (rollover)
+ r=hts_htmlcheck_loop(back,back_max,0,0,lien_tot,(int) (opt.waittime-tl+24*3600),NULL);
+ else
+ r=hts_htmlcheck_loop(back,back_max,0,0,lien_tot,(int) (opt.waittime-tl),NULL);
+ if (!r) {
+ exit_xh=1; // exit requested
+ ok=1;
+ } else
+ Sleep(100);
+ }
+#endif
+ } while(!ok);
+
+ // note: recopie de plus haut
+ // noter heure actuelle de départ en secondes
+ HTS_STAT.stat_timestart=time_local();
+ if (opt.aff_progress)
+ lastime=HTS_STAT.stat_timestart;
+ if (opt.shell) {
+ last_info_shell=HTS_STAT.stat_timestart;
+ }
+ if ((opt.makestat) || (opt.maketrack)){
+ makestat_time=HTS_STAT.stat_timestart;
+ }
+
+
+ }
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: start"LF);
+ }
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_start(&opt)) {
+ XH_extuninit;
+ return 1;
+ }
+#endif
+
+
+ // ------------------------------------------------------------
+
+ // ------------------------------------------------------------
+ // Boucle générale de parcours des liens
+ // ------------------------------------------------------------
+ do {
+ int error=0; // si error alors sauter
+ int store_errpage=0; // c'est une erreur mais on enregistre le html
+ char loc[HTS_URLMAXSIZE*2]; // adresse de relocation
+
+ // Ici on charge le fichier (html, gif..) en mémoire
+ // Les HTMLs sont traités (si leur priorité est suffisante)
+
+ // effacer r
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET;
+ r.location=loc; // en cas d'erreur 3xx (moved)
+ // recopier proxy
+ memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy));
+ // et user-agent
+ strcpy(r.req.user_agent,opt.user_agent);
+ r.req.user_agent_send=opt.user_agent_send;
+
+ if (!error) {
+
+ // Skip empty/invalid/done in background
+ if (liens[ptr]) {
+ while ( (liens[ptr]) && (
+ ( ((urladr != NULL)?(urladr):(" "))[0]=='!') ||
+ ( ((urlfil != NULL)?(urlfil):(" "))[0]=='\0') ||
+ ( (liens[ptr]->pass2 == -1) )
+ )
+ ) { // sauter si lien annulé (ou fil vide)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" ")));
+ test_flush;
+ }
+ ptr++;
+ }
+ }
+ if (liens[ptr]) { // on a qq chose à récupérer?
+
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Wait get: %s%s"LF,urladr,urlfil);
+ test_flush;
+#if DEBUG_ROBOTS
+ if (strcmp(urlfil,"/robots.txt") == 0) {
+ printf("robots.txt detected\n");
+ }
+#endif
+ }
+ // ------------------------------------------------------------
+ // DEBUT --RECUPERATION LIEN---
+ if (ptr==0) { // premier lien à parcourir: lien primaire construit avant
+ r.adr=primary; primary=NULL;
+ r.statuscode=200;
+ r.size=strlen(r.adr);
+ r.soc=INVALID_SOCKET;
+ strcpy(r.contenttype,"text/html");
+ /*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing)
+ // charger le fichier en mémoire tout bêtement
+ r=xhttpget(urladr,urlfil);
+ //
+ */
+ } else { // backing, multiples sockets
+ //
+ int b;
+ int n;
+
+#if BDEBUG==1
+ printf("\nBack test..\n");
+#endif
+
+ // pause/lock files
+ {
+ int do_pause=0;
+
+ // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
+ if (fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
+ // remove lockfile
+ remove(fconcat(opt.path_log,"hts-stop.lock"));
+ if (!fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
+ do_pause=1;
+ }
+ }
+
+ // after receving N bytes, pause
+ if (opt.fragment>0) {
+ if ((HTS_STAT.stat_bytes-stat_fragment) > opt.fragment) {
+ do_pause=1;
+ }
+ }
+
+ // pause?
+ if (do_pause) {
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: pause requested.."LF);
+ }
+ while (back_nsoc(back,back_max)>0) { // attendre fin des transferts
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ Sleep(200);
+#if HTS_ANALYSTE
+ {
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ b=0;
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+#endif
+ }
+ // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause
+ // l'utilisateur ferait un rm -r après avoir effectué un tar
+ structcheck_init(1);
+ {
+ FILE* fp = fopen(fconcat(opt.path_log,"hts-paused.lock"),"wb");
+ if (fp) {
+ fspc(fp,"info"); // dater
+ fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror..."LF""LF"",HTS_STAT.stat_bytes);
+ fclose(fp);
+ }
+ }
+ stat_fragment=HTS_STAT.stat_bytes;
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: pause: %s"LF,fconcat(opt.path_log,"hts-paused.lock"));
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_pause(fconcat(opt.path_log,"hts-paused.lock"));
+#else
+ while (fexist(fconcat(opt.path_log,"hts-paused.lock"))) {
+ //back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives)
+ Sleep(1000);
+ }
+#endif
+ }
+ //
+ }
+ // end of pause/lock files
+
+#if HTS_ANALYSTE
+ // changement dans les préférences
+/*
+ if (_hts_setopt) {
+ copy_htsopt(_hts_setopt,&opt); // copier au besoin
+ _hts_setopt=NULL; // effacer callback
+ }
+*/
+ if (_hts_addurl) {
+ char add_adr[HTS_URLMAXSIZE*2];
+ char add_fil[HTS_URLMAXSIZE*2];
+ while(*_hts_addurl) {
+ char add_url[HTS_URLMAXSIZE*2];
+ add_adr[0]=add_fil[0]=add_url[0]='\0';
+ if (!link_has_authority(*_hts_addurl))
+ strcpy(add_url,"http://"); // ajouter http://
+ strcat(add_url,*_hts_addurl);
+ if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
+ // ----Ajout----
+ // noter NOUVEAU lien
+ char add_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
+ if (hash_read(&hash,add_sav,"",0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO)
+ liens_record(add_adr,add_fil,add_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=0; // mode test?
+ liens[lien_tot]->link_import=0; // mode normal
+ liens[lien_tot]->depth=opt.depth;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=opt.retry;
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=lien_tot;
+ lien_tot++;
+ //
+ if ((opt.debug>0) && (opt.log!=NULL)) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
+ }
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
+ test_flush;
+ }
+ }
+
+ }
+ } else {
+ if (opt.errlog) {
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"Error during URL decoding for %s"LF,add_url);
+ test_flush;
+ }
+ }
+ // ----Fin Ajout----
+ _hts_addurl++; // suivante
+ }
+ _hts_addurl=NULL; // libérer _hts_addurl
+ }
+ // si une pause a été demandée
+ if (_hts_setpause) {
+ // index du lien actuel
+ int b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0) b=0; // forcer pour les stats
+ while(_hts_setpause) { // on fait la pause..
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ if (back_nsoc(back,back_max)==0)
+ Sleep(250); // tite pause
+ }
+ }
+#endif
+
+ // si le fichier n'est pas en backing, le mettre..
+ if (!back_exist(back,back_max,urladr,urlfil,savename)) {
+#if BDEBUG==1
+ printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
+#endif
+ if (back_add(back,back_max,&opt,&cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
+ printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
+#if BDEBUG==1
+ printf("error while crash adding\n");
+#endif
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+
+ }
+ }
+
+#if BDEBUG==1
+ printf("test number of socks\n");
+#endif
+
+ // ajouter autant de socket qu'on peut ajouter
+ n=opt.maxsoc-back_nsoc(back,back_max);
+#if BDEBUG==1
+ printf("%d sockets available for backing\n",n);
+#endif
+
+#if HTS_ANALYSTE
+ if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter
+#else
+ if (n>0) { // si sockets libre
+#endif
+ // remplir autant que l'on peut le cache (backing)
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+ }
+
+ // index du lien actuel
+/*
+ b=back_index(back,back_max,urladr,urlfil,savename);
+
+ if (b>=0)
+*/
+ {
+ // ------------------------------------------------------------
+ // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
+ do {
+
+ // index du lien actuel
+ b=back_index(back,back_max,urladr,urlfil,savename);
+#if BDEBUG==1
+ printf("back index %d, waiting\n",b);
+#endif
+ // Continue to the loop if link still present
+ if (b<0)
+ continue;
+
+ // Receive data
+ if (back[b].status>0)
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // And fill the backing stack
+ if (back[b].status>0)
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
+ if ((opt.makestat) || (opt.maketrack)) {
+ TStamp l=time_local();
+ if ((int) (l-makestat_time) >= 60) {
+ if (makestat_fp != NULL) {
+ fspc(makestat_fp,"info");
+ fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-makestat_total)/(l-makestat_time)), HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-makestat_lnk,(int) lien_tot);
+ fflush(makestat_fp);
+ makestat_total=HTS_STAT.HTS_TOTAL_RECV;
+ makestat_lnk=lien_tot;
+ }
+ if (maketrack_fp!=NULL) {
+ int i;
+ fspc(maketrack_fp,"info"); fprintf(maketrack_fp,LF);
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,3,maketrack_fp);
+ }
+ fprintf(maketrack_fp,LF);
+
+ }
+ makestat_time=l;
+ }
+ }
+#if HTS_ANALYSTE
+ {
+ int i;
+ {
+ char* s=hts_cancel_file("");
+ if (strnotempty(s)) { // fichier à canceller
+ for(i=0;i<back_max;i++) {
+ if ((back[i].status>0)) {
+ if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
+ if (back[i].status != 1000) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("user cancel: deletehttp\n");
+#endif
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-1;
+ strcpy(back[i].r.msg,"Cancelled by User");
+ back[i].status=0; // terminé
+ } else // cancel ftp.. flag à 1
+ back[i].stop_ftp = 1;
+ }
+ }
+ }
+ s[0]='\0';
+ }
+ }
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+
+#endif
+#if HTS_POLL
+ if ((opt.shell) || (opt.keyboard) || (opt.verbosedisplay) || (!opt.quiet)) {
+ TStamp tl;
+ info_shell=1;
+
+ /* Toggle with ENTER */
+ if (!opt.quiet) {
+ if (check_stdin()) {
+ char com[256];
+ linput(stdin,com,200);
+ if (opt.verbosedisplay==2)
+ opt.verbosedisplay=1;
+ else
+ opt.verbosedisplay=2;
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: change-options"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_chopt(&opt);
+#endif
+ }
+ }
+
+ /*
+ ..useless..
+ while (check_stdin()) { // données disponibles
+ char com[256];
+ com[0]='\0';
+
+ if (!rcvd) rcvd=1;
+ linput(stdin,com,256);
+
+ if (strnotempty(com)) {
+ if (strlen(com)<=2) {
+ switch(*com) {
+ case '?': { // Status?
+ if (back[b].status>0) printf("WAIT\n");
+ else printf("READY\n");
+ }
+ break;
+ case 'f': { // Fichier en attente?
+ if (back[b].status>0) printf("WAIT %s\n",back[b].url_fil);
+ else printf("READY %s\n",back[b].url_fil);
+ }
+ break;
+ case 'A': case 'F': { // filters
+ int i;
+ for(i=0;i<filptr;i++) {
+ printf("%s ",filters[i]);
+ }
+ printf("\n");
+ }
+ break;
+ case '#': { // Afficher statistique sur le nombre de liens, etc
+ switch(*(com+1)) {
+ case 'l': printf("%d\n",lien_tot); break; // nombre de liens enregistrés
+ case 's': printf("%d\n",back_nsoc(back,back_max)); break; // nombre de sockets
+ case 'r': printf("%d\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); break; // taux de transfert
+ }
+ }
+ break;
+ case 'K': if (*(com+1)=='!') { // Kill
+ XH_uninit;
+ return -1;
+ }
+ break;
+ case 'X': if (*(com+1)=='!') { // exit
+ exit_xh=1;
+ }
+ break;
+ case 'I': if (*(com+1)=='+') info_shell=1; else info_shell=0;
+ break;
+ }
+ io_flush;
+ } else if (*com=='@') {
+ printf("%s\n",com+1);
+ io_flush;
+ }
+ }
+
+ } // while
+ */
+ tl=time_local();
+
+ // générer un message d'infos sur l'état actuel
+ if (opt.shell) { // si shell
+ if ((tl-last_info_shell)>0) { // toute les 1 sec
+ FILE* fp=stdout;
+ int a=0;
+ last_info_shell=tl;
+ if (fexist(fconcat(opt.path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
+ // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
+ // (libérons les robots esclaves de l'internet!)
+ remove(fconcat(opt.path_log,"hts-autopsy"));
+ fp=fopen(fconcat(opt.path_log,"hts-isalive"),"wb");
+ a=1;
+ }
+ if ((info_shell) || a) {
+ int i,j;
+
+ fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
+ fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
+ fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
+ fprintf(fp,"LINK %d"LF,lien_tot);
+ {
+ LLint mem=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].r.adr!=NULL)
+ mem+=back[i].r.size;
+ fprintf(fp,"INMEM "LLintP""LF,mem);
+ }
+ for(j=0;j<2;j++) { // passes pour ready et wait
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
+ }
+ }
+ fprintf(fp,LF);
+ if (a)
+ fclose(fp);
+ io_flush;
+ }
+ }
+ } // si shell
+
+ } // si shell ou keyboard (option)
+ //
+#endif
+ } while((b>=0) && (back[max(b,0)].status>0));
+
+
+ // If link not found on the stack, it's because it has already been downloaded
+ // in background
+ // Then, skip it and go to the next one
+ if (b<0) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
+ test_flush;
+ }
+
+ // prochain lien
+ // ptr++;
+
+ // Jump to 'continue'
+ // This is one of the very very rare cases where goto
+ // is acceptable
+ // A supplemental flag and if( ) { } would be really messy
+ goto jump_if_done;
+ }
+
+
+#if HTS_ANALYSTE==2
+#else
+ //if (!opt.quiet) { // petite animation
+ if (!opt.verbosedisplay) {
+ if (!opt.quiet) {
+ static int roll=0; /* static: ok */
+ roll=(roll+1)%4;
+ printf("%c\x0d",("/-\\|")[roll]);
+ fflush(stdout);
+ }
+ } else if (opt.verbosedisplay==1) {
+ if (back[b].r.statuscode==200)
+ printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size);
+ else
+ printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size,back[b].r.statuscode);
+ fflush(stdout);
+ }
+ //}
+#endif
+ // ------------------------------------------------------------
+ // Vérificateur d'intégrité
+#if DEBUG_CHECKINT
+ _CHECKINT(&back[b],"Retour de back_wait, après le while")
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
+ }
+ }
+#endif
+
+ // copier structure réponse htsblk
+ memcpy(&r, &(back[b].r), sizeof(htsblk));
+ r.location=loc; // ne PAS copier location!! adresse, pas de buffer
+ if (back[b].r.location)
+ strcpy(r.location,back[b].r.location);
+ back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
+
+ // libérer emplacement backing
+ back_delete(back,b);
+
+ // progression
+ if (opt.aff_progress) {
+ TStamp tl=time_local();
+ if ((tl-HTS_STAT.stat_timestart)>0) {
+ char s[32];
+ int i=0;
+ lastime=tl;
+ _CLRSCR; _GOTOXY("1","1");
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,99)) { // **
+ if (back[i].status>=0) { // loading..
+ s[0]='\0';
+ if (strlen(back[i].url_fil)>16)
+ strcat(s,back[i].url_fil+strlen(back[i].url_fil)-16);
+ else
+ strncat(s,back[i].url_fil,16);
+ printf("%s : ",s);
+
+ printf("[");
+ if (back[i].r.totalsize>0) {
+ int p;
+ int j;
+ p=(int)((back[i].r.size*10)/back[i].r.totalsize);
+ p=minimum(10,p);
+ for(j=0;j<p;j++) printf("*");
+ for(j=0;j<(10-p);j++) printf("-");
+ } else {
+ printf(LLintP,back[i].r.size);
+ }
+ printf("]");
+
+ //} else if (back[i].status==0) {
+ // strcpy(s,"ENDED");
+ }
+ printf("\n");
+ i++;
+ }
+ io_flush;
+ }
+ }
+
+ // débug graphique
+#if BDEBUG==2
+ {
+ char s[12];
+ int i=0;
+ _GOTOXY(1,1);
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,160)) {
+ if (back[i].status>0) {
+ sprintf(s,"%d",back[i].r.size);
+ } else if (back[i].status==0) {
+ strcpy(s,"ENDED");
+ } else
+ strcpy(s," - ");
+ while(strlen(s)<8) strcat(s," ");
+ printf("%s",s); io_flush;
+ i++;
+ }
+ }
+#endif
+
+
+#if BDEBUG==1
+ printf("statuscode=%d with %s / msg=%s\n",r.statuscode,r.contenttype,r.msg);
+#endif
+
+ }
+ /*else {
+#if BDEBUG==1
+ printf("back index error\n");
+#endif
+ }
+ */
+
+ }
+ // FIN --RECUPERATION LIEN---
+ // ------------------------------------------------------------
+
+
+
+ } else { // lien vide..
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush;
+ error=1;
+ }
+ } // test si url existe (non vide!)
+
+
+
+ // ---tester taille a posteriori---
+ // tester r.adr
+ if (!error) {
+ // erreur, pas de fichier chargé:
+ if ((!r.adr) && (r.is_write==0)
+ && (r.statuscode!=301)
+ && (r.statuscode!=302)
+ && (r.statuscode!=303)
+ && (r.statuscode!=307)
+ && (r.statuscode!=412)
+ && (r.statuscode!=416)
+ ) {
+ // error=1;
+
+ // peut être que le fichier était trop gros?
+ if ((istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))
+ || (istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))) {
+ error=0;
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ // // // error=1; // ne pas traiter la suite -- euhh si finalement..
+ }
+ }
+ // ---fin tester taille a posteriori---
+
+
+ // --------------------
+ // BOGUS MIME TYPE HACK
+ // Check if we have a bogus MIME type
+ // example:
+ // Content-type="text/html"
+ // and
+ // Content-disposition="foo.jpg"
+ // --------------------
+ if (!error) {
+ if (r.statuscode == 200) { // OK (ou 304 en backing)
+ if (r.adr) { // Written file
+ if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ ) {
+ if (strnotempty(r.cdispo)) { // Content-disposition set!
+ if (ishtml(savename) == 0) { // Non HTML!!
+ // patch it!
+ strcpy(r.contenttype,"application/octet-stream");
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // ------------------------------------
+ // BOGUS MIME TYPE HACK II (the revenge)
+ // Check if we have a bogus MIME type
+ if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */
+ ) {
+ if ((r.adr) && (r.size)) {
+ unsigned int map[256];
+ int i;
+ unsigned int nspec = 0;
+ map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map);
+ for(i = 1 ; i < 32 ; i++) { // null chars ignored..
+ if (!is_realspace(i)
+ && i != 27 /* Damn you ISO2022-xx! */
+ ) {
+ nspec += map[i];
+ }
+ }
+ if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters
+ strcpy(r.contenttype,"application/octet-stream");
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // --------------------
+ // REAL MEDIA HACK
+ // Check if we have to load locally the file
+ // --------------------
+ if (!error) {
+ if (r.statuscode == 200) { // OK (ou 304 en backing)
+ if (r.adr==NULL) { // Written file
+ if (may_be_hypertext_mime(r.contenttype)) { // to parse!
+ LLint sz;
+ sz=fsize(savename);
+ if (sz>0) { // ok, exists!
+ if (sz < 1024) { // ok, small file --> to parse!
+ FILE* fp=fopen(savename,"rb");
+ if (fp) {
+ r.adr=malloct((int)sz + 2);
+ if (r.adr) {
+ fread(r.adr,(int)sz,1,fp);
+ r.size=sz;
+ fclose(fp);
+ fp=NULL;
+ // remove (temporary) file!
+ remove(savename);
+ }
+ if (fp)
+ fclose(fp);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // EN OF REAL MEDIA HACK
+
+
+ // ---stockage en cache---
+ // stocker dans le cache?
+ /*
+ if (!error) {
+ if (ptr>0) {
+ if (liens[ptr]) {
+ cache_mayadd(&opt,&cache,&r,urladr,urlfil,savename);
+ } else
+ error=1;
+ }
+ }
+ */
+ // ---fin stockage en cache---
+
+
+
+ // DEBUT rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+ if (!error) {
+ ////////{
+ // on a chargé un fichier en plus
+ // if (!error) stat_loaded+=r.size;
+
+ // ------------------------------------------------------------
+ // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing
+ // ------------------------------------------------------------
+ if ( (r.statuscode==301)
+ || (r.statuscode==302)
+ || (r.statuscode==303)
+ || (r.statuscode==307)
+ ) {
+ //if (r.adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
+ //int i=0;
+ char *rn=NULL;
+ // char* p;
+
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ //if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"%s for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+
+
+ {
+ char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
+ int reponse=0;
+ mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
+ //
+
+ strcpy(mov_url,r.location);
+
+ // url qque -> adresse+fichier
+ if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
+ int set_prio_to=0; // pas de priotité fixéd par wizard
+
+ //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
+ // c'est (en gros) la même URL..
+ // si c'est un problème de casse dans le host c'est que le serveur est buggé
+ // ("RFC says.." : host name IS case insensitive)
+ if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ // on tourne en rond
+ if (strcmp(mov_fil,urlfil)==0) {
+ error=1;
+ get_it=-1; // ne rien faire
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Can not bear crazy server (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
+ get_it=1;
+ }
+ } else { // adresse différente
+ if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
+ // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ // accepté?
+ if (hts_acceptlink(&opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ NULL) != 1) { /* nouvelle adresse non refusée ? */
+ get_it=1;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ } /* sinon traité normalement */
+ }
+
+ //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ if (get_it==1) {
+ // court-circuiter le reste du traitement
+ // et reculer pour mieux sauter
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
+ test_flush;
+ }
+ // canceller lien actuel
+ error=1;
+ strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ // noter NOUVEAU lien
+ {
+ char mov_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
+ if (hash_read(&hash,mov_sav,"",0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO) avec SAV IDENTIQUE
+ liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
+ //liens_record(mov_adr,mov_fil,mov_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ // mode test?
+ liens[lien_tot]->testmode=liens[ptr]->testmode;
+ liens[lien_tot]->link_import=0; // mode normal
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ else
+ liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
+ test_flush;
+ }
+ }
+
+ }
+ }
+
+ //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
+
+ // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
+ // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
+ // est à même de régler ce problème
+ }
+ } // ident_url_xx
+
+ if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
+ rn=(char*) calloct(8192,1);
+ if (rn!=NULL) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
+ test_flush;
+ }
+ escape_uri(mov_url);
+ // On prépare une page qui sautera immédiatement sur la bonne URL
+ // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
+ strcpy(rn,"<HTML>"CRLF);
+ strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcat(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
+ strcat(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
+ strcat(rn,mov_url); // URL
+ strcat(rn,"\">"CRLF);
+ strcat(rn,"<A HREF=\"");
+ strcat(rn,mov_url);
+ strcat(rn,"\">");
+ strcat(rn,"<B>Click here...</B></A>"CRLF);
+ strcat(rn,"</BODY>"CRLF);
+ strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcat(rn,"</HTML>"CRLF);
+
+ // changer la page
+ if (r.adr) { freet(r.adr); r.adr=NULL; }
+ r.adr=rn;
+ r.size=strlen(r.adr);
+ strcpy(r.contenttype,"text/html");
+ }
+ } // get_it==0
+
+ } // bloc
+ // erreur HTTP (ex: 404, not found)
+ } else if (
+ (r.statuscode==412)
+ || (r.statuscode==416)
+ ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
+ if (fexist(liens[ptr]->sav)) {
+ remove(liens[ptr]->sav); // Eliminer
+ if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
+#if HDEBUG
+ printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
+#endif
+ if ( (opt.debug>1) && (opt.errlog!=NULL) ) {
+ //if (opt.errlog) {
+ fspc(opt.errlog,"debug"); fprintf(opt.errlog,"Partial file reget (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ // enregistrer le MEME lien (MACRO)
+ liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=ptr;
+ lien_tot++;
+ //
+ // canceller lien actuel
+ error=1;
+ strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Can not remove old file %s"LF,urlfil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ }
+ } else if (r.statuscode!=200) {
+ int can_retry=0;
+
+ // cas où l'on peut reessayer
+ // -2=timeout -3=rateout (interne à httrack)
+ switch(r.statuscode) {
+ //case -1: can_retry=1; break;
+ case -2: if (opt.hostcontrol) { // timeout et retry épuisés
+ if ((opt.hostcontrol & 1) && (liens[ptr]->retry<=0)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -3: if ((opt.hostcontrol) && (liens[ptr]->retry<=0)) { // too slow
+ if (opt.hostcontrol & 2) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -4: // connect closed
+ can_retry=1;
+ break;
+ case -5: // other (non fatal) error
+ can_retry=1;
+ break;
+ case -6: // bad SSL handskake
+ can_retry=1;
+ break;
+ case 408: case 409: case 500: case 502: case 504: can_retry=1;
+ break;
+ }
+
+ if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0)
+ if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible)
+ if (opt.errlog) {
+ if ((opt.retry>0) && (can_retry)){
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r.msg,r.statuscode,opt.retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (r.statuscode==-10) { // test OK
+ if ((opt.debug>0) && (opt.errlog!=NULL)) {
+ fspc(opt.errlog,"info");
+ fprintf(opt.errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r.msg,r.statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (opt.debug>1) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"No robots.txt rules at %s"LF,urladr);
+ test_flush;
+ }
+ }
+ }
+ }
+ test_flush;
+ }
+
+ // NO error in trop level
+ // due to the "no connection -> previous restored" hack
+ // This prevent the engine from wiping all data if the website has been deleted (or moved)
+ // since last time (which is quite annoying)
+ if (liens[ptr]->precedent != 0) {
+ // ici on teste si on doit enregistrer la page tout de même
+ if (opt.errpage) {
+ store_errpage=1;
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt") != 0) {
+ /*
+ This is an error caused by a link entered by the user
+ That is, link(s) entered by user are invalid (404, 500, connect error, proxy error..)
+ If all links entered are invalid, the session failed and we will attempt to restore
+ the previous one
+ Example: Try to update a website which has been deleted remotely: this may delete
+ the website locally, which is really not desired (especially if the website disappeared!)
+ With this hack, the engine won't wipe local files (how clever)
+ */
+ HTS_STAT.stat_errors_front++;
+ }
+ }
+
+ } else { // retry!!
+ if (opt.debug>0 && opt.errlog != NULL) { // on fera un alert si le retry échoue
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r.statuscode,r.msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ test_flush;
+ }
+ // redemander fichier
+ liens_record(urladr,urlfil,savename,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry!
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fspc(opt.errlog,"panic");
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ }
+ } else {
+ if (opt.errlog) {
+ if (opt.debug>1) {
+ fspc(opt.errlog,"info");
+ fprintf(opt.errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
+ }
+ }
+ }
+ if (!store_errpage) {
+ if (r.adr) { freet(r.adr); r.adr=NULL; } // désalloc
+ error=1; // erreur!
+ }
+ }
+ // FIN rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+
+
+
+ } // if !error
+ } // if !error
+
+ if (!error) {
+#if DEBUG_SHOWTYPES
+ if (strstr(REG,r.contenttype)==NULL) {
+ strcat(REG,r.contenttype);
+ strcat(REG,"\n");
+ printf("%s\n",r.contenttype);
+ io_flush;
+ }
+#endif
+
+
+ // ------------------------------------------------------
+ // ok, fichier chargé localement
+ // ------------------------------------------------------
+
+ // Vérificateur d'intégrité
+ #if DEBUG_CHECKINT
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
+ }
+ }
+ #endif
+
+
+ /* info: updated */
+ /*
+ if (ptr>0) {
+ // "mis à jour"
+ if ((!r.notmodified) && (opt.is_update) && (!store_errpage)) { // page modifiée
+ if (strnotempty(savename)) {
+ HTS_STAT.stat_updated_files++;
+ if (opt.log!=NULL) {
+ //if ((opt.debug>0) && (opt.log!=NULL)) {
+ fspc(opt.log,"info"); fprintf(opt.log,"File updated: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ } else {
+ if (!store_errpage) {
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"File recorded: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+ */
+
+ // ------------------------------------------------------
+ // traitement (parsing)
+ // ------------------------------------------------------
+
+ // traiter
+ if (
+ ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ )
+ && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */
+ && (r.adr!=NULL) /* HTML Data exists */
+ && (r.size>0) /* And not empty */
+ && (!store_errpage) /* Not an html error page */
+ && (savename[0]!='\0') /* Output filename exists */
+ ) { // ne traiter que le html si autorisé
+ // -- -- -- --
+ // Parsing HTML
+ if (!error) {
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil);
+ }
+ {
+ // I'll have to segment this part
+#include "htsparse.c"
+ }
+ }
+ // Fin parsing HTML
+ // -- -- -- --
+
+
+ } // si text/html
+ // -- -- --
+ else { // sauver fichier quelconque
+ // -- -- --
+ // sauver fichier
+
+
+ /* En cas d'erreur, vérifier que fichier d'erreur existe */
+ if (strnotempty(savename) == 0) { // chemin de sauvegarde existant
+ if (strcmp(urlfil,"/robots.txt")==0) { // pas robots.txt
+ if (store_errpage) { // c'est une page d'erreur
+ int create_html_warning=0;
+ int create_gif_warning=0;
+ switch (ishtml(urlfil)) { /* pas fichier html */
+ case 0: /* non html */
+ {
+ char buff[256];
+ guess_httptype(buff,urlfil);
+ if (strcmp(buff,"image/gif")==0)
+ create_gif_warning=1;
+ }
+ break;
+ case 1: /* html */
+ if (!r.adr) {
+ }
+ break;
+ default: /* don't know.. */
+ break;
+ }
+ /* Créer message d'erreur ? */
+ if (create_html_warning) {
+ char* adr=(char*)malloct(strlen(HTS_DATA_ERROR_HTML)+1100);
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Creating HTML warning file (%s)"LF,r.msg);
+ test_flush;
+ }
+ if (adr) {
+ if (r.adr) {
+ freet(r.adr);
+ r.adr=NULL;
+ }
+ sprintf(adr,HTS_DATA_ERROR_HTML,r.msg);
+ r.adr=adr;
+ }
+ } else if (create_gif_warning) {
+ char* adr=(char*)malloct(HTS_DATA_UNKNOWN_GIF_LEN);
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Creating GIF dummy file (%s)"LF,r.msg);
+ test_flush;
+ }
+ if (r.adr) {
+ freet(r.adr);
+ r.adr=NULL;
+ }
+ memcpy(adr, HTS_DATA_UNKNOWN_GIF, HTS_DATA_UNKNOWN_GIF_LEN);
+ r.adr=adr;
+ }
+ }
+ }
+ }
+
+ if (strnotempty(savename) == 0) { // pas de chemin de sauvegarde
+ if (strcmp(urlfil,"/robots.txt")==0) { // robots.txt
+ if (r.adr) {
+ int bptr=0;
+ char line[1024];
+ char buff[8192];
+ char infobuff[8192];
+ int record=0;
+ line[0]='\0'; buff[0]='\0'; infobuff[0]='\0';
+ //
+#if DEBUG_ROBOTS
+ printf("robots.txt dump:\n%s\n",r.adr);
+#endif
+ do {
+ bptr+=binput(r.adr+bptr, line, sizeof(line) - 2);
+ if (strfield(line,"user-agent:")) {
+ char* a;
+ a=line+11;
+ while(*a==' ') a++; // sauter espace(s)
+ if (*a == '*') {
+ if (record != 2)
+ record=1; // c pour nous
+ } else if (strfield(a,"httrack")) {
+ buff[0]='\0'; // re-enregistrer
+ infobuff[0]='\0';
+ record=2; // locked
+#if DEBUG_ROBOTS
+ printf("explicit disallow for httrack\n");
+#endif
+ }
+ else record=0;
+ } else if (record) {
+ if (strfield(line,"disallow:")) {
+ char* a;
+ a=strchr(line,'#');
+ if (a) *a='\0';
+ while((line[strlen(line)-1]==' ')
+ || (line[strlen(line)-1]==10)
+ || (line[strlen(line)-1]==13))
+ line[strlen(line)-1]='\0'; // supprimer espaces
+ a=line+9;
+ while((*a==' ') || (*a==10) || (*a==13))
+ a++; // sauter espace(s)
+ if (strnotempty(a)) {
+ if (strcmp(a,"/") != 0) { /* ignoring disallow: / */
+ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) {
+ strcat(buff,a);
+ strcat(buff,"\n");
+ if (strnotempty(infobuff)) strcat(infobuff,", ");
+ strcat(infobuff,a);
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ } while( (bptr<r.size) && (strlen(buff) < (sizeof(buff) - 32) ) );
+ if (strnotempty(buff)) {
+ checkrobots_set(&robots,urladr,buff);
+ if (opt.log!=NULL) {
+ if (opt.log != opt.errlog) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Note: robots.txt forbidden links for %s are: %s"LF,urladr,infobuff);
+ test_flush;
+ }
+ }
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff);
+ test_flush;
+ }
+ }
+ }
+ }
+ } else if (r.is_write) { // déja sauvé sur disque
+ /*
+ if (!ishttperror(r.statuscode))
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ //printf("ok......\n");
+ } else {
+ // Si on doit sauver une page HTML sans la scanner, cela signifie que le niveau de
+ // récursion nous en empêche
+ // Dans ce cas on met un fichier indiquant ce fait
+ // Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus
+ // fort, on supprimera le readme, et on scannera le fichier html!
+ // note: sauté si store_errpage (càd si page d'erreur, non à scanner!)
+ if ( (is_hypertext_mime(r.contenttype)) && (!store_errpage) && (r.size>0)) { // c'est du html!!
+ char tempo[HTS_URLMAXSIZE*2];
+ FILE* fp;
+ tempo[0]='\0';
+ strcpy(tempo,savename);
+ strcat(tempo,".readme");
+
+#if HTS_DOSNAME
+ // remplacer / par des slash arrière
+ {
+ int i=0;
+ while(tempo[i]) {
+ if (tempo[i]=='/')
+ tempo[i]='\\';
+ i++;
+ }
+ }
+ // a partir d'ici le slash devient antislash
+#endif
+
+ if ((fp=fopen(tempo,"wb"))!=NULL) {
+ fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION""CRLF""CRLF);
+ fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename);
+ fprintf(fp,"Some links contained in it may be unreachable locally."CRLF);
+ fprintf(fp,"If you want to get these files, you have to set an upper recurse level, ");
+ fprintf(fp,"and to rescan the URL."CRLF);
+ fclose(fp);
+#if HTS_WIN==0
+ chmod(tempo,HTS_ACCESS_FILE);
+#endif
+ usercommand(0,NULL,antislash(tempo));
+ }
+
+
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning: store %s without scan: %s"LF,r.contenttype,savename);
+ test_flush;
+ }
+ } else {
+ if ((opt.getmode & 2)!=0) { // ok autorisé
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Store %s: %s"LF,r.contenttype,savename);
+ test_flush;
+ }
+ } else { // lien non autorisé! (ex: cgi-bin en html)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil);
+ test_flush;
+ }
+ freet(r.adr); r.adr=NULL;
+ }
+ }
+
+ //printf("extern=%s\n",r.contenttype);
+
+ // ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!!
+ if (r.adr) {
+ if (filesave(r.adr,(int)r.size,savename)!=0) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s"LF,savename);
+ test_flush;
+ }
+ } else {
+ /*
+ if (!ishttperror(r.statuscode))
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ }
+ }
+
+ }
+
+
+ /* Parsing of other media types (java, ram..) */
+ /*
+ if (strfield2(r.contenttype,"audio/x-pn-realaudio")) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): parsing %s"LF,savename); test_flush;
+ }
+ if (fexist(savename)) { // ok, existe bien!
+ FILE* fp=fopen(savename,"r+b");
+ if (fp) {
+ if (!fseek(fp,0,SEEK_SET)) {
+ char line[HTS_URLMAXSIZE*2];
+ linput(fp,line,HTS_URLMAXSIZE);
+ if (strnotempty(line)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): detected %s"LF,line); test_flush;
+ }
+ }
+ }
+ fclose(fp);
+ }
+ }
+ } else */
+ if (opt.parsejava) {
+ if (strlen(savename)>6) { // fichier.class
+ if (strfield(savename+strlen(savename)-6,".class")) { // ok c'est une classe
+ if (fexist(savename)) { // ok, existe bien!
+ char err_msg[1100];
+ int r;
+ err_msg[0]='\0';
+
+ //##char* buffer;
+ // JavaParsing f34R!
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing %s"LF,savename); test_flush;
+ }
+
+ //##buffer=(char*) malloct(32768);
+ //##if (buffer) {
+ //
+ //##strcpy(buffer,"$BUFFER$");
+ //##hts_add_file(buffer); // déclarer buffer
+ while(hts_add_file(NULL,-1) >= 0); // clear chain
+
+ r=hts_parse_java(savename,(char*) &err_msg); // parsing
+ if (!r) { // error
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to parse java file %s : %s"LF,savename,err_msg);
+ test_flush;
+ }
+ } else { // ok
+ char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2],save[HTS_URLMAXSIZE*2]; // nom du fichier à sauver dans la boucle
+ char codebase[HTS_URLMAXSIZE*2]; // codebase classe java
+ char lien[HTS_URLMAXSIZE*2];
+ //##char* a;
+ int file_position;
+ int pass_fix,prio_fix;
+ codebase[0]='\0';
+ //
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing finished, now copying links.."LF); test_flush;
+ }
+ // recopie de "creer le lien"
+ //
+
+ // adr = c'est la même
+ // fil et save: save2 et fil2
+ prio_fix=maximum(liens[ptr]->depth-1,0);
+ pass_fix=max(liens[ptr]->pass2,numero_passe);
+ if (liens[ptr]->cod) strcpy(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes
+ if (strnotempty(codebase)==0) { // pas de codebase, construire
+ char* a;
+ strcpy(codebase,liens[ptr]->fil);
+ a=codebase+strlen(codebase)-1;
+ while((*a) && (*a!='/') && ( a > codebase)) a--;
+ if (*a=='/')
+ *(a+1)='\0'; // couper
+ } else { // couper http:// éventuel
+ if (strfield(codebase,"http://")) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=codebase+7;
+ a=strchr(a,'/'); // après host
+ if (a) { // ** msg erreur et vérifier?
+ strcpy(tempo,a);
+ strcpy(codebase,tempo); // couper host
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Unexpected strstr error in base %s"LF,codebase);
+ test_flush;
+ }
+ }
+ }
+ }
+ //##a=buffer;
+ //##strcat(buffer,"&"); // fin du buffer
+ if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Codebase too long, parsing skipped (%s)"LF,codebase);
+ test_flush;
+ }
+ //##a=NULL;
+ while(hts_add_file(NULL,-1) >= 0); // clear chain
+ }
+ while ( (file_position=hts_add_file(lien,-1)) >= 0 ) {
+ int dejafait=0;
+ /* //##
+ char* b;
+
+ // prochain fichier à noter!
+ lien[0]='\0';
+ b=strchr(a,'&'); // marqueur de fin de chaine (voir hts_add_file)
+ if (b) {
+ if ( ( ((int) b-(int) a) + strlen(codebase)) < HTS_URLMAXSIZE)
+ strncat(lien,a,(int) b-(int) a); // nom du fichier
+ else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error: Java-Parser generated link that exceeds %d bytes"LF,HTS_URLMAXSIZE);
+ test_flush;
+ }
+ }
+ } else a=NULL;
+
+ if (strnotempty(lien)==0) a=NULL; // fin
+ if (a)
+ a=b+1;
+ */
+
+ if (strnotempty(lien)) {
+
+ // calculer les chemins et noms de sauvegarde
+ if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin
+ int r;
+
+ // patcher opt pour garder structure originale!! (on ne patche pas les noms dans la classe java!)
+ //##if (!strstr(lien,"://")) { // PAS tester les http://.. inutile (on ne va pas patcher le binaire :-( )
+ if (1) {
+ char tempo[HTS_URLMAXSIZE*2];
+ int a,b;
+ tempo[0]='\0';
+ a=opt.savename_type;
+ b=opt.savename_83;
+ opt.savename_type=0;
+ opt.savename_83=0;
+ // note: adr,fil peuvent être patchés
+ r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
+ opt.savename_type=a;
+ opt.savename_83=b;
+ if (r != -1) {
+ if (savename) {
+ if (lienrelatif(tempo,save,savename)==0) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ test_flush;
+ }
+ //
+ // xxc xxc xxc xxc TODO java:
+ // rebuild the java class with patched strings...
+ //
+ if (strlen(tempo)<=strlen(lien)) {
+ FILE* fp=fopen(savename,"r+b");
+ if (fp) {
+ if (!fseek(fp,file_position,SEEK_SET)) {
+ //unsigned short int string_length=strlen(tempo);
+ //fwrite(&valint,sizeof(string_length),1,fp);
+ // xxc xxc ARGH! SI la taille est <, décaler le code ?!
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to patch: %s"LF,savename);
+ test_flush;
+ }
+ }
+ fclose(fp);
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to open: %s"LF,savename);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): link too long, unable to write it: %s"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): file not caught: %s"LF,lien); test_flush;
+ }
+ r=-1;
+ }
+ //
+ if (r != -1) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush;
+ }
+
+ // modifié par rapport à l'autre version (cf prio_fix notamment et save2)
+
+ // vérifier que le lien n'a pas déja été noté
+ // si c'est le cas, alors il faut s'assurer que la priorité associée
+ // au fichier est la plus grande des deux priorités
+ //
+ // On part de la fin et on essaye de se presser (économise temps machine)
+#if HTS_HASH
+ {
+ int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0) {
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+#else
+ {
+ int l;
+ int i;
+ l=strlen(save);
+ for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
+ if (liens[i]->sav_len==l) { // même taille de chaîne
+ if (strcmp(liens[i]->sav,save)==0) { // existe déja
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+ }
+ }
+#endif
+
+
+ if (!dejafait) {
+ //
+ // >>>> CREER LE LIEN JAVA <<<<
+
+ // enregistrer fichier de java (MACRO)
+ liens_record(adr,fil,save,"","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ // if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_extuninit; // désallocation mémoire & buffers
+ return 0;
+ }
+
+ // mode test?
+ liens[lien_tot]->testmode=0; // pas mode test
+
+ liens[lien_tot]->link_import=0; // pas mode import
+
+ // écrire autres paramètres de la structure-lien
+ //if (meme_adresse)
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ //else // sinon l'objet père est le précédent lui même
+ // liens[lien_tot]->premier=ptr;
+
+ liens[lien_tot]->precedent=ptr;
+ // noter la priorité
+ liens[lien_tot]->depth=prio_fix;
+ liens[lien_tot]->pass2=max(pass_fix,numero_passe);
+ liens[lien_tot]->retry=opt.retry;
+
+ //strcpy(liens[lien_tot]->adr,adr);
+ //strcpy(liens[lien_tot]->fil,fil);
+ //strcpy(liens[lien_tot]->sav,save);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ test_flush;
+ }
+
+ lien_tot++; // UN LIEN DE PLUS
+ }
+ }
+ }
+
+ }
+ }
+
+ }
+ //##// effacer buffer temporaire
+ //##if (buffer) freet(buffer); buffer=NULL;
+ //##} // if buffer
+ } // if exist
+ } // if .class
+ } // if strlen-savename
+ } // if opt.parsejava
+
+
+
+ } // text/html ou autre
+
+ } // if !error
+
+
+jump_if_done:
+ // libérer les liens
+ if (r.adr) { freet(r.adr); r.adr=NULL; } // libérer la mémoire!
+
+ // prochain lien
+ ptr++;
+
+ // faut-il sauter le(s) lien(s) suivant(s)? (fichiers images à passer après les html)
+ if (opt.getmode & 4) { // sauver les non html après
+ // sauter les fichiers selon la passe
+ if (!numero_passe) {
+ while((ptr<lien_tot)?( liens[ptr]->pass2):0) ptr++;
+ } else {
+ while((ptr<lien_tot)?( ! liens[ptr]->pass2):0) ptr++;
+ }
+ if (ptr>=lien_tot) { // fin de boucle
+ if (!numero_passe) { // première boucle
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fprintf(opt.log,LF"Now getting non-html files..."LF);
+ test_flush;
+ }
+ numero_passe=1; // seconde boucle
+ ptr=0;
+ // prochain pass2
+ while((ptr<lien_tot)?(!liens[ptr]->pass2):0) ptr++;
+
+ //printf("first link==%d\n");
+
+ }
+ }
+ }
+
+ // a-t-on dépassé le quota?
+ if ((opt.maxsite>0) && (HTS_STAT.stat_bytes>=opt.maxsite)) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,opt.maxsite);
+ test_flush;
+ }
+ ptr=lien_tot;
+ } else if ((opt.maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt.maxtime)) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"More than %d seconds passed.. giving up"LF,opt.maxtime);
+ test_flush;
+ }
+ ptr=lien_tot;
+ } else if (exit_xh) { // sortir
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ ptr=lien_tot;
+ }
+ } while(ptr<lien_tot);
+ //
+ //
+ //
+
+ /*
+ Ensure the index is being closed
+ */
+ HT_INDEX_END;
+
+ /*
+ updating-a-remotely-deteted-website hack
+ no much data transfered, no data saved
+ <no files successfulyl saved>
+ we assume that something was bad (no connection)
+ just backup old cache and restore everything
+ */
+ if (
+ (HTS_STAT.stat_files <= 0)
+ &&
+ (HTS_STAT.HTS_TOTAL_RECV < 32768) /* should be fine */
+ ) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"No data seems to have been transfered during this session! : restoring previous one!"LF);
+ test_flush;
+ }
+ XH_uninit;
+ if ( (fexist(fconcat(opt.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt.path_log,"hts-cache/old.ndx"))) ) {
+ remove(fconcat(opt.path_log,"hts-cache/new.dat"));
+ remove(fconcat(opt.path_log,"hts-cache/new.ndx"));
+ remove(fconcat(opt.path_log,"hts-cache/new.lst"));
+ remove(fconcat(opt.path_log,"hts-cache/new.txt"));
+ rename(fconcat(opt.path_log,"hts-cache/old.dat"),fconcat(opt.path_log,"hts-cache/new.dat"));
+ rename(fconcat(opt.path_log,"hts-cache/old.ndx"),fconcat(opt.path_log,"hts-cache/new.ndx"));
+ rename(fconcat(opt.path_log,"hts-cache/old.lst"),fconcat(opt.path_log,"hts-cache/new.lst"));
+ rename(fconcat(opt.path_log,"hts-cache/old.txt"),fconcat(opt.path_log,"hts-cache/new.txt"));
+ }
+ exit_xh=2; /* interrupted (no connection detected) */
+ return 1;
+ }
+
+ // info text
+ if (cache.txt) {
+ fclose(cache.txt); cache.txt=NULL;
+ }
+
+ // purger!
+ if (cache.lst) {
+ fclose(cache.lst); cache.lst=NULL;
+ if (opt.delete_old) {
+ FILE *old_lst,*new_lst;
+ //
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=3;
+#endif
+ //
+ old_lst=fopen(fconcat(opt.path_log,"hts-cache/old.lst"),"rb");
+ if (old_lst) {
+ LLint sz=fsize(fconcat(opt.path_log,"hts-cache/new.lst"));
+ new_lst=fopen(fconcat(opt.path_log,"hts-cache/new.lst"),"rb");
+ if ((new_lst) && (sz>0)) {
+ char* adr=(char*) malloct((INTsys)sz);
+ if (adr) {
+ if ((int) fread(adr,1,(INTsys)sz,new_lst) == sz) {
+ char line[1100];
+ int purge=0;
+ while(!feof(old_lst)) {
+ linput(old_lst,line,1000);
+ if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau?
+ char file[HTS_URLMAXSIZE*2];
+ strcpy(file,opt.path_html);
+ strcat(file,line+1);
+ file[strlen(file)-1]='\0';
+ if (fexist(file)) { // toujours sur disque: virer
+ if (opt.log) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Purging %s"LF,file);
+ }
+ remove(file); purge=1;
+ }
+ }
+ }
+ {
+ fseek(old_lst,0,SEEK_SET);
+ while(!feof(old_lst)) {
+ linput(old_lst,line,1000);
+ while(strnotempty(line) && (line[strlen(line)-1]!='/') && (line[strlen(line)-1]!='\\')) {
+ line[strlen(line)-1]='\0';
+ }
+ if (strnotempty(line))
+ line[strlen(line)-1]='\0';
+ if (strnotempty(line))
+ if (!strstr(adr,line)) { // non trouvé?
+ char file[HTS_URLMAXSIZE*2];
+ strcpy(file,opt.path_html);
+ strcat(file,line+1);
+ while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait)
+ purge=1;
+ if (opt.log) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Purging directory %s/"LF,file);
+ while(strnotempty(file) && (file[strlen(file)-1]!='/') && (file[strlen(file)-1]!='\\')) {
+ file[strlen(file)-1]='\0';
+ }
+ if (strnotempty(file))
+ file[strlen(file)-1]='\0';
+ }
+ }
+ }
+ }
+ }
+ //
+ if (!purge) {
+ if (opt.log) {
+ fprintf(opt.log,"No files purged"LF);
+ }
+ }
+ }
+ freet(adr);
+ }
+ fclose(new_lst);
+ }
+ fclose(old_lst);
+ }
+ //
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=0;
+#endif
+ }
+ }
+ // fin purge!
+
+ // Indexation
+ if (opt.kindex)
+ index_finish(opt.path_html,opt.kindex);
+
+ // afficher résumé dans log
+ if (opt.log!=NULL) {
+ int error = fspc(NULL,"error");
+ int warning = fspc(NULL,"warning");
+ int info = fspc(NULL,"info");
+ char htstime[256];
+ // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart));
+ int n=(int) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart)));
+
+ sec2str(htstime,time_local()-HTS_STAT.stat_timestart);
+ //fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n);
+ fprintf(opt.log,LF"HTTrack mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]",htstime,(int)lien_tot-1,(int)HTS_STAT.stat_files,(int)HTS_STAT.stat_bytes,(int)HTS_STAT.HTS_TOTAL_RECV,(int)n);
+ if (HTS_STAT.total_packed) {
+ int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked);
+ fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,packed_ratio);
+ }
+ fprintf(opt.log,LF);
+ if (error)
+ fprintf(opt.log,"(%d errors, %d warnings, %d messages)"LF,error,warning,info);
+ else
+ fprintf(opt.log,"(No errors, %d warnings, %d messages)"LF,warning,info);
+ test_flush;
+ }
+#if DEBUG_HASH
+ // noter les collisions
+ {
+ int i;
+ int empty1=0,empty2=0,empty3=0;
+ for(i=0;i<HTS_HASH_SIZE;i++) {
+ if (hash.hash[0][i] == -1)
+ empty1++;
+ if (hash.hash[1][i] == -1)
+ empty2++;
+ if (hash.hash[2][i] == -1)
+ empty3++;
+ }
+ printf("\n");
+ printf("Debug info: Hash-table report\n");
+ printf("Number of files entered: %d\n",hashnumber);
+ printf("Table size: %d\n",HTS_HASH_SIZE);
+ printf("\n");
+ printf("Longest chain sav: %d, empty: %d\n",longest_hash[0],empty1);
+ printf("Longest chain adr,fil: %d, empty: %d\n",longest_hash[1],empty2);
+ printf("Longest chain former_adr/fil: %d, empty: %d\n",longest_hash[2],empty3);
+ printf("\n");
+ }
+#endif
+ // fin afficher résumé dans log
+
+ // désallocation mémoire & buffers
+
+ XH_uninit
+
+ return 1; // OK
+}
+// version 2 pour le reste
+// flusher si on doit lire peu à peu le fichier
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+
+// Estimate transfer rate
+// a little bit complex, but not too much
+/*
+ .. : idle
+ ^ : event
+
+ ----|----|----|----|----|----|----|----|---->
+ 1 2 3 4 5 6 7 8 9 time (seconds)
+ ----|----|----|----|----|----|----|----|---->
+ ^........^.........^.........^.........^.... timer 0
+ ----^.........^.........^.........^......... timer 1
+ 0 1 0 1 0 1 0 timer N sets its statistics
+ * * * * timer 0 resync timer 1
+
+ Therefore, each seconds, we resync the transfer rate with 2-seconds
+
+*/
+int engine_stats(void) {
+#if 0
+ static FILE* debug_fp=NULL; /* ok */
+ if (!debug_fp)
+ debug_fp=fopen("esstat.txt","wb");
+#endif
+ HTS_STAT.stat_nsocket=HTS_STAT.stat_errors=HTS_STAT.nbk==0;
+ HTS_STAT.nb=0;
+ if (HTS_STAT.HTS_TOTAL_RECV>2048) {
+ TStamp cdif=mtime_local();
+ int i;
+
+ for(i=0;i<2;i++) {
+ if ( (cdif - HTS_STAT.istat_timestart[i]) >= 2000) {
+ TStamp dif;
+#if 0
+fprintf(debug_fp,"set timer %d\n",i); fflush(debug_fp);
+#endif
+ dif=cdif - HTS_STAT.istat_timestart[i];
+ if ((TStamp)(dif/1000)>0) {
+ LLint byt=(HTS_STAT.HTS_TOTAL_RECV - HTS_STAT.istat_bytes[i]);
+ HTS_STAT.rate=(LLint)((TStamp) ((TStamp)byt/(dif/1000)));
+ HTS_STAT.istat_idlasttimer=i; // this timer recently sets the stats
+ //
+ HTS_STAT.istat_bytes[i]=HTS_STAT.HTS_TOTAL_RECV;
+ HTS_STAT.istat_timestart[i]=cdif;
+ }
+ return 1; /* refreshed */
+ }
+ }
+
+ // resynchronization between timer 0 (master) and 1 (slave)
+ // timer #0 resync timer #1 when reaching 1 second limit
+ if (HTS_STAT.istat_reference01 != HTS_STAT.istat_timestart[0]) {
+ if ( (cdif - HTS_STAT.istat_timestart[0]) >= 1000) {
+#if 0
+fprintf(debug_fp,"resync timer 1\n"); fflush(debug_fp);
+#endif
+ HTS_STAT.istat_bytes[1]=HTS_STAT.HTS_TOTAL_RECV;
+ HTS_STAT.istat_timestart[1]=cdif;
+ HTS_STAT.istat_reference01=HTS_STAT.istat_timestart[0];
+ }
+ }
+
+ }
+ return 0;
+}
+
+
+// bannir host (trop lent etc)
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host) {
+ //int l;
+ int i;
+
+ if (host[0]=='!')
+ return; // erreur.. déja cancellé.. bizarre.. devrait pas arriver
+
+ /* sanity check */
+ if (*filptr + 1 >= opt->maxfilter) {
+ opt->maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n",*filptr,__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*filptr);
+ fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ fflush(opt->errlog);
+ }
+ abort();
+ }
+ //opt->filters.filters=&filters;
+ }
+
+ // interdire host
+ if (*filptr < filter_max) {
+ strcpy(filters[*filptr],"-");
+ strcat(filters[*filptr],host);
+ strcat(filters[*filptr],"/*"); // host/ * interdit
+ (*filptr)++; *filptr=minimum(*filptr,filter_max);
+ }
+
+ // oups
+ if (strlen(host)<=1) { // euhh?? longueur <= 1
+ if (strcmp(host,"file://")) {
+ //## if (host[0]!=lOCAL_CHAR) { // pas local
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"PANIC! HostCancel detected memory leaks [char %d]"LF,host[0]); test_flush;
+ }
+ return; // purée
+ }
+ }
+
+ // couper connexion
+ for(i=0;i<back_max;i++) {
+ if (back[i].status>=0) // réception OU prêt
+ if (strfield2(back[i].url_adr,host)) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("host control: deletehttp\n");
+#endif
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-2; // timeout (peu importe si c'est un traffic jam)
+ strcpy(back[i].r.msg,"Link Cancelled by host control");
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fprintf(opt->log,"Shutdown: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // effacer liens
+ //l=strlen(host);
+ for(i=0;i<lien_tot;i++) {
+ //if (liens[i]->adr_len==l) { // même taille de chaîne
+ // Calcul de taille sécurisée
+ if (liens[i]) {
+ if (liens[i]->adr) {
+ int l = 0;
+ while((liens[i]->adr[l]) && (l<1020)) l++;
+ if ((l > 0) && (l<1020)) { // sécurité
+ if (strfield2(jump_identification(liens[i]->adr),host)) { // host
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fprintf(opt->log,"Cancel: %s%s"LF,liens[i]->adr,liens[i]->fil); test_flush;
+ }
+ strcpy(liens[i]->adr,"!"); // cancel (invalide hash)
+#if HTS_HASH
+#else
+ liens[i]->sav_len=-1; // taille invalide
+#endif
+ // on efface pas le hash, because si on rencontre le lien, reverif sav..
+ }
+ } else {
+ if (opt->log!=NULL) {
+ char dmp[1040];
+ dmp[0]='\0';
+ strncat(dmp,liens[i]->adr,1024);
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [len %d at %d]"LF,l,i); test_flush;
+ fprintf(opt->log,"dump 1024 bytes (address %p): "LF"%s"LF,liens[i]->adr,dmp); test_flush;
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [adr at %d]"LF,i); test_flush;
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [null at %d]"LF,i); test_flush;
+ }
+ }
+ //}
+ }
+}
+
+
+/* Init structure */
+/* 1 : init */
+/* -1 : off */
+char* structcheck_init(int init) {
+ char** structcheck_buff;
+ int* structcheck_buff_size;
+ NOSTATIC_RESERVE(structcheck_buff, char*, 1);
+ NOSTATIC_RESERVE(structcheck_buff_size, int, 1);
+ if (init < 2) {
+ if (init) {
+ if (*structcheck_buff)
+ freet(*structcheck_buff);
+ *structcheck_buff=NULL;
+ }
+ if (init != -1) {
+ if (*structcheck_buff==NULL) {
+ *structcheck_buff_size = 65536;
+ *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
+ if (*structcheck_buff)
+ strcpy(*structcheck_buff,"#");
+ }
+ }
+ } else { /* Ensure enough room */
+ if (*structcheck_buff_size < init) {
+ *structcheck_buff_size = init + 65536;
+ *structcheck_buff=(char*) realloct(*structcheck_buff, *structcheck_buff_size);
+ if (*structcheck_buff == NULL) { /* Reset :( */
+ *structcheck_buff_size = 65536;
+ *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
+ if (*structcheck_buff)
+ strcpy(*structcheck_buff,"#");
+ }
+ }
+ }
+ return *structcheck_buff;
+}
+
+int filters_init(char*** ptrfilters, int maxfilter, int filterinc) {
+ char** filters = *ptrfilters;
+ int filter_max=maximum(maxfilter, 128);
+ if (filters == NULL) {
+ filters=(char**) malloct( sizeof(char*) * (filter_max+2) );
+ memset(filters, 0, sizeof(char*) * (filter_max+2)); // filters[0] == 0
+ } else {
+ filters=(char**) realloct(filters, sizeof(char*) * (filter_max+2) );
+ }
+ if (filters) {
+ if (filters[0] == NULL) {
+ filters[0]=(char*) malloct( sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ memset(filters[0], 0, sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ } else {
+ filters[0]=(char*) realloct(filters[0], sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ }
+ if (filters[0] == NULL) {
+ freet(filters);
+ filters = NULL;
+ }
+ }
+ if (filters != NULL) {
+ int i;
+ int from;
+ if (filterinc == 0)
+ from = 0;
+ else
+ from = filter_max - filterinc;
+ for(i=0 ; i<=filter_max ; i++) { // PLUS UN (sécurité)
+ filters[i]=filters[0]+i*(HTS_URLMAXSIZE*2);
+ }
+ for(i=from ; i<=filter_max ; i++) { // PLUS UN (sécurité)
+ filters[i][0]='\0'; // clear
+ }
+ }
+ *ptrfilters = filters;
+ return (filters != NULL) ? filter_max : 0;
+}
+
+// vérifier présence de l'arbo
+int structcheck(char* s) {
+ // vérifier la présence des dossier(s)
+ char *a=s;
+ char nom[HTS_URLMAXSIZE*2];
+ char *b;
+ char* structcheck_buff=NULL;
+ if (strnotempty(s)==0) return 0;
+ if (strlen(s)>HTS_URLMAXSIZE) return 0;
+
+ // Get buffer address
+ structcheck_buff=structcheck_init(0);
+ if (!structcheck_buff)
+ return -1;
+
+ if (strlen(structcheck_buff) > 65000) {
+ strcpy(structcheck_buff,"#"); // réinit.. c'est idiot ** **
+ }
+
+ if (structcheck_buff) {
+ b=nom;
+ do {
+ if (*a) *b++=*a++;
+ while((*a!='/') && (*a!='\0')) *b++=*a++;
+ *b='\0'; // pas de ++ pour boucler
+ if (*a=='/') { // toujours dossier
+ if (strnotempty(nom)) {
+ char tempo[HTS_URLMAXSIZE*2];
+
+ strcpy(tempo,"#"); strcat(tempo,nom); strcat(tempo,"#");
+ if (strstr(structcheck_buff,tempo)==NULL) { // non encore créé
+
+ /* Check room */
+ structcheck_init(strlen(structcheck_buff) + strlen(nom) + 8192);
+ if (!structcheck_buff)
+ return -1;
+
+ strcat(structcheck_buff,"#"); strcat(structcheck_buff,nom); strcat(structcheck_buff,"#"); // ajouter à la liste
+
+#if HTS_WIN
+ if (mkdir(fconv(nom))!=0)
+#else
+ if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0)
+#endif
+ {
+#if HTS_REMOVE_ANNOYING_INDEX
+ // might be a filename with same name than this folder
+ // then, remove it to allow folder creation
+ // it happends when servers gives a folder index while
+ // requesting / page
+ // -> if the file can be opened (not a folder) then rename it
+ FILE* fp=fopen(fconv(nom),"ab");
+ if (fp) {
+ fclose(fp);
+ rename(fconv(nom),fconcat(fconv(nom),".txt"));
+ }
+ // if it fails, that's too bad
+#if HTS_WIN
+ mkdir(fconv(nom));
+#else
+ mkdir(fconv(nom),HTS_ACCESS_FOLDER);
+#endif
+#endif
+ // Si existe déja renvoie une erreur.. tant pis
+ }
+#if HTS_WIN==0
+ chmod(fconv(nom),HTS_ACCESS_FOLDER);
+#endif
+ }
+ }
+ *b++=*a++; // slash
+ }
+ } while(*a);
+ }
+ return 0;
+}
+
+
+// sauver un fichier
+int filesave(char* adr,int len,char* s) {
+ FILE* fp;
+ // écrire le fichier
+ if ((fp=filecreate(s))!=NULL) {
+ int nl=0;
+ if (len>0) {
+ nl=(int) fwrite(adr,1,len,fp);
+ }
+ fclose(fp);
+ usercommand(0,NULL,antislash(s));
+ if (nl!=len) // erreur
+ return -1;
+ } else
+ return -1;
+
+ return 0;
+}
+
+
+// ouvrir un fichier (avec chemin Un*x)
+FILE* filecreate(char* s) {
+ char fname[HTS_URLMAXSIZE*2];
+ FILE* fp;
+ fname[0]='\0';
+
+ // noter lst
+ filenote(s,NULL);
+
+ // if (*s=='/') strcpy(fname,s+1); else strcpy(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path>
+ strcpy(fname,s);
+
+#if HTS_DOSNAME
+ // remplacer / par des slash arrière
+ {
+ int i=0;
+ while(fname[i]) {
+ if (fname[i]=='/')
+ fname[i]='\\';
+ i++;
+ }
+ }
+ // a partir d'ici le slash devient antislash
+#endif
+
+ // construite le chemin si besoin est
+ if (structcheck(s)!=0) {
+ return NULL;
+ }
+
+ // ouvrir
+ fp=fopen(fname,"wb");
+#if HTS_WIN==0
+ if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE);
+#endif
+
+ return fp;
+}
+
+// create an empty file
+int filecreateempty(char* filename) {
+ FILE* fp;
+ fp=filecreate(filename); // filenote & co
+ if (fp) {
+ fclose(fp);
+ return 1;
+ } else
+ return 0;
+}
+
+// noter fichier
+typedef struct {
+ FILE* lst;
+ char path[HTS_URLMAXSIZE*2];
+} filenote_strc;
+int filenote(char* s,filecreate_params* params) {
+ filenote_strc* strc;
+ NOSTATIC_RESERVE(strc, filenote_strc, 1);
+
+ // gestion du fichier liste liste
+ if (params) {
+ //filecreate_params* p = (filecreate_params*) params;
+ strcpy(strc->path,params->path);
+ strc->lst=params->lst;
+ return 0;
+ } else if (strc->lst) {
+ char savelst[HTS_URLMAXSIZE*2];
+ strcpy(savelst,fslash(s));
+ // couper chemin?
+ if (strnotempty(strc->path)) {
+ if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper
+ strcpy(savelst,s+strlen(strc->path));
+ }
+ }
+ fprintf(strc->lst,"[%s]"LF,savelst);
+ fflush(strc->lst);
+ }
+ return 1;
+}
+
+// executer commande utilisateur
+typedef struct {
+ int exe;
+ char cmd[2048];
+} usercommand_strc;
+HTS_INLINE void usercommand(int _exe,char* _cmd,char* file) {
+ usercommand_strc* strc;
+ NOSTATIC_RESERVE(strc, usercommand_strc, 1);
+
+ if (_exe) {
+ strcpy(strc->cmd,_cmd);
+ if (strnotempty(strc->cmd))
+ strc->exe=_exe;
+ else
+ strc->exe=0;
+ }
+
+#if HTS_ANALYSTE
+ if (hts_htmlcheck_filesave)
+ if (strnotempty(file))
+ hts_htmlcheck_filesave(file);
+#endif
+
+ if (strc->exe) {
+ if (strnotempty(file)) {
+ if (strnotempty(strc->cmd)) {
+ usercommand_exe(strc->cmd,file);
+ }
+ }
+ }
+}
+void usercommand_exe(char* cmd,char* file) {
+ char temp[8192];
+ char c[2]="";
+ int i;
+ temp[0]='\0';
+ //
+ for(i=0;i<(int) strlen(cmd);i++) {
+ if ((cmd[i]=='$') && (cmd[i+1]=='0')) {
+ strcat(temp,file);
+ i++;
+ } else {
+ c[0]=cmd[i]; c[1]='\0';
+ strcat(temp,c);
+ }
+ }
+ system(temp);
+}
+
+// écrire n espaces dans fp
+typedef struct {
+ int error;
+ int warning;
+ int info;
+} fspc_strc;
+HTS_INLINE int fspc(FILE* fp,char* type) {
+ fspc_strc* strc;
+ NOSTATIC_RESERVE(strc, fspc_strc, 1); // log..
+
+ //
+ if (fp) {
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ strftime(s,250,"%H:%M:%S",A);
+ if (strnotempty(type))
+ fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1);
+ else
+ fprintf(fp,"%s\t \t",s);
+ if (strcmp(type,"warning")==0)
+ strc->warning++;
+ else if (strcmp(type,"error")==0)
+ strc->error++;
+ else if (strcmp(type,"info")==0)
+ strc->info++;
+ }
+ else if (!type)
+ strc->error=strc->warning=strc->info=0; // reset
+ else if (strcmp(type,"warning")==0)
+ return strc->warning;
+ else if (strcmp(type,"error")==0)
+ return strc->error;
+ else if (strcmp(type,"info")==0)
+ return strc->info;
+ return 0;
+}
+
+
+// vérifier taux de transfert
+#if 0
+void check_rate(TStamp stat_timestart,int maxrate) {
+ // vérifier taux de transfert (pas trop grand?)
+ /*
+ if (maxrate>0) {
+ int r = (int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-stat_timestart)); // taux actuel de transfert
+ HTS_STAT.HTS_TOTAL_RECV_STATE=0;
+ if (r>maxrate) { // taux>taux autorisé
+ int taux = (int) (((TStamp) (r - maxrate) * 100) / (TStamp) maxrate);
+ if (taux<15)
+ HTS_STAT.HTS_TOTAL_RECV_STATE=1; // ralentir un peu (<15% dépassement)
+ else if (taux<50)
+ HTS_STAT.HTS_TOTAL_RECV_STATE=2; // beaucoup (<50% dépassement)
+ else
+ HTS_STAT.HTS_TOTAL_RECV_STATE=3; // énormément (>50% dépassement)
+ }
+ }
+ */
+}
+#endif
+
+// ---
+// sous routines liées au moteur et au backing
+
+// supplemental links ready (done) after ptr
+int backlinks_done(lien_url** liens,int lien_tot,int ptr) {
+ int n=0;
+ int i;
+ //Links done and stored in cache
+ for(i=ptr+1;i<lien_tot;i++) {
+ if (liens[i]) {
+ if (liens[i]->pass2 == -1) {
+ n++;
+ }
+ }
+ }
+ return n;
+}
+
+// remplir backing si moins de max_bytes en mémoire
+HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+ if (!opt->state.stop) {
+ if (back_incache(back,back_max)<opt->maxcache) { // pas trop en mémoire?
+ return back_fill(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
+ }
+ return -1; /* plus de place */
+}
+
+// remplir backing
+int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+ int n;
+
+ // ajouter autant de socket qu'on peut ajouter
+ n=opt->maxsoc-back_nsoc(back,back_max);
+
+ // vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait)
+ n=min( n, back_available(back,back_max) - 8 );
+
+ // no space left on backing stack - do not back anymore
+ if (back_stack_available(back,back_max) <= 2)
+ n=0;
+
+ if (n>0) {
+ int p;
+
+ if (ptr<cache->ptr_last) { /* restart (2 scans: first html, then non html) */
+ cache->ptr_ant=0;
+ }
+
+ p=ptr+1;
+ /* on a déja parcouru */
+ if (p<cache->ptr_ant)
+ p=cache->ptr_ant;
+ while( (p<lien_tot) && (n>0) ) {
+ //while((p<lien_tot) && (n>0) && (p < ptr+opt->maxcache_anticipate)) {
+ int ok=1;
+
+ // on ne met pas le fichier en backing si il doit être traité après
+ if (liens[p]->pass2) { // 2è passe
+ if (numero_passe!=1)
+ ok=0;
+ } else {
+ if (numero_passe!=0)
+ ok=0;
+ }
+
+ // note: si un backing est fini, il reste en mémoire jusqu'à ce que
+ // le ptr l'atteigne
+ if (ok) {
+ if (!back_exist(back,back_max,liens[p]->adr,liens[p]->fil,liens[p]->sav)) {
+ if (back_add(back,back_max,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode,&liens[p]->pass2)==-1) {
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: unable to add more links through back_add for back_fill"LF);
+ test_flush;
+ }
+#if BDEBUG==1
+ printf("error while adding\n");
+#endif
+ n=0; // sortir
+ } else {
+ n--;
+#if BDEBUG==1
+ printf("backing: %s%s\n",liens[p]->adr,liens[p]->fil);
+#endif
+ }
+ }
+ }
+ p++;
+ } // while
+ /* sauver position dernière anticipation */
+ cache->ptr_ant=p;
+ cache->ptr_last=ptr;
+ }
+ return 0;
+}
+// ---
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// routines de détournement de SIGHUP & co (Unix)
+//
+httrackp* hts_declareoptbuffer(httrackp* optdecl) {
+ static httrackp* opt=NULL; /* OK */
+ if (optdecl) opt=optdecl;
+ return opt;
+}
+//
+void sig_finish( int code ) { // finir et quitter
+ signal(code,sig_term); // quitter si encore
+ exit_xh=1;
+ fprintf(stderr,"\nExit requested to engine (signal %d)\n",code);
+}
+void sig_term( int code ) { // quitter brutalement
+ fprintf(stderr,"\nProgram terminated (signal %d)\n",code);
+ exit(0);
+}
+#if HTS_WIN
+void sig_ask( int code ) { // demander
+ char s[256];
+ signal(code,sig_term); // quitter si encore
+ printf("\nQuit program/Interrupt/Cancel? (Q/I/C) ");
+ fflush(stdout);
+ scanf("%s",s);
+ if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q'))
+ exit(0); // quitter
+ else if ( (s[0]=='i') || (s[0]=='I') ) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // ask for stop
+ opt->state.stop=1;
+ }
+ }
+ signal(code,sig_ask); // remettre signal
+}
+#else
+void sig_back( int code ) { // ignorer et mettre en backing
+ signal(code,sig_ignore);
+ sig_doback(0);
+}
+void sig_ask( int code ) { // demander
+ char s[256];
+ signal(code,sig_term); // quitter si encore
+ printf("\nQuit program/Interrupt/Background/bLind background/Cancel? (Q/I/B/L/C) ");
+ fflush(stdout);
+ scanf("%s",s);
+ if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q'))
+ exit(0); // quitter
+ else if ( (s[0]=='b') || (s[0]=='B') || (s[0]=='a') || (s[0]=='A') )
+ sig_doback(0); // arrière plan
+ else if ( (s[0]=='l') || (s[0]=='L') )
+ sig_doback(1); // arrière plan
+ else if ( (s[0]=='i') || (s[0]=='I') ) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // ask for stop
+ opt->state.stop=1;
+ }
+ signal(code,sig_ask); // remettre signal
+ }
+ else {
+ printf("cancel..\n");
+ signal(code,sig_ask); // remettre signal
+ }
+}
+void sig_ignore( int code ) { // ignorer signal
+}
+void sig_brpipe( int code ) { // treat if necessary
+ if (!sig_ignore_flag(-1)) {
+ sig_term(code);
+ }
+}
+void sig_doback(int blind) { // mettre en backing
+ int out=-1;
+ //
+ printf("\nMoving to background to complete the mirror...\n"); fflush(stdout);
+
+ {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // suppress logging and asking lousy questions
+ opt->quiet=1;
+ opt->verbosedisplay=0;
+ }
+ }
+
+ if (!blind)
+ out = open("hts-nohup.out",O_CREAT|O_WRONLY,S_IRUSR|S_IWUSR);
+ if (out == -1)
+ out = open("/dev/null",O_WRONLY,S_IRUSR|S_IWUSR);
+ close(0);
+ close(1);
+ dup(out);
+ close(2);
+ dup(out);
+ //
+ switch (fork()) {
+ case 0:
+ break;
+ case -1:
+ fprintf(stderr,"Error: can not fork process\n");
+ break;
+ default: // pere
+ usleep(100000); // pause 1/10s "A microsecond is .000001s"
+ _exit(0);
+ break;
+ }
+}
+#endif
+// fin routines de détournement de SIGHUP & co
+
+// Poll stdin.. si besoin
+#if HTS_POLL
+// lecture stdin des caractères disponibles
+int read_stdin(char* s,int max) {
+ int i=0;
+ while((check_stdin()) && (i<(max-1)) )
+ s[i++]=fgetc(stdin);
+ s[i]='\0';
+ return i;
+}
+#ifdef _WIN32
+HTS_INLINE int check_stdin(void) {
+ return (_kbhit());
+}
+#else
+HTS_INLINE int check_flot(T_SOC s) {
+ fd_set fds;
+ struct timeval tv;
+ FD_ZERO(&fds);
+ FD_SET((T_SOC) s,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(s+1,&fds,NULL,NULL,&tv);
+ return FD_ISSET(s,&fds);
+}
+HTS_INLINE int check_stdin(void) {
+ fflush(stdout); fflush(stdin);
+ if (check_flot(0))
+ return 1;
+ return 0;
+}
+#endif
+#endif
+
+// Attente de touche
+#if HTS_ANALYSTE
+int ask_continue(void) {
+ char* s;
+ s=hts_htmlcheck_query2(HTbuff);
+ if (s) {
+ if (strnotempty(s)) {
+ if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON")))
+ return 0;
+ }
+ return 1;
+ }
+ return 1;
+}
+#else
+int ask_continue(void) {
+ char s[12];
+ s[0]='\0';
+ printf("Press <Y><Enter> to confirm, <N><Enter> to abort\n");
+ io_flush; linput(stdin,s,4);
+ if (strnotempty(s)) {
+ if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON")))
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+// nombre de digits dans un nombre
+int nombre_digit(int n) {
+ int i=1;
+ while(n >= 10) { n/=10; i++; }
+ return i;
+}
+
+
+// renvoi adresse de la fin du token dans p
+// renvoi NULL si la chaine est un token unique
+// (PATCHE également la chaine)
+// ex: "test" "test2" renvoi adresse sur espace
+// flag==1 si chaine comporte des echappements comme \"
+char* next_token(char* p,int flag) {
+ int detect=0;
+ int quote=0;
+ p--;
+ do {
+ p++;
+ if (flag && (*p=='\\')) { // sauter \x ou \"
+ if (quote) {
+ char c='\0';
+ if (*(p+1)=='\\')
+ c='\\';
+ else if (*(p+1)=='"')
+ c='"';
+ if (c) {
+ char tempo[8192];
+ tempo[0]=c; tempo[1]='\0';
+ strcat(tempo,p+2);
+ strcpy(p,tempo);
+ }
+ }
+ }
+ else if (*p==34) { // guillemets (de fin)
+ quote=!quote;
+ }
+ else if (*p==32) {
+ if (!quote)
+ detect=1;
+ }
+ else if (*p=='\0') {
+ p=NULL;
+ detect=1;
+ }
+ } while(!detect);
+ return p;
+}
+
+// routines annexes
+#if HTS_ANALYSTE
+// canceller un fichier (noter comme cancellable)
+// !!NOT THREAD SAFE!!
+char* hts_cancel_file(char * s) {
+ static char sav[HTS_URLMAXSIZE*2]="";
+ if (s[0]!='\0')
+ if (sav[0]=='\0')
+ strcpy(sav,s);
+ return sav;
+}
+void hts_cancel_test(void) {
+ if (_hts_in_html_parsing==2)
+ _hts_cancel=2;
+}
+void hts_cancel_parsing(void) {
+ if (_hts_in_html_parsing)
+ _hts_cancel=1;
+}
+#endif
+// for(_i=0;(_i<back_max) && (index<NStatsBuffer);_i++) {
+// i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
+// if (back[i].status>=0) { // signifie "lien actif"
+
+
+/*
+hts_add_file, add/get elements in the add chain for java parsing
+if file_position >= 0
+ push 'file/file_position'
+ return 1 (return 0 if exists)
+else
+ pop file -> 'file'
+ return 'file_position'
+else if empty/error
+ return -1;
+*/
+typedef struct addfile_chain {
+ char name[1024];
+ int pos;
+ struct addfile_chain* next;
+} addfile_chain;
+typedef addfile_chain* addfile_chain_ptr;
+int hts_add_file(char* file,int file_position) {
+ addfile_chain** chain;
+ NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1);
+
+ if (file_position>=0) { /* copy file to the chain */
+ struct addfile_chain** current;
+ current=chain; /* start from */
+ while(*current) {
+ if (strcmp((*current)->name,file)==0)
+ return 0; /* already exists */
+ current=&( (*current)->next ); /* 'next' address */
+ }
+ *current=calloct(1,sizeof(addfile_chain));
+ if (*current) {
+ (*current)->next=NULL;
+ (*current)->pos=-1;
+ (*current)->name[0]='\0';
+ }
+ if (*current) {
+ strcpy((*current)->name,file);
+ (*current)->pos=file_position;
+ return 1;
+ } else {
+ printf("PANIC! Too many Java files during parsing [1]\n");
+ return -1;
+ }
+ } else { /* copy last element in file and delete it */
+ if (file)
+ file[0]='\0';
+ if (*chain) {
+ struct addfile_chain** current;
+ int pos=-1;
+ current=chain; /* start from */
+ while( (*current)->next ) {
+ current=&( (*current)->next ); /* 'next' address */
+ }
+ if (file)
+ strcpy(file,(*current)->name);
+ pos=(*current)->pos;
+ freet(*current);
+ *current=NULL;
+ return pos;
+ }
+ return -1; /* no more elements */
+ }
+
+ return 0;
+}
+
+#if HTS_ANALYSTE
+// en train de parser un fichier html? réponse: % effectués
+// flag>0 : refresh demandé
+int hts_is_parsing(int flag) {
+ if (_hts_in_html_parsing) { // parsing?
+ if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh
+ return max(_hts_in_html_done,1); // % effectués
+ } else {
+ return 0; // non
+ }
+}
+int hts_is_testing(void) { // 0 non 1 test 2 purge
+ if (_hts_in_html_parsing==2)
+ return 1;
+ else if (_hts_in_html_parsing==3)
+ return 2;
+ return 0;
+}
+// message d'erreur?
+char* hts_errmsg(void) {
+ return _hts_errmsg;
+}
+// mode pause transfer
+int hts_setpause(int p) {
+ if (p>=0) _hts_setpause=p;
+ return _hts_setpause;
+}
+// ask for termination
+int hts_request_stop(int force) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ opt->state.stop=1;
+ }
+ return 0;
+}
+// régler en cours de route les paramètres réglables..
+// -1 : erreur
+int hts_setopt(httrackp* set_opt) {
+ if (set_opt) {
+ httrackp* engine_opt=hts_declareoptbuffer(NULL);
+ if (engine_opt) {
+ //_hts_setopt=opt;
+ copy_htsopt(set_opt,engine_opt);
+ }
+ }
+ return 0;
+}
+// ajout d'URL
+// -1 : erreur
+int hts_addurl(char** url) {
+ if (url) _hts_addurl=url;
+ return (_hts_addurl!=NULL);
+}
+int hts_resetaddurl(void) {
+ _hts_addurl=NULL;
+ return (_hts_addurl!=NULL);
+}
+// copier nouveaux paramètres si besoin
+int copy_htsopt(httrackp* from,httrackp* to) {
+ if (from->maxsite > -1)
+ to->maxsite = from->maxsite;
+
+ if (from->maxfile_nonhtml > -1)
+ to->maxfile_nonhtml = from->maxfile_nonhtml;
+
+ if (from->maxfile_html > -1)
+ to->maxfile_html = from->maxfile_html;
+
+ if (from->maxsoc > 0)
+ to->maxsoc = from->maxsoc;
+
+ if (from->nearlink > -1)
+ to->nearlink = from->nearlink;
+
+ if (from->timeout > -1)
+ to->timeout = from->timeout;
+
+ if (from->rateout > -1)
+ to->rateout = from->rateout;
+
+ if (from->maxtime > -1)
+ to->maxtime = from->maxtime;
+
+ if (from->maxrate > -1)
+ to->maxrate = from->maxrate;
+
+ if (strnotempty(from->user_agent))
+ strcpy(to->user_agent , from->user_agent);
+
+ if (from->retry > -1)
+ to->retry = from->retry;
+
+ if (from->hostcontrol > -1)
+ to->hostcontrol = from->hostcontrol;
+
+ if (from->errpage > -1)
+ to->errpage = from->errpage;
+
+ if (from->parseall > -1)
+ to->parseall = from->parseall;
+
+
+ // test all: bit 8 de travel
+ if (from->travel > -1) {
+ if (from->travel & 256)
+ to->travel|=256;
+ else
+ to->travel&=255;
+ }
+
+
+ return 0;
+}
+
+#endif
+//
+
+
+
+
+
+// message copyright interne
+void voidf(void) {
+ char* a;
+ a=""CRLF""CRLF;
+ a="+-----------------------------------------------+"CRLF;
+ a="|HyperTextTRACKer, Offline Browser Utility |"CRLF;
+ a="| HTTrack Website Copier |"CRLF;
+ a="|Code: Windows Interface Xavier Roche |"CRLF;
+ a="| HTS/HTTrack Xavier Roche |"CRLF;
+ a="| .class Parser Yann Philippot |"CRLF;
+ a="| |"CRLF;
+ a="|Tested on: Windows95,98,NT,2K |"CRLF;
+ a="| Linux PC |"CRLF;
+ a="| Sun-Solaris 5.6 |"CRLF;
+ a="| AIX 4 |"CRLF;
+ a="| |"CRLF;
+ a="|Copyright (C) Xavier Roche and other |"CRLF;
+ a="|contributors |"CRLF;
+ a="| |"CRLF;
+ a="|Use this program at your own risks! |"CRLF;
+ a="+-----------------------------------------------+"CRLF;
+ a=""CRLF;
+}
+
+
+// HTTrack Website Copier Copyright (C) Xavier Roche and other contributors
+//
+
diff --git a/src/htscore.h b/src/htscore.h
new file mode 100644
index 0000000..a50aac8
--- /dev/null
+++ b/src/htscore.h
@@ -0,0 +1,363 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main file .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+#ifndef HTTRACK_DEFH
+#define HTTRACK_DEFH
+
+
+#include "htsglobal.h"
+
+/* specific definitions */
+#include "htsbase.h"
+// Includes & définitions
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef _WIN32
+#include <conio.h>
+#include <signal.h>
+#include <direct.h>
+#else
+#include <signal.h>
+#include <unistd.h>
+#endif
+/* END specific definitions */
+
+
+// Include htslib.h for all types
+#include "htslib.h"
+
+#include "htsopt.h"
+
+// structure d'un lien
+typedef struct {
+ char firstblock; // flag 1=premier malloc
+ char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down
+ short int depth; // profondeur autorisée lien ; >0 forte 0=faible
+ short int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background
+ int premier; // pointeur sur le premier lien qui a donné lieu aux autres liens du domaine
+ int precedent; // pointeur sur le lien qui a donné lieu à ce lien précis
+ //int moved; // pointeur sur moved
+ short int retry; // nombre de retry restants
+ short int testmode; // mode test uniquement, envoyer juste un head!
+ char* adr; // adresse
+ char* fil; // nom du fichier distant
+ char* sav; // nom à sauver sur disque (avec chemin éventuel)
+ char* cod; // chemin codebase éventuel si classe java
+ char* former_adr; // adresse initiale (avant éventuel moved), peut être nulle
+ char* former_fil; // nom du fichier distant initial (avant éventuel moved), peut être nul
+ // pour optimisation:
+#if HTS_HASH
+ int hash_next[3]; // prochain lien avec même valeur hash
+#else
+ int sav_len; // taille de sav
+#endif
+} lien_url;
+
+// chargement de fichiers en 'arrière plan'
+typedef struct {
+#if DEBUG_CHECKINT
+ char magic;
+#endif
+ char url_adr[HTS_URLMAXSIZE*2]; // adresse
+ char url_fil[HTS_URLMAXSIZE*2]; // nom du fichier distant
+ char url_sav[HTS_URLMAXSIZE*2]; // nom à sauver sur disque (avec chemin éventuel)
+ char referer_adr[HTS_URLMAXSIZE*2]; // adresse host page referer
+ char referer_fil[HTS_URLMAXSIZE*2]; // fichier page referer
+ char location_buffer[HTS_URLMAXSIZE*2]; // "location" en cas de "moved" (302,..)
+ char tmpfile[HTS_URLMAXSIZE*2]; // nom à sauver temporairement (compressé)
+ char send_too[1024]; // données à envoyer en même temps que le header
+ int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours)
+ int testmode; // mode de test
+ int timeout; // gérer des timeouts? (!=0 : nombre de secondes)
+ TStamp timeout_refresh; // si oui, time refresh
+ int rateout; // timeout refresh? (!=0 : taux minimum toléré en octets/s)
+ TStamp rateout_time; // si oui, date de départ
+ LLint maxfile_nonhtml; // taille max d'un fichier non html
+ LLint maxfile_html; // idem pour un ficheir html
+ htsblk r; // structure htsblk de chaque objet en background
+ short int is_update; // mode update
+ int head_request; // requète HEAD?
+ LLint range_req_size; // range utilisé
+ //
+ int http11; // L'en tête doit être signé HTTP/1.1 et non HTTP/1.0
+ int is_chunk; // chunk?
+ char* chunk_adr; // adresse chunk en cours de chargement
+ LLint chunk_size; // taille chunk en cours de chargement
+ LLint compressed_size; // taille compressés (stats uniquement)
+ //
+ short int* pass2_ptr; // pointeur sur liens[ptr]->pass2
+ //
+ char info[256]; // éventuel status pour le ftp
+ int stop_ftp; // flag stop pour ftp
+#if DEBUG_CHECKINT
+ char magic2;
+#endif
+} lien_back;
+
+// cache
+typedef struct {
+ int version; // 0 ou 1
+ /* */
+ int type;
+ FILE *dat,*ndx,*olddat;
+ char *use; // liste des adr+fil
+ FILE *lst; // liste des fichiers pour la "purge"
+ FILE *txt; // liste des fichiers (info)
+ char lastmodified[256];
+ // HASH
+ void* hashtable;
+ // fichiers log optionnels
+ FILE* log;
+ FILE* errlog;
+ // variables
+ int ptr_ant; // pointeur pour anticiper
+ int ptr_last; // pointeur pour anticiper
+} cache_back;
+
+typedef struct {
+ lien_url** liens; // pointeur sur liens
+ int max_lien; // indice le plus grand rencontré
+ int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil
+} hash_struct;
+
+#if HTS_HASH
+#else
+#define hash_write(A,B)
+#endif
+
+typedef struct {
+ FILE* lst;
+ char path[HTS_URLMAXSIZE*2];
+} filecreate_params;
+
+// Fonctions
+
+// INCLUDES .H PARTIES DE CODE HTTRACK
+
+// routine main
+#include "htscoremain.h"
+
+// divers outils pour httrack.c
+#include "htstools.h"
+
+// aide pour la version en ligne de commande
+#include "htshelp.h"
+
+// génération du nom de fichier à sauver
+#include "htsname.h"
+
+// gestion ftp
+#include "htsftp.h"
+
+// routine parser java
+#include "htsjava.h"
+
+// gestion interception d'URL
+#include "htscatchurl.h"
+
+// gestion robots.txt
+#include "htsrobots.h"
+
+// routines d'acceptation de liens
+#include "htswizard.h"
+
+// routines de regexp
+#include "htsfilters.h"
+
+// gestion backing
+#include "htsback.h"
+
+// gestion cache
+#include "htscache.h"
+
+// gestion hashage
+#include "htshash.h"
+
+// gestion réentrance
+#include "htsnostatic.h"
+
+// infos console
+#if HTS_ANALYSTE_CONSOLE
+#include "httrack.h"
+#endif
+
+#include "htsdefines.h"
+
+#include "hts-indextmpl.h"
+
+// INCLUDES .H PARTIES DE CODE HTTRACK
+
+//
+
+/*
+typedef void (* t_hts_htmlcheck_init)(void);
+typedef void (* t_hts_htmlcheck_uninit)(void);
+typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_end)(void);
+typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+typedef char* (* t_hts_htmlcheck_query)(char* question);
+typedef char* (* t_hts_htmlcheck_query2)(char* question);
+typedef char* (* t_hts_htmlcheck_query3)(char* question);
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,LLint stat_bytes,LLint stat_bytes_recv,int stat_time,int stat_nsocket, LLint stat_written, int stat_updated, int stat_errors, int irate, int nbk );
+typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
+typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+*/
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+//char HTbuff[1024];
+/*
+extern t_hts_htmlcheck_init hts_htmlcheck_init;
+extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+extern t_hts_htmlcheck_start hts_htmlcheck_start;
+extern t_hts_htmlcheck_end hts_htmlcheck_end;
+extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck hts_htmlcheck;
+extern t_hts_htmlcheck_query hts_htmlcheck_query;
+extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
+extern t_hts_htmlcheck_check hts_htmlcheck_check;
+extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+*/
+//
+int hts_is_parsing(int flag);
+int hts_is_testing(void);
+int hts_setopt(httrackp* opt);
+int hts_addurl(char** url);
+int hts_resetaddurl(void);
+int copy_htsopt(httrackp* from,httrackp* to);
+char* hts_errmsg(void);
+int hts_setpause(int); // pause transfer
+int hts_request_stop(int force);
+//
+char* hts_cancel_file(char * s);
+void hts_cancel_test(void);
+void hts_cancel_parsing(void);
+//
+// Variables globales
+extern int _hts_in_html_parsing;
+extern int _hts_in_html_done; // % réalisés
+extern int _hts_in_html_poll; // parsing
+extern char _hts_errmsg[1100];
+extern int _hts_setpause;
+//extern httrackp* _hts_setopt;
+extern char** _hts_addurl;
+extern int _hts_cancel;
+#endif
+
+
+
+//
+
+
+//int httpmirror(char* url,int level,httrackp opt);
+int httpmirror(char* url1,httrackp* opt);
+int filesave(char* adr,int len,char* s);
+int engine_stats(void);
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host);
+FILE* filecreate(char* s);
+int filecreateempty(char* filename);
+int filenote(char* s,filecreate_params* params);
+HTS_INLINE void usercommand(int exe,char* cmd,char* file);
+void usercommand_exe(char* cmd,char* file);
+char* structcheck_init(int init);
+int filters_init(char*** ptrfilters, int maxfilter, int filterinc);
+int structcheck(char* s);
+HTS_INLINE int fspc(FILE* fp,char* type);
+char* next_token(char* p,int flag);
+//
+char* readfile(char* fil);
+char* readfile_or(char* fil,char* defaultdata);
+#if 0
+void check_rate(TStamp stat_timestart,int maxrate);
+#endif
+
+// liens
+int liens_record(char* adr,char* fil,char* save,char* former_adr,char* former_fil,char* codebase);
+
+
+// backing, routines externes
+int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
+int backlinks_done(lien_url** liens,int lien_tot,int ptr);
+int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
+
+// cancel file
+#if HTS_ANALYSTE
+char* hts_cancel_file(char * s);
+void hts_cancel_test(void);
+void hts_cancel_parsing(void);
+#endif
+
+int ask_continue(void);
+int nombre_digit(int n);
+
+// Java
+int hts_add_file(char* file,int file_position);
+
+// Polling
+#if HTS_POLL
+HTS_INLINE int check_flot(T_SOC s);
+HTS_INLINE int check_stdin(void);
+int read_stdin(char* s,int max);
+#endif
+
+httrackp* hts_declareoptbuffer(httrackp* optdecl);
+void sig_finish( int code ); // finir et quitter
+void sig_term( int code ); // quitter
+#if HTS_WIN
+void sig_ask( int code ); // demander
+#else
+void sig_back( int code ); // ignorer et mettre en backing
+void sig_ask( int code ); // demander
+void sig_ignore( int code ); // ignorer signal
+void sig_brpipe( int code ); // treat if necessary
+void sig_doback(int); // mettre en arrière plan
+#endif
+
+// Void
+void voidf(void);
+
+#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
+
+#endif
+
+
diff --git a/src/htscoremain.c b/src/htscoremain.c
new file mode 100644
index 0000000..a03635f
--- /dev/null
+++ b/src/htscoremain.c
@@ -0,0 +1,2001 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* main routine (first called) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htscoremain.h"
+
+#include "htsglobal.h"
+#include "htscore.h"
+#include "htsdefines.h"
+#include "htsalias.h"
+#include "htswrap.h"
+#include <ctype.h>
+#if HTS_WIN
+#else
+#ifndef HTS_DO_NOT_USE_UID
+/* setuid */
+#include <pwd.h>
+#include <unistd.h>
+#endif
+#endif
+
+extern int exit_xh; // sortir prématurément
+
+/* Resolver */
+extern int IPV6_resolver;
+
+
+// Add a command in the argc/argv
+#define cmdl_add(token,argc,argv,buff,ptr) \
+ argv[argc]=(buff+ptr); \
+ strcpy(argv[argc],token); \
+ ptr += (strlen(argv[argc])+2); \
+ argc++
+
+// Insert a command in the argc/argv
+#define cmdl_ins(token,argc,argv,buff,ptr) \
+ { \
+ int i; \
+ for(i=argc;i>0;i--)\
+ argv[i]=argv[i-1];\
+ } \
+ argv[0]=(buff+ptr); \
+ strcpy(argv[0],token); \
+ ptr += (strlen(argv[0])+2); \
+ argc++
+
+#define htsmain_free() do { if (url != NULL) { free(url); } } while(0)
+
+// Main, récupère les paramètres et appelle le robot
+#if HTS_ANALYSTE
+int hts_main(int argc, char **argv) {
+#else
+int main(int argc, char **argv) {
+#endif
+ char* x_argv[999]; // Patch pour argv et argc: en cas de récupération de ligne de commande
+ char* x_argvblk=NULL; // (reprise ou update)
+ int x_ptr=0; // offset
+ /*
+ char* x_argv2[999]; // Patch pour config
+ char* x_argvblk2=NULL;
+ */
+ //
+ int argv_url=-1; // ==0 : utiliser cache et doit.log
+ char* argv_firsturl=NULL; // utilisé pour nommage par défaut
+ char* url = NULL; // URLS séparées par un espace
+ //char url[65536]; // URLS séparées par un espace
+ // the parametres
+ httrackp httrack;
+ int httrack_logmode=3; // ONE log file
+ int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer)
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+ WORD wVersionRequested; /* requested version WinSock API */
+ WSADATA wsadata; /* Windows Sockets API data */
+#endif
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ int switch_uid=-1,switch_gid=-1; /* setuid/setgid */
+#endif
+ int switch_chroot=0; /* chroot ? */
+#endif
+ //
+ url = malloc(65536);
+ if (url == NULL) {
+ HTS_PANIC_PRINTF("* memory exhausted");
+ htsmain_free();
+ return -1;
+ }
+ url[0]='\0';
+ //
+
+#if HTS_ANALYSTE
+ // custom wrappers
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name");
+#endif
+
+ // options par défaut
+ memset(&httrack, 0, sizeof(httrackp));
+ httrack.wizard=2; // wizard automatique
+ httrack.quiet=0; // questions
+ //
+ httrack.travel=0; // même adresse
+ httrack.depth=9999; // mirror total par défaut
+ httrack.extdepth=0; // mais pas à l'extérieur
+ httrack.seeker=1; // down
+ httrack.urlmode=2; // relatif par défaut
+ httrack.debug=0; // pas de débug en plus
+ httrack.getmode=3; // linear scan
+ httrack.maxsite=-1; // taille max site (aucune)
+ httrack.maxfile_nonhtml=-1; // taille max fichier non html
+ httrack.maxfile_html=-1; // idem pour html
+ httrack.maxsoc=8; // nbre socket max
+ httrack.fragment=-1; // pas de fragmentation
+ httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents"
+ httrack.makeindex=1; // faire un index
+ httrack.kindex=0; // index 'keyword'
+ httrack.delete_old=1; // effacer anciens fichiers
+ httrack.makestat=0; // pas de fichier de stats
+ httrack.maketrack=0; // ni de tracking
+ httrack.timeout=120; // timeout par défaut (2 minutes)
+ httrack.cache=1; // cache prioritaire
+ httrack.shell=0; // pas de shell par defaut
+ httrack.proxy.active=0; // pas de proxy
+ httrack.user_agent_send=1; // envoyer un user-agent
+ strcpy(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
+ httrack.savename_83=0; // noms longs par défaut
+ httrack.savename_type=0; // avec structure originale
+ httrack.parsejava=1; // parser classes
+ httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer
+ httrack.retry=2; // 2 retry par défaut
+ httrack.errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.)
+ httrack.check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html
+ httrack.all_in_cache=0; // ne pas tout stocker en cache
+ httrack.robots=2; // traiter les robots.txt
+ httrack.external=0; // liens externes normaux
+ httrack.passprivacy=0; // mots de passe dans les fichiers
+ httrack.includequery=1; // include query-string par défaut
+ httrack.mirror_first_page=0; // pas mode mirror links
+ httrack.accept_cookie=1; // gérer les cookies
+ httrack.cookie=NULL;
+ httrack.http10=0; // laisser http/1.1
+ httrack.nocompression=0; // pas de compression
+ httrack.tolerant=0; // ne pas accepter content-length incorrect
+ httrack.parseall=1; // tout parser (tags inconnus, par exemple)
+ httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur
+ httrack.verbosedisplay=0; // pas d'animation texte
+ strcpy(httrack.footer,HTS_DEFAULT_FOOTER);
+ httrack.ftp_proxy=1; // proxy http pour ftp
+ strcpy(httrack.filelist,"");
+ strcpy(httrack.lang_iso,"en, *");
+ strcpy(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT)
+ //
+ httrack.log=stdout;
+ httrack.errlog=stderr;
+ httrack.flush=1; // flush sur les fichiers log
+ httrack.aff_progress=0;
+ httrack.keyboard=0;
+ //
+ strcpy(httrack.path_html,"");
+ strcpy(httrack.path_log,"");
+ strcpy(httrack.path_bin,"");
+ //
+ httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb)
+ httrack.maxfilter=200; // 200 filtres max par défaut
+ httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
+ //httrack.maxcache_anticipate=256; // maximum de liens à anticiper
+ httrack.maxtime=-1; // temps max en secondes
+ httrack.maxrate=-1; // pas de taux maxi
+ httrack.maxconn=10; // nombre connexions/s
+ httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss
+ //
+ httrack.exec=argv[0];
+ httrack.is_update=0; // not an update (yet)
+ httrack.dir_topindex=0; // do not built top index (yet)
+ //
+ httrack.state.stop=0; // stopper
+ //
+ _DEBUG_HEAD=0; // pas de debuggage en têtes
+
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+ {
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ HTS_PANIC_PRINTF("Winsock not found!\n");
+ htsmain_free();
+ return -1;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ HTS_PANIC_PRINTF("WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ htsmain_free();
+ return -1;
+ }
+ }
+#endif
+#endif
+
+ /* Init root dir */
+ hts_rootdir(argv[0]);
+
+#if HTS_WIN
+#else
+ /* Terminal is a tty, may ask questions and display funny information */
+ if (isatty(1)) {
+ httrack.quiet=0;
+ httrack.verbosedisplay=1;
+ }
+ /* Not a tty, no stdin input or funny output! */
+ else {
+ httrack.quiet=1;
+ httrack.verbosedisplay=0;
+ }
+#endif
+
+ /* First test: if -#R then only launch ftp */
+ if (argc > 2) {
+ if (strcmp(argv[1],"-#R")==0) {
+ if (argc==6) {
+ lien_back r;
+ char* path;
+ FILE* fp;
+ strcpy(r.url_adr,argv[2]);
+ strcpy(r.url_fil,argv[3]);
+ strcpy(r.url_sav,argv[4]);
+ path=argv[5];
+ r.status=1000;
+ run_launch_ftp(&r);
+ fp=fopen(fconv(path),"wb");
+ if (fp) {
+ fprintf(fp,"%d %s",r.r.statuscode,r.r.msg);
+ fclose(fp); fp=NULL;
+ rename(fconv(path),fconcat(path,".ok"));
+ } else remove(fconv(path));
+ } else {
+ printf("htsftp error, wrong parameter number (%d)\n",argc);
+ }
+ exit(0); // pas _exit()
+ }
+ }
+
+ // ok, non ftp, continuer
+
+
+ // Binary program path?
+#ifndef HTS_HTTRACKDIR
+ {
+ char* path=fslash(argv[0]);
+ char* a;
+ if ((a=strrchr(path,'/'))) {
+ httrack.path_bin[0]='\0';
+ strncat(httrack.path_bin,argv[0],(int) a - (int) path);
+ }
+ }
+#else
+ strcpy(httrack.path_bin,HTS_HTTRACKDIR);
+#endif
+
+
+ /* filter CR, LF, TAB.. */
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ char* a;
+ while( (a=strchr(argv[na],'\x0d')) ) *a=' ';
+ while( (a=strchr(argv[na],'\x0a')) ) *a=' ';
+ while( (a=strchr(argv[na],9)) ) *a=' ';
+ /* equivalent to "empty parameter" */
+ if ((strcmp(argv[na],HTS_NOPARAM)==0) || (strcmp(argv[na],HTS_NOPARAM2)==0)) // (none)
+ strcpy(argv[na],"\"\"");
+ if (strncmp(argv[na],"-&",2)==0)
+ argv[na][1]='%';
+ }
+ }
+
+
+
+ /* create x_argvblk buffer for transformed command line */
+ {
+ int current_size=0;
+ int size;
+ int na;
+ for(na=0;na<argc;na++)
+ current_size += (strlen(argv[na]) + 1);
+ if ((size=fsize("config"))>0)
+ current_size += size;
+ x_argvblk=(char*) malloct(current_size+32768);
+ if (x_argvblk == NULL) {
+ HTS_PANIC_PRINTF("Error, not enough memory");
+ htsmain_free();
+ return -1;
+ }
+ x_argvblk[0]='\0';
+ x_ptr=0;
+ }
+
+ /* Create new argc/argv, replace alias, count URLs, treat -h, -q, -i */
+ {
+ char _tmp_argv[2][HTS_CDLMAXSIZE];
+ char* tmp_argv[2];
+ char tmp_error[HTS_CDLMAXSIZE];
+ int tmp_argc;
+ int x_argc=0;
+ int na;
+ tmp_argv[0]=_tmp_argv[0];
+ tmp_argv[1]=_tmp_argv[1];
+ //
+ argv_url=0; /* pour comptage */
+ //
+ cmdl_add(argv[0],x_argc,x_argv,x_argvblk,x_ptr);
+ na=1; /* commencer après nom_prg */
+ while(na<argc) {
+ int result=1;
+ tmp_argv[0][0]=tmp_argv[1][0]='\0';
+
+ /* Vérifier argv[] non vide */
+ if (strnotempty(argv[na])) {
+
+ /* Vérifier Commande (alias) */
+ result=optalias_check(argc,(const char * const *)argv,na,
+ &tmp_argc,(char**)tmp_argv,tmp_error);
+ if (!result) {
+ HTS_PANIC_PRINTF(tmp_error);
+ htsmain_free();
+ return -1;
+ }
+
+ /* Copier */
+ cmdl_add(tmp_argv[0],x_argc,x_argv,x_argvblk,x_ptr);
+ if (tmp_argc > 1) {
+ cmdl_add(tmp_argv[1],x_argc,x_argv,x_argvblk,x_ptr);
+ }
+
+ /* Compter URLs et détecter -i,-q.. */
+ if (tmp_argc == 1) { /* pas -P & co */
+ if (!cmdl_opt(tmp_argv[0])) { /* pas -c0 & co */
+ if (argv_url<0) argv_url=0; // -1==force -> 1=one url already detected, wipe all previous options
+ //if (argv_url>=0) {
+ argv_url++;
+ if (!argv_firsturl)
+ argv_firsturl=x_argv[x_argc-1];
+ //}
+ } else {
+ if (strcmp(tmp_argv[0],"-h")==0) {
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return 0;
+ } else {
+ if (strncmp(tmp_argv[0],"--",2)) { /* pas */
+ if ((strchr(tmp_argv[0],'q')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ if ((strchr(tmp_argv[0],'i')!=NULL)) { // doit.log!
+ argv_url=-1; /* forcer */
+ httrack.quiet=1;
+ }
+ }
+ }
+ }
+ } else if (tmp_argc == 2) {
+ if ((strcmp(tmp_argv[0],"-%L")==0)) { // liste d'URLs
+ if (argv_url<0) argv_url=0; // -1==force -> 1=one url already detected, wipe all previous options
+ //if (argv_url>=0)
+ argv_url++; /* forcer */
+ }
+ }
+ }
+
+ na+=result;
+ }
+ if (argv_url<0)
+ argv_url=0;
+
+ /* Nouveaux argc et argv */
+ argv=x_argv;
+ argc=x_argc;
+ }
+
+
+
+
+ // Ici on ajoute les arguments de config
+/*
+ if (fexist("config")) { // configuration
+ x_argvblk2=(char*) calloct(32768,1);
+
+ if (x_argvblk2!=NULL) {
+ FILE* fp;
+ int x_argc2;
+
+ //strcpy(x_argvblk2,"httrack ");
+ fp=fopen("config","rb");
+ if (fp) {
+ linput(fp,x_argvblk2+strlen(x_argvblk2),32000);
+ fclose(fp); fp=NULL;
+
+ // calculer arguments selon derniers arguments
+ x_argv2[0]=argv[0];
+ x_argc2=1;
+ {
+ char* p=x_argvblk2;
+ do {
+ x_argv2[x_argc2++]=p;
+ p=strchr(p,' ');
+ if (p) {
+ *p=0; // octet nul (tableau)
+ p++;
+ }
+ } while(p!=NULL);
+ }
+ // recopier arguments actuels (pointeurs uniquement)
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ x_argv2[x_argc2++]=argv[na];
+ }
+ }
+ argc=x_argc2; // nouvel argc
+ argv=x_argv2; // nouvel argv
+ }
+ }
+ }
+*/
+
+
+ // Option O and includerc
+ {
+ int loops=0;
+ while (loops<2) {
+ char* com;
+ int na;
+
+ for(na=1;na<argc;na++) {
+
+ if (argv[na][0]=='"') {
+ char tempo[HTS_CDLMAXSIZE];
+ strcpy(tempo,argv[na]+1);
+ if (tempo[strlen(tempo)-1]!='"') {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Missing quote in %s",argv[na]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ tempo[strlen(tempo)-1]='\0';
+ strcpy(argv[na],tempo);
+ }
+
+ if (cmdl_opt(argv[na])) { // option
+ com=argv[na]+1;
+
+ while(*com) {
+ switch(*com) {
+ case 'O': // output path
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option O needs to be followed by a blank space, and a path (or path,path)");
+ printf("Example: -O /binary/\n");
+ printf("Example: -O /binary/,/log/\n");
+ htsmain_free();
+ return -1;
+ } else {
+ char* a;
+ na++;
+ strcpy(httrack.path_html,"");
+ strcpy(httrack.path_log,"");
+ a=strstr(argv[na],"\",\""); // rechercher en premier, au cas ou -O "c:\pipo,test","c:\test"
+ if (!a)
+ a=strchr(argv[na],','); // 2 path
+ else
+ a++; // position ,
+ if (a) {
+ strncat(httrack.path_html,argv[na],(int) (a-argv[na]));
+ strcat(httrack.path_log,a+1);
+ } else {
+ strcpy(httrack.path_log,argv[na]);
+ strcpy(httrack.path_html,argv[na]);
+ }
+ // Eliminer les cas comme -O "C:\mirror\"
+ if (httrack.path_log[0]=='"') { // Guillemets
+ char tmp[256];
+ strcpy(tmp,httrack.path_log+1);
+ if (tmp[strlen(tmp)-1]=='"')
+ tmp[strlen(tmp)-1]='\0';
+ strcpy(httrack.path_log,tmp);
+ }
+ if (httrack.path_html[0]=='"') {
+ char tmp[256];
+ strcpy(tmp,httrack.path_html+1);
+ if (tmp[strlen(tmp)-1]=='"')
+ tmp[strlen(tmp)-1]='\0';
+ strcpy(httrack.path_html,tmp);
+ }
+ check_path(httrack.path_log,argv_firsturl);
+ if (check_path(httrack.path_html,argv_firsturl)) {
+ httrack.dir_topindex=1; // rebuilt top index
+ }
+
+ //printf("-->%s\n%s\n",httrack.path_html,httrack.path_log);
+
+ }
+ break;
+ } // switch
+ com++;
+ } // while
+
+ } // arg
+
+ } // for
+
+ /* if doit.log exists, or if new URL(s) defined,
+ then DO NOT load standard config files */
+ /* (config files are added in doit.log) */
+#if DEBUG_STEPS
+ printf("Loading httrackrc/doit.log\n");
+#endif
+ /* recreate a doit.log (no old doit.log or new URLs (and parameters)) */
+ if ((strnotempty(httrack.path_log)) || (strnotempty(httrack.path_html)))
+ loops++; // do not loop once again and do not include rc file (O option exists)
+ else {
+ if ( (!fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) || (argv_url>0) ) {
+ if (!optinclude_file(fconcat(httrack.path_log,HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr))
+ if (!optinclude_file(HTS_HTTRACKRC,&argc,argv,x_argvblk,&x_ptr)) {
+ if (!optinclude_file(fconcat(hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) {
+#ifdef HTS_HTTRACKCNF
+ optinclude_file(HTS_HTTRACKCNF,&argc,argv,x_argvblk,&x_ptr);
+#endif
+ }
+ }
+ } else
+ loops++; // do not loop once again
+ }
+
+ loops++;
+ } // while
+
+ } // traiter -O
+
+
+
+ /* load doit.log and insert in current command line */
+ if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) {
+ FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
+ if (fp) {
+ int insert_after=1; /* insérer après nom au début */
+ //
+ char buff[8192];
+ char *p,*lastp;
+ linput(fp,buff,8000);
+ fclose(fp); fp=NULL;
+ p=buff;
+ do {
+ int insert_after_argc;
+ // read next
+ lastp=p;
+ if (p) {
+ p=next_token(p,1);
+ if (p) {
+ *p=0; // null
+ p++;
+ }
+ }
+
+ /* Insert parameters BUT so that they can be in the same order */
+ if (lastp) {
+ if (strnotempty(lastp)) {
+ insert_after_argc=argc-insert_after;
+ cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr);
+ argc=insert_after_argc+insert_after;
+ insert_after++;
+ }
+ }
+ } while(lastp!=NULL);
+ //fclose(fp);
+ }
+ }
+
+
+ // Existence d'un cache - pas de new mais un old.. renommer
+#if DEBUG_STEPS
+ printf("Checking cache\n");
+#endif
+ if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) {
+ if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) {
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //remove(fconcat(httrack.path_log,"hts-cache/new.lst"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //rename(fconcat(httrack.path_log,"hts-cache/old.lst"),fconcat(httrack.path_log,"hts-cache/new.lst"));
+ }
+ }
+
+ /* Interrupted mirror detected */
+ if (!httrack.quiet) {
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) {
+ /* Old cache */
+ if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) {
+ if (httrack.log != NULL) {
+ fprintf(httrack.log,"Warning!\n");
+ fprintf(httrack.log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n");
+ fprintf(httrack.log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n");
+ }
+ exit(0);
+ }
+ }
+ }
+
+ // remplacer "macros" comme --spider
+ // permet de lancer httrack sans a avoir à se rappeler de syntaxes comme p0C0I0Qc32 ..
+#if DEBUG_STEPS
+ printf("Checking last macros\n");
+#endif
+ {
+ int i;
+ for(i=0;i<argc;i++) {
+#if DEBUG_STEPS
+ printf("Checking #%d:\n",argv[i]);
+ printf("%s\n",argv[i]);
+#endif
+ if (argv[i][0]=='-') {
+ if (argv[i][1]=='-') { // --xxx
+ if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer
+ strcpy(argv[i]+1,"");
+ if (fexist(fconcat(httrack.path_log,"hts-log.txt")))
+ remove(fconcat(httrack.path_log,"hts-log.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
+ remove(fconcat(httrack.path_log,"hts-err.txt"));
+ if (fexist(fconcat(httrack.path_html,"index.html")))
+ remove(fconcat(httrack.path_html,"index.html"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.ndx"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.lst")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.lst"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.lst")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.lst"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.txt")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.txt")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log")))
+ remove(fconcat(httrack.path_log,"hts-cache/doit.log"));
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock")))
+ remove(fconcat(httrack.path_log,"hts-in_progress.lock"));
+ rmdir(fconcat(httrack.path_log,"hts-cache"));
+ //
+ } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire!
+ argv_url=1; // forcer a passer les parametres
+ strcpy(argv[i]+1,"#P");
+ //
+ } else if (strfield2(argv[i]+2,"updatehttrack")) {
+#ifdef _WIN32
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"%s not available in this version",argv[i]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+#else
+#if 0
+ char _args[8][256];
+ char *args[8];
+
+ printf("Cheking for updates...\n");
+ strcpy(_args[0],argv[0]);
+ strcpy(_args[1],"--get");
+ sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,"");
+ strcpy(_args[3],"--quickinfo");
+ args[0]=_args[0];
+ args[1]=_args[1];
+ args[2]=_args[2];
+ args[3]=_args[3];
+ args[4]=NULL;
+ if (execvp(args[0],args)==-1) {
+ }
+#endif
+#endif
+ }
+ //
+ else {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"%s not recognized",argv[i]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+
+ }
+ }
+ }
+ }
+
+ // Compter urls/jokers
+ /*
+ if (argv_url<=0) {
+ int na;
+ argv_url=0;
+ for(na=1;na<argc;na++) {
+ if ( (strcmp(argv[na],"-P")==0) || (strcmp(argv[na],"-N")==0) || (strcmp(argv[na],"-F")==0) || (strcmp(argv[na],"-O")==0) || (strcmp(argv[na],"-V")==0) ) {
+ na++; // sauter nom de proxy
+ } else if (!cmdl_opt(argv[na])) {
+ argv_url++; // un de plus
+ } else if (strcmp(argv[na],"-h")==0) {
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return 0;
+ } else {
+ if ((strchr(argv[na],'q')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ if ((strchr(argv[na],'i')!=NULL)) { // doit.log!
+ argv_url=0;
+ na=argc;
+ }
+ }
+ }
+ }
+ */
+
+ // Ici on ajoute les arguments qui ont été appelés avant au cas où on récupère une session
+ // Exemple: httrack www.truc.fr -L0 puis ^C puis httrack sans URL : ajouter URL précédente
+ /*
+ if (argv_url==0) {
+ //if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent
+
+ x_argvblk=(char*) calloct(32768,1);
+
+ if (x_argvblk!=NULL) {
+ FILE* fp;
+ int x_argc;
+
+ //strcpy(x_argvblk,"httrack ");
+ fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
+ if (fp) {
+ linput(fp,x_argvblk+strlen(x_argvblk),8192);
+ fclose(fp); fp=NULL;
+ }
+
+ // calculer arguments selon derniers arguments
+ x_argv[0]=argv[0];
+ x_argc=1;
+ {
+ char* p=x_argvblk;
+ do {
+ x_argv[x_argc++]=p;
+ //p=strstr(p," ");
+ // exemple de chaine: "echo \"test\"" c:\a "\$0"
+ p=next_token(p,1); // prochain token
+ if (p) {
+ *p=0; // octet nul (tableau)
+ p++;
+ }
+ } while(p!=NULL);
+ }
+ // recopier arguments actuels (pointeurs uniquement)
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ if (strcmp(argv[na],"-O") != 0) // SAUF le path!
+ x_argv[x_argc++]=argv[na];
+ else
+ na++;
+ }
+ }
+ argc=x_argc; // nouvel argc
+ argv=x_argv; // nouvel argv
+ }
+
+
+ }
+ //}
+ }
+ */
+
+ // Vérifier quiet
+ /*
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ if (!cmdl_opt(argv[na])) {
+ if ((strcmp(argv[na],"-P")==0) || (strcmp(argv[na],"-N")==0) || (strcmp(argv[na],"-F")==0) || (strcmp(argv[na],"-O")==0) || (strcmp(argv[na],"-V")==0))
+ na++; // sauter nom de proxy
+ } else {
+ if ((strchr(argv[na],'q')!=NULL) || (strchr(argv[na],'i')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ }
+ }
+ }
+ */
+
+ // Pas d'URL
+#if DEBUG_STEPS
+ printf("Checking URLs\n");
+#endif
+ if (argv_url==0) {
+ // Présence d'un cache, que faire?..
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent
+ if (x_argvblk!=NULL) {
+ int m;
+ // établir mode - mode cache: 1 (cache valide) 2 (cache à vérifier)
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // cache prioritaire
+ m=1;
+ recuperer=1;
+ } else {
+ m=2;
+ }
+ httrack.cache=m;
+
+ if (httrack.quiet==0) { // sinon on continue automatiquement
+ HT_REQUEST_START;
+ HT_PRINT("A cache (hts-cache/) has been found in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF);
+ if (m==1) {
+ HT_PRINT("That means that a transfer has been aborted"LF);
+ HT_PRINT("OK to Continue ");
+ } else {
+ HT_PRINT("That means you can update faster the remote site(s)"LF);
+ HT_PRINT("OK to Update ");
+ }
+ HT_PRINT("httrack "); HT_PRINT(x_argvblk); HT_PRINT("?"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+
+ } else {
+ HTS_PANIC_PRINTF("Error, not enough memory");
+ htsmain_free();
+ return -1;
+ }
+ } else { // log existe pas
+ HTS_PANIC_PRINTF("A cache has been found, but no command line");
+ printf("Please launch httrack with proper parameters to reuse the cache\n");
+ htsmain_free();
+ return -1;
+ }
+
+ } else { // aucune URL définie et pas de cache
+#if HTS_ANALYSTE!=2
+ if (httrack.quiet) {
+#endif
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return -1;
+#if HTS_ANALYSTE!=2
+ } else {
+ help_wizard(&httrack);
+ htsmain_free();
+ return -1;
+ }
+#endif
+ htsmain_free();
+ return 0;
+ }
+ } else { // plus de 2 paramètres
+ // un fichier log existe?
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // fichier lock?
+ //char s[32];
+
+ httrack.cache=1; // cache prioritaire
+ if (httrack.quiet==0) {
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ HT_REQUEST_START;
+ HT_PRINT("There is a lock-file in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF"That means that a mirror has not been terminated"LF);
+ HT_PRINT("Be sure you call httrack with proper parameters"LF);
+ HT_PRINT("(The cache allows you to restart faster the transfer)"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+ }
+ } else if (fexist(fconcat(httrack.path_html,"index.html"))) {
+ //char s[32];
+ httrack.cache=2; // cache vient après test de validité
+ if (httrack.quiet==0) {
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ HT_REQUEST_START;
+ HT_PRINT("There is an index.html and a hts-cache folder in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF"A site may have been mirrored here, that could mean that you want to update it"LF);
+ HT_PRINT("Be sure parameters are ok"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ } else {
+ HT_REQUEST_START;
+ HT_PRINT("There is an index.html in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(" but no cache"LF);
+ HT_PRINT("There is an index.html in the directory, but no cache"LF);
+ HT_PRINT("A site may have been mirrored here, and erased.."LF);
+ HT_PRINT("Be sure parameters are ok"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+ }
+ }
+ }
+
+
+ // Treat parameters
+ // Traiter les paramètres
+#if DEBUG_STEPS
+ printf("Analyze parameters\n");
+#endif
+ {
+ char* com;
+ int na;
+
+ for(na=1;na<argc;na++) {
+
+ if (argv[na][0]=='"') {
+ char tempo[HTS_CDLMAXSIZE];
+ strcpy(tempo,argv[na]+1);
+ if (tempo[strlen(tempo)-1]!='"') {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Missing quote in %s",argv[na]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ tempo[strlen(tempo)-1]='\0';
+ strcpy(argv[na],tempo);
+ }
+
+ if (cmdl_opt(argv[na])) { // option
+ com=argv[na]+1;
+
+ while(*com) {
+ switch(*com) {
+ case ' ': case 9: case '-': case '\0': break;
+ //
+ case 'h':
+ help(argv[0],0);
+ htsmain_free();
+ return 0; // déja fait normalement
+ //
+ case 'g': // récupérer un (ou plusieurs) fichiers isolés
+ httrack.wizard=2; // le wizard on peut plus s'en passer..
+ //httrack.wizard=0; // pas de wizard
+ httrack.cache=0; // ni de cache
+ httrack.makeindex=0; // ni d'index
+ httrack_logmode=1; // erreurs à l'écran
+ httrack.savename_type=1003; // mettre dans le répertoire courant
+ httrack.depth=0; // ne pas explorer la page
+ httrack.accept_cookie=0; // pas de cookies
+ break;
+ case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions)
+ httrack.travel=0;
+ httrack.seeker=1;
+ break;
+ case 'W': httrack.wizard=1; // Wizard-Help (pose des questions)
+ httrack.travel=0;
+ httrack.seeker=1;
+ break;
+ case 'r': // n'est plus le recurse get bestial mais wizard itou!
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.depth);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.depth=3;
+ break;
+/*
+ case 'r': httrack.wizard=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.depth);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.depth=3;
+ break;
+*/
+ //
+ // note: les tests httrack.depth sont pour éviter de faire
+ // un miroir du web (:-O) accidentelement ;-)
+ case 'a': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=0+(httrack.travel&256); break;
+ case 'd': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=1+(httrack.travel&256); break;
+ case 'l': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=2+(httrack.travel&256); break;
+ case 'e': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=7+(httrack.travel&256); break;
+ case 't': httrack.travel|=256; break;
+ case 'n': httrack.nearlink=1; break;
+ case 'x': httrack.external=1; break;
+ //
+ case 'U': httrack.seeker=2; break;
+ case 'D': httrack.seeker=1; break;
+ case 'S': httrack.seeker=0; break;
+ case 'B': httrack.seeker=3; break;
+ //
+ case 'Y': httrack.mirror_first_page=1; break;
+ //
+ case 'q': case 'i': httrack.quiet=1; break;
+ //
+ case 'Q': httrack_logmode=0; break;
+ case 'v': httrack_logmode=1; break;
+ case 'f': httrack_logmode=2; if (*(com+1)=='2') httrack_logmode=3; while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ //case 'A': httrack.urlmode=1; break;
+ //case 'R': httrack.urlmode=2; break;
+ case 'K': httrack.urlmode=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.urlmode);
+ if (httrack.urlmode == 0) { // in fact K0 ==> K2
+ // and K ==> K0
+ httrack.urlmode=2;
+ }
+ while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ //if (*(com+1)=='0') { httrack.urlmode=2; com++; } break;
+ //
+ case 'c':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.maxsoc);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1
+ } else httrack.maxsoc=8;
+
+ break;
+ //
+ case 'p': sscanf(com+1,"%d",&httrack.getmode); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'G': sscanf(com+1,LLintP,&httrack.fragment); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'M': sscanf(com+1,LLintP,&httrack.maxsite); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'm': sscanf(com+1,LLintP,&httrack.maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++;
+ if (*(com+1)==',') {
+ com++;
+ sscanf(com+1,LLintP,&httrack.maxfile_html); while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.maxfile_html=-1;
+ break;
+ //
+ case 'T': sscanf(com+1,"%d",&httrack.timeout); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'J': sscanf(com+1,"%d",&httrack.rateout); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'R': sscanf(com+1,"%d",&httrack.retry); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'E': sscanf(com+1,"%d",&httrack.maxtime); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'H': sscanf(com+1,"%d",&httrack.hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'A': sscanf(com+1,"%d",&httrack.maxrate); while(isdigit((unsigned char)*(com+1))) com++; break;
+
+ case 'j': httrack.parsejava=1; if (*(com+1)=='0') { httrack.parsejava=0; com++; } break;
+ //
+ case 'I': httrack.makeindex=1; if (*(com+1)=='0') { httrack.makeindex=0; com++; } break;
+ //
+ case 'X': httrack.delete_old=1; if (*(com+1)=='0') { httrack.delete_old=0; com++; } break;
+ //
+ case 'b': sscanf(com+1,"%d",&httrack.accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'N':
+ if (strcmp(argv[na],"-N")==0) { // Tout seul
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) { // erreur
+ HTS_PANIC_PRINTF("Option N needs a number, or needs to be followed by a blank space, and a string");
+ printf("Example: -N4\n");
+ htsmain_free();
+ return -1;
+ } else {
+ na++;
+ if (strlen(argv[na])>=127) {
+ HTS_PANIC_PRINTF("Userdef structure string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.savename_userdef,argv[na]);
+ if (strnotempty(httrack.savename_userdef))
+ httrack.savename_type = -1; // userdef!
+ else
+ httrack.savename_type = 0; // -N "" : par défaut
+ }
+ } else {
+ sscanf(com+1,"%d",&httrack.savename_type); while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ break;
+ case 'L':
+ {
+ sscanf(com+1,"%d",&httrack.savename_83);
+ switch(httrack.savename_83) {
+ case 0:
+ httrack.savename_83=1;
+ break;
+ case 1:
+ httrack.savename_83=0;
+ break;
+ default:
+ httrack.savename_83=2;
+ break;
+ }
+ while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ break;
+ case 's':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.robots);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.robots=1;
+#if DEBUG_ROBOTS
+ printf("robots.txt mode set to %d\n",httrack.robots);
+#endif
+ break;
+ case 'o': sscanf(com+1,"%d",&httrack.errpage); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'u': sscanf(com+1,"%d",&httrack.check_type); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'C':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.cache);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.cache=1;
+ break;
+ case 'k': httrack.all_in_cache=1; break;
+ //
+ case 'z': httrack.debug=1; break; // petit debug
+ case 'Z': httrack.debug=2; break; // GROS debug
+ //
+ case '&': case '%': { // deuxième jeu d'options
+ com++;
+ switch(*com) {
+ case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files
+ case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files
+ case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; }
+ break; // Keyword Index
+ case 'c': sscanf(com+1,"%d",&httrack.maxconn); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment
+ case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0
+ case 'z': httrack.nocompression=1; if (*(com+1)=='0') { httrack.nocompression=0; com++; } break; // pas de compression
+ case 'f': httrack.ftp_proxy=1; if (*(com+1)=='0') { httrack.ftp_proxy=0; com++; } break; // proxy http pour ftp
+ case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser
+ case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement
+ case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length
+ case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break;
+
+ // preserve: no footer, original links
+ case 'p':
+ httrack.footer[0]='\0';
+ httrack.urlmode=4;
+ break;
+ case 'L': // URL list
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %L needs to be followed by a blank space, and a text filename");
+ printf("Example: -%%L \"mylist.txt\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("File list string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.filelist,argv[na]);
+ }
+ break;
+ case 'A': // assume
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %A needs to be followed by a blank space, and a filesystemtype=mimetype/mimesubtype parameters");
+ printf("Example: -%%A php3=text/html,asp=text/html\n");
+ htsmain_free();
+ return -1;
+ } else{
+ char* a;
+ na++;
+ if ( (strlen(argv[na]) + strlen(httrack.mimedefs) + 4) >= sizeof(httrack.mimedefs)) {
+ HTS_PANIC_PRINTF("Mime definition string too long");
+ htsmain_free();
+ return -1;
+ }
+ // --assume standard
+ if (strcmp(argv[na],"standard") == 0) {
+ strcpy(httrack.mimedefs,"\n");
+ strcat(httrack.mimedefs,HTS_ASSUME_STANDARD);
+ strcat(httrack.mimedefs,"\n");
+ } else {
+ strcat(httrack.mimedefs,argv[na]);
+ strcat(httrack.mimedefs,"\n");
+ }
+ a=httrack.mimedefs;
+ while(*a) {
+ switch(*a) {
+ case ',': case ' ': case '\r': case ';': case '\t':
+ *a='\n';
+ break;
+ }
+ a++;
+ }
+ }
+ break;
+ //
+ case 'l':
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %l needs to be followed by a blank space, and an ISO language code");
+ printf("Example: -%%l \"en\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=62) {
+ HTS_PANIC_PRINTF("Lang list string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.lang_iso,argv[na]);
+ }
+ break;
+ //
+ case 'F': // footer id
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %F needs to be followed by a blank space, and a footer string");
+ printf("Example: -%%F \"<!-- Mirrored from %%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %%s -->\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("Footer string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.footer,argv[na]);
+ }
+ break;
+ case 'H': // debug headers
+ _DEBUG_HEAD=1;
+ break;
+ case 'O':
+#if HTS_WIN
+ printf("Warning option -%%O has no effect in this system (chroot)\n");
+#else
+ switch_chroot=1;
+#endif
+ break;
+ case 'U': // setuid
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %U needs to be followed by a blank space, and a username");
+ printf("Example: -%%U smith\n");
+ htsmain_free();
+ return -1;
+ } else {
+ na++;
+#if HTS_WIN
+ printf("Warning option -%%U has no effect on this system (setuid)\n");
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ /* Change the user id and gid */
+ {
+ struct passwd* userdef=getpwnam((const char*)argv[na]);
+ if (userdef) { /* we'll have to switch the user id */
+ switch_gid=userdef->pw_gid;
+ switch_uid=userdef->pw_uid;
+ }
+ }
+#else
+ printf("Warning option -%%U has no effect with this compiled version (setuid)\n");
+#endif
+#endif
+ }
+ break;
+
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %%%c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+
+ }
+ }
+ break;
+ //
+ case '@': { // troisième jeu d'options
+ com++;
+ switch(*com) {
+ case 'i':
+#if HTS_INET6==0
+ printf("Warning, option @i has no effect (v6 routines not compiled)\n");
+#else
+ {
+ int res=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&res); while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ switch(res) {
+ case 1:
+ case 4:
+ IPV6_resolver=1;
+ break;
+ case 2:
+ case 6:
+ IPV6_resolver=2;
+ break;
+ case 0:
+ IPV6_resolver=0;
+ break;
+ default:
+ printf("Unknown flag @i%d\n", res);
+ htsmain_free();
+ return -1;
+ break;
+ }
+ }
+#endif
+ break;
+
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %%%c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+
+ //case 's': httrack.sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break;
+ }
+ }
+ break;
+
+ //
+ case '#': { // non documenté (appel de l'interface)
+ com++;
+ switch(*com) {
+ case 'f': httrack.flush=1; break;
+ case 'h':
+ printf("HTTrack version "HTTRACK_VERSION"\n");
+ exit(1);
+ break;
+ case 'p': httrack.aff_progress=1; break;
+ case 'S': httrack.shell=1; break; // stdin sur un shell
+ case 'K': httrack.keyboard=1; break; // vérifier stdin
+ //
+ case 'L': sscanf(com+1,"%d",&httrack.maxlink); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'F': sscanf(com+1,"%d",&httrack.maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'Z': httrack.makestat=1; break;
+ case 'T': httrack.maketrack=1; break;
+ case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break;
+
+ case 'R': // ohh ftp, catch->ftpget
+ HTS_PANIC_PRINTF("Unexpected internal error with -#R command");
+ htsmain_free();
+ return -1;
+ break;
+ case 'P': { // catchurl
+ help_catchurl(httrack.path_log);
+ htsmain_free();
+ return 0;
+ }
+ break;
+
+ case '0': /* test #0 : filters */
+ if (na+2>=argc) {
+ HTS_PANIC_PRINTF("Option #0 needs to be followed by a filter string and a string");
+ printf("Example: '-#0' '*.gif' 'foo.gif'\n");
+ htsmain_free();
+ return -1;
+ } else {
+ if (strjoker(argv[na+2],argv[na+1],NULL,NULL))
+ printf("%s does match %s\n",argv[na+2],argv[na+1]);
+ else
+ printf("%s does NOT match %s\n",argv[na+2],argv[na+1]);
+ htsmain_free();
+ return 0;
+ }
+ break;
+ case '!':
+ if (na+1>=argc) {
+ HTS_PANIC_PRINTF("Option #! needs to be followed by a commandline");
+ printf("Example: '-#!' 'echo hello'\n");
+ htsmain_free();
+ return -1;
+ } else {
+ system(argv[na+1]);
+ }
+ break;
+
+ default: printf("Internal option %c not recognized\n",*com); break;
+ }
+ }
+ break;
+ case 'O': // output path
+ na++; // sauter, déja traité
+ break;
+ case 'P': // proxy
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option P needs to be followed by a blank space, and a proxy proxy:port or user:id@proxy:port");
+ printf("Example: -P proxy.myhost.com:8080\n");
+ htsmain_free();
+ return -1;
+ } else {
+ char* a;
+ na++;
+ httrack.proxy.active=1;
+ // Rechercher MAIS en partant de la fin à cause de user:pass@proxy:port
+ a = argv[na] + strlen(argv[na]) -1;
+ // a=strstr(argv[na],":"); // port
+ while( (a > argv[na]) && (*a != ':') && (*a != '@') ) a--;
+ if (*a == ':') { // un port est présent, <proxy>:port
+ sscanf(a+1,"%d",&httrack.proxy.port);
+ httrack.proxy.name[0]='\0';
+ strncat(httrack.proxy.name,argv[na],(int) (a - argv[na]));
+ } else { // <proxy>
+ httrack.proxy.port=8080;
+ strcpy(httrack.proxy.name,argv[na]);
+ }
+ }
+ break;
+ case 'F': // user-agent field
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option F needs to be followed by a blank space, and a user-agent name");
+ printf("Example: -F \"my_user_agent/1.0\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=126) {
+ HTS_PANIC_PRINTF("User-agent length too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.user_agent,argv[na]);
+ if (strnotempty(httrack.user_agent))
+ httrack.user_agent_send=1;
+ else
+ httrack.user_agent_send=0; // -F "" désactive l'option
+ }
+ break;
+ //
+ case 'V': // execute command
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option V needs to be followed by a system-command string");
+ printf("Example: -V \"tar uvf some.tar \\$0\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=2048) {
+ HTS_PANIC_PRINTF("System-command length too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.sys_com,argv[na]);
+ if (strnotempty(httrack.sys_com))
+ httrack.sys_com_exec=1;
+ else
+ httrack.sys_com_exec=0; // -V "" désactive l'option
+ }
+ break;
+ //
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+ } // switch
+ com++;
+ } // while
+
+ } else { // URL/filters
+ char tempo[1024];
+ if (strnotempty(url)) strcat(url," "); // espace de séparation
+ strcpy(tempo,unescape_http_unharm(argv[na],1));
+ escape_spc_url(tempo);
+ strcat(url,tempo);
+ } // if argv=- etc.
+
+ } // for
+ }
+
+#if BDEBUG==3
+ printf("URLs/filters=%s\n",url);
+#endif
+
+#if DEBUG_STEPS
+ printf("Analyzing parameters done\n");
+#endif
+
+
+#if HTS_WIN
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ /* Chroot - xxc */
+ if (switch_chroot) {
+ uid_t userid=getuid();
+ //struct passwd* userdef=getpwuid(userid);
+ //if (userdef) {
+ if (!userid) {
+ //if (strcmp(userdef->pw_name,"root")==0) {
+ char rpath[1024];
+ //printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc
+ if ((httrack.path_html[0]) && (httrack.path_log[0])) {
+ char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL;
+ c=a; d=b;
+ while ((*a) && (*a == *b)) {
+ if (*a=='/') { c=a; d=b; }
+ a++;
+ b++;
+ }
+
+ rpath[0]='\0';
+ if (c != httrack.path_html) {
+ if (httrack.path_html[0]!='/')
+ strcat(rpath,"./");
+ strncat(rpath,httrack.path_html,(int) (c - httrack.path_html));
+ }
+ {
+ char tmp[1024];
+ strcpy(tmp,c); strcpy(httrack.path_html,tmp);
+ strcpy(tmp,d); strcpy(httrack.path_log,tmp);
+ }
+ } else {
+ strcpy(rpath,"./");
+ strcpy(httrack.path_html,"/");
+ strcpy(httrack.path_log,"/");
+ }
+ if (rpath[0]) {
+ printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log);
+ if (chroot(rpath)) {
+ printf("ERROR! Can not chroot to %s!\n",rpath);
+ exit(0);
+ }
+ if (chdir("/")) { /* new root */
+ printf("ERROR! Can not chdir to %s!\n",rpath);
+ exit(0);
+ }
+ } else
+ printf("WARNING: chroot not possible with these paths\n");
+ }
+ //}
+ }
+
+ /* Setuid */
+ if (switch_uid>=0) {
+ printf("[setting user/group to %d/%d]\n",switch_uid,switch_gid);
+ if (setgid(switch_gid))
+ printf("WARNING! Can not setgid to %d!\n",switch_gid);
+ if (setuid(switch_uid))
+ printf("WARNING! Can not setuid to %d!\n",switch_uid);
+ }
+
+ /* Final check */
+ {
+ uid_t userid=getuid();
+ if (!userid) { /* running as r00t */
+ printf("WARNING! You are running this program as root!\n");
+ printf("It might be a good idea to use the -%%U option to change the userid:\n");
+ printf("Example: -%%U smith\n\n");
+ }
+ }
+#endif
+#endif
+
+ //printf("WARNING! This is *only* a beta-release of HTTrack\n");
+ io_flush;
+
+#if DEBUG_STEPS
+ printf("Cache & log settings\n");
+#endif
+
+ // on utilise le cache..
+ // en cas de présence des deux versions, garder la version la plus avancée,
+ // cad la version contenant le plus de fichiers
+ if (httrack.cache) {
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes..
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) {
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) {
+ // switcher si new<32Ko et old>65Ko (tailles arbitraires) ?
+ // ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre
+ // alors l'ancien cache
+ if (fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))<32768) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat"))>65536) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat")) > fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))) {
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //} else { // ne rien faire
+ // remove("hts-cache/old.dat");
+ // remove("hts-cache/old.ndx");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Débuggage des en têtes
+ if (_DEBUG_HEAD) {
+ ioinfo=fopen(fconcat(httrack.path_log,"hts-ioinfo.txt"),"wb");
+ }
+
+ {
+ char n_lock[256];
+ // on peut pas avoir un affichage ET un fichier log
+ // ca sera pour la version 2
+ if (httrack_logmode==1) {
+ httrack.log=stdout;
+ httrack.errlog=stderr;
+ } else if (httrack_logmode>=2) {
+ // deux fichiers log
+ structcheck(httrack.path_log);
+ if (fexist(fconcat(httrack.path_log,"hts-log.txt")))
+ remove(fconcat(httrack.path_log,"hts-log.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
+ remove(fconcat(httrack.path_log,"hts-err.txt"));
+
+ httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w");
+ if (httrack_logmode==2)
+ httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w");
+ else
+ httrack.errlog=httrack.log;
+ if (httrack.log==NULL) {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-log.txt"));
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ } else if (httrack.errlog==NULL) {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-err.txt"));
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+
+ } else {
+ httrack.log=NULL;
+ httrack.errlog=NULL;
+ }
+
+ // un petit lock-file pour indiquer un miroir en cours, ainsi qu'un éventuel fichier log
+ {
+ FILE* fp=NULL;
+ //int n=0;
+ char t[256];
+ time_local_rfc822(t); // faut bien que ca serve quelque part l'heure RFC1945 arf'
+
+ /* readme for information purpose */
+ {
+ FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/readme.txt"),"wb");
+ if (fp) {
+ fprintf(fp,"What's in this folder?"LF);
+ fprintf(fp,""LF);
+ fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION""LF);
+ fprintf(fp,"and is used for updating this website."LF);
+ fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF);
+ fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF);
+ fprintf(fp,"(you can safely delete old.dat, old.ndx and old.lst files, however)"LF);
+ fprintf(fp,""LF);
+ fprintf(fp,HTS_LOG_SECURITY_WARNING);
+ fclose(fp);
+ }
+ }
+
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"));
+ //sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n);
+ /*do {
+ if (!n)
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n);
+ else
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress%d.lock"),n);
+ n++;
+ } while((fexist(n_lock)) && httrack.quiet);
+ if (fexist(n_lock)) {
+ if (!recuperer) {
+ remove(n_lock);
+ }
+ }*/
+
+ // vérifier existence de la structure
+ structcheck(httrack.path_html);
+ structcheck(httrack.path_log);
+
+ // reprise/update
+ if (httrack.cache) {
+ FILE* fp;
+ int i;
+#if HTS_WIN
+ mkdir(fconcat(httrack.path_log,"hts-cache"));
+#else
+ mkdir(fconcat(httrack.path_log,"hts-cache"),HTS_PROTECT_FOLDER);
+#endif
+ fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"wb");
+ if (fp) {
+ for(i=0+1;i<argc;i++) {
+ if ( ((strchr(argv[i],' ')!=NULL) || (strchr(argv[i],'"')!=NULL) || (strchr(argv[i],'\\')!=NULL)) && (argv[i][0]!='"') ) {
+ int j;
+ fprintf(fp,"\"");
+ for(j=0;j<(int) strlen(argv[i]);j++) {
+ if (argv[i][j]==34)
+ fprintf(fp,"\\\"");
+ else if (argv[i][j]=='\\')
+ fprintf(fp,"\\\\");
+ else
+ fprintf(fp,"%c",argv[i][j]);
+ }
+ fprintf(fp,"\"");
+ } else if (strnotempty(argv[i])==0) { // ""
+ fprintf(fp,"\"\"");
+ } else { // non critique
+ fprintf(fp,"%s",argv[i]);
+ }
+ if (i<argc-1)
+ fprintf(fp," ");
+ }
+ fprintf(fp,LF);
+ fprintf(fp,"File generated automatically on %s, do NOT edit"LF,t);
+ fprintf(fp,LF);
+ fprintf(fp,"To update a mirror, just launch httrack without any parameters"LF);
+ fprintf(fp,"The existing cache will be used (and modified)"LF);
+ fprintf(fp,"To have other options, retype all parameters and launch HTTrack"LF);
+ fprintf(fp,"To continue an interrupted mirror, just launch httrack without any parameters"LF);
+ fprintf(fp,LF);
+ fclose(fp); fp=NULL;
+ //} else if (httrack.debug>1) {
+ // printf("! FileOpen error, \"%s\"\n",strerror(errno));
+ }
+ }
+
+ // petit message dans le lock
+ if ( (fp=fopen(n_lock,"wb"))!=NULL) {
+ int i;
+ fprintf(fp,"Mirror in progress since %s .. please wait!"LF,t);
+ for(i=0;i<argc;i++) {
+ if (strchr(argv[i],' ')==NULL)
+ fprintf(fp,"%s ",argv[i]);
+ else // entre ""
+ fprintf(fp,"\"%s\" ",argv[i]);
+ }
+ fprintf(fp,LF);
+ fprintf(fp, "To pause the engine: create an empty file named 'hts-stop.lock'"LF);
+ fclose(fp); fp=NULL;
+ }
+
+ // fichier log
+ if (httrack.log) {
+ int i;
+ fprintf(httrack.log,"HTTrack"HTTRACK_VERSION" launched on %s at %s"LF,t,url);
+ fprintf(httrack.log,"(");
+ for(i=0;i<argc;i++) {
+ if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"')))
+ fprintf(httrack.log,"%s ",argv[i]);
+ else // entre "" (si espace(s) et pas déja de ")
+ fprintf(httrack.log,"\"%s\" ",argv[i]);
+ }
+ fprintf(httrack.log,")"LF);
+ fprintf(httrack.log,LF);
+ fprintf(httrack.log,"Information, Warnings and Errors reported for this mirror:"LF);
+ fprintf(httrack.log,HTS_LOG_SECURITY_WARNING );
+ fprintf(httrack.log,LF);
+ }
+
+ if (httrack_logmode) {
+ printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS""LF,t);
+ if (httrack.wizard==0) {
+ printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode);
+ } else { // the magic wizard
+ printf("mirroring %s with the wizard help..\n",url);
+ }
+ }
+ }
+
+ io_flush;
+
+ /* Info for wrappers */
+ if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_init();
+#endif
+
+ // détourner SIGHUP etc.
+#if HTS_WIN
+ signal( SIGINT , sig_ask ); // ^C
+ signal( SIGTERM , sig_finish ); // kill <process>
+#else
+ signal( SIGHUP , sig_back ); // close window
+ signal( SIGTSTP , sig_back ); // ^Z
+ signal( SIGTERM , sig_finish ); // kill <process>
+ signal( SIGINT , sig_ask ); // ^C
+ signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket)
+/*
+deprecated - see SIGCHLD
+#ifndef HTS_DO_NOT_SIGCLD
+ signal( SIGCLD , sig_ignore ); // child change status
+#endif
+*/
+ signal( SIGCHLD , sig_ignore ); // child change status
+#endif
+#if DEBUG_STEPS
+ printf("Launching the mirror\n");
+#endif
+
+
+ // Lancement du miroir
+ // ------------------------------------------------------------
+ if (httpmirror(url, &httrack)==0) {
+ printf("Error during operation (see log file), site has not been successfully mirrored\n");
+ } else {
+ if (httrack.shell) {
+ HTT_REQUEST_START;
+ HT_PRINT("TRANSFER DONE"LF);
+ HTT_REQUEST_END
+ } else {
+ printf("Done.\n");
+ }
+ }
+ // ------------------------------------------------------------
+
+ //
+ // Build top index
+ if (httrack.dir_topindex) {
+ char rpath[1024*2];
+ char* a;
+ strcpy(rpath,httrack.path_html);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/')
+ rpath[strlen(rpath)-1]='\0';
+ }
+ a=strrchr(rpath,'/');
+ if (a) {
+ *a='\0';
+ hts_buildtopindex(rpath,httrack.path_bin);
+ if (httrack.log) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF);
+ }
+ }
+ }
+
+ if (exit_xh ==1) {
+ if (httrack.log) {
+ fprintf(httrack.log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n");
+ }
+ }
+
+ /* Info for wrappers */
+ if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_uninit();
+#endif
+
+ if (httrack_logmode!=1) {
+ if (httrack.errlog == httrack.log) httrack.errlog=NULL;
+ if (httrack.log) { fclose(httrack.log); httrack.log=NULL; }
+ if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; }
+ }
+
+ // Débuggage des en têtes
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fclose(ioinfo);
+ }
+ }
+
+ // supprimer lock
+ remove(n_lock);
+ }
+
+ if (x_argvblk)
+ freet(x_argvblk);
+
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+// WSACleanup(); // ** non en cas de thread tjs présent!..
+#endif
+#endif
+#if HTS_TRACE_MALLOC
+ hts_freeall();
+#endif
+
+ printf("Thanks for using HTTrack!\n");
+ io_flush;
+ htsmain_free();
+ return 0; // OK
+}
+
+
+// main() subroutines
+
+// vérifier chemin path
+int check_path(char* s,char* defaultname) {
+ int i;
+ int return_value=0;
+
+ // Replace name: ~/mywebsites/# -> /home/foo/mywebsites/#
+ expand_home(s);
+ for(i=0;i<(int) strlen(s);i++) // conversion \ -> /
+ if (s[i]=='\\')
+ s[i]='/';
+
+ // remove ending /
+ if (strnotempty(s))
+ if (s[strlen(s)-1]=='/')
+ s[strlen(s)-1]='\0';
+
+ // Replace name: /home/foo/mywebsites/# -> /home/foo/mywebsites/wonderfulsite
+ if (strnotempty(s)) {
+ if (s[(i=strlen(s))-1]=='#') {
+ if (strnotempty((defaultname?defaultname:""))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=strchr(defaultname,'#'); // we never know..
+ if (a) *a='\0';
+ tempo[0]='\0';
+ strncat(tempo,s,i-1);
+ strcat(tempo,defaultname);
+ strcpy(s,tempo);
+ } else
+ s[0]='\0'; // Clear path (no name/default url given)
+ return_value=1; // expanded
+ }
+ }
+
+ // ending /
+ if (strnotempty(s))
+ if (s[strlen(s)-1]!='/') // ajouter slash à la fin
+ strcat(s,"/");
+
+ return return_value;
+}
+
+// détermine si l'argument est une option
+int cmdl_opt(char* s) {
+ if (s[0]=='-') { // c'est peut être une option
+ if (strchr(s,'.')!=NULL)
+ return 0; // sans doute un -www.truc.fr (note: -www n'est pas compris)
+ else if (strchr(s,'/')!=NULL)
+ return 0; // idem, -*cgi-bin/
+ else if (strchr(s,'*')!=NULL)
+ return 0; // joker, idem
+ else
+ return 1;
+ } else return 0;
+}
+
diff --git a/src/htscoremain.h b/src/htscoremain.h
new file mode 100644
index 0000000..0775492
--- /dev/null
+++ b/src/htscoremain.h
@@ -0,0 +1,62 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* main routine (first called) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSMAINHSR_DEFH
+#define HTSMAINHSR_DEFH
+
+// --assume standard
+#define HTS_ASSUME_STANDARD \
+ "php2,php3,php4,php,cgi,asp,jsp,pl,cfm=text/html"
+
+#include "htsglobal.h"
+
+// Main, récupère les paramètres et appelle le robot
+#if HTS_ANALYSTE
+int hts_main(int argc, char **argv);
+#else
+int main(int argc, char **argv);
+#endif
+
+int cmdl_opt(char* s);
+int check_path(char* s,char* defaultname);
+
+
+
+
+#endif
diff --git a/src/htsdefines.h b/src/htsdefines.h
new file mode 100644
index 0000000..223fae1
--- /dev/null
+++ b/src/htsdefines.h
@@ -0,0 +1,100 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Some defines for httrack.c and others */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+#ifndef HTS_DEFINES_DEFH
+#define HTS_DEFINES_DEFH
+
+typedef void (* t_hts_htmlcheck_init)(void);
+typedef void (* t_hts_htmlcheck_uninit)(void);
+typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_end)(void);
+typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+typedef char* (* t_hts_htmlcheck_query)(char* question);
+typedef char* (* t_hts_htmlcheck_query2)(char* question);
+typedef char* (* t_hts_htmlcheck_query3)(char* question);
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
+typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
+typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+typedef void (* t_hts_htmlcheck_filesave)(char* file);
+typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
+typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+extern char HTbuff[2048];
+extern t_hts_htmlcheck_init hts_htmlcheck_init;
+extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+extern t_hts_htmlcheck_start hts_htmlcheck_start;
+extern t_hts_htmlcheck_end hts_htmlcheck_end;
+extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck hts_htmlcheck;
+extern t_hts_htmlcheck_query hts_htmlcheck_query;
+extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
+extern t_hts_htmlcheck_check hts_htmlcheck_check;
+extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
+extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
+extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
+#endif
+
+#if HTS_ANALYSTE==2
+#define HT_PRINT(A) strcat(HTbuff,A);
+#define HT_REQUEST_START HTbuff[0]='\0';
+#define HT_REQUEST_END
+#define HTT_REQUEST_START HTbuff[0]='\0';
+#define HTT_REQUEST_END
+#define HTS_REQUEST_START HTbuff[0]='\0';
+#define HTS_REQUEST_END
+#define HTS_PANIC_PRINTF(S) strcpy(_hts_errmsg,S);
+#else
+#define HT_PRINT(A) printf("%s",A);
+#define HT_REQUEST_START /*printf("§\n");*/
+#define HT_REQUEST_END /*printf("§\n");*/
+#define HTT_REQUEST_START /*if (httrack.shell) printf("§\n");*/
+#define HTT_REQUEST_END /*if (httrack.shell) printf("§\n");*/
+#define HTS_REQUEST_START if (opt->shell) { HT_REQUEST_START }
+#define HTS_REQUEST_END if (opt->shell) { HT_REQUEST_END }
+#define HTS_PANIC_PRINTF(S) printf("%s\n",S);
+#endif
+
+#endif
+
diff --git a/src/htsfilters.c b/src/htsfilters.c
new file mode 100644
index 0000000..ed0dee4
--- /dev/null
+++ b/src/htsfilters.c
@@ -0,0 +1,316 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* filters ("regexp") */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+// *.gif match all gif files
+// *[file]/*[file].exe match all exe files with one folder structure
+// *[A-Z,a-z,0-9,/,?] match letters, nums, / and ?
+// *[A-Z,a-z,0-9,/,?]
+
+// *[>10,<100].gif match all gif files larger than 10KB and smaller than 100KB
+// *[file,>10,<100].gif FORBIDDEN: you must not mix size test and pattern test
+
+#include "htsfilters.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+// à partir d'un tableau de {"+*.toto","-*.zip","+*.tata"} définit si nom est autorisé
+// optionnel: taille à contrôller (ou numéro, etc) en pointeur
+// (en de détection de *size, la taille limite est écrite par dessus *size)
+// exemple: +-*.gif*[<5] == supprimer GIF si <5KB
+int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth) {
+ int verdict = 0; // on sait pas
+ int i;
+ LLint sizelimit=0;
+ if (size)
+ sizelimit=*size;
+ for(i=0;i<nfil;i++) {
+ LLint sz;
+ if (size)
+ sz=*size;
+ if (strjoker(nom,filters[i] + 1,&sz,size_flag)) { // reconnu
+ if (size)
+ if (sz != *size)
+ sizelimit=sz;
+ if (filters[i][0]=='+')
+ verdict = 1; // autorisé
+ else
+ verdict = -1; // interdit
+ if (depth)
+ *depth=i;
+ }
+ }
+ if (size)
+ *size=sizelimit;
+ return verdict;
+}
+
+
+// supercomparateur joker (tm)
+// compare a et b (b=avec joker dedans), case insensitive [voir CI]
+// renvoi l'adresse de la première lettre de la chaine
+// (càd *[..]toto.. renvoi adresse de toto dans la chaine)
+// accepte les délires du genre www.*.*/ * / * truc*.*
+// cet algo est 'un peu' récursif mais ne consomme pas trop de tm
+// * = toute lettre
+// --?-- : spécifique à HTTrack et aux ?
+HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) {
+ int err=0;
+ if (strnotempty(joker)==0) { // fin de chaine joker
+ if (strnotempty(chaine)==0) // fin aussi pour la chaine: ok
+ return chaine;
+ else if (chaine[0]=='?')
+ return chaine; // --?-- pour les index.html?Choix=2
+ else
+ return NULL; // non trouvé
+ }
+
+ // on va progresser en suivant les 'mots' contenus dans le joker
+ // un mot peut être un * ou bien toute autre séquence de lettres
+
+ if (strcmp(joker,"*")==0) { // ok, rien après
+ return chaine;
+ }
+
+ // 1er cas: jokers * ou jokers multiples *[..]
+ if (joker[0]=='*') { // comparer joker+reste (*toto/..)
+ int jmp; // nombre de caractères pour le prochain mot dans joker
+ int cut = 0; // interdire tout caractère superflu
+ char pass[256];
+ char LEFT='[',RIGHT=']';
+ int unique=0;
+
+ switch(joker[1]) {
+ case '[':
+ LEFT='[';
+ RIGHT=']';
+ unique=0;
+ break;
+ case '(':
+ LEFT='(';
+ RIGHT=')';
+ unique=1;
+ break;
+ }
+
+ if ((joker[1]==LEFT) && (joker[2]!=LEFT)) { // multijoker (tm)
+ int i;
+ for(i=0;i<256;i++) pass[i]=0;
+
+ // noms réservés
+ if ((strfield(joker+2,"file")) || (strfield(joker+2,"name"))) {
+ for(i=0;i<256;i++) pass[i]=1;
+ pass[(int) '?'] = 0;
+ //pass[(int) ';'] = 0;
+ pass[(int) '/'] = 0;
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else if (strfield(joker+2,"path")) {
+ for(i=0;i<256;i++) pass[i]=1;
+ pass[(int) '?'] = 0;
+ //pass[(int) ';'] = 0;
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else if (strfield(joker+2,"param")) {
+ if (chaine[0]=='?') { // il y a un paramètre juste là
+ for(i=0;i<256;i++) pass[i]=1;
+ } // sinon synonyme de 'rien'
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else {
+ // décode les directives comme *[A-Z,âêîôû,0-9]
+ i=2;
+ if (joker[i] == RIGHT) { // *[] signifie "plus rien après"
+ cut = 1; // caractère supplémentaire interdit
+ } else {
+ int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) {
+ if ( (joker[i]=='<') || (joker[i]=='>') ) { // *[<10]
+ int lsize=0;
+ int lverdict;
+ i++;
+ if (sscanf(joker+i,"%d",&lsize) == 1) {
+ if (size) {
+ if (*size>=0) {
+ if (size_flag)
+ *size_flag=1; /* a joué */
+ if (joker[i-1]=='<')
+ lverdict=(*size<lsize);
+ else
+ lverdict=(*size>lsize);
+ if (!lverdict) {
+ return NULL; // ne correspond pas
+ } else {
+ *size=lsize;
+ return chaine; // ok
+ }
+ } else
+ return NULL; // ne correspond pas
+ } else
+ return NULL; // ne correspond pas (test impossible)
+ // jump
+ while(isdigit((unsigned char)joker[i])) i++;
+ }
+ }
+ else if (joker[i+1]=='-') { // 2 car, ex: *[A-Z]
+ if ((int) (unsigned char) joker[i+2]>(int) (unsigned char) joker[i]) {
+ int j;
+ for(j=(int) (unsigned char) joker[i];j<=(int) (unsigned char) joker[i+2];j++)
+ pass[j]=1;
+
+ } else err=1;
+ i+=3;
+ } else { // 1 car, ex: *[ ]
+ pass[(int) (unsigned char) joker[i]]=1;
+ i++;
+ }
+ if ((joker[i]==',') || (joker[i]==';')) i++;
+ }
+ }
+ }
+ // à sauter dans joker
+ jmp=i;
+ if (joker[i]) jmp++;
+
+ //
+ } else { // tout autoriser
+ //
+ int i;
+ for(i=0;i<256;i++) pass[i]=1; // tout autoriser
+ jmp=1;
+ if (joker[2]==LEFT) jmp=3; // permet de recher *<crochet ouvrant>
+ }
+
+ {
+ int i,max;
+ char* adr;
+
+ // la chaine doit se terminer exactement
+ if (cut) {
+ if (strnotempty(chaine))
+ return NULL; // perdu
+ else
+ return chaine; // ok
+ }
+
+ // comparaison en boucle, c'est ca qui consomme huhu..
+ // le tableau pass[256] indique les caractères ASCII autorisés
+
+ // tester sans le joker (pas ()+ mais ()*)
+ if (!unique) {
+ if ( (adr=strjoker(chaine,joker+jmp,size,size_flag)) ) {
+ return adr;
+ }
+ }
+
+ // tester
+ i=0;
+ if (!unique)
+ max=strlen(chaine);
+ else /* *(a) only match a (not aaaaa) */
+ max=1;
+ while(i<(int) max) {
+ if (pass[(int) (unsigned char) chaine[i]]) { // caractère autorisé
+ if ( (adr=strjoker(chaine+i+1,joker+jmp,size,size_flag)) ) {
+ return adr;
+ }
+ i++;
+ } else i=max+2; // sortir
+ }
+
+ // tester chaîne vide
+ if (i!=max+2) // avant c'est ok
+ if ( (adr=strjoker(chaine+max,joker+jmp,size,size_flag)) )
+ return adr;
+
+ return NULL; // perdu
+ }
+
+ } else { // comparer mot+reste (toto*..)
+ if (strnotempty(chaine)) {
+ int jmp=0,ok=1;
+
+ // comparer début de joker et début de chaine
+ while((joker[jmp]!='*') && (joker[jmp]) && (ok)) {
+ // CI : remplacer streql par une comparaison !=
+ if (!streql(chaine[jmp],joker[jmp])) {
+ ok=0; // quitter
+ }
+ jmp++;
+ }
+
+ // comparaison ok?
+ if (ok) {
+ // continuer la comparaison.
+ if (strjoker(chaine+jmp,joker+jmp,size,size_flag))
+ return chaine; // retourner 1e lettre
+ }
+
+ } // strlen(a)
+ return NULL;
+ } // * ou mot
+
+ return NULL;
+}
+
+// recherche multiple
+// exemple: find dans un texte de strcpy(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence
+// d'un strcpy sur une variable ayant un nom en lettres et copiant une chaine de chiffres
+// ATTENTION!! Eviter les jokers en début, où gare au temps machine!
+char* strjokerfind(char* chaine,char* joker) {
+ char* adr;
+ while(*chaine) {
+ if ( (adr=strjoker(chaine,joker,NULL,NULL)) ) { // ok trouvé
+ return adr;
+ }
+ chaine++;
+ }
+ return NULL;
+}
diff --git a/src/htsfilters.h b/src/htsfilters.h
new file mode 100644
index 0000000..168d330
--- /dev/null
+++ b/src/htsfilters.h
@@ -0,0 +1,49 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* filters ("regexp") */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSFILT_DEFH
+#define HTSFILT_DEFH
+
+#include "htsbase.h"
+
+int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth);
+HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag);
+char* strjokerfind(char* chaine,char* joker);
+
+#endif
diff --git a/src/htsftp.c b/src/htsftp.c
new file mode 100644
index 0000000..5fbe895
--- /dev/null
+++ b/src/htsftp.c
@@ -0,0 +1,1135 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: basic FTP protocol manager */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Gestion protocole ftp
+// Version .05 (01/2000)
+
+#include "htsftp.h"
+
+#include "htsglobal.h"
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsthread.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HTS_WIN
+#else
+//inet_ntoa
+#include <arpa/inet.h>
+#endif
+
+#if HTS_WIN
+#ifndef __cplusplus
+// DOS
+#include <process.h> /* _beginthread, _endthread */
+#endif
+#endif
+
+// ftp mode passif
+// #if HTS_INET6==0
+#define FTP_PASV 1
+// #else
+// no passive mode for v6
+// #define FTP_PASV 0
+// #endif
+
+#define FTP_DEBUG 0
+//#define FORK_DEBUG 0
+
+#define FTP_STATUS_READY 1001
+
+#if USE_BEGINTHREAD
+/*
+#ifdef __cplusplus
+// C++ -> Shell
+UINT back_launch_ftp( LPVOID pP ) {
+ lien_back* back=(lien_back*) pP;
+ if (back == NULL) {
+ //back->status=FTP_STATUS_READY; // fini
+ //back->r.statuscode=-1;
+ return -1;
+ }
+
+ // lancer ftp
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+
+ return 0; // thread completed successfully
+}
+#else
+*/
+PTHREAD_TYPE back_launch_ftp( void* pP ) {
+ lien_back* back=(lien_back*) pP;
+ if (back == NULL) {
+ //back->status=FTP_STATUS_READY; // fini
+ //back->r.statuscode=-1;
+#if FTP_DEBUG
+ printf("[ftp error: no args]\n");
+#endif
+ return PTHREAD_RETURN;
+ }
+
+ /* Initialize */
+ hts_init();
+
+ // lancer ftp
+#if FTP_DEBUG
+ printf("[Launching main ftp routine]\n");
+#endif
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+
+ /* Uninitialize */
+ hts_uninit();
+ return PTHREAD_RETURN;
+}
+/*#endif*/
+// lancer en back
+void launch_ftp(lien_back* back) {
+/*
+#ifdef __cplusplus
+ // C++ -> Shell
+ AfxBeginThread(back_launch_ftp,(LPVOID) back);
+#else
+*/
+ // DOS
+#if FTP_DEBUG
+ printf("[Launching main ftp thread]\n");
+#endif
+ _beginthread(back_launch_ftp, 0, (void*) back);
+/*#endif*/
+}
+
+#else
+// Unix sans pthread
+int back_launch_ftp(lien_back* back) {
+ // lancer ftp
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+ return 0;
+}
+void launch_ftp(lien_back* back,char* path,char* exec) {
+ FILE* fp = fopen(fconv(path),"wb");
+ if (fp) {
+ char _args[8][256];
+ char *args[8];
+ fclose(fp); fp=NULL;
+
+ strcpy(_args[0],exec);
+ strcpy(_args[1],"-#R");
+ strcpy(_args[2],back->url_adr);
+ strcpy(_args[3],back->url_fil);
+ strcpy(_args[4],back->url_sav);
+ strcpy(_args[5],path);
+ //strcpy(_args[6],"");
+ args[0]=_args[0];
+ args[1]=_args[1];
+ args[2]=_args[2];
+ args[3]=_args[3];
+ args[4]=_args[4];
+ args[5]=_args[5];
+ args[6]=NULL;
+ switch (fork()) { // note: vfork déconne un max'
+ case -1: printf("Can not vfork() process\n"); break;
+ case 0:
+ if (execvp(args[0],args)==-1) {
+ fp=fopen(fconv(path),"wb");
+ if (fp) {
+ fprintf(fp,"-1 unable to launch %s",args[0]);
+ fclose(fp); fp=NULL;
+ rename(path,concat(path,".ok"));
+ } else remove(path);
+ }
+ _exit(0); // exit 'propre'
+ break;
+ default: // parent
+ // bah on fait rien..
+ break;
+ }
+ }
+}
+#endif
+
+// pour l'arrêt du ftp
+#ifdef _WIN32
+#define _T_SOC_close(soc) closesocket(soc); soc=INVALID_SOCKET;
+#else
+#define _T_SOC_close(soc) close(soc); soc=INVALID_SOCKET;
+#endif
+#define _HALT_FTP { \
+ if ( soc_ctl != INVALID_SOCKET ) _T_SOC_close(soc_ctl); \
+ if ( soc_servdat != INVALID_SOCKET ) _T_SOC_close(soc_servdat); \
+ if ( soc_dat != INVALID_SOCKET ) _T_SOC_close(soc_dat); \
+}
+#define _CHECK_HALT_FTP \
+ if (stop_ftp(back)) { \
+ _HALT_FTP \
+ return 0; \
+ }
+
+// la véritable fonction une fois lancées les routines thread/fork
+int run_launch_ftp(lien_back* back) {
+ char user[256]="anonymous";
+ char pass[256]="user@";
+ char line_retr[2048];
+ int port=21;
+#if FTP_PASV
+ int port_pasv=0;
+#endif
+ char adr_ip[1024];
+ char *adr,*real_adr;
+ char* ftp_filename="";
+ int timeout = 300; // timeout
+ int timeout_onfly=8; // attente réponse supplémentaire
+ int transfer_list=0; // directory
+ int rest_understood=0; // rest command understood
+ t_fullhostent fullhostent_buffer; // buffer pour resolver
+ //
+ T_SOC soc_ctl=INVALID_SOCKET;
+ T_SOC soc_servdat=INVALID_SOCKET;
+ T_SOC soc_dat=INVALID_SOCKET;
+ //
+ SOCaddr server_data;
+ int server_data_size=sizeof(server_data);
+ //
+ line_retr[0]=adr_ip[0]='\0';
+
+ timeout=300;
+
+ // effacer
+ strcpy(back->r.msg,"");
+ back->r.statuscode=0;
+ back->r.size=0;
+
+ // récupérer user et pass si présents, et sauter user:id@ dans adr
+ real_adr = strchr(back->url_adr,':');
+ if (real_adr) real_adr++;
+ else real_adr=back->url_adr;
+ while(*real_adr=='/') real_adr++; // sauter /
+ if ( (adr = jump_identification(real_adr)) != real_adr) { // user
+ int i=-1;
+ pass[0]='\0';
+ do {
+ i++;
+ user[i]=real_adr[i];
+ } while( (real_adr[i]!=':') && (real_adr[i]) );
+ user[i]='\0';
+ if (real_adr[i]==':') { // pass
+ int j=-1;
+ i++; // oui on saute aussi le :
+ do {
+ j++;
+ pass[j]=real_adr[i+j];
+ } while( ((&real_adr[i+j+1]) < adr) && (real_adr[i+j]) );
+ pass[j]='\0';
+ }
+ }
+
+ // Calculer RETR <nom>
+ {
+ char* a;
+ a=back->url_fil + strlen(back->url_fil)-1;
+ while( (a > back->url_fil) && (*a!='/')) a--;
+ if (*a == '/') { // ok repéré
+ a++; // sauter /
+ ftp_filename=a;
+ if (strnotempty(a)) {
+ char* ua=unescape_http(a);
+ if (
+ (strchr(ua, ' '))
+ ||
+ (strchr(ua, '\"'))
+ ||
+ (strchr(ua, '\''))
+ ) {
+ sprintf(line_retr,"RETR \"%s\"",ua);
+ } else { /* Regular one */
+ sprintf(line_retr,"RETR %s",ua);
+ }
+ } else {
+ transfer_list=1;
+ sprintf(line_retr,"LIST -A");
+ }
+ } else {
+ strcpy(back->r.msg,"Unexpected PORT error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ }
+
+#if FTP_DEBUG
+ printf("Connecting to %s...\n",adr);
+#endif
+
+ // connexion
+ {
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ char * a;
+ char _adr[256];
+ _adr[0]='\0';
+ //T_SOC soc_ctl;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ // port
+ a=strchr(adr,':'); // port
+ if (a) {
+ sscanf(a+1,"%d",&port);
+ strncat(_adr,adr,(int) (a - adr));
+ } else
+ strcpy(_adr,adr);
+
+ // récupérer adresse résolue
+ strcpy(back->info,"host name");
+ hp = hts_gethostbyname(_adr, &fullhostent_buffer);
+ if (hp == NULL) {
+ strcpy(back->r.msg,"Unable to get server's address");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-5;
+ _HALT_FTP
+ return 0;
+ }
+ _CHECK_HALT_FTP;
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ // copie adresse pour cnx data
+ SOCaddr_copyaddr(server_data, server_data_size, hp->h_addr_list[0], hp->h_length);
+ // memcpy(&server.sin_addr, hp->h_addr, hp->h_length);
+
+ // créer ("attachement") une socket (point d'accès) internet,en flot
+ soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+ if (soc_ctl==INVALID_SOCKET) {
+ strcpy(back->r.msg,"Unable to create a socket");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ _HALT_FTP
+ return 0;
+ }
+
+ SOCaddr_initport(server, port);
+ // server.sin_port = htons((unsigned short int) port);
+
+ // connexion (bloquante, on est en thread)
+ strcpy(back->info,"connect");
+
+#if HTS_WIN
+ if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) {
+#else
+ if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) {
+#endif
+ strcpy(back->r.msg,"Unable to connect to the server");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ _HALT_FTP
+ return 0;
+#if HTS_WIN
+ }
+#else
+ }
+#endif
+ _CHECK_HALT_FTP;
+
+ {
+ char line[1024];
+ // envoi du login
+
+ // --USER--
+ get_ftp_line(soc_ctl,line,timeout); // en tête
+ _CHECK_HALT_FTP;
+
+ if (line[0]=='2') { // ok, connecté
+ strcpy(back->info,"login: user");
+ sprintf(line,"USER %s",user);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if ((line[0]=='3') || (line[0]=='2')) {
+ // --PASS--
+ strcpy(back->info,"login: pass");
+ sprintf(line,"PASS %s",pass);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // ok
+ // --CWD--
+ char* a;
+ a=back->url_fil + strlen(back->url_fil)-1;
+ while( (a > back->url_fil) && (*a!='/')) a--;
+ if (*a == '/') { // ok repéré
+ char target[1024];
+ target[0]='\0';
+ strncat(target,back->url_fil,(int) (a - back->url_fil));
+ if (strnotempty(target)==0)
+ strcat(target,"/");
+ strcpy(back->info,"cwd");
+ sprintf(line,"CWD %s",target);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ send_line(soc_ctl,"TYPE I");
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ // ok..
+ } else {
+ strcpy(back->r.msg,"TYPE I error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"CWD error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ strcpy(back->r.msg,"Unexpected ftp error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+
+ } else {
+ sprintf(back->r.msg,"Bad password: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"Bad user name: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"Connection refused: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+
+ // ok, si on est prêts on écoute sur un port et on demande la sauce
+ if (back->r.statuscode != -1) {
+
+
+ //
+ // Pré-REST
+ //
+#if FTP_PASV
+ if (SOCaddr_getproto(server, server_size) == '1') {
+ strcpy(back->info,"pasv");
+ sprintf(line,"PASV");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ } else { /* ipv6 */
+ line[0]='\0';
+ }
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ char *a,*b,*c;
+ a=strchr(line,'('); // exemple: 227 Entering Passive Mode (123,45,67,89,177,27)
+ if (a) {
+
+ // -- analyse de l'adresse IP et du port --
+ a++;
+ b=strchr(a,',');
+ if (b) b=strchr(b+1,',');
+ if (b) b=strchr(b+1,',');
+ if (b) b=strchr(b+1,',');
+ c=a; while( (c=strchr(c,',')) ) *c='.'; // remplacer , par .
+ if (b) *b='\0';
+ //
+ strcpy(adr_ip,a); // copier adresse ip
+ //
+ if (b) {
+ a=b+1; // début du port
+ b=strchr(a,'.');
+ if (b) {
+ int n1,n2;
+ //
+ *b='\0';
+ b++;
+ c=strchr(b,')');
+ if (c) {
+ *c='\0';
+ if ( (sscanf(a,"%d",&n1)==1) && (sscanf(b,"%d",&n2)==1) && (strlen(adr_ip)<=16)) {
+ port_pasv=n2+(n1<<8);
+ }
+ } else {
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ } // sinon on est prêts
+ }
+ }
+ // -- fin analyse de l'adresse IP et du port --
+ } else {
+ sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ /*
+ * try epsv (ipv6) *
+ */
+ strcpy(back->info,"pasv");
+ sprintf(line,"EPSV");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { /* got it */
+ char *a;
+ a=strchr(line,'('); // exemple: 229 Entering Extended Passive Mode (|||6446|)
+ if (
+ (a != NULL)
+ &&
+ (*a == '(')
+ && (*(a+1))
+ && (*(a+1) == *(a+2)) && (*(a+1) == *(a+3))
+ && (isdigit(*(a+4)))
+ && (*(a+5))
+ ) {
+ unsigned int n1 = 0;
+ if (sscanf(a+4,"%d",&n1)==1) {
+ if ((n1 < 65535) && (n1 > 0)) {
+ port_pasv=n1;
+ }
+ }
+ } else {
+ sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ }
+#else
+ // rien à faire avant
+#endif
+
+#if FTP_PASV
+ if (port_pasv) {
+#endif
+ // SIZE
+ if (back->r.statuscode != -1) {
+ if (!transfer_list) {
+ char* ua=unescape_http(ftp_filename);
+ if (
+ (strchr(ua, ' '))
+ ||
+ (strchr(ua, '\"'))
+ ||
+ (strchr(ua, '\''))
+ ) {
+ sprintf(line,"SIZE \"%s\"", ua);
+ } else {
+ sprintf(line,"SIZE %s", ua);
+ }
+
+ // SIZE?
+ strcpy(back->info,"size");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // SIZE compris, ALORS tester REST (sinon pas tester: cf probleme des txt.gz decompresses a la volee)
+ // REST?
+ if (fexist(back->url_sav) && (transfer_list==0)) {
+ strcpy(back->info,"rest");
+ sprintf(line,"REST "LLintP,(LLint)fsize(back->url_sav));
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if ((line[0]=='3') || (line[0]=='2')) { // ok
+ rest_understood=1;
+ } // sinon tant pis
+ }
+ } // sinon tant pis
+ }
+ }
+#if FTP_PASV
+ }
+#endif
+
+ //
+ // Post-REST
+ //
+#if FTP_PASV
+ // Ok, se connecter
+ if (port_pasv) {
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ // infos
+ strcpy(back->info,"resolv");
+
+ // résoudre
+ if (adr_ip[0]) {
+ hp = hts_gethostbyname(adr_ip, &fullhostent_buffer);
+ if (hp) {
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ } else {
+ server_size=0;
+ }
+ } else {
+ memcpy(&server, &server_data, sizeof(server_data));
+ server_size=server_data_size;
+ }
+
+ // infos
+ strcpy(back->info,"cnxdata");
+#if FTP_DEBUG
+ printf("Data: Connecting to %s:%d...\n", adr_ip, port_pasv);
+#endif
+ if (server_size > 0) {
+ // socket
+ soc_dat=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+ if (soc_dat != INVALID_SOCKET) {
+ // structure: connexion au domaine internet, port 80 (ou autre)
+ SOCaddr_initport(server, port_pasv);
+ // server.sin_port = htons((unsigned short int) port_pasv);
+#if HTS_WIN
+ if (connect(soc_dat, (const struct sockaddr FAR *)&server, server_size) == 0) {
+#else
+ if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) {
+#endif
+ strcpy(back->info,"retr");
+ strcpy(line,line_retr);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='1') {
+ // OK
+ } else {
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ //
+ sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+#if FTP_DEBUG
+ printf("Data: unable to connect\n");
+#endif
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ //
+ strcpy(back->r.msg,"Unable to connect");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ strcpy(back->r.msg,"Unable to create a socket");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip);
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+#else
+ //T_SOC soc_servdat;
+ strcpy(back->info,"listening");
+ if ( (soc_servdat = get_datasocket(line)) != INVALID_SOCKET) {
+ _CHECK_HALT_FTP;
+ send_line(soc_ctl,line); // envoi du RETR
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // ok
+ strcpy(back->info,"retr");
+ strcpy(line,line_retr);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='1') {
+ //T_SOC soc_dat;
+ struct sockaddr dummyaddr;
+ int dummylen = sizeof(struct sockaddr);
+ if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) {
+ strcpy(back->r.msg,"Unable to accept connection");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"PORT command error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#if HTS_WIN
+ closesocket(soc_servdat);
+#else
+ close(soc_servdat);
+#endif
+ } else {
+ strcpy(back->r.msg,"Unable to listen to a port");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#endif
+
+ //
+ // Ok, connexion initiée
+ //
+ if (soc_dat != INVALID_SOCKET) {
+ if (rest_understood) { // REST envoyée et comprise
+ filenote(back->url_sav,NULL);
+ back->r.fp = fopen(fconv(back->url_sav),"ab");
+ } else
+ back->r.fp = filecreate(back->url_sav);
+ strcpy(back->info,"receiving");
+ if (back->r.fp != NULL) {
+ char buff[1024];
+ int len=1;
+ int read_len=1024;
+ //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
+
+ while( (len>0) && (!stop_ftp(back)) ) {
+ // attendre les données
+ len=1; // pas d'erreur pour le moment
+ switch(wait_socket_receive(soc_dat,timeout)) {
+ case -1:
+ strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // fin
+ break;
+ case 0:
+ sprintf(back->r.msg,"Time out (%d)",timeout);
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // fin
+ break;
+ }
+
+ // réception
+ if (len) {
+ len=recv(soc_dat,buff,read_len,0);
+ if (len>0) {
+ back->r.size+=len;
+ HTS_STAT.HTS_TOTAL_RECV+=len;
+ if (back->r.fp) {
+ if ((int) fwrite(buff,1,len,back->r.fp) != len) {
+ strcpy(back->r.msg,"Write error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // error
+ }
+ } else {
+ strcpy(back->r.msg,"Unexpected write error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else { // Erreur ou terminé
+ //strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=0;
+ }
+ read_len=1024;
+ //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
+ }
+ }
+ if (back->r.fp) {
+ fclose(back->r.fp);
+ back->r.fp=NULL;
+ }
+ } else {
+ strcpy(back->r.msg,"Unable to write file");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#if HTS_WIN
+ closesocket(soc_dat);
+#else
+ close(soc_dat);
+#endif
+
+ // 226 Transfer complete?
+ if (back->r.statuscode != -1) {
+ if (wait_socket_receive(soc_ctl,timeout_onfly)>0) {
+ // récupérer 226 transfer complete
+ get_ftp_line(soc_ctl,line,timeout);
+ if (line[0]=='2') { // OK
+ strcpy(back->r.msg,"OK");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=200;
+ } else {
+ sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ }
+
+ }
+
+
+
+ }
+
+
+ }
+
+ _CHECK_HALT_FTP;
+ strcpy(back->info,"quit");
+ send_line(soc_ctl,"QUIT"); // bye bye
+ get_ftp_line(soc_ctl,NULL,timeout);
+#if HTS_WIN
+ closesocket(soc_ctl);
+#else
+ close(soc_ctl);
+#endif
+ }
+
+ if (back->r.statuscode!=-1) {
+ back->r.statuscode=200;
+ strcpy(back->r.msg,"OK");
+ }
+ back->status=FTP_STATUS_READY; // fini
+ return 0;
+}
+
+
+
+// ouverture d'un port
+T_SOC get_datasocket(char* to_send) {
+ T_SOC soc = INVALID_SOCKET;
+ char h_loc[256+2];
+
+ to_send[0]='\0';
+ if (gethostname(h_loc,256)==0) { // host name
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp_loc;
+ t_fullhostent buffer;
+
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) ) { // notre host
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length);
+
+ if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) {
+
+ if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getsockname(soc,(struct sockaddr*) &server2, &len) == 0) {
+ // *port=ntohs(server.sin_port); // récupérer port
+ if (listen(soc,10)>=0) { // au pif le 10
+#if HTS_INET6==0
+ unsigned short int a,n1,n2;
+ // calculer port
+ a = SOCaddr_sinport(server2);
+ n1 = (a & 0xff);
+ n2 = ((a>>8) & 0xff);
+ {
+ char dots[256+2];
+ char dot[256+2];
+ char* a;
+ SOCaddr_inetntoa(dot, 256, server2, sizeof(server2));
+ //
+ dots[0]='\0';
+ strncat(dots, dot, 128);
+ while( (a=strchr(dots,'.')) ) *a=','; // virgules!
+ while( (a=strchr(dots,':')) ) *a=','; // virgules!
+ sprintf(to_send,"PORT %s,%d,%d",dots,n1,n2);
+ }
+#else
+ /*
+ EPRT |1|132.235.1.2|6275|
+ EPRT |2|1080::8:800:200C:417A|5282|
+ */
+ {
+ char dot[256+2];
+ SOCaddr_inetntoa(dot, 256, server2, len);
+ sprintf(to_send,"EPRT |%c|%s|%d|", SOCaddr_getproto(server2, len), dot, SOCaddr_sinport(server2));
+ }
+#endif
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+
+
+ return soc;
+}
+
+#if FTP_DEBUG
+FILE* dd=NULL;
+#endif
+
+// routines de réception/émission
+// 0 = ERROR
+int send_line(T_SOC soc,char* data) {
+ char line[1024];
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"---> %s\x0d\x0a",data);
+ fflush(ioinfo);
+ }
+ }
+#if FTP_DEBUG
+ if (dd == NULL) dd = fopen("toto.txt","w");
+ fprintf(dd,"---> %s\x0d\x0a",data); fflush(dd);
+ printf("---> %s",data); fflush(stdout);
+#endif
+ sprintf(line,"%s\x0d\x0a",data);
+ if (check_socket_connect(soc) != 1) {
+#if FTP_DEBUG
+ printf("!SOC WRITE ERROR\n");
+#endif
+ return 0; // erreur, plus connecté!
+ }
+#if FTP_DEBUG
+ {
+ int r = (send(soc,line,strlen(line),0) == (int) strlen(line));
+ printf("%s\x0d\x0a",data); fflush(stdout);
+ return r;
+ }
+#else
+ return (send(soc,line,strlen(line),0) == (int) strlen(line));
+#endif
+}
+
+int get_ftp_line(T_SOC soc,char* line,int timeout) {
+ char data[1024];
+ int i,ok,multiline;
+#if FTP_DEBUG
+ if (dd == NULL) dd = fopen("toto.txt","w");
+#endif
+
+ data[0]='\0';
+ i=ok=multiline=0; data[3]='\0';
+ do {
+ char b;
+
+ // vérifier données
+ switch(wait_socket_receive(soc,timeout)) {
+ case -1: // erreur de lecture
+ if (line) strcpy(line,"500 *read error");
+ return 0;
+ break;
+ case 0:
+ if (line) sprintf(line,"500 *read timeout (%d)",timeout);
+ return 0;
+ break;
+ }
+
+ //HTS_TOTAL_RECV_CHECK(dummy); // Diminuer au besoin si trop de données reçues
+ switch(recv(soc,&b,1,0)) {
+ //case 0: break; // pas encore --> erreur (on attend)!
+ case 1:
+ HTS_STAT.HTS_TOTAL_RECV+=1; // compter flux entrant
+ if ((b!=10) && (b!=13))
+ data[i++]=b;
+ break;
+ default:
+ if (line) strcpy(line,"500 *read error");
+ return 0; // error
+ break;
+ }
+ if ( ((b==13) || (b==10)) && (i>0) ){ // CR/LF
+ if (
+ (data[3] == '-')
+ ||
+ ((multiline) && (!isdigit((unsigned char)data[0])))
+ )
+ {
+ data[3]='\0';
+ i=0;
+ multiline=1;
+ }
+ else
+ ok=1; // sortir
+ }
+ } while(!ok);
+ data[i++]='\0';
+
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"<--- %s\x0d\x0a",data);
+ fflush(ioinfo);
+ }
+ }
+#if FTP_DEBUG
+ fprintf(dd,"<--- %s\n",data); fflush(dd);
+ printf("<--- %s\n",data);
+#endif
+ if (line) strcpy(line,data);
+ return (strnotempty(data));
+}
+
+// sauter NNN
+char* linejmp(char* line) {
+ if (strlen(line)>4)
+ return line+4;
+ else
+ return line;
+}
+
+// test socket:
+// 0 : no data
+// 1 : data detected
+// -1: error
+int check_socket(T_SOC soc) {
+ fd_set fds,fds_e; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_e);
+ // socket read
+ FD_SET(soc,&fds);
+ // socket error
+ FD_SET(soc,&fds_e);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ // poll!
+ select(soc + 1,&fds,NULL,&fds_e,&tv);
+ if (FD_ISSET(soc,&fds_e)) { // error detected
+ return -1;
+ } else if (FD_ISSET(soc,&fds)) {
+ return 1;
+ }
+ return 0;
+}
+// check if connected
+int check_socket_connect(T_SOC soc) {
+ fd_set fds,fds_e; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_e);
+ // socket write
+ FD_SET(soc,&fds);
+ // socket error
+ FD_SET(soc,&fds_e);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ // poll!
+ select(soc + 1,NULL,&fds,&fds_e,&tv);
+ if (FD_ISSET(soc,&fds_e)) { // error detected
+ return -1;
+ } else if (FD_ISSET(soc,&fds)) {
+ return 1;
+ }
+ return 0;
+}
+// attendre des données
+int wait_socket_receive(T_SOC soc,int timeout) {
+ // attendre les données
+ TStamp ltime=time_local();
+ int r;
+#if FTP_DEBUG
+ printf("\x0dWaiting for data "); fflush(stdout);
+#endif
+ while( (!(r = check_socket(soc))) && ( ((int) ((TStamp) (time_local()-ltime))) < timeout )) {
+ Sleep(100);
+#if FTP_DEBUG
+ printf("."); fflush(stdout);
+#endif
+ }
+#if FTP_DEBUG
+ printf("\x0dreturn: %d\x0d",r); fflush(stdout);
+#endif
+ return r;
+}
+
+
+// cancel reçu?
+int stop_ftp(lien_back* back) {
+ if (back->stop_ftp) {
+ strcpy(back->r.msg,"Cancelled by User");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ return 1;
+ }
+ return 0;
+}
+
+
+
+
diff --git a/src/htsftp.h b/src/htsftp.h
new file mode 100644
index 0000000..e24f1f3
--- /dev/null
+++ b/src/htsftp.h
@@ -0,0 +1,68 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: basic FTP protocol manager .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSFTP_DEFH
+#define HTSFTP_DEFH
+
+#include "htsbase.h"
+#include "htsbasenet.h"
+#include "htsthread.h"
+
+// lien_back
+#include "htscore.h"
+
+#if USE_BEGINTHREAD
+void launch_ftp(lien_back* back);
+PTHREAD_TYPE back_launch_ftp( void* pP );
+#else
+void launch_ftp(lien_back* back,char* path,char* exec);
+int back_launch_ftp(lien_back* back);
+#endif
+
+int run_launch_ftp(lien_back* back);
+int send_line(T_SOC soc,char* data);
+int get_ftp_line(T_SOC soc,char* line,int timeout);
+T_SOC get_datasocket(char* to_send);
+int stop_ftp(lien_back* back);
+char* linejmp(char* line);
+int check_socket(T_SOC soc);
+int check_socket_connect(T_SOC soc);
+int wait_socket_receive(T_SOC soc,int timeout);
+
+
+#endif
+
diff --git a/src/htsglobal.h b/src/htsglobal.h
new file mode 100644
index 0000000..ce54d3d
--- /dev/null
+++ b/src/htsglobal.h
@@ -0,0 +1,332 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Global #define file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier réunissant l'ensemble des defines
+
+#ifndef HTTRACK_GLOBAL_DEFH
+#define HTTRACK_GLOBAL_DEFH
+
+// Version
+#define HTTRACK_VERSION "3.20-2"
+#define HTTRACK_VERSIONID "3.20.02"
+#define HTTRACK_AFF_VERSION "3.x"
+//#define HTTRACK_AFF_WARNING "This is a RELEASE CANDIDATE version of WinHTTrack Website Copier 3.0\nPlease report us any bug or problem"
+
+
+
+// Définition plate-forme
+#include "htssystem.h"
+#include "htsconfig.h"
+
+// Socket windows ou socket unix
+#if HTS_PLATFORM==1
+#define HTS_WIN 1
+#else
+#define HTS_WIN 0
+#endif
+
+// compatibilité DOS
+#if HTS_WIN
+#define HTS_DOSNAME 1
+#else
+#define HTS_DOSNAME 0
+#endif
+
+// utiliser zlib?
+#if HTS_USEZLIB
+#else
+#ifdef _WINDOWS
+#define HTS_USEZLIB 1
+#endif
+#endif
+
+#ifndef HTS_INET6
+#define HTS_INET6 0
+#endif
+
+// utiliser openssl?
+#ifndef HTS_USEOPENSSL
+#define HTS_USEOPENSSL 1
+#endif
+
+#if HTS_WIN
+#else
+#define __cdecl
+#endif
+
+/*
+#if HTS_XGETHOST
+#if HTS_PLATFORM==1
+#ifndef __cplusplus
+#undef HTS_XGMETHOD
+#undef HTS_XGETHOST
+#endif
+#endif
+#else
+#undef HTS_XGMETHOD
+#undef HTS_XGETHOST
+#endif
+*/
+
+
+#if HTS_ANALYSTE
+#else
+#if HTS_WIN
+#else
+#undef HTS_ANALYSTE
+// Analyste
+#define HTS_ANALYSTE 1
+#define HTS_ANALYSTE_CONSOLE 1
+#endif
+#endif
+
+
+/* rc file */
+#if HTS_WIN
+#define HTS_HTTRACKRC "httrackrc"
+#else
+
+#ifndef HTS_ETCPATH
+#define HTS_ETCPATH "/etc"
+#endif
+#ifndef HTS_BINPATH
+#define HTS_BINPATH "/usr/bin"
+#endif
+#ifndef HTS_LIBPATH
+#define HTS_LIBPATH "/usr/lib"
+#endif
+#ifndef HTS_PREFIX
+#define HTS_PREFIX "/usr"
+#endif
+
+#define HTS_HTTRACKRC ".httrackrc"
+#define HTS_HTTRACKCNF HTS_ETCPATH"/httrack.conf"
+#define HTS_HTTRACKDIR HTS_PREFIX"/doc/httrack/"
+
+#endif
+
+/* Gestion des tables de hashage */
+#define HTS_HASH_SIZE 20147
+/* Taille max d'une URL */
+#define HTS_URLMAXSIZE 512
+/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
+#define HTS_CDLMAXSIZE 1024
+/* Copyright (C) Xavier Roche and other contributors */
+#define HTTRACK_AFF_AUTHORS "[XR&CO'2002]"
+#define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->"
+#define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s"
+
+#define H_CRLF "\x0d\x0a"
+#define CRLF "\x0d\x0a"
+#if HTS_WIN
+#define LF "\x0d\x0a"
+#else
+#define LF "\x0a"
+#endif
+
+/* équivaut à "paramètre vide", par exemple -F (none) */
+#define HTS_NOPARAM "(none)"
+#define HTS_NOPARAM2 "\"(none)\""
+
+/* maximum et minimum */
+#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
+#define minimum(A,B) ( (A) < (B) ? (A) : (B) )
+
+/* chaine vide? */
+#define strnotempty(A) (((A)[0]!='\0') ? 1 : 0)
+
+/* optimisation inline si possible */
+#ifdef __cplusplus
+#define HTS_INLINE inline
+#else
+#define HTS_INLINE
+#endif
+
+#ifdef HTS_NO_64_BIT
+#define HTS_LONGLONG 0
+#else
+#define HTS_LONGLONG 1
+#endif
+
+// long long int? (or int)
+// (and int cast for system functions like malloc() )
+#if HTS_LONGLONG
+ #if HTS_WIN
+ typedef __int64 LLint;
+ typedef __int64 TStamp;
+ typedef int INTsys;
+ #define LLintP "%I64d"
+ #else
+ #if HTS_PLATFORM==0
+ typedef long long int LLint;
+ typedef long long int TStamp;
+ typedef int INTsys;
+ #define LLintP "%lld"
+ #else
+ typedef long long int LLint;
+ typedef long long int TStamp;
+ typedef int INTsys;
+ #define LLintP "%Ld"
+ #endif
+ #endif
+#else
+ typedef int LLint;
+ typedef int INTsys;
+ typedef double TStamp;
+ #define LLintP "%d"
+#endif
+
+/* Alignement */
+#ifndef HTS_ALIGN
+#define HTS_ALIGN 4
+#endif
+
+/* IPV4, IPV6 and various unified structures */
+#define HTS_MAXADDRLEN 64
+
+#if HTS_WIN
+#else
+#define __cdecl
+#endif
+
+/* mode pour mkdir ET chmod (accès aux fichiers) */
+#define HTS_PROTECT_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
+#if HTS_ACCESS
+#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)
+#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+#else
+#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR)
+#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
+#endif
+
+/* vérifier la déclaration des variables préprocesseur */
+#ifndef HTS_DOSNAME
+#error | HTS_DOSNAME Has not been defined.
+#error | Set it to 1 if you are under DOS, 0 under Unix.
+#error | Example: place this line in you source, before includes:
+#error | #define HTS_DOSNAME 0
+#error
+#error
+#endif
+#ifndef HTS_ACCESS
+/* Par défaut, accès à tous les utilisateurs */
+#define HTS_ACCESS 1
+#endif
+
+/* fflush sur stdout */
+#define io_flush { fflush(stdout); fflush(stdin); }
+
+
+
+/* HTSLib */
+
+// Cache DNS, accélère les résolution d'adresses
+#define HTS_DNSCACHE 1
+
+// ID d'une pseudo-socket locale pour les file://
+#define LOCAL_SOCKET_ID -500000
+
+// taille de chaque buffer (10 sockets 650 ko)
+#define TAILLE_BUFFER 65535
+
+#if HTS_WIN
+#else
+// use pthreads.h
+#ifdef HTS_DO_NOT_USE_PTHREAD
+#define USE_PTHREAD 0
+#else
+#define USE_PTHREAD 1
+#endif
+#endif
+
+#if HTS_WIN
+#define USE_BEGINTHREAD 1
+#else
+#if USE_PTHREAD
+#define USE_BEGINTHREAD 1
+#else
+/* sh*t.. */
+#define USE_BEGINTHREAD 0
+#endif
+#endif
+
+/* ------------------------------------------------------------ */
+/* Debugging */
+/* ------------------------------------------------------------ */
+
+// débuggage types
+#define DEBUG_SHOWTYPES 0
+// backing debug
+#define BDEBUG 0
+// chunk receive
+#define CHUNKDEBUG 0
+// realloc links debug
+#define MDEBUG 0
+// cache debug
+#define DEBUGCA 0
+// DNS debug
+#define DEBUGDNS 0
+// savename debug
+#define DEBUG_SAVENAME 0
+// debug robots
+#define DEBUG_ROBOTS 0
+// debug hash
+#define DEBUG_HASH 0
+// Vérification d'intégrité
+#define DEBUG_CHECKINT 0
+// nbr sockets debug
+#define NSDEBUG 0
+// tracer mallocs
+#define HTS_TRACE_MALLOC 0
+
+// débuggage HTSLib
+#define HDEBUG 0
+// surveillance de la connexion
+#define CNXDEBUG 0
+// debuggage cookies
+#define DEBUG_COOK 0
+// débuggage hard..
+#define HTS_WIDE_DEBUG 0
+// debuggage deletehttp et cie
+#define HTS_DEBUG_CLOSESOCK 0
+// debug tracage mémoire
+#define MEMDEBUG 0
+
+// htsmain
+#define DEBUG_STEPS 0
+
+#endif
+
diff --git a/src/htshash.c b/src/htshash.c
new file mode 100644
index 0000000..b02f2ba
--- /dev/null
+++ b/src/htshash.c
@@ -0,0 +1,453 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htshash.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* END specific definitions */
+
+// GESTION DES TABLES DE HACHAGE
+// Méthode à 2 clés (adr+fil), 2e cle facultative
+// hash[no_enregistrement][pos]->hash est un index dans le tableau général liens
+// #define HTS_HASH_SIZE 8191 (premier si possible!)
+// type: numero enregistrement - 0 est case insensitive (sav) 1 (adr+fil) 2 (former_adr+former_fil)
+#if HTS_HASH
+// recherche dans la table selon nom1,nom2 et le no d'enregistrement
+// retour: position ou -1 si non trouvé
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
+ unsigned int cle;
+ int pos;
+ // calculer la clé de recherche, non modulée
+ if (type)
+ cle = hash_cle(nom1,nom2);
+ else
+ cle = hash_cle(convtolower(nom1),nom2); // case insensitive
+ // la position se calcule en modulant
+ pos = (int) (cle%HTS_HASH_SIZE);
+ // entrée trouvée?
+ if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe..
+ // tester table de raccourcis (hash)
+ // pos est maintenant la position recherchée dans liens
+ pos = hash->hash[type][pos];
+ while (pos>=0) { // parcourir la chaine
+ switch (type) {
+ case 0: // sav
+ if (strfield2(nom1,hash->liens[pos]->sav)) { // case insensitive
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ case 1: // adr+fil
+ if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ case 2: // former_adr+former_fil
+ if (hash->liens[pos]->former_adr)
+ if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ }
+ // calculer prochaine position dans la chaine
+ {
+ int old=pos;
+ pos=hash->liens[pos]->hash_next[type]; // sinon prochain dans la chaine
+ if (old==pos)
+ pos=-1; // erreur de bouclage (ne devrait pas arriver)
+ }
+ }
+
+ // Ok va falloir chercher alors..
+ /*pos=hash->max_lien; // commencer à max_lien
+ switch (type) {
+ case 0: // sav
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_sav == cle ) {
+ if (strcmp(nom1,hash->liens[pos]->sav)==0) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ break;
+ case 1: // adr+fil
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_adrfil == cle ) {
+ if ((strcmp(nom1,hash->liens[pos]->adr)==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ break;
+ case 2: // former_adr+former_fil
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_fadrfil == cle ) {
+ if (hash->liens[pos]->former_adr)
+ if ((strcmp(nom1,hash->liens[pos]->former_adr)==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ }*/
+#if DEBUG_HASH==1
+ printf("hash: not found after test %s%s\n",nom1,nom2);
+#endif
+ return -1; // non trouvé
+ } else {
+#if DEBUG_HASH==2
+ printf("hash: not found %s%s\n",nom1,nom2);
+#endif
+ return -1; // non trouvé : clé non entrée (même une fois)
+ }
+}
+
+// enregistrement lien lpos dans les 3 tables hash1..3
+void hash_write(hash_struct* hash,int lpos) {
+ unsigned int cle;
+ int pos;
+ int* ptr;
+ //
+ if (hash->liens[lpos]) { // on sait jamais..
+ hash->max_lien = max(hash->max_lien,lpos);
+#if DEBUG_HASH
+ hashnumber=hash->max_lien;
+#endif
+ // élément actuel sur -1 (fin de chaine)
+ hash->liens[lpos]->hash_next[0]=hash->liens[lpos]->hash_next[1]=hash->liens[lpos]->hash_next[2]=-1;
+ //
+ cle = hash_cle(convtolower(hash->liens[lpos]->sav),""); // CASE INSENSITIVE
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,0,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf("[%d",pos);
+#endif
+ //
+ cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil);
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf(",%d",pos);
+#endif
+ //
+ if (hash->liens[lpos]->former_adr) { // former_adr existe?
+ cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil);
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf(",%d",pos);
+#endif
+ }
+#if DEBUG_HASH==3
+ printf("] "); fflush(stdout);
+#endif
+ }
+#if DEBUT_HASH
+ else {
+ printf("* hash_write=0!!\n");
+ exit(1);
+ }
+#endif
+ //
+}
+
+// calcul clé
+// il n'y a pas de formule de hashage universelle, celle-ci semble acceptable..
+unsigned long int hash_cle(char* nom1,char* nom2) {
+ /*
+ unsigned int sum=0;
+ int i=0;
+ while(*nom1) {
+ sum += 1;
+ sum += (unsigned int) *(nom1);
+ sum *= (unsigned int) *(nom1++);
+ sum += (unsigned int) i;
+ i++;
+ }
+ while(*nom2) {
+ sum += 1;
+ sum += (unsigned int) *(nom2);
+ sum *= (unsigned int) *(nom2++);
+ sum += (unsigned int) i;
+ i++;
+ }
+ */
+ return md5sum32(nom1)
+ +md5sum32(nom2);
+}
+
+// calcul de la position finale dans la chaine des elements ayant la même clé
+int* hash_calc_chaine(hash_struct* hash,int type,int pos) {
+#if DEBUG_HASH
+ int count=0;
+#endif
+ if (hash->hash[type][pos] == -1)
+ return &(hash->hash[type][pos]); // premier élément dans la chaine
+ pos=hash->hash[type][pos];
+ while(hash->liens[pos]->hash_next[type] != -1) {
+ pos = hash->liens[pos]->hash_next[type];
+#if DEBUG_HASH
+ count++;
+#endif
+ }
+#if DEBUG_HASH
+ count++;
+ longest_hash[type]=max(longest_hash[type],count);
+#endif
+ return &(hash->liens[pos]->hash_next[type]);
+}
+#endif
+// FIN GESTION DES TABLES DE HACHAGE
+
+
+
+
+
+
+
+
+
+
+
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+unsigned long int inthash_key(char* value) {
+ return md5sum32(value);
+}
+
+// Check for duplicate entry (==1 : added)
+int inthash_write(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ h->value.intg=value;
+ return 0;
+ }
+ h=h->next;
+ }
+ // Not found, add it!
+ inthash_add(hashtable,name,value);
+ return 1;
+}
+
+// Increment pos value, create one if necessary (=0)
+// (==1 : created)
+int inthash_inc(inthash hashtable,char* name) {
+ long int value=0;
+ int r=0;
+ if (inthash_read(hashtable,name,&value)) {
+ value++;
+ }
+ else { /* create new value */
+ value=0;
+ r=1;
+ }
+ inthash_write(hashtable,name,value);
+ return (r);
+}
+
+
+// Does not check for duplicate entry
+void inthash_add(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloc(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ );
+ if (*h) {
+ (*h)->name=((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpy((*h)->name,name);
+ (*h)->value.intg=value;
+ }
+}
+
+void* inthash_addblk(inthash hashtable,char* name,int blksize) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloc(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ +
+ blksize
+ );
+ if (*h) {
+ (*h)->name = ((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpy((*h)->name,name);
+ (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2;
+ return (void*)(*h)->value.intg;
+ }
+ return NULL;
+}
+
+int inthash_read(inthash hashtable,char* name,long int* value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ *value=h->value.intg;
+ return 1;
+ }
+ h=h->next;
+ }
+ return 0;
+}
+
+void inthash_init(inthash hashtable) {
+ unsigned int i;
+ for(i=0;i<hashtable->hash_size;i++) {
+ hashtable->hash[i]=NULL;
+ }
+}
+
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
+ if (hash) {
+ inthash_delchain(hash->next,free_handler);
+ if (free_handler) { // pos is a malloc() block, delete it!
+ if (hash->value.intg) {
+ if (free_handler)
+ free_handler((void*)hash->value.intg);
+ else
+ free((void*)hash->value.intg);
+ }
+ hash->value.intg=0;
+ }
+ free(hash);
+ }
+}
+
+void inthash_default_free_handler(void* value) {
+ if (value)
+ free(value);
+}
+
+// --
+
+inthash inthash_new(int size) {
+ inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash));
+ if (hashtable) {
+ hashtable->hash_size=0;
+ hashtable->flag_valueismalloc=0;
+ if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) {
+ hashtable->hash_size=size;
+ inthash_init(hashtable);
+ }
+ }
+ return hashtable;
+}
+
+int inthash_created(inthash hashtable) {
+ if (hashtable)
+ if (hashtable->hash)
+ return 1;
+ return 0;
+}
+
+void inthash_value_is_malloc(inthash hashtable,int flag) {
+ hashtable->flag_valueismalloc=flag;
+}
+
+void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) {
+ hashtable->free_handler = free_handler;
+}
+
+void inthash_delete(inthash* hashtable) {
+ if (hashtable) {
+ if (*hashtable) {
+ if ((*hashtable)->hash) {
+ unsigned int i;
+ t_inthash_freehandler free_handler=NULL;
+ if ( (*hashtable)->flag_valueismalloc ) {
+ if ( (*hashtable)->free_handler )
+ free_handler=(*hashtable)->free_handler;
+ else
+ free_handler=inthash_default_free_handler;
+ }
+ for(i=0;i<(*hashtable)->hash_size;i++) {
+ inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler);
+ (*hashtable)->hash[i]=NULL;
+ }
+ }
+ free(*hashtable);
+ *hashtable=NULL;
+ }
+ }
+}
+
+
diff --git a/src/htshash.h b/src/htshash.h
new file mode 100644
index 0000000..9a54710
--- /dev/null
+++ b/src/htshash.h
@@ -0,0 +1,104 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSHASH_DEFH
+#define HTSHASH_DEFH
+
+#include "htscore.h"
+
+// tables de hashage
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type);
+void hash_write(hash_struct* hash,int lpos);
+int* hash_calc_chaine(hash_struct* hash,int type,int pos);
+unsigned long int hash_cle(char* nom1,char* nom2);
+
+
+
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+// simple hash table for other routines
+typedef struct inthash_chain {
+ char* name; /* key (name) */
+ union {
+ unsigned long int intg; /* integer value */
+ void* ptr; /* ptr value */
+ } value;
+ struct inthash_chain* next; /* next element */
+} inthash_chain;
+
+// structure behind inthash
+typedef void (* t_inthash_freehandler)(void* value);
+typedef struct {
+ inthash_chain** hash;
+ t_inthash_freehandler free_handler;
+ unsigned int hash_size;
+ unsigned short flag_valueismalloc;
+} struct_inthash;
+
+// main inthash type
+typedef struct_inthash* inthash;
+
+// subfunctions
+unsigned long int inthash_key(char* value);
+void inthash_init(inthash hashtable);
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler);
+void inthash_default_free_handler(void* value);
+
+// main functions:
+
+
+/* Hash functions: */
+inthash inthash_new(int size); /* Create a new hash table */
+int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
+void inthash_delete(inthash* hashtable); /* Delete an hash table */
+void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */
+void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */
+ t_inthash_freehandler free_handler);
+/* */
+int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */
+/* */
+void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */
+void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */
+int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */
+int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */
+/* End of hash functions: */
+
+
+#endif
diff --git a/src/htshelp.c b/src/htshelp.c
new file mode 100644
index 0000000..3d743fe
--- /dev/null
+++ b/src/htshelp.c
@@ -0,0 +1,622 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* command-line help system */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htshelp.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htscoremain.h"
+#include "htscatchurl.h"
+#include "htslib.h"
+#include "htsalias.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HTS_WIN
+#else
+#include <unistd.h>
+#endif
+/* END specific definitions */
+
+#define waitkey if (more) { char s[4]; printf("\nMORE.. q to quit\n"); linput(stdin,s,4); if (strcmp(s,"q")==0) quit=1; else printf("Page %d\n\n",++m); }
+void infomsg(char* msg) {
+ int l=0;
+ int m=0;
+ int more=0;
+ int quit=0;
+ int done=0;
+ //
+ if (msg==NULL)
+ quit=0;
+ if (msg) {
+ if (!quit) {
+ if (strlen(msg)==1) {
+ if (msg[0]=='1') {
+ more=1;
+ return;
+ }
+ }
+
+ /* afficher alias? */
+ if (((int)strlen(msg)) > 4) {
+ if (msg[0]==' ') {
+ if (msg[2]!=' ') {
+ if ((msg[3]==' ') || (msg[4]==' ')) {
+ char cmd[32]="-";
+ int p=0;
+ while(cmd[p]==' ') p++;
+ sscanf(msg+p,"%s",cmd+strlen(cmd));
+ /* clears cN -> c */
+ if ((p=strlen(cmd))>2)
+ if (cmd[p-1]=='N')
+ cmd[p-1]='\0';
+ /* finds alias (if any) */
+ p=optreal_find(cmd);
+ if (p>=0) {
+ /* fings type of parameter: number,param,param concatenated,single cmd */
+ if (strcmp(opttype_value(p),"param") == 0)
+ printf("%s (--%s[=N])\n",msg,optalias_value(p));
+ else if (strcmp(opttype_value(p),"param1") == 0)
+ printf("%s (--%s <param>)\n",msg,optalias_value(p));
+ else if (strcmp(opttype_value(p),"param0") == 0)
+ printf("%s (--%s<param>)\n",msg,optalias_value(p));
+ else
+ printf("%s (--%s)\n",msg,optalias_value(p));
+ done=1;
+ }
+ }
+ }
+ }
+ }
+
+ /* sinon */
+ if (!done)
+ printf("%s\n",msg);
+ l++;
+ if (l>20) {
+ l=0;
+ waitkey;
+ }
+ }
+ }
+}
+void help_wizard(httrackp* opt) {
+ char* urls = (char*) malloc(HTS_URLMAXSIZE*2);
+ char* mainpath = (char*) malloc(256);
+ char* projname = (char*) malloc(256);
+ char* stropt = (char*) malloc(2048); // options
+ char* stropt2 = (char*) malloc(2048); // options longues
+ char* strwild = (char*) malloc(2048); // wildcards
+ char* cmd = (char*) malloc(4096);
+ char* str = (char*) malloc(256);
+ char** argv = (char**) malloc(256 * sizeof(char*));
+ //
+ char* a;
+ //
+ if (urls == NULL || mainpath == NULL || projname == NULL || stropt == NULL
+ || stropt2 == NULL || strwild == NULL || cmd == NULL || str == NULL || argv == NULL) {
+ fprintf(stderr, "* memory exhausted in %s, line %d\n", __FILE__, __LINE__);
+ return;
+ }
+ urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0';
+ //
+ strcpy(stropt,"-");
+ mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0';
+ //
+
+ printf("\n");
+ printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"\n");
+ printf("Copyright (C) Xavier Roche and other contributors\n");
+#ifdef _WIN32
+ printf("Note: You are running the commandline version,\n");
+ printf("run 'WinHTTrack.exe' to get the GUI version.\n");
+#endif
+#ifdef HTTRACK_AFF_WARNING
+ printf("NOTE: "HTTRACK_AFF_WARNING"\n");
+#endif
+#ifdef HTS_PLATFORM_NAME
+#if USE_BEGINTHREAD
+ printf("[compiled: "HTS_PLATFORM_NAME" - MT]\n");
+#else
+ printf("[compiled: "HTS_PLATFORM_NAME"]\n");
+#endif
+#endif
+ printf("To see the option list, enter a blank line or try httrack --help\n");
+ //
+ // Project name
+ while(strnotempty(projname)==0) {
+ printf("\n");
+ printf("Enter project name :");
+ fflush(stdout);
+ linput(stdin,projname,250);
+ if (strnotempty(projname)==0)
+ help("httrack",1);
+ }
+ //
+ // Path
+ if (strnotempty(hts_gethome()))
+ printf("\nBase path (return=%s/websites/) :",hts_gethome());
+ else
+ printf("\nBase path (return=current directory) :");
+ linput(stdin,str,250);
+ if (!strnotempty(str)) {
+ strcat(str,hts_gethome());
+ strcat(str,"/websites/");
+ }
+ if (strnotempty(str))
+ if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\'))
+ strcat(str,"/");
+ strcat(stropt2,"-O \""); strcat(stropt2,str); strcat(stropt2,projname); strcat(stropt2,"\" ");
+ // Créer si ce n'est fait un index.html 1er niveau
+ make_empty_index(str);
+ //
+ printf("\n");
+ printf("Enter URLs (separated by commas or blank spaces) :");
+ fflush(stdout);
+ linput(stdin,urls,250);
+ if (strnotempty(urls)) {
+ while( (a=strchr(urls,',')) ) *a=' ';
+ while( (a=strchr(urls,'\t')) ) *a=' ';
+
+ // Action
+ printf("\nAction:\n");
+ switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) {
+ case 1: break;
+ case 2: strcat(stropt,"W"); break;
+ case 3: strcat(stropt2,"--get "); break;
+ case 4: strcat(stropt2,"--mirrorlinks "); break;
+ case 5: strcat(stropt2,"--testlinks "); break;
+ case 6: strcat(stropt2,"--update "); break;
+ case 0: return; break;
+ }
+
+ // Proxy
+ printf("\nProxy (return=none) :");
+ linput(stdin,str,250);
+ if (strnotempty(str)) {
+ while( (a=strchr(str,' ')) ) *a=':'; // port
+ if (!strchr(jump_identification(str),':')) {
+ char str2[256];
+ printf("\nProxy port (return=8080) :");
+ linput(stdin,str2,250);
+ strcat(str,":");
+ if (strnotempty(str2)==0)
+ strcat(str,"8080");
+ else
+ strcat(str,str2);
+ }
+ strcat(stropt2,"-P "); strcat(stropt2,str); strcat(stropt2," ");
+ }
+
+ // Display
+ strcat(stropt2," -%v ");
+
+ // Wildcards
+ printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n");
+ printf("Wildcards (return=none) :");
+ linput(stdin,strwild,250);
+
+ // Options
+ do {
+ printf("\nYou can define additional options, such as recurse level (-r<number>), separed by blank spaces\n");
+ printf("To see the option list, type help\n");
+ printf("Additional options (return=none) :");
+ linput(stdin,str,250);
+ if (strfield2(str,"help")) {
+ help("httrack",2);
+ } else if (strnotempty(str)) {
+ strcat(stropt2,str);
+ strcat(stropt2," ");
+ }
+ } while(strfield2(str,"help"));
+
+ {
+ int argc=1;
+ int g=0;
+ int i=0;
+ //
+ printf("\n");
+ if (strlen(stropt)==1)
+ stropt[0]='\0'; // aucune
+ sprintf(cmd,"%s %s %s %s",urls,stropt,stropt2,strwild);
+ printf("---> Wizard command line: httrack %s\n\n",cmd);
+ printf("Ready to launch the mirror? (Y/n) :");
+ fflush(stdout);
+ linput(stdin,str,250);
+ if (strnotempty(str)) {
+ if (!((str[0]=='y') || (str[0]=='Y')))
+ exit(0);
+ }
+ printf("\n");
+
+ // couper en morceaux
+ argv[0]="winhttrack";
+ argv[1]=cmd;
+ argc++;
+ while(cmd[i]) {
+ if(cmd[i]=='\"') g=!g;
+ if(cmd[i]==' '){
+ if(!g){
+ cmd[i]='\0';
+ argv[argc++]=cmd+i+1;
+ }
+ }
+ i++;
+ }
+#if HTS_ANALYSTE
+ hts_main(argc,argv);
+#else
+ main(argc,argv);
+#endif
+ }
+ //} else {
+ // help("httrack",1);
+ }
+
+ /* Free buffers */
+ free(urls);
+ free(mainpath);
+ free(projname);
+ free(stropt);
+ free(stropt2);
+ free(strwild);
+ free(cmd);
+ free(str);
+}
+int help_query(char* list,int def) {
+ char s[256];
+ char* a;
+ int opt;
+ int n=1;
+ a=list;
+ while(strnotempty(a)) {
+ char* b = strchr(a,'|');
+ if (b) {
+ char str[256];
+ str[0]='\0';
+ //
+ strncat(str,a,(int) (b - a));
+ if (n==def)
+ printf("(enter)\t%d\t%s\n",n++,str);
+ else
+ printf("\t%d\t%s\n",n++,str);
+ a=b+1;
+ } else
+ a=list+strlen(list);
+ }
+ printf("\t0\tQuit");
+ do {
+ printf("\n: ");
+ fflush(stdout);
+ linput(stdin,s,250);
+ } while ((strnotempty(s)!=0) && (sscanf(s,"%d",&opt)!=1));
+ if (strnotempty(s))
+ return opt;
+ else
+ return def;
+}
+
+// Capture d'URL
+void help_catchurl(char* dest_path) {
+ char adr_prox[HTS_URLMAXSIZE*2];
+ int port_prox;
+ T_SOC soc=catch_url_init_std(&port_prox,adr_prox);
+ if (soc!=INVALID_SOCKET) {
+ char url[HTS_URLMAXSIZE*2];
+ char method[32];
+ char data[32768];
+ url[0]=method[0]=data[0]='\0';
+ //
+ printf("Okay, temporary proxy installed.\nSet your browser's preferences to:\n\n");
+ printf("\tProxy's address: \t%s\n\tProxy's port: \t%d\n",adr_prox,port_prox);
+ //
+ if (catch_url(soc,url,method,data)) {
+ char dest[HTS_URLMAXSIZE*2];
+ int i=0;
+ do {
+ sprintf(dest,"%s%s%d",dest_path,"hts-post",i);
+ i++;
+ } while(fexist(dest));
+ {
+ FILE* fp=fopen(dest,"wb");
+ if (fp) {
+ fwrite(data,strlen(data),1,fp);
+ fclose(fp);
+ }
+ }
+ // former URL!
+ {
+ char finalurl[HTS_URLMAXSIZE*2];
+ escape_check_url(dest);
+ sprintf(finalurl,"%s"POSTTOK"file:%s",url,dest);
+ printf("\nThe URL is: \"%s\"\n",finalurl);
+ printf("You can capture it through: httrack \"%s\"\n",finalurl);
+ }
+ } else
+ printf("Unable to analyse the URL\n");
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ } else
+ printf("Unable to create a temporary proxy (no remaining port)\n");
+}
+
+// Créer un index.html vide
+void make_empty_index(char* str) {
+#if 0
+ if (!fexist(fconcat(str,"index.html"))) {
+ FILE* fp=fopen(fconcat(str,"index.html"),"wb");
+ if (fp) {
+ fprintf(fp,"<!-- "HTS_TOPINDEX" -->"CRLF);
+ fprintf(fp,"<HTML><BODY>Index is empty!<BR>(File used to index all HTTrack projects)</BODY></HTML>"CRLF);
+ fclose(fp);
+ }
+ }
+#endif
+}
+
+// mini-aide (h: help)
+// y
+void help(char* app,int more) {
+ infomsg("");
+ if (more)
+ infomsg("1");
+ if (more != 2) {
+ char info[2048];
+ infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+#ifdef HTTRACK_AFF_WARNING
+ infomsg("NOTE: "HTTRACK_AFF_WARNING);
+#endif
+ sprintf(info,"\tusage: %s <URLs> [-option] [+<FILTERs>] [-<FILTERs>]",app);
+ infomsg(info);
+ infomsg("\twith options listed below: (* is the default value)");
+ infomsg("");
+ }
+ infomsg("General options:");
+ infomsg(" O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])");
+#ifndef HTS_WIN
+ infomsg(" %O chroot path to, must be r00t (-%O root_path)");
+#endif
+ infomsg("");
+ infomsg("Action options:");
+ infomsg(" w *mirror web sites");
+ infomsg(" W mirror web sites, semi-automatic (asks questions)");
+ infomsg(" g just get files (saved in the current directory)");
+ infomsg(" i continue an interrupted mirror using the cache");
+ infomsg(" Y mirror ALL links located in the first level pages (mirror links)");
+ infomsg("");
+ infomsg("Proxy options:");
+ infomsg(" P proxy use (-P proxy:port or -P user:pass@proxy:port)");
+ infomsg(" %f *use proxy for ftp (f0 don't use)");
+ infomsg("");
+ infomsg("Limits options:");
+ infomsg(" rN set the mirror depth to N (* r9999)");
+ infomsg(" %eN set the external links depth to N (* %e0)");
+ infomsg(" mN maximum file length for a non-html file");
+ infomsg(" mN,N2 maximum file length for non html (N) and html (N2)");
+ infomsg(" MN maximum overall size that can be uploaded/scanned");
+ infomsg(" EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)");
+ infomsg(" AN maximum transfer rate in bytes/seconds (1000=1KB/s max)");
+ infomsg(" %cN maximum number of connections/seconds (*%c10)");
+ infomsg(" GN pause transfer if N bytes reached, and wait until lock file is deleted");
+ infomsg("");
+ infomsg("Flow control:");
+ infomsg(" cN number of multiple connections (*c8)");
+ infomsg(" TN timeout, number of seconds after a non-responding link is shutdown");
+ infomsg(" RN number of retries, in case of timeout or non-fatal errors (*R1)");
+ infomsg(" JN traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link");
+ infomsg(" HN host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow");
+ infomsg("");
+ infomsg("Links options:");
+ infomsg(" %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use)");
+ infomsg(" n get non-html files 'near' an html file (ex: an image located outside)");
+ infomsg(" t test all URLs (even forbidden ones)");
+ infomsg(" %L <file> add all URL located in this text file (one URL per line)");
+ infomsg("");
+ infomsg("Build options:");
+ infomsg(" NN structure type (0 *original structure, 1+: see below)");
+ infomsg(" or user defined structure (-N \"%h%p/%n%q.%t\")");
+ infomsg(" LN long names (L1 *long names / L0 8-3 conversion / L2 ISO9660 compatible)");
+ infomsg(" KN keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K4 original links, K3 absolute URI links)");
+ infomsg(" x replace external html links by error pages");
+ infomsg(" %x do not include any password for external password protected websites (%x0 include)");
+ infomsg(" %q *include query string for local files (useless, for information purpose only) (%q0 don't include)");
+ infomsg(" o *generate output html file in case of error (404..) (o0 don't generate)");
+ infomsg(" X *purge old files after update (X0 keep delete)");
+ infomsg(" %p preserve html files 'as is' (identical to '-K4 -%F \"\"')");
+ infomsg("");
+ infomsg("Spider options:");
+ infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)");
+ infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)");
+ infomsg(" j *parse Java Classes (j0 don't parse)");
+ infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)");
+ infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)");
+ infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)");
+ infomsg(" %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)");
+ infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
+ infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD);
+ infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)");
+ infomsg("");
+ infomsg("Browser ID:");
+ infomsg(" F user-agent field (-F \"user-agent name\")");
+ infomsg(" %F footer string in Html code (-%F \"Mirrored [from host %s [file %s [at %s]]]\"");
+ infomsg(" %l preffered language (-%l \"fr, en, jp, *\"");
+ infomsg("");
+ infomsg("Log, index, cache");
+ infomsg(" C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)");
+ infomsg(" k store all files in cache (not useful if files on disk)");
+ infomsg(" %n do not re-download locally erased files");
+ infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version");
+ infomsg(" Q no log - quiet mode");
+ infomsg(" q no questions - quiet mode");
+ infomsg(" z log - extra infos");
+ infomsg(" Z log - debug");
+ infomsg(" v log on screen");
+ infomsg(" f *log in files");
+ infomsg(" f2 one single log file");
+ infomsg(" I *make an index (I0 don't make)");
+ infomsg(" %I make an searchable index for this mirror (* %I0 don't make)");
+ infomsg("");
+ infomsg("Expert options:");
+ infomsg(" pN priority mode: (* p3)");
+ infomsg(" p0 just scan, don't save anything (for checking links)");
+ infomsg(" p1 save only html files");
+ infomsg(" p2 save only non html files");
+ infomsg(" *p3 save all files");
+ infomsg(" p7 get html files before, then treat other files");
+ infomsg(" S stay on the same directory");
+ infomsg(" D *can only go down into subdirs");
+ infomsg(" U can only go to upper directories");
+ infomsg(" B can both go up&down into the directory structure");
+ infomsg(" a *stay on the same address");
+ infomsg(" d stay on the same principal domain");
+ infomsg(" l stay on the same TLD (eg: .com)");
+ infomsg(" e go everywhere on the web");
+ infomsg(" %H debug HTTP headers in logfile");
+ infomsg("");
+ infomsg("Guru options: (do NOT use if possible)");
+ infomsg(" #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
+ infomsg(" #f Always flush log files");
+ infomsg(" #FN Maximum number of filters");
+ infomsg(" #h Version info");
+ infomsg(" #K Scan stdin (debug)");
+ infomsg(" #L Maximum number of links (-#L1000000)");
+ infomsg(" #p Display ugly progress information");
+ infomsg(" #P Catch URL");
+ infomsg(" #R Old FTP routines (debug)");
+ infomsg(" #T Generate transfer ops. log every minutes");
+ infomsg(" #u Wait time");
+ infomsg(" #Z Generate transfer rate statictics every minutes");
+ infomsg(" #! Execute a shell command (-#! \"echo hello\")");
+ infomsg("");
+ infomsg("Command-line specific options:");
+ infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")");
+ infomsg(" %U run the engine with another id when called as root (-%U smith)");
+ /* infomsg(" %O do a chroot before setuid"); */
+ infomsg("");
+ infomsg("Details: Option N");
+ infomsg(" N0 Site-structure (default)");
+ infomsg(" N1 HTML in web/, images/other files in web/images/");
+ infomsg(" N2 HTML in web/HTML, images/other in web/images");
+ infomsg(" N3 HTML in web/, images/other in web/");
+ infomsg(" N4 HTML in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)");
+ infomsg(" N5 Images/other in web/xxx and HTML in web/HTML");
+ infomsg(" N99 All files in web/, with random names (gadget !)");
+ infomsg(" N100 Site-structure, without www.domain.xxx/");
+ infomsg(" N101 Identical to N1 exept that \"web\" is replaced by the site's name");
+ infomsg(" N102 Identical to N2 exept that \"web\" is replaced by the site's name");
+ infomsg(" N103 Identical to N3 exept that \"web\" is replaced by the site's name");
+ infomsg(" N104 Identical to N4 exept that \"web\" is replaced by the site's name");
+ infomsg(" N105 Identical to N5 exept that \"web\" is replaced by the site's name");
+ infomsg(" N199 Identical to N99 exept that \"web\" is replaced by the site's name");
+ infomsg(" N1001 Identical to N1 exept that there is no \"web\" directory");
+ infomsg(" N1002 Identical to N2 exept that there is no \"web\" directory");
+ infomsg(" N1003 Identical to N3 exept that there is no \"web\" directory (option set for g option)");
+ infomsg(" N1004 Identical to N4 exept that there is no \"web\" directory");
+ infomsg(" N1005 Identical to N5 exept that there is no \"web\" directory");
+ infomsg(" N1099 Identical to N99 exept that there is no \"web\" directory");
+ infomsg("Details: User-defined option N");
+ infomsg(" '%n' Name of file without file type (ex: image)");
+ infomsg(" '%N' Name of file, including file type (ex: image.gif)");
+ infomsg(" '%t' File type (ex: gif)");
+ infomsg(" '%p' Path [without ending /] (ex: /someimages)");
+ infomsg(" '%h' Host name (ex: www.someweb.com)");
+ infomsg(" '%M' URL MD5 (128 bits, 32 ascii bytes)");
+ infomsg(" '%Q' query string MD5 (128 bits, 32 ascii bytes)");
+ infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)");
+ infomsg(" '%s?' Short name version (ex: %sN)");
+ infomsg(" '%[param]' param variable in query string");
+ infomsg("");
+ infomsg("Details: Option K");
+ infomsg(" K0 foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, default)");
+ infomsg(" K -> http://www.foobar.com/folder/foo.cgi?q=45 (absolute URL)");
+ infomsg(" K4 -> foo.cgi?q=45 (original URL)");
+ infomsg(" K3 -> /folder/foo.cgi?q=45 (absolute URI)");
+ infomsg("");
+ infomsg("Shortcuts:");
+ infomsg("--mirror <URLs> *make a mirror of site(s) (default)");
+ infomsg("--get <URLs> get the files indicated, do not seek other URLs (-qg)");
+ infomsg("--list <text file> add all URL located in this text file (-%L)");
+ infomsg("--mirrorlinks <URLs> mirror all links in 1st level pages (-Y)");
+ infomsg("--testlinks <URLs> test links in pages (-r1p0C0I0t)");
+ infomsg("--spider <URLs> spider site(s), to test links: reports Errors & Warnings (-p0C0I0t)");
+ infomsg("--testsite <URLs> identical to --spider");
+ infomsg("--skeleton <URLs> make a mirror, but gets only html files (-p1)");
+ infomsg("--update update a mirror, without confirmation (-iC2)");
+ infomsg("--continue continue a mirror, without confirmation (-iC1)");
+ infomsg("");
+ infomsg("--catchurl create a temporary proxy to capture an URL or a form post URL");
+ infomsg("--clean erase cache & log files");
+ infomsg("");
+ infomsg("--http10 force http/1.0 requests (-%h)");
+ infomsg("");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/");
+ infomsg("means: mirror site www.someweb.com/bob/ and only this site");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/ www.anothertest.com/mike/ +*.com/*.jpg");
+ infomsg("means: mirror the two sites together (with shared links) and accept any .jpg files on .com sites");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/bobby.html +* -r6");
+ infomsg("means get all files starting from bobby.html, with 6 link-depth, and possibility of going everywhere on the web");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/bobby.html --spider -P proxy.myhost.com:8080");
+ infomsg("runs the spider on www.someweb.com/bob/bobby.html using a proxy");
+ infomsg("");
+ infomsg("example: httrack --update");
+ infomsg("updates a mirror in the current folder");
+ infomsg("");
+ infomsg("example: httrack");
+ infomsg("will bring you to the interactive mode");
+ infomsg("");
+ infomsg("example: httrack --continue");
+ infomsg("continues a mirror in the current folder");
+ infomsg("");
+ infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+ infomsg("Copyright (C) Xavier Roche and other contributors");
+#ifdef HTS_PLATFORM_NAME
+ infomsg("[compiled: "HTS_PLATFORM_NAME"]");
+#endif
+ infomsg(NULL);
+
+// infomsg(" R *relative links (e.g ../link)\n");
+// infomsg(" A absolute links (e.g /www.adr/link)\n");
+}
+
+
diff --git a/src/htshelp.h b/src/htshelp.h
new file mode 100644
index 0000000..924a526
--- /dev/null
+++ b/src/htshelp.h
@@ -0,0 +1,53 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* command-line help system */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSHELP_DEFH
+#define HTSHELP_DEFH
+
+#include "htsglobal.h"
+#include "htscore.h"
+
+void infomsg(char* msg);
+void help(char* app,int more);
+void make_empty_index(char* str);
+void help_wizard(httrackp* opt);
+int help_query(char* list,int def);
+void help_catchurl(char* dest_path);
+
+#endif
diff --git a/src/htsindex.c b/src/htsindex.c
new file mode 100644
index 0000000..5a66724
--- /dev/null
+++ b/src/htsindex.c
@@ -0,0 +1,483 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsindex.c */
+/* keyword indexing system (search index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsindex.h"
+#include "htsglobal.h"
+#include "htslib.h"
+
+#if HTS_MAKE_KEYWORD_INDEX
+#include "htshash.h"
+
+
+/* Keyword Indexer Parameters */
+
+// Maximum length for a keyword
+#define KEYW_LEN 50
+// Minimum length for a keyword - MUST NOT BE NULL!!!
+#define KEYW_MIN_LEN 3
+// What characters to accept? - MUST NOT BE EMPTY AND MUST NOT CONTAIN THE SPACE (32) CHARACTER!!!
+#define KEYW_ACCEPT "abcdefghijklmnopqrstuvwxyz0123456789-_."
+// Convert A to a, and so on.. to avoid case problems in indexing
+// This can be a generic table, containing characters that are in fact not accepted by KEYW_ACCEPT
+// MUST HAVE SAME SIZES!!
+#define KEYW_TRANSCODE_FROM (\
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "àâä" \
+ "ÀÂÄ" \
+ "éèêë" \
+ "ÈÈÊË" \
+ "ìîï" \
+ "ÌÎÏ" \
+ "òôö" \
+ "ÒÔÖ" \
+ "ùûü" \
+ "ÙÛÜ" \
+ "ÿ" \
+ )
+#define KEYW_TRANSCODE_TO ( \
+ "abcdefghijklmnopqrstuvwxyz" \
+ "aaa" \
+ "aaa" \
+ "eeee" \
+ "eeee" \
+ "iii" \
+ "iii" \
+ "ooo" \
+ "ooo" \
+ "uuu" \
+ "uuu" \
+ "y" \
+ )
+// These (accepted) characters will be ignored at begining of a keyword
+#define KEYW_IGNORE_BEG "-_."
+// These (accepted) characters will be stripped if at the end of a keyword
+#define KEYW_STRIP_END "-_."
+// Words begining with these (accepted) characters will be ignored
+#define KEYW_NOT_BEG "0123456789"
+// Treat these characters as space characters - MUST NOT BE EMPTY!!!
+#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0c"
+// Common words (the,for..) detector
+// If a word represents more than KEYW_USELESS1K (%1000) of total words, then ignore it
+// 5 (0.5%)
+#define KEYW_USELESS1K 5
+// If a word is present in more than KEYW_USELESS1KPG (%1000) pages, then ignore it
+// 800 (80%)
+#define KEYW_USELESS1KPG 800
+// This number will be reduced by index hit for sorting purpose
+// leave it as it is here if you don't REALLY know what you are doing
+// Yes, I may be the only person, maybe
+#define KEYW_SORT_MAXCOUNT 999999999
+
+/* End of Keyword Indexer Parameters */
+
+int strcpos(char* adr,char c);
+int mystrcmp(const void* _e1,const void* _e2);
+
+// Global variables
+int hts_index_init=1;
+int hts_primindex_size=0;
+FILE* fp_tmpproject=NULL;
+int hts_primindex_words=0;
+
+#endif
+
+/*
+ Init index
+*/
+void index_init(const char* indexpath) {
+#if HTS_MAKE_KEYWORD_INDEX
+ /* remove(concat(indexpath,"index.txt")); */
+ hts_index_init=1;
+ hts_primindex_size=0;
+ hts_primindex_words=0;
+ fp_tmpproject=tmpfile();
+#endif
+}
+
+
+/*
+ Indexing system
+ A little bit dirty, (quick'n dirty, in fact)
+ But should be okay on most cases
+ Tags and javascript handled (ignored)
+*/
+int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) {
+#if HTS_MAKE_KEYWORD_INDEX
+ int intag=0,inscript=0,incomment=0;
+ char keyword[KEYW_LEN+32];
+ int i=0;
+ //
+ int WordIndexSize=1024;
+ inthash WordIndexHash=NULL;
+ FILE *tmpfp=NULL;
+ //
+
+ // Check parameters
+ if (!html_data)
+ return 0;
+ if (!size)
+ return 0;
+ if (!mime)
+ return 0;
+ if (!filename)
+ return 0;
+
+ // Init ?
+ if (hts_index_init) {
+ remove(concat(indexpath,"index.txt"));
+ remove(concat(indexpath,"sindex.html"));
+ hts_index_init=0;
+ }
+
+ // Check MIME type
+ if (strfield2(mime,"text/html")) {
+ inscript=0;
+ }
+ // FIXME - temporary fix for image/svg+xml (svg)
+ // "IN XML" (html like, in fact :) )
+ else if (
+ (strfield2(mime,"image/svg+xml"))
+ ||
+ (strfield2(mime,"image/svg-xml"))
+ ) {
+ inscript=0;
+ }
+ else if (
+ (strfield2(mime,"application/x-javascript"))
+ || (strfield2(mime,"text/css"))
+ ) {
+ inscript=1;
+ } else
+ return 0;
+
+ // Temporary file
+ tmpfp = tmpfile();
+ if (!tmpfp)
+ return 0;
+
+ // Create hash structure
+ // Hash tables rulez da world!
+ WordIndexHash=inthash_new(WordIndexSize);
+ if (!WordIndexHash)
+ return 0;
+
+ // Start indexing this page
+ keyword[0]='\0';
+ while(i<size) {
+ if (strfield(html_data + i , "<script")) {
+ inscript=1;
+ }
+ else if (strfield(html_data + i , "<!--")) {
+ incomment=1;
+ }
+ else if (strfield(html_data + i , "</script")) {
+ if (!incomment)
+ inscript=0;
+ }
+ else if (strfield(html_data + i , "-->")) {
+ incomment=0;
+ }
+ else if (html_data[i]=='<') {
+ if (!inscript)
+ intag=1;
+ }
+ else if (html_data[i]=='>') {
+ intag=0;
+ }
+ else {
+ // Okay, parse keywords
+ if ( (!inscript) && (!incomment) && (!intag) ) {
+ char cchar=html_data[i];
+ int pos;
+ int len=strlen(keyword);
+
+ // Replace (ignore case, and so on..)
+ if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0)
+ cchar=KEYW_TRANSCODE_TO[pos];
+
+ if (strchr(KEYW_ACCEPT,cchar)) {
+ /* Ignore some characters at begining */
+ if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) {
+ keyword[len++]=cchar;
+ keyword[len]='\0';
+ }
+ } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) {
+
+
+ /* Avoid these words */
+ if (len>0) {
+ if (strchr(KEYW_NOT_BEG,keyword[0])) {
+ keyword[(len=0)]='\0';
+ }
+ }
+
+ /* Strip ending . and so */
+ {
+ int ok=0;
+ while((len=strlen(keyword)) && (!ok)) {
+ if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */
+ keyword[len-1]='\0';
+ } else
+ ok=1;
+ }
+ }
+
+ /* Store it ? */
+ if (len >= KEYW_MIN_LEN ) {
+ hts_primindex_words++;
+ if (inthash_inc(WordIndexHash,keyword)) { /* added new */
+ fprintf(tmpfp,"%s\n",keyword);
+ }
+ }
+ keyword[(len=0)]='\0';
+ } else /* Invalid */
+ keyword[(len=0)]='\0';
+
+ if (len>KEYW_LEN) {
+ keyword[(len=0)]='\0';
+ }
+ }
+
+ }
+
+ i++;
+ }
+
+ // Reset temp file
+ fseek(tmpfp,0,SEEK_SET);
+
+ // Process indexing for this page
+ {
+ //FILE* fp=NULL;
+ //fp=fopen(concat(indexpath,"index.txt"),"ab");
+ if (fp_tmpproject) {
+ while(!feof(tmpfp)) {
+ char line[KEYW_LEN + 32];
+ linput(tmpfp,line,KEYW_LEN + 2);
+ if (strnotempty(line)) {
+ unsigned long int e=0;
+ if (inthash_read(WordIndexHash,line,&e)) {
+ //if (e) {
+ char savelst[HTS_URLMAXSIZE*2];
+ e++; /* 0 means "once" */
+
+ if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper
+ strcpy(savelst,filename+strlen(indexpath));
+ else
+ strcpy(savelst,filename);
+
+ // Add entry for this file and word
+ fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst);
+ hts_primindex_size++;
+ //}
+ }
+ }
+ }
+ //fclose(fp);
+ }
+ }
+
+ // Delete temp file
+ fclose(tmpfp);
+ tmpfp=NULL;
+
+ // Clear hash table
+ inthash_delete(&WordIndexHash);
+#endif
+ return 1;
+}
+
+/*
+ Sort index!
+*/
+void index_finish(const char* indexpath,int mode) {
+#if HTS_MAKE_KEYWORD_INDEX
+ char** tab;
+ char* blk;
+ int size;
+
+ size=fpsize(fp_tmpproject);
+ if (size>0) {
+ //FILE* fp=fopen(concat(indexpath,"index.txt"),"rb");
+ if (fp_tmpproject) {
+ tab=(char**)malloct(sizeof(char*) * (hts_primindex_size+2) );
+ if (tab) {
+ blk = malloct(size+4);
+ if (blk) {
+ fseek(fp_tmpproject,0,SEEK_SET);
+ if ((int)fread(blk,1,size,fp_tmpproject) == size) {
+ char *a=blk,*b;
+ int index=0;
+ int i;
+ FILE* fp;
+
+ while( (b=strchr(a,'\n')) && (index < hts_primindex_size) ) {
+ tab[index++]=a;
+ *b='\0';
+ a=b+1;
+ }
+
+ // Sort it!
+ qsort(tab,index,sizeof(char*),mystrcmp);
+
+ // Delete fp_tmpproject
+ fclose(fp_tmpproject);
+ fp_tmpproject=NULL;
+
+ // Write new file
+ if (mode == 1) // TEXT
+ fp=fopen(concat(indexpath,"index.txt"),"wb");
+ else // HTML
+ fp=fopen(concat(indexpath,"sindex.html"),"wb");
+ if (fp) {
+ char current_word[KEYW_LEN + 32];
+ char word[KEYW_LEN + 32];
+ int hit;
+ int total_hit=0;
+ int total_line=0;
+ int last_pos=0;
+ char word0='\0';
+ current_word[0]='\0';
+
+ if (mode == 2) { // HTML
+ for(i=0;i<index;i++) {
+ if (word0 != tab[i][0]) {
+ word0 = tab[i][0];
+ fprintf(fp," <a href=\"#%c\">%c</a>\r\n",word0,word0);
+ }
+ }
+ word0='\0';
+ fprintf(fp,"<br><br>\r\n");
+ fprintf(fp,"<table width=\"100%%\" border=\"0\">\r\n<tr>\r\n<td>word</td>\r\n<td>location\r\n");
+ }
+
+ for(i=0;i<index;i++) {
+ if (sscanf(tab[i],"%s %d",word,&hit) == 2) {
+ char* a=strchr(tab[i],' ');
+ if (a) a=strchr(a+1,' ');
+ if (a++) { /* Yes, a++, not ++a :) */
+ hit=KEYW_SORT_MAXCOUNT-hit;
+ if (strcmp(word,current_word)) { /* New word */
+ if (total_hit) {
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t=%d\r\n",total_hit);
+ //else // HTML
+ // fprintf(fp,"<br>(%d total hits)\r\n",total_hit);
+ if (
+ ( ((total_hit*1000 ) / hts_primindex_words) >= KEYW_USELESS1K )
+ ||
+ ( ((total_line*1000) / index ) >= KEYW_USELESS1KPG )
+ ) {
+ fseek(fp,last_pos,SEEK_SET);
+ if (mode == 1) // TEXT
+ fprintf(fp,"\tignored (%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ else
+ fprintf(fp,"(ignored) [%d hits]<br>\r\n",total_hit);
+ }
+ else {
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t(%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ //else // HTML
+ // fprintf(fp,"(%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ }
+ }
+ if (mode == 1) // TEXT
+ fprintf(fp,"%s\r\n",word);
+ else { // HTML
+ fprintf(fp,"</td></tr>\r\n");
+ if (word0 != word[0]) {
+ word0 = word[0];
+ fprintf(fp,"<th>%c</th>\r\n",word0);
+ fprintf(fp,"<a name=\"%c\"></a>\r\n",word0);
+ }
+ fprintf(fp,"<tr>\r\n<td>%s</td>\r\n<td>\r\n",word);
+ }
+ fflush(fp); last_pos=ftell(fp);
+ strcpy(current_word,word);
+ total_hit=total_line=0;
+ }
+ total_hit+=hit;
+ total_line++;
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t%d %s\r\n",hit,a);
+ else // HTML
+ fprintf(fp,"<a href=\"%s\">%s</a> [%d hits]<br>\r\n",a,a,hit);
+ }
+ }
+ }
+ if (mode == 2) // HTML
+ fprintf(fp,"</td></tr>\r\n</table>\r\n");
+ fclose(fp);
+ }
+
+ }
+ freet(blk);
+ }
+ freet(tab);
+ }
+
+ }
+ //qsort
+ }
+ if (fp_tmpproject)
+ fclose(fp_tmpproject);
+ fp_tmpproject=NULL;
+#endif
+}
+
+
+/* Subroutines */
+
+#if HTS_MAKE_KEYWORD_INDEX
+int strcpos(char* adr,char c) {
+ char* apos=strchr(adr,c);
+ if (apos)
+ return (int)(apos-adr);
+ else
+ return -1;
+}
+
+int mystrcmp(const void* _e1,const void* _e2) {
+ char** e1=(char**)_e1;
+ char** e2=(char**)_e2;
+ return strcmp(*e1,*e2);
+}
+#endif
+
diff --git a/src/htsindex.h b/src/htsindex.h
new file mode 100644
index 0000000..40a189b
--- /dev/null
+++ b/src/htsindex.h
@@ -0,0 +1,48 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsindex.h */
+/* keyword indexing system (search index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSKINDEX_DEFH
+#define HTSKINDEX_DEFH
+
+#include "htsglobal.h"
+
+int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath);
+void index_init(const char* indexpath);
+void index_finish(const char* indexpath,int mode);
+
+#endif
diff --git a/src/htsjava.c b/src/htsjava.c
new file mode 100644
index 0000000..bb29692
--- /dev/null
+++ b/src/htsjava.c
@@ -0,0 +1,395 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Java classes parser */
+/* Author: Yann Philippot */
+/* ------------------------------------------------------------ */
+
+
+/* Version: Oct/2000 */
+/* Fixed: problems with class structure (10/2000) */
+
+// htsjava.c - Parseur de classes java
+
+#include "stdio.h"
+#include "htssystem.h"
+#include "htscore.h"
+#include "htsjava.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+
+//#include <math.h>
+
+#ifndef HTS_LITTLE_ENDIAN
+#define REVERSE_ENDIAN 1
+#else
+#define REVERSE_ENDIAN 0
+#endif
+
+/* big/little endian swap */
+#define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) )
+#define hts_swap32(A) ( (( (hts_swap16(A)) & 0xFFFF)<<16) | (( (hts_swap16(A>>16)) & 0xFFFF)) )
+
+
+// ** HTS_xx sinon pas pris par VC++
+#define HTS_CLASS 7
+#define HTS_FIELDREF 9
+#define HTS_METHODREF 10
+#define HTS_STRING 8
+#define HTS_INTEGER 3
+#define HTS_FLOAT 4
+#define HTS_LONG 5
+#define HTS_DOUBLE 6
+#define HTS_INTERFACE 11
+#define HTS_NAMEANDTYPE 12
+#define HTS_ASCIZ 1
+#define HTS_UNICODE 2
+
+#define JAVADEBUG 0
+
+int hts_parse_java(char *file,char* err_msg)
+{
+ FILE *fpout;
+ JAVA_HEADER header;
+ RESP_STRUCT *tab;
+
+#if JAVADEBUG
+ printf("fopen\n");
+#endif
+ if ((fpout = fopen(fconv(file), "r+b")) == NULL)
+ {
+ //fprintf(stderr, "Cannot open input file.\n");
+ sprintf(err_msg,"Unable to open file %s",file);
+ return 0; // une erreur..
+ }
+
+#if JAVADEBUG
+ printf("fread\n");
+#endif
+ //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet..
+ if (fread(&header,1,10,fpout) != 10) { // pas complet..
+ fclose(fpout);
+ sprintf(err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file));
+ return 0;
+ }
+
+#if JAVADEBUG
+ printf("header\n");
+#endif
+ // tester en tête
+#if REVERSE_ENDIAN
+ header.magic = hts_swap32(header.magic);
+ header.count = hts_swap16(header.count);
+#endif
+ if(header.magic!=0xCAFEBABE) {
+ sprintf(err_msg,"non java file");
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0;
+ }
+
+ tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT));
+ if (!tab) {
+ sprintf(err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT));
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0; // erreur..
+ }
+
+#if JAVADEBUG
+ printf("calchead\n");
+#endif
+ {
+ int i;
+
+ for (i = 1; i < header.count; i++) {
+ int err=0; // ++
+ tab[i]=readtable(fpout,tab[i],&err,err_msg);
+ if (!err) {
+ if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float
+ } else { // ++ une erreur est survenue!
+ if (strnotempty(err_msg)==0)
+ strcpy(err_msg,"Internal readtable error");
+ freet(tab);
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0;
+ }
+ }
+
+ }
+
+
+#if JAVADEBUG
+ printf("addfiles\n");
+#endif
+ {
+ unsigned int acess;
+ unsigned int Class;
+ unsigned int SClass;
+ int i;
+ acess = readshort(fpout);
+ Class = readshort(fpout);
+ SClass = readshort(fpout);
+
+ for (i = 1; i <header.count; i++) {
+
+ if (tab[i].type == HTS_CLASS) {
+
+ if ((tab[i].index1<header.count) && (tab[i].index1>=0)) {
+
+
+ if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) {
+
+ if(!strstr(tab[tab[i].index1].name,"java/")) {
+ char tempo[1024];
+ tempo[0]='\0';
+
+ sprintf(tempo,"%s.class",tab[tab[i].index1].name);
+#if JAVADEBUG
+ printf("add %s\n",tempo);
+#endif
+ if (tab[tab[i].index1].file_position >= 0)
+ hts_add_file(tempo,tab[tab[i].index1].file_position);
+ }
+
+ }
+ } else {
+ i=header.count; // exit
+ }
+ }
+
+ }
+ }
+
+
+#if JAVADEBUG
+ printf("end\n");
+#endif
+ freet(tab);
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 1;
+}
+
+
+
+
+// error: !=0 si erreur fatale
+RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
+{
+ unsigned short int length;
+ int j;
+ *error = 0; // pas d'erreur
+ trans.file_position=-1;
+ trans.type = (int)(unsigned char)fgetc(fp);
+ switch (trans.type) {
+ case HTS_CLASS:
+ strcpy(trans.name,"Class");
+ trans.index1 = readshort(fp);
+ break;
+
+ case HTS_FIELDREF:
+ strcpy(trans.name,"Field Reference");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_METHODREF:
+ strcpy(trans.name,"Method Reference");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_INTERFACE:
+ strcpy(trans.name,"Interface Method Reference");
+ trans.index1 =readshort(fp);
+ readshort(fp);
+ break;
+ case HTS_NAMEANDTYPE:
+ strcpy(trans.name,"Name and Type");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_STRING: // CONSTANT_String
+ strcpy(trans.name,"String");
+ trans.index1 = readshort(fp);
+ break;
+
+ case HTS_INTEGER:
+ strcpy(trans.name,"Integer");
+ for(j=0;j<4;j++) fgetc(fp);
+ break;
+
+ case HTS_FLOAT:
+ strcpy(trans.name,"Float");
+ for(j=0;j<4;j++) fgetc(fp);
+ break;
+
+ case HTS_LONG:
+ strcpy(trans.name,"Long");
+ for(j=0;j<8;j++) fgetc(fp);
+ break;
+ case HTS_DOUBLE:
+ strcpy(trans.name,"Double");
+ for(j=0;j<8;j++) fgetc(fp);
+ break;
+
+ case HTS_ASCIZ:
+ case HTS_UNICODE:
+
+ if (trans.type == HTS_ASCIZ)
+ strcpy(trans.name,"HTS_ASCIZ");
+ else
+ strcpy(trans.name,"HTS_UNICODE");
+
+ {
+ char buffer[1024];
+ char *p;
+
+ p=&buffer[0];
+
+ //fflush(fp);
+ trans.file_position=ftell(fp);
+ length = readshort(fp);
+ if (length<HTS_URLMAXSIZE) {
+ // while ((length > 0) && (length<500)) {
+ while (length > 0) {
+ *p++ =fgetc(fp);
+
+ length--;
+ }
+ *p='\0';
+
+ //#if JDEBUG
+ // if(tris(buffer)==1) printf("%s\n ",buffer);
+ // if(tris(buffer)==2) printf("%s\n ",printname(buffer));
+ //#endif
+ if(tris(buffer)==1) hts_add_file(buffer,trans.file_position);
+ else if(tris(buffer)==2) hts_add_file(printname(buffer),trans.file_position);
+
+ strcpy(trans.name,buffer);
+ } else { // gros pb
+ while ( (length > 0) && (!feof(fp))) {
+ fgetc(fp);
+ length--;
+ }
+ if (!feof(fp)) {
+ trans.type=-1;
+ } else {
+ sprintf(err_msg,"Internal stucture error (ASCII)");
+ *error = 1;
+ }
+ return(trans);
+ }
+ }
+ break;
+ default:
+ // printf("Type inconnue\n");
+ // on arrête tout
+ sprintf(err_msg,"Internal structure unknown (type %d)",trans.type);
+ *error = 1;
+ return(trans);
+ break;
+ }
+ return(trans);
+}
+
+
+unsigned short int readshort(FILE *fp)
+{
+ unsigned short int valint;
+ fread(&valint,sizeof(valint),1,fp);
+
+#if REVERSE_ENDIAN
+ return hts_swap16(valint);
+#else
+ return valint;
+#endif
+
+}
+
+int tris(char * buffer)
+{
+ //
+ // Java
+ if((buffer[0]=='[') && buffer[1]=='L' && (!strstr(buffer,"java/")) )
+ return 2;
+ if (strstr(buffer,".gif") || strstr(buffer,".jpg") || strstr(buffer,".jpeg") || strstr(buffer,".au") )
+ return 1;
+ // Ajouts R.X: test type
+ // Autres fichiers
+ {
+ char type[256];
+ type[0]='\0';
+ get_httptype(type,buffer,0);
+ if (strnotempty(type)) // type reconnu!
+ return 1;
+ // ajout RX 05/2001
+ else if (is_dyntype(get_ext(buffer))) // asp,cgi...
+ return 1;
+ }
+ return 0;
+}
+
+
+char * printname(char name[1024])
+{
+ char* rname;
+ //char *rname;
+ char *p;
+ char *p1;
+ int j;
+ NOSTATIC_RESERVE(rname, char, 1024);
+ rname[0]='\0';
+ //
+
+ p=&name[0];
+
+ if(*p!='[') return "";
+ p+=2;
+ //rname=(char*)calloct(strlen(name)+8,sizeof(char));
+ p1=rname;
+ for (j = 0; j < (int) strlen(name); j++,p++) {
+ if (*p == '/') *p1='.';
+ if (*p==';'){*p1='\0';
+ strcat(rname,".class");
+ return (rname);}
+ else *p1=*p;
+ p1++;
+ }
+ p1-=3;
+ *p1='\0';
+ return (rname);
+
+}
diff --git a/src/htsjava.h b/src/htsjava.h
new file mode 100644
index 0000000..66a75a5
--- /dev/null
+++ b/src/htsjava.h
@@ -0,0 +1,69 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Java classes parser .h */
+/* Author: Yann Philippot */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSJAVA_DEFH
+#define HTSJAVA_DEFH
+
+/* LLint fsize(char* s); */
+int fsize(char* s);
+
+typedef struct {
+ unsigned long int magic;
+ unsigned short int minor;
+ unsigned short int major;
+ unsigned short int count;
+} JAVA_HEADER;
+
+typedef struct {
+ int file_position;
+ //
+ unsigned int index1;
+ unsigned int type;
+ char name[1024];
+} RESP_STRUCT;
+
+
+int hts_parse_java(char *file,char* err_msg);
+RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5);
+//unsigned int swap(long int nomber,int digit);
+RESP_STRUCT readtable(FILE *fp,RESP_STRUCT,int*,char*);
+unsigned short int readshort(FILE *fp);
+int tris(char*);
+char * printname(char [1024]);
+
+
+#endif
diff --git a/src/htslib.c b/src/htslib.c
new file mode 100644
index 0000000..e4e6006
--- /dev/null
+++ b/src/htslib.c
@@ -0,0 +1,4279 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Subroutines */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .c
+
+#include "htslib.h"
+#include "htsbauth.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsbauth.h"
+#include "htsthread.h"
+#include "htsnostatic.h"
+#include "htswrap.h"
+#include <stdio.h>
+#if HTS_WIN
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/timeb.h>
+#include <fcntl.h>
+// pour utimbuf
+#if HTS_WIN
+#include <sys/utime.h>
+#else
+#if HTS_PLATFORM!=3
+#include <utime.h>
+#else
+#include <utime.h>
+#endif
+#endif
+/* END specific definitions */
+
+
+
+// Débuggage de contrôle
+#if HTS_DEBUG_CLOSESOCK
+#define _HTS_WIDE 1
+#endif
+#if HTS_WIDE_DEBUG
+#define _HTS_WIDE 1
+#endif
+#if _HTS_WIDE
+FILE* DEBUG_fp=NULL;
+#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); }
+#define DEBUG_W2(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,A); fflush(DEBUG_fp); }
+#endif
+
+/* variables globales */
+int _DEBUG_HEAD;
+FILE* ioinfo;
+
+#if HTS_USEOPENSSL
+ SSL_CTX *openssl_ctx = NULL;
+#endif
+int IPV6_resolver = 0;
+
+
+/* détection complémentaire */
+const char hts_detect[][32] = {
+ "archive",
+ "background",
+ "data", // OBJECT
+ "dynsrc",
+ "lowsrc",
+ "profile", // element META
+ "src",
+ "swurl",
+ "url",
+ "usemap",
+ "longdesc", // accessibility
+ "xlink:href", // xml/svg tag
+ ""
+};
+
+/* détecter début */
+const char hts_detectbeg[][32] = {
+ "hotspot", /* hotspot1=..,hotspot2=.. */
+ ""
+};
+
+/* ne pas détcter de liens dedans */
+const char hts_nodetect[][32] = {
+ "accept-charset",
+ "accesskey",
+ "action",
+ "align",
+ "alt",
+ "axes",
+ "axis",
+ "char",
+ "charset",
+ "cite",
+ "class",
+ "classid",
+ "code",
+ "color",
+ "datetime",
+ "dir",
+ "enctype",
+ "face",
+ "height",
+ "id",
+ "lang",
+ "language",
+ "media",
+ "method",
+ "name",
+ "prompt",
+ "scheme",
+ "size",
+ "style",
+ "target",
+ "title",
+ "type",
+ "valign",
+ "version",
+ "width",
+ ""
+};
+
+
+/* détection de mini-code javascript */
+/* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
+const char hts_detect_js[][32] = {
+ "onAbort",
+ "onBlur",
+ "onChange",
+ "onClick",
+ "onDblClick",
+ "onDragDrop",
+ "onError",
+ "onFocus",
+ "onKeyDown",
+ "onKeyPress",
+ "onKeyUp",
+ "onLoad",
+ "onMouseDown",
+ "onMouseMove",
+ "onMouseOut",
+ "onMouseOver",
+ "onMouseUp",
+ "onMove",
+ "onReset",
+ "onResize",
+ "onSelect",
+ "onSubmit",
+ "onUnload",
+ ""
+};
+
+/* détection "...URL=<url>" */
+const char hts_detectURL[][32] = {
+ "content",
+ ""
+};
+
+/* tags où l'URL doit être réécrite mais non capturée */
+const char hts_detectandleave[][32] = {
+ "action",
+ ""
+};
+
+/* ne pas renommer les types renvoyés (couvent types inconnus) */
+const char hts_mime_keep[][32] = {
+ "application/octet-stream",
+ "text/plain",
+ ""
+};
+
+/* pas de type mime connu, mais extension connue */
+const char hts_ext_dynamic[][32] = {
+ "php3",
+ "php",
+ "php4",
+ "php2",
+ "cgi",
+ "asp",
+ "jsp",
+ "pl",
+ /*"exe",*/
+ "cfm",
+ ""
+};
+
+/* types MIME */
+const char hts_mime[][2][32] = {
+ {"application/acad","dwg"},
+ {"application/arj","arj"},
+ {"application/clariscad","ccad"},
+ {"application/drafting","drw"},
+ {"application/dxf","dxf"},
+ {"application/excel","xls"},
+ {"application/i-deas","unv"},
+ {"application/iges","isg"},
+ {"application/iges","iges"},
+ {"application/mac-binhex40","hqx"},
+ {"application/mac-compactpro","cpt"},
+ {"application/msword","doc"},
+ {"application/msword","w6w"},
+ {"application/msword","word"},
+ {"application/mswrite","wri"},
+ /*{"application/octet-stream","dms"},*/
+ /*{"application/octet-stream","lzh"},*/
+ /*{"application/octet-stream","lha"},*/
+ /*{"application/octet-stream","bin"},*/
+ {"application/oda","oda"},
+ {"application/pdf","pdf"},
+ {"application/postscript","ps"},
+ {"application/postscript","ai"},
+ {"application/postscript","eps"},
+ {"application/powerpoint","ppt"},
+ {"application/pro_eng","prt"},
+ {"application/pro_eng","part"},
+ {"application/rtf","rtf"},
+ {"application/set","set"},
+ {"application/sla","stl"},
+ {"application/smil","smi"},
+ {"application/smil","smil"},
+ {"application/smil","sml"},
+ {"application/solids","sol"},
+ {"application/STEP","stp"},
+ {"application/STEP","step"},
+ {"application/vda","vda"},
+ {"application/x-authorware-map","aam"},
+ {"application/x-authorware-seg","aas"},
+ {"application/x-authorware-bin","aab"},
+ {"application/x-cocoa","cco"},
+ {"application/x-csh","csh"},
+ {"application/x-director","dir"},
+ {"application/x-director","dcr"},
+ {"application/x-director","dxr"},
+ {"application/x-mif","mif"},
+ {"application/x-dvi","dvi"},
+ {"application/x-gzip","gz"},
+ {"application/x-gzip","gzip"},
+ {"application/x-hdf","hdf"},
+ {"application/x-javascript","js"},
+ {"application/x-koan","skp"},
+ {"application/x-koan","skd"},
+ {"application/x-koan","skt"},
+ {"application/x-koan","skm"},
+ {"application/x-latex","latex"},
+ {"application/x-netcdf","nc"},
+ {"application/x-netcdf","cdf"},
+ /* {"application/x-sh","sh"}, */
+ /* {"application/x-csh","csh"}, */
+ /* {"application/x-ksh","ksh"}, */
+ {"application/x-shar","shar"},
+ {"application/x-stuffit","sit"},
+ {"application/x-tcl","tcl"},
+ {"application/x-tex","tex"},
+ {"application/x-texinfo","texinfo"},
+ {"application/x-texinfo","texi"},
+ {"application/x-troff","t"},
+ {"application/x-troff","tr"},
+ {"application/x-troff","roff"},
+ {"application/x-troff-man","man"},
+ {"application/x-troff-me","ms"},
+ {"application/x-wais-source","src"},
+ {"application/zip","zip"},
+ {"application/x-zip-compressed","zip"},
+ {"application/x-bcpio","bcpio"},
+ {"application/x-cdlink","vcd"},
+ {"application/x-cpio","cpio"},
+ {"application/x-gtar","tgz"},
+ {"application/x-gtar","gtar"},
+ {"application/x-shar","shar"},
+ {"application/x-shockwave-flash","swf"},
+ {"application/x-sv4cpio","sv4cpio"},
+ {"application/x-sv4crc","sv4crc"},
+ {"application/x-tar","tar"},
+ {"application/x-ustar","ustar"},
+ {"application/x-winhelp","hlp"},
+ {"audio/midi","mid"},
+ {"audio/midi","midi"},
+ {"audio/midi","kar"},
+ {"audio/mpeg","mp3"},
+ {"audio/mpeg","mpga"},
+ {"audio/mpeg","mp2"},
+ {"audio/basic","au"},
+ {"audio/basic","snd"},
+ {"audio/x-aiff","aif"},
+ {"audio/x-aiff","aiff"},
+ {"audio/x-aiff","aifc"},
+ {"audio/x-pn-realaudio","rm"},
+ {"audio/x-pn-realaudio","ram"},
+ {"audio/x-pn-realaudio","ra"},
+ {"audio/x-pn-realaudio-plugin","rpm"},
+ {"audio/x-wav","wav"},
+ {"chemical/x-pdb","pdb"},
+ {"chemical/x-pdb","xyz"},
+ {"drawing/x-dwf","dwf"},
+ {"image/gif","gif"},
+ {"image/ief","ief"},
+ {"image/jpeg","jpg"},
+ {"image/jpeg","jpe"},
+ {"image/jpeg","jpeg"},
+ {"image/pict","pict"},
+ {"image/png","png"},
+ {"image/tiff","tiff"},
+ {"image/tiff","tif"},
+ {"image/svg+xml","svg"},
+ {"image/svg-xml","svg"},
+ {"image/x-cmu-raster","ras"},
+ {"image/x-freehand","fh4"},
+ {"image/x-freehand","fh7"},
+ {"image/x-freehand","fh5"},
+ {"image/x-freehand","fhc"},
+ {"image/x-freehand","fh"},
+ {"image/x-portable-anymap","pnm"},
+ {"image/x-portable-bitmap","pgm"},
+ {"image/x-portable-pixmap","ppm"},
+ {"image/x-rgb","rgb"},
+ {"image/x-xbitmap","xbm"},
+ {"image/x-xpixmap","xpm"},
+ {"image/x-xwindowdump","xwd"},
+ {"model/mesh","msh"},
+ {"model/mesh","mesh"},
+ {"model/mesh","silo"},
+ {"multipart/x-zip","zip"},
+ {"multipart/x-gzip","gzip"},
+ {"text/css","css"},
+ {"text/html","html"},
+ {"text/html","htm"},
+ {"text/plain","txt"},
+ {"text/plain","g"},
+ {"text/plain","h"},
+ {"text/plain","c"},
+ {"text/plain","cc"},
+ {"text/plain","hh"},
+ {"text/plain","m"},
+ {"text/plain","f90"},
+ {"text/richtext","rtx"},
+ {"text/tab-separated-values","tsv"},
+ {"text/x-setext","etx"},
+ {"text/x-sgml","sgml"},
+ {"text/x-sgml","sgm"},
+ {"text/xml","xml"},
+ {"text/xml","dtd"},
+ {"video/mpeg","mpeg"},
+ {"video/mpeg","mpg"},
+ {"video/mpeg","mpe"},
+ {"video/quicktime","qt"},
+ {"video/quicktime","mov"},
+ {"video/x-msvideo","avi"},
+ {"video/x-sgi-movie","movie"},
+ {"x-conference/x-cooltalk","ice"},
+ /*{"application/x-httpd-cgi","cgi"},*/
+ {"x-world/x-vrml","wrl"},
+
+ {"*","class"},
+
+ {"",""}};
+
+
+// Reserved (RFC2396)
+#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
+// Delimiters (RFC2396)
+#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
+// Unwise (RFC2396)
+#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
+// Special (escape chars) (RFC2396 + >127 )
+#define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) )
+#define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) )
+#define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) )
+// We try to avoid them and encode them instead
+#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
+
+
+// conversion éventuelle / vers antislash
+#if HTS_WIN
+char* antislash(char* s) {
+ char* buff;
+ char* a;
+ NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2);
+
+ strcpy(buff,s);
+ while(a=strchr(buff,'/')) *a='\\';
+ return buff;
+}
+#endif
+
+
+
+// Récupération d'un fichier http sur le net.
+// Renvoie une adresse sur le bloc de mémoire, ou bien
+// NULL si un retour.msgeur (buffer retour.msg) est survenue.
+//
+// Une adresse de structure htsmsg peut être transmise pour
+// suivre l'évolution du chargement si le process a été lancé
+// en background
+
+htsblk httpget(char* url) {
+ char adr[HTS_URLMAXSIZE*2]; // adresse
+ char fil[HTS_URLMAXSIZE*2]; // chemin
+
+ // séparer URL en adresse+chemin
+ if (ident_url_absolute(url,adr,fil)==-1) {
+ htsblk retour;
+ memset(&retour, 0, sizeof(htsblk)); // effacer
+ // retour prédéfini: erreur
+ retour.adr=NULL;
+ retour.size=0;
+ retour.msg[0]='\0';
+ retour.statuscode=-1;
+ strcpy(retour.msg,"Error invalid URL");
+ return retour;
+ }
+
+ return xhttpget(adr,fil);
+}
+
+// ouvre une liaison http, envoie une requète GET et réceptionne le header
+// retour: socket
+int http_fopen(char* adr,char* fil,htsblk* retour) {
+ // / GET, traiter en-tête
+ return http_xfopen(0,1,1,NULL,adr,fil,retour);
+}
+
+// ouverture d'une liaison http, envoi d'une requète
+// mode: 0 GET 1 HEAD [2 POST]
+// treat: traiter header?
+// waitconnect: attendre le connect()
+// note: dans retour, on met les params du proxy
+int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) {
+ //htsblk retour;
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+ T_SOC soc=INVALID_SOCKET;
+ //char *p,*q;
+
+ // retour prédéfini: erreur
+ if (retour) {
+ retour->adr=NULL;
+ retour->size=0;
+ retour->msg[0]='\0';
+ retour->statuscode=-5; // a priori erreur non fatale
+ }
+
+#if HDEBUG
+ printf("adr=%s\nfichier=%s\n",adr,fil);
+#endif
+
+ // ouvrir liaison
+#if HDEBUG
+ printf("Création d'une socket sur %s\n",adr);
+#endif
+
+#if CNXDEBUG
+ printf("..newhttp\n");
+#endif
+
+ /* connexion */
+ if (retour) {
+ if ( (!(retour->req.proxy.active))
+ ||
+ (
+ (strcmp(adr,"file://")==0)
+ ||
+ (strncmp(adr,"https://", 8)==0)
+ )
+ ) { /* pas de proxy, ou non utilisable ici */
+ soc=newhttp(adr,retour,-1,waitconnect);
+ } else {
+ soc=newhttp(retour->req.proxy.name,retour,retour->req.proxy.port,waitconnect); // ouvrir sur le proxy à la place
+ }
+ } else {
+ soc=newhttp(adr,NULL,-1,waitconnect);
+ }
+
+ // copier index socket retour
+ if (retour) retour->soc=soc;
+
+ /* Check for errors */
+ if (soc == INVALID_SOCKET) {
+ if (retour) {
+ if (retour->msg) {
+ if (!strnotempty(retour->msg)) {
+ strcpy(retour->msg,"Connect error");
+ }
+ }
+ }
+ }
+
+ // --------------------
+ // court-circuit (court circuite aussi le proxy..)
+ // LOCAL_SOCKET_ID est une pseudo-socket locale
+ if (soc==LOCAL_SOCKET_ID) {
+ retour->is_file=1; // fichier local
+ if (mode==0) { // GET
+
+ // Test en cas de file:///C|...
+ if (!fexist(fconv(unescape_http(fil))))
+ if (fexist(fconv(unescape_http(fil+1)))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,fil+1);
+ strcpy(fil,tempo);
+ }
+
+ // Ouvrir
+ retour->totalsize=fsize(fconv(unescape_http(fil))); // taille du fichier
+ retour->msg[0]='\0';
+ soc=INVALID_SOCKET;
+ if (retour->totalsize<0)
+ strcpy(retour->msg,"Unable to open file");
+ else if (retour->totalsize==0)
+ strcpy(retour->msg,"File empty");
+ else {
+ // Note: On passe par un FILE* (plus propre)
+ //soc=open(fil,O_RDONLY,0); // en lecture seule!
+ retour->fp=fopen(fconv(unescape_http(fil)),"rb"); // ouvrir
+ if (retour->fp==NULL)
+ soc=INVALID_SOCKET;
+ else
+ soc=LOCAL_SOCKET_ID;
+ }
+ retour->soc=soc;
+ if (soc!=INVALID_SOCKET) {
+ retour->statuscode=200; // OK
+ strcpy(retour->msg,"OK");
+ guess_httptype(retour->contenttype,fil);
+ } else if (strnotempty(retour->msg)==0)
+ strcpy(retour->msg,"Unable to open file");
+ return soc; // renvoyer
+ } else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
+ strcpy(retour->msg,"Unexpected Head/Post local request");
+ soc=INVALID_SOCKET; // erreur
+ retour->soc=soc;
+ return soc;
+ }
+ }
+ // --------------------
+
+ if (soc!=INVALID_SOCKET) {
+ char rcvd[1100];
+ rcvd[0]='\0';
+#if HDEBUG
+ printf("Ok, connexion réussie, id=%d\n",soc);
+#endif
+
+ // connecté?
+ if (waitconnect) {
+ http_sendhead(NULL,mode,xsend,adr,fil,NULL,NULL,retour);
+ }
+
+ if (soc!=INVALID_SOCKET) {
+
+#if HDEBUG
+ printf("Attente de la réponse:\n");
+#endif
+
+ // si GET (réception d'un fichier), réceptionner en-tête d'abord,
+ // et ensuite le corps
+ // si POST on ne réceptionne rien du tout, c'est après que l'on fera
+ // une réception standard pour récupérer l'en tête
+ if ((treat) && (waitconnect)) { // traiter (attendre!) en-tête
+ // Réception de la status line et de l'en-tête (norme RFC1945)
+
+ // status-line à récupérer
+ finput(soc,rcvd,1024);
+ if (strnotempty(rcvd)==0)
+ finput(soc,rcvd,1024); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(retour,rcvd);
+
+#if HDEBUG
+ printf("Status-Code=%d\n",retour->statuscode);
+#endif
+
+ // en-tête
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ finput(soc,rcvd,1024);
+#if HDEBUG
+ printf(">%s\n",rcvd);
+#endif
+ if (strnotempty(rcvd))
+ treathead(NULL,NULL,NULL,retour,rcvd); // traiter
+
+ } while(strnotempty(rcvd));
+
+ //rcvsize=-1; // forCER CHARGEMENT INCONNU
+
+ //if (retour)
+ // retour->totalsize=rcvsize;
+
+ } else { // si GET, on recevra l'en tête APRES
+ //rcvsize=-1; // on ne connait pas la taille de l'en-tête
+ if (retour)
+ retour->totalsize=-1;
+ }
+
+ }
+
+ }
+
+ return soc;
+}
+
+
+// envoi d'une requète
+int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) {
+ char buff[8192];
+ //int use_11=0; // HTTP 1.1 utilisé
+ int direct_url=0; // ne pas analyser l'url (exemple: ftp://)
+ char* search_tag=NULL;
+ buff[0]='\0';
+
+ // header Date
+ //strcat(buff,"Date: ");
+ //time_gmt_rfc822(buff); // obtenir l'heure au format rfc822
+ //sendc("\n");
+ //strcat(buff,buff);
+
+ // possibilité non documentée: >post: et >postfile:
+ // si présence d'un tag >post: alors executer un POST
+ // exemple: http://www.someweb.com/test.cgi?foo>post:posteddata=10&foo=5
+ // si présence d'un tag >postfile: alors envoyer en tête brut contenu dans le fichier en question
+ // exemple: http://www.someweb.com/test.cgi?foo>postfile:post0.txt
+ search_tag=strstr(fil,POSTTOK":");
+ if (!search_tag) {
+ search_tag=strstr(fil,POSTTOK"file:");
+ if (search_tag) { // postfile
+ if (mode==0) { // GET!
+ FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb");
+ if (fp) {
+ char line[1100];
+ char protocol[256],url[HTS_URLMAXSIZE*2],method[256];
+ linput(fp,line,1000);
+ if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
+ // selon que l'on a ou pas un proxy
+ if (retour->req.proxy.active)
+ sprintf(buff,"%s http://%s%s %s\r\n",method,adr,url,protocol);
+ else
+ sprintf(buff,"%s %s %s\r\n",method,url,protocol);
+ // lire le reste en brut
+ fread(buff+strlen(buff),8000-strlen(buff),1,fp);
+ }
+ fclose(fp);
+ }
+ }
+ }
+ }
+ // Fin postfile
+
+ if (strnotempty(buff)==0) { // PAS POSTFILE
+ // Type de requète?
+ if ((search_tag) && (mode==0)) {
+ strcat(buff,"POST ");
+ } else if (mode==0) { // GET
+ strcat(buff,"GET ");
+ } else { // if (mode==1) {
+ if (!retour->req.http11) // forcer HTTP/1.0
+ strcat(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD
+ else
+ strcat(buff,"HEAD ");
+ }
+
+ // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
+ if (retour->req.proxy.active) {
+ if (!link_has_authority(adr)) { // default http
+#if HDEBUG
+ printf("Proxy Use: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
+#endif
+ strcat(buff,"http://");
+ strcat(buff,jump_identification(adr));
+ } else { // ftp:// en proxy http
+#if HDEBUG
+ printf("Proxy Use for ftp: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
+#endif
+ direct_url=1; // ne pas analyser user/pass
+ strcat(buff,adr);
+ }
+ }
+
+ // NOM DU FICHIER
+ // on slash doit être présent en début, sinon attention aux bad request! (400)
+ if (*fil!='/') strcat(buff,"/");
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ if (search_tag)
+ strncat(tempo,fil,(int) (search_tag - fil));
+ else
+ strcpy(tempo,fil);
+ escape_check_url(tempo);
+ strcat(buff,tempo); // avec échappement
+ }
+
+ // protocole
+ if (!retour->req.http11) { // forcer HTTP/1.0
+ //use_11=0;
+ strcat(buff," HTTP/1.0\x0d\x0a");
+ } else { // Requète 1.1
+ //use_11=1;
+ strcat(buff," HTTP/1.1\x0d\x0a");
+ }
+
+ /* supplemental data */
+ if (xsend) strcat(buff,xsend); // éventuelles autres lignes
+
+ // tester proxy authentication
+ if (retour->req.proxy.active) {
+ if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
+ char* a=jump_identification(retour->req.proxy.name);
+ char* astart=jump_protocol(retour->req.proxy.name);
+ char autorisation[1100];
+ char user_pass[256];
+ autorisation[0]=user_pass[0]='\0';
+ //
+ strncat(user_pass,astart,(int) (a - astart) - 1);
+ strcpy(user_pass,unescape_http(user_pass));
+ code64(user_pass,autorisation);
+ strcat(buff,"Proxy-Authorization: Basic ");
+ strcat(buff,autorisation);
+ strcat(buff,H_CRLF);
+#if HDEBUG
+ printf("Proxy-Authenticate, %s (code: %s)\n",user_pass,autorisation);
+#endif
+ }
+ }
+
+ // Referer?
+ if ((referer_adr) && (referer_fil)) { // existe
+ if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
+ if (
+ (strcmp(referer_adr,"file://") != 0)
+ &&
+ ( /* no https referer to http urls */
+ (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
+ ||
+ (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
+ )
+ ) { // PAS file://
+ strcat(buff,"Referer: ");
+ strcat(buff,"http://");
+ strcat(buff,jump_identification(referer_adr));
+ strcat(buff,referer_fil);
+ strcat(buff,H_CRLF);
+ }
+ }
+ }
+
+ // POST?
+ if (mode==0) { // GET!
+ if (search_tag) {
+ char clen[256];
+ sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1))));
+ strcat(buff,clen);
+ }
+ }
+
+ // gestion cookies?
+ if (cookie) {
+ char* b=cookie->data;
+ int cook=0;
+ int max_cookies=8;
+ int max_size=2048;
+ max_size+=strlen(buff);
+ do {
+ b=cookie_find(b,"",jump_identification(adr),fil); // prochain cookie satisfaisant aux conditions
+ if (b) {
+ max_cookies--;
+ if (!cook) {
+ strcat(buff,"Cookie: ");
+ strcat(buff,"$Version=1; ");
+ cook=1;
+ } else
+ strcat(buff,"; ");
+ strcat(buff,cookie_get(b,5));
+ strcat(buff,"=");
+ strcat(buff,cookie_get(b,6));
+ strcat(buff,"; $Path=");
+ strcat(buff,cookie_get(b,2));
+ b=cookie_nextfield(b);
+ }
+ } while( (b) && (max_cookies>0) && ((int)strlen(buff)<max_size));
+ if (cook) { // on a envoyé un (ou plusieurs) cookie?
+ strcat(buff,H_CRLF);
+#if DEBUG_COOK
+ printf("Header:\n%s\n",buff);
+#endif
+ }
+ }
+
+ // connection close?
+ //if (use_11) // Si on envoie une requète 1.1, préciser qu'on ne veut pas de keep-alive!!
+ strcat(buff,"Connection: close"H_CRLF);
+
+ // gérer le keep-alive (garder socket)
+ //strcat(buff,"Connection: Keep-Alive\n");
+
+ {
+ char* real_adr=jump_identification(adr);
+ //if ((use_11) || (retour->user_agent_send)) { // Pour le 1.1 on utilise un Host:
+ if (!direct_url) { // pas ftp:// par exemple
+ //if (!retour->req.proxy.active) {
+ strcat(buff,"Host: "); strcat(buff,real_adr); strcat(buff,H_CRLF);
+ //}
+ }
+ //}
+
+ // Présence d'un user-agent?
+ if (retour->req.user_agent_send) { // ohh un user-agent
+ char s[256];
+ // HyperTextSeeker/"HTSVERSION
+ sprintf(s,"User-Agent: %s"H_CRLF,retour->req.user_agent);
+ strcat(buff,s);
+
+ // pour les serveurs difficiles
+ strcat(buff,"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/svg+xml, */*"H_CRLF);
+ if (strnotempty(retour->req.lang_iso)) {
+ strcat(buff,"Accept-Language: "); strcat(buff,retour->req.lang_iso); strcat(buff,H_CRLF);
+ }
+ strcat(buff,"Accept-Charset: iso-8859-1, *"H_CRLF);
+ if (retour->req.http11) {
+#if HTS_USEZLIB
+ if ((!retour->req.range_used) && (!retour->req.nocompression))
+ strcat(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF);
+ else
+ strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+#else
+ strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+#endif
+ }
+ } else {
+ strcat(buff,"Accept: */*"H_CRLF); // le minimum
+ }
+
+ /* Authentification */
+ {
+ char autorisation[1100];
+ char* a;
+ autorisation[0]='\0';
+ if (link_has_authorization(adr)) { // ohh une authentification!
+ char* a=jump_identification(adr);
+ char* astart=jump_protocol(adr);
+ if (!direct_url) { // pas ftp:// par exemple
+ char user_pass[256];
+ user_pass[0]='\0';
+ strncat(user_pass,astart,(int) (a - astart) - 1);
+ strcpy(user_pass,unescape_http(user_pass));
+ code64(user_pass,autorisation);
+ if (strcmp(fil,"/robots.txt")) /* pas robots.txt */
+ bauth_add(cookie,astart,fil,autorisation);
+ }
+ } else if ( (a=bauth_check(cookie,real_adr,fil)) )
+ strcpy(autorisation,a);
+ /* On a une autorisation a donner? */
+ if (strnotempty(autorisation)) {
+ strcat(buff,"Authorization: Basic ");
+ strcat(buff,autorisation);
+ strcat(buff,H_CRLF);
+ }
+ }
+
+ }
+ //strcat(buff,"Accept-Language: en\n");
+ //strcat(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
+
+ // CRLF de fin d'en tête
+ strcat(buff,H_CRLF);
+
+ // données complémentaires?
+ if (search_tag)
+ if (mode==0) // GET!
+ strcat(buff,unescape_http(search_tag+strlen(POSTTOK)+1));
+ }
+
+#if HDEBUG
+#endif
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"request for %s%s:\r\n",jump_identification(adr),fil);
+ fprintfio(ioinfo,buff,"<<< ");
+ fprintf(ioinfo,"\r\n");
+ fflush(ioinfo);
+ }
+ } // Fin test pas postfile
+ //
+
+ // Envoi
+ if (sendc(retour, buff)<0) { // ERREUR, socket rompue?...
+ //if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?...
+ deletesoc_r(retour); // fermer tout de même
+ // et tenter de reconnecter
+
+ strcpy(retour->msg,"Broken pipe");
+ retour->soc=INVALID_SOCKET;
+ }
+
+ // RX'98
+ return 0;
+}
+
+
+
+
+// traiter 1ere ligne d'en tête
+void treatfirstline(htsblk* retour,char* rcvd) {
+ char* a=rcvd;
+ // exemple:
+ // HTTP/1.0 200 OK
+ if (*a) {
+ // note: certains serveurs buggés renvoient HTTP/1.0\n200 OK ou " HTTP/1.0 200 OK"
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces au début
+ if (strfield(a, "HTTP/")) {
+ // sauter HTTP/1.x
+ while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++;
+ if (*a != '\0') {
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces
+ if ((*a>='0') && (*a<='9')) {
+ sscanf(a,"%d",&(retour->statuscode));
+ // sauter 200
+ while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++;
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces
+ if ((strlen(a) > 1) && (strlen(a) < 64) ) // message retour
+ strcpy(retour->msg,a);
+ else
+ infostatuscode(retour->msg,retour->statuscode);
+ // type MIME par défaut2
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ } else { // pas de code!
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure");
+ }
+ } else { // euhh??
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure");
+ }
+ } else {
+ if (*a == '<') {
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ } else if (strnotempty(a)) {
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure, no HTTP/ response given");
+ } else {
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ }
+ }
+ } else { // vide!
+ /*
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Empty reponse or internal error");
+ */
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ }
+}
+
+// traiter ligne par ligne l'en tête
+// gestion des cookies
+void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
+ int p;
+ if ((p=strfield(rcvd,"Content-length:"))!=0) {
+#if HDEBUG
+ printf("ok, Content-length: détecté\n");
+#endif
+ sscanf(rcvd+p,LLintP,&(retour->totalsize));
+ }
+ else if ((p=strfield(rcvd,"Content-Disposition:"))!=0) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<250) { // pas trop long?
+ char tmp[256];
+ char *a=NULL,*b=NULL;
+ strcpy(tmp,rcvd+p);
+ a=strstr(tmp,"filename=");
+ if (a) {
+ a+=strlen("filename=");
+ while(is_space(*a)) a++;
+ //a=strchr(a,'"');
+ if (a) {
+ char *c=NULL;
+ //a++; /* jump " */
+ while((c=strchr(a,'/'))) /* skip all / (see RFC2616) */
+ a=c+1;
+ //b=strchr(a+1,'"');
+ b=a+strlen(a)-1;
+ while(is_space(*b)) b--;
+ b++;
+ if (b) {
+ *b='\0';
+ if ((int) strlen(a) < 200) { // pas trop long?
+ strcpy(retour->cdispo,a);
+ }
+ }
+ }
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Last-Modified:"))!=0) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) { // pas trop long?
+ //struct tm* tm_time=convert_time_rfc822(rcvd+p);
+ strcpy(retour->lastmodified,rcvd+p);
+ }
+ }
+ else if ((p=strfield(rcvd,"Date:"))!=0) {
+ if (strnotempty(retour->lastmodified)==0) { /* pas encore de last-modified */
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) { // pas trop long?
+ //struct tm* tm_time=convert_time_rfc822(rcvd+p);
+ strcpy(retour->lastmodified,rcvd+p);
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Etag:"))!=0) { /* Etag */
+ if (retour) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) // pas trop long?
+ strcpy(retour->etag,rcvd+p);
+ else // erreur.. ignorer
+ retour->etag[0]='\0';
+ }
+ }
+ else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
+ retour->is_chunk=1; // chunked
+ //retour->http11=2; // chunked
+#if HDEBUG
+ printf("ok, Transfer-Encoding: détecté\n");
+#endif
+ }
+ else if ((p=strfield(rcvd,"Content-type:"))!=0) {
+ if (retour) {
+ char tempo[1100];
+ // éviter les text/html; charset=foo
+ {
+ char* a=strchr(rcvd+p,';');
+ if (a) *a='\0';
+ }
+ sscanf(rcvd+p,"%s",tempo);
+ if (strlen(tempo)<64) // pas trop long!!
+ strcpy(retour->contenttype,tempo);
+ else
+ strcpy(retour->contenttype,"application/octet-stream-unknown"); // erreur
+ }
+ }
+ else if ((p=strfield(rcvd,"Content-Range:"))!=0) {
+ char* a=strstr(rcvd+p,"*/");
+ if (a) {
+ if (sscanf(a+2,LLintP,&retour->crange) != 1) {
+ retour->crange=0;
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) {
+ if (retour) {
+ char tempo[1100];
+ {
+ char* a=strchr(rcvd+p,';');
+ if (a) *a='\0';
+ }
+ sscanf(rcvd+p,"%s",tempo);
+ if (strlen(tempo)<64) // pas trop long!!
+ strcpy(retour->contentencoding,tempo);
+ else
+ retour->contentencoding[0]='\0'; // erreur
+#if HTS_USEZLIB
+ /* Check known encodings */
+ if (retour->contentencoding[0]) {
+ if (
+ (strfield2(retour->contentencoding, "gzip"))
+ || (strfield2(retour->contentencoding, "x-gzip"))
+ /*
+ || (strfield2(retour->contentencoding, "compress"))
+ || (strfield2(retour->contentencoding, "x-compress"))
+ */
+ || (strfield2(retour->contentencoding, "deflate"))
+ || (strfield2(retour->contentencoding, "x-deflate"))
+ ) {
+ retour->compressed=1;
+ }
+ }
+#endif
+ }
+ }
+ else if ((p=strfield(rcvd,"Location:"))!=0) {
+ if (retour) {
+ if (retour->location) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<HTS_URLMAXSIZE) // pas trop long?
+ strcpy(retour->location,rcvd+p);
+ else // erreur.. ignorer
+ retour->location[0]='\0';
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Connection: Keep-Alive"))!=0) {
+ // non, pas de keep-alive! on déconnectera..
+ }
+ else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) { // params keep-alive
+ // rien à faire
+ }
+ else if ( ((p=strfield(rcvd,"Set-Cookie:"))!=0) && (cookie) ) { // ohh un cookie
+ char* a = rcvd+p; // pointeur
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ char cook_name[256]; // nom cookie (MYCOOK)
+ char cook_value[8192]; // valeur (ID=toto,S=1234)
+#if DEBUG_COOK
+ printf("set-cookie detected\n");
+#endif
+ while(*a) {
+ char *token_st,*token_end;
+ char *value_st,*value_end;
+ char name[256];
+ char value[8192];
+ int next=0;
+ name[0]=value[0]='\0';
+ //
+
+ // initialiser cookie lu actuellement
+ if (adr)
+ strcpy(domain,jump_identification(adr)); // domaine
+ strcpy(path,"/"); // chemin (/)
+ strcpy(cook_name,""); // nom cookie (MYCOOK)
+ strcpy(cook_value,""); // valeur (ID=toto,S=1234)
+ // boucler jusqu'au prochain cookie ou la fin
+ do {
+ char* start_loop=a;
+ while(is_space(*a)) a++; // sauter espaces
+ token_st=a; // départ token
+ while((!is_space(*a)) && (*a) && (*a!=';') && (*a!='=')) a++; // arrêter si espace, point virgule
+ token_end=a;
+ while(is_space(*a)) a++; // sauter espaces
+ if (*a=='=') { // name=value
+ a++;
+ while(is_space(*a)) a++; // sauter espaces
+ value_st=a;
+ while( (*a!=';') && (*a)) a++; // prochain ;
+ //while( ((*a!='"') || (*(a-1)=='\\')) && (*a)) a++; // prochain " (et pas \")
+ value_end=a;
+ //if (*a==';') { // finit par un ;
+ // vérifier débordements
+ if ( (((int) (token_end - token_st))<200) && (((int) (value_end - value_st))<8000)
+ && (((int) (token_end - token_st))>0) && (((int) (value_end - value_st))>0) ) {
+ name[0]='\0';
+ value[0]='\0';
+ strncat(name,token_st,(int) (token_end - token_st));
+ strncat(value,value_st,(int) (value_end - value_st));
+#if DEBUG_COOK
+ printf("detected cookie-av: name=\"%s\" value=\"%s\"\n",name,value);
+#endif
+ if (strfield2(name,"domain")) {
+ strcpy(domain,value);
+ }
+ else if (strfield2(name,"path")) {
+ strcpy(path,value);
+ }
+ else if (strfield2(name,"max-age")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"expires")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"version")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"comment")) {
+ // ignoré
+ }
+ else if (strfield2(name,"secure")) { // ne devrait pas arriver ici
+ // ignoré
+ }
+ else {
+ if (strnotempty(cook_name)==0) { // noter premier: nom et valeur cookie
+ strcpy(cook_name,name);
+ strcpy(cook_value,value);
+ } else { // prochain cookie
+ a=start_loop; // on devra recommencer à cette position
+ next=1; // enregistrer
+ }
+ }
+ }
+ }
+ if (!next) {
+ while((*a!=';') && (*a)) a++; // prochain
+ while(*a==';') a++; // sauter ;
+ }
+ } while((*a) && (!next));
+ if (strnotempty(cook_name)) { // cookie?
+#if DEBUG_COOK
+ printf("new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",cook_name,cook_value,domain,path);
+#endif
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ }
+ }
+ }
+}
+
+
+// transforme le message statuscode en chaîne
+void infostatuscode(char* msg,int statuscode) {
+ switch( statuscode) {
+ // Erreurs HTTP, selon RFC
+ case 100: strcpy( msg,"Continue"); break;
+ case 101: strcpy( msg,"Switching Protocols"); break;
+ case 200: strcpy( msg,"OK"); break;
+ case 201: strcpy( msg,"Created"); break;
+ case 202: strcpy( msg,"Accepted"); break;
+ case 203: strcpy( msg,"Non-Authoritative Information"); break;
+ case 204: strcpy( msg,"No Content"); break;
+ case 205: strcpy( msg,"Reset Content"); break;
+ case 206: strcpy( msg,"Partial Content"); break;
+ case 300: strcpy( msg,"Multiple Choices"); break;
+ case 301: strcpy( msg,"Moved Permanently"); break;
+ case 302: strcpy( msg,"Moved Temporarily"); break;
+ case 303: strcpy( msg,"See Other"); break;
+ case 304: strcpy( msg,"Not Modified"); break;
+ case 305: strcpy( msg,"Use Proxy"); break;
+ case 306: strcpy( msg,"Undefined 306 error"); break;
+ case 307: strcpy( msg,"Temporary Redirect"); break;
+ case 400: strcpy( msg,"Bad Request"); break;
+ case 401: strcpy( msg,"Unauthorized"); break;
+ case 402: strcpy( msg,"Payment Required"); break;
+ case 403: strcpy( msg,"Forbidden"); break;
+ case 404: strcpy( msg,"Not Found"); break;
+ case 405: strcpy( msg,"Method Not Allowed"); break;
+ case 406: strcpy( msg,"Not Acceptable"); break;
+ case 407: strcpy( msg,"Proxy Authentication Required"); break;
+ case 408: strcpy( msg,"Request Time-out"); break;
+ case 409: strcpy( msg,"Conflict"); break;
+ case 410: strcpy( msg,"Gone"); break;
+ case 411: strcpy( msg,"Length Required"); break;
+ case 412: strcpy( msg,"Precondition Failed"); break;
+ case 413: strcpy( msg,"Request Entity Too Large"); break;
+ case 414: strcpy( msg,"Request-URI Too Large"); break;
+ case 415: strcpy( msg,"Unsupported Media Type"); break;
+ case 416: strcpy( msg,"Requested Range Not Satisfiable"); break;
+ case 417: strcpy( msg,"Expectation Failed"); break;
+ case 500: strcpy( msg,"Internal Server Error"); break;
+ case 501: strcpy( msg,"Not Implemented"); break;
+ case 502: strcpy( msg,"Bad Gateway"); break;
+ case 503: strcpy( msg,"Service Unavailable"); break;
+ case 504: strcpy( msg,"Gateway Time-out"); break;
+ case 505: strcpy( msg,"HTTP Version Not Supported"); break;
+ //
+ default: if (strnotempty(msg)==0) strcpy( msg,"Unknown error"); break;
+ }
+}
+
+
+// identique au précédent, sauf que l'on donne adr+fil et non url complète
+htsblk xhttpget(char* adr,char* fil) {
+ T_SOC soc;
+ htsblk retour;
+
+ memset(&retour, 0, sizeof(htsblk));
+ soc=http_fopen(adr,fil,&retour);
+
+ if (soc!=INVALID_SOCKET) {
+ http_fread(soc,&retour);
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("xhttpget: deletehttp\n");
+#endif
+ if (retour.soc!=INVALID_SOCKET) deletehttp(&retour); // fermer
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+
+// variation sur un thème...
+// réceptionne uniquement un en-tête (HEAD)
+// retourne dans xx.adr l'adresse pointant sur le bloc de mémoire de l'en tête
+htsblk http_gethead(char* adr,char* fil) {
+ T_SOC soc;
+ htsblk retour;
+
+ memset(&retour, 0, sizeof(htsblk));
+ soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête
+
+ if (soc!=INVALID_SOCKET) {
+ http_fread(soc,&retour); // réception en-tête
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("http_gethead: deletehttp\n");
+#endif
+ if (retour.soc!=INVALID_SOCKET) deletehttp(&retour); // fermer
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+// oui ca ressemble vachement à xhttpget - en étant sobre on peut voir LA différence..
+
+
+// lecture sur une socket ouverte, le header a déja été envoyé dans le cas de GET
+// il ne reste plus qu'à lire les données
+// (pour HEAD le header est lu ici!)
+void http_fread(T_SOC soc,htsblk* retour) {
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+
+ if (retour) retour->soc=soc;
+ if (soc!=INVALID_SOCKET) {
+ // fonction de lecture d'une socket (plus propre)
+ while(http_fread1(retour)!=-1);
+ soc=retour->soc;
+ if (retour->adr==NULL) {
+ if (strnotempty(retour->msg)==0)
+ sprintf(retour->msg,"Unable to read");
+ return ; // erreur
+ }
+
+#if HDEBUG
+ printf("Ok, données reçues\n");
+#endif
+
+ return ;
+
+ }
+
+ return ;
+}
+
+// check if data is available
+int check_readinput(htsblk* r) {
+ if (r->soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(r->soc,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(r->soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(r->soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+// lecture d'un bloc sur une socket (ou un fichier!)
+// >=0 : nombre d'octets lus
+// <0 : fin ou erreur
+HTS_INLINE LLint http_fread1(htsblk* r) {
+ //int bufl=TAILLE_BUFFER; // taille d'un buffer max.
+ return http_xfread1(r,TAILLE_BUFFER);
+}
+
+// idem, sauf qu'ici on peut choisir la taille max de données à recevoir
+// SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
+// en éliminant les cr (ex: header), arrêt si double-lf
+// SI bufl==-1 alors le buffer est censé être de 8kos, et on recoit ligne par ligne
+// en éliminant les cr (ex: header), arrêt si double-lf
+// Note: les +1 dans les malloc sont dûs à l'octet nul rajouté en fin de fichier
+LLint http_xfread1(htsblk* r,int bufl) {
+ int nl=-1;
+
+ if (bufl>0) {
+ if (!r->is_write) { // stocker en mémoire
+ if (r->totalsize>0) { // totalsize déterminé ET ALLOUE
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct((INTsys) r->totalsize + 1);
+ r->size=0;
+ }
+ if (r->adr!=NULL) {
+ // lecture
+ nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */
+ // nouvelle taille
+ if (nl >= 0) r->size+=nl;
+
+ if ((nl < 0) || (r->size >= r->totalsize))
+ nl=-1; // break
+
+ r->adr[r->size]='\0'; // caractère NULL en fin au cas où l'on traite des HTML
+ }
+
+ } else { // inconnu..
+ // réserver de la mémoire?
+ if (r->adr==NULL) {
+#if HDEBUG
+ printf("..alloc xfread\n");
+#endif
+ r->adr=(char*) malloct(bufl + 1);
+ r->size=0;
+ }
+ else {
+#if HDEBUG
+ printf("..realloc xfread1\n");
+#endif
+ r->adr=(char*) realloct(r->adr,(int)r->size+bufl + 1);
+ }
+
+ if (r->adr!=NULL) {
+ // lecture
+ nl = hts_read(r,r->adr+(int)r->size,bufl);
+ if (nl>0) {
+ // resize
+ r->adr=(char*) realloct(r->adr,(int)r->size+nl + 1);
+ // nouvelle taille
+ r->size+=nl;
+ // octet nul
+ if (r->adr) r->adr[r->size]='\0';
+
+ } // sinon on a fini
+#if HDEBUG
+ else if (nl < 0)
+ printf("..end read (%d)\n", nl);
+#endif
+ }
+#if HDEBUG
+ else printf("..-> error\n");
+#endif
+ }
+
+ // pas de adr=erreur
+ if (r->adr==NULL) nl=-1;
+
+ } else { // stocker sur disque
+ char* buff;
+ buff=(char*) malloct(bufl);
+ if (buff!=NULL) {
+ // lecture
+ nl = hts_read(r,buff,bufl);
+ // nouvelle taille
+ if (nl > 0) {
+ r->size+=nl;
+ if ((int) fwrite(buff,1,nl,r->out)!=nl) {
+ r->statuscode=-1;
+ strcpy(r->msg,"Write error on disk");
+ nl=-1;
+ }
+ }
+
+ if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
+ nl=-1; // break
+
+ // libérer bloc tempo
+ freet(buff);
+ } else
+ nl=-1;
+
+ if ((nl < 0) && (r->out!=NULL)) {
+ fflush(r->out);
+ }
+
+
+ } // stockage disque ou mémoire
+
+ } else if (bufl == -2) { // force reserve
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct(8192);
+ r->size=0;
+ return 0;
+ }
+ return -1;
+ } else { // réception d'un en-tête octet par octet
+ int count=256;
+ int tot_nl=0;
+ int lf_detected=0;
+ int at_begining=1;
+ do {
+ nl=-1;
+ count--;
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct(8192);
+ r->size=0;
+ }
+ if (r->adr!=NULL) {
+ if (r->size < 8190) {
+ // lecture
+ nl = hts_read(r,r->adr+r->size,1);
+ if (nl>0) {
+ // exit if:
+ // lf detected AND already detected before
+ // or
+ // lf detected AND first character read
+ if (*(r->adr+r->size) == 10) {
+ if (lf_detected || (at_begining) || (bufl<0))
+ count=-1;
+ lf_detected=1;
+ }
+ if (*(r->adr+r->size) != 13) { // sauter caractères 13
+ if (
+ (*(r->adr+r->size) != 10)
+ &&
+ (*(r->adr+r->size) != 13)
+ ) {
+ // restart for new line
+ lf_detected=0;
+ }
+ (r->size)++;
+ at_begining=0;
+ }
+ *(r->adr+r->size)='\0'; // terminer par octet nul
+ }
+ }
+ }
+ if (nl >= 0) {
+ tot_nl+=nl;
+ if (!check_readinput(r))
+ count=-1;
+ }
+ } while((nl >= 0) && (count>0));
+ nl = tot_nl;
+ }
+#if HDEBUG
+ //printf("add to %d / %d\n",r->size,r->totalsize);
+#endif
+ // nl == 0 may mean "no relevant data", for example is using cache or ssl
+#if HTS_USEOPENSSL
+ if (r->ssl)
+ return nl;
+ else
+#endif
+ return ((nl > 0) ? nl : -1); // ==0 is fatal if direct read
+}
+
+
+// teste une adresse, et suit l'éventuel chemin "moved"
+// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
+// copie dans loc la véritable adresse si celle-ci est différente
+htsblk http_location(char* adr,char* fil,char* loc) {
+ htsblk retour;
+ int retry=0;
+ int tryagain;
+ // note: "RFC says"
+ // 5 boucles au plus, on en teste au plus 8 ici
+ // sinon abandon..
+ do {
+ tryagain=0;
+ switch ((retour=http_test(adr,fil,loc)).statuscode) {
+ case 200: break; // ok!
+ case 301: case 302: case 303: case 307: // moved!
+ // recalculer adr et fil!
+ if (ident_url_absolute(loc,adr,fil)!=-1) {
+ tryagain=1; // retenter
+ retry++; // ..encore une fois
+ }
+ }
+ } while((tryagain) && (retry<5+3));
+ return retour;
+}
+
+
+// teste si une URL (validité, header, taille)
+// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
+// en cas de moved xx, dans location
+// abandonne désormais au bout de 30 secondes (aurevoir les sites
+// qui nous font poireauter 5 heures..) -> -2=timeout
+htsblk http_test(char* adr,char* fil,char* loc) {
+ T_SOC soc;
+ htsblk retour;
+ //int rcvsize=-1;
+ //char* rcv=NULL; // adresse de retour
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+ TStamp tl;
+ int timeout=30; // timeout pour un check (arbitraire) // **
+
+ // pour abandonner un site trop lent
+ tl=time_local();
+
+ loc[0]='\0';
+ memset(&retour, 0, sizeof(htsblk)); // effacer
+ retour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
+
+ //soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header
+
+ // on ouvre en head, et on traite l'en tête
+ soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header
+
+ if (soc!=INVALID_SOCKET) {
+ int e=0;
+ // tant qu'on a des données, et qu'on ne recoit pas deux LF, et que le timeout n'arrie pas
+ do {
+ if (http_xfread1(&retour,0) < 0)
+ e=1;
+ else {
+ if (retour.adr!=NULL) {
+ if ((retour.adr[retour.size-1]!=10) || (retour.adr[retour.size-2]!=10))
+ e=1;
+ }
+ }
+
+ if (!e) {
+ if ((time_local()-tl)>=timeout) {
+ e=-1;
+ }
+ }
+
+ } while (!e);
+
+ if (e==1) {
+ if (adr!=NULL) {
+ int ptr=0;
+ char rcvd[1100];
+
+ // note: en gros recopie du traitement de back_wait()
+ //
+
+
+ // ----------------------------------------
+ // traiter en-tête!
+ // status-line à récupérer
+ ptr+=binput(retour.adr+ptr,rcvd,1024);
+ if (strnotempty(rcvd)==0)
+ ptr+=binput(retour.adr+ptr,rcvd,1024); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(&retour,rcvd);
+
+#if HDEBUG
+ printf("(Buffer) Status-Code=%d\n",retour.statuscode);
+#endif
+
+ // en-tête
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ ptr+=binput(retour.adr+ptr,rcvd,1024);
+#if HDEBUG
+ printf("(buffer)>%s\n",rcvd);
+#endif
+ if (strnotempty(rcvd))
+ treathead(NULL,NULL,NULL,&retour,rcvd); // traiter
+
+ } while(strnotempty(rcvd));
+ // ----------------------------------------
+
+ // libérer mémoire
+ if (retour.adr!=NULL) { freet(retour.adr); retour.adr=NULL; }
+ }
+ } else {
+ retour.statuscode=-2;
+ strcpy(retour.msg,"Timeout While Testing");
+ }
+
+
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("http_test: deletehttp\n");
+#endif
+ deletehttp(&retour);
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+
+// Crée un lien (http) vers une adresse internet iadr
+// retour: structure (adresse, taille, message si erreur (si !adr))
+// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
+int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
+ t_fullhostent fullhostent_buffer; // buffer pour resolver
+ T_SOC soc; // descipteur de la socket
+ char* iadr;
+ // unsigned short int port;
+
+ // tester un éventuel id:pass et virer id:pass@ si détecté
+ iadr = jump_identification(_iadr);
+
+ // si iadr="#" alors c'est une fausse URL, mais un vrai fichier
+ // local.
+ // utile pour les tests!
+ //## if (iadr[0]!=lOCAL_CHAR) {
+ if (strcmp(_iadr,"file://")) { /* non fichier */
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+#if HDEBUG
+ printf("gethostbyname\n");
+#endif
+
+ // tester un éventuel port
+ if (port==-1) {
+ char *a=jump_toport(iadr);
+#if HTS_USEOPENSSL
+ if (retour->ssl)
+ port=443;
+ else
+ port=80; // port par défaut
+#else
+ port=80; // port par défaut
+#endif
+ if (a) {
+ char iadr2[HTS_URLMAXSIZE*2];
+ int i=-1;
+ iadr2[0]='\0';
+ sscanf(a+1,"%d",&i);
+ if (i!=-1) {
+ port=(unsigned short int) i;
+ }
+
+ // adresse véritable (sans :xx)
+ strncat(iadr2,iadr,(int) (a - iadr));
+
+ // adresse sans le :xx
+ hp = hts_gethostbyname(iadr2, &fullhostent_buffer);
+
+ } else {
+
+ // adresse normale (port par défaut par la suite)
+ hp = hts_gethostbyname(iadr, &fullhostent_buffer);
+
+ }
+
+ } else // port défini
+ hp = hts_gethostbyname(iadr, &fullhostent_buffer);
+
+
+ // Conversion iadr -> adresse
+ // structure recevant le nom de l'hôte, etc
+ //struct hostent *hp;
+ if (hp == NULL) {
+#if DEBUG
+ printf("erreur gethostbyname\n");
+#endif
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to get server's address");
+ return INVALID_SOCKET;
+ }
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ // memcpy(&SOCaddr_sinaddr(server), hp->h_addr_list[0], hp->h_length);
+
+ // créer ("attachement") une socket (point d'accès) internet,en flot
+#if HDEBUG
+ printf("socket\n");
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("socket\n");
+#endif
+ soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+#if HTS_WIDE_DEBUG
+ DEBUG_W("socket done\n");
+#endif
+ if (soc==INVALID_SOCKET) {
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to create a socket");
+ return INVALID_SOCKET; // erreur création socket impossible
+ }
+ // structure: connexion au domaine internet, port 80 (ou autre)
+ SOCaddr_initport(server, port);
+#if HDEBUG
+ printf("==%d\n",soc);
+#endif
+
+ // connexion non bloquante?
+ if (!waitconnect ) {
+ unsigned long p=1; // non bloquant
+#if HTS_WIN
+ ioctlsocket(soc,FIONBIO,&p);
+#else
+ ioctl(soc,FIONBIO,&p);
+#endif
+ }
+
+ // Connexion au serveur lui même
+#if HDEBUG
+ printf("connect\n");
+#endif
+
+#if HTS_WIDE_DEBUG
+ DEBUG_W("connect\n");
+#endif
+#if HTS_WIN
+ if (connect(soc, (const struct sockaddr FAR *)&server, server_size) != 0) {
+#else
+ if (connect(soc, (struct sockaddr *)&server, server_size) == -1) {
+#endif
+
+ // no - non blocking
+ //deletesoc(soc);
+ //soc=INVALID_SOCKET;
+
+ // bloquant
+ if (waitconnect) {
+#if HDEBUG
+ printf("unable to connect!\n");
+#endif
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to connect to the server");
+ /* Close the socket and notify the error!!! */
+ deletesoc(soc);
+ return INVALID_SOCKET;
+ }
+ }
+#if HTS_WIDE_DEBUG
+ DEBUG_W("connect done\n");
+#endif
+
+#if HDEBUG
+ printf("connexion établie\n");
+#endif
+
+ // A partir de maintenant, on peut envoyer et recevoir des données
+ // via le flot identifié par soc (socket): write(soc,adr,taille) et
+ // read(soc,adr,taille)
+
+ } else { // on doit ouvrir un fichier local!
+ // il sera géré de la même manière qu'une socket (c'est idem!)
+
+ soc=LOCAL_SOCKET_ID; // pseudo-socket locale..
+ // soc sera remplacé lors d'un http_fopen() par un handle véritable!
+
+ } // teste fichier local ou http
+
+ return soc;
+}
+
+
+
+// couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
+// retour=-1 si erreur.
+// si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
+int ident_url_absolute(char* url,char* adr,char* fil) {
+ int pos=0;
+ int scheme=0;
+
+ // effacer adr et fil
+ adr[0]=fil[0]='\0';
+
+#if HDEBUG
+ printf("protocol: %s\n",url);
+#endif
+
+ // Scheme?
+ {
+ char* a=url;
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ scheme=1;
+ }
+
+ // 1. optional scheme ":"
+ if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests)
+ //!! p+=3;
+ strcpy(adr,"file://");
+ } else if ((pos=strfield(url,"http:"))) { // HTTP
+ //!!p+=3;
+ } else if ((pos=strfield(url,"ftp:"))) { // FTP
+ strcpy(adr,"ftp://"); // FTP!!
+ //!!p+=3;
+#if HTS_USEOPENSSL
+ } else if ((pos=strfield(url,"https:"))) { // HTTPS
+ strcpy(adr,"https://");
+#endif
+ } else if (scheme) {
+ return -1; // erreur non reconnu
+ } else
+ pos=0;
+
+ // 2. optional "//" authority
+ if (strncmp(url+pos,"//",2)==0)
+ pos+=2;
+
+ // (url+pos) now points to the path (not net path)
+
+ //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http
+ if (!strfield(adr,"file:")) { // PAS file://
+ char *p,*q;
+ p=url+pos;
+
+ // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html
+ q=strchr(jump_identification(p),'/');
+ if (q==0) q=strchr(jump_identification(p),'?'); // http://www.foo.com?bar=1
+ if (q==0) q=p+strlen(p); // pointe sur \0
+ // q pointe sur le chemin, ex: index.html?query=recherche
+
+ // chemin www... trop long!!
+ if ( ( ((int) (q - p)) ) > HTS_URLMAXSIZE) {
+ //strcpy(retour.msg,"Path too long");
+ return -1; // erreur
+ }
+
+ // recopier adresse www..
+ strncat(adr,p, ((int) (q - p)) );
+ // *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
+ // recopier chemin /pub/..
+ if (q[0] != '/') // page par défaut (/)
+ strcat(fil,"/");
+ strcat(fil,q);
+ // SECURITE:
+ // simplifier url pour les ../
+ fil_simplifie(fil);
+ } else { // localhost file://
+ char *p;
+ int i;
+ char* a;
+
+ p=url+pos;
+
+ strcat(fil,p); // fichier local ; adr="#"
+ a=strchr(fil,'?');
+ if (a)
+ *a='\0'; /* couper query (inutile pour file:// lors de la requête) */
+ // filtrer les \\ -> / pour les fichiers DOS
+ for(i=0;i<(int) strlen(fil);i++)
+ if (fil[i]=='\\')
+ fil[i]='/';
+ }
+
+ // no hostname
+ if (!strnotempty(adr))
+ return -1; // erreur non reconnu
+
+ // nommer au besoin.. (non utilisé normalement)
+ if (!strnotempty(fil))
+ strcpy(fil,"default-index.html");
+
+ // case insensitive pour adresse
+ {
+ char *a=jump_identification(adr);
+ while(*a) {
+ if ((*a>='A') && (*a<='Z'))
+ *a+='a'-'A';
+ a++;
+ }
+ }
+
+ return 0;
+}
+
+// simplification des ../
+void fil_simplifie(char* f) {
+ int i=0;
+ int last=0;
+ char* a;
+
+ // éliminer ../
+ while (f[i]) {
+
+ if (f[i]=='/') {
+ if (f[i+1]=='.')
+ if (f[i+2]=='.') // couper dernier répertoire
+ if (f[i+3]=='/') // éviter les /tmp/..coolandlamedir/
+ { // couper dernier répertoire
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ //
+ if (!last) /* can't go upper.. */
+ strcpy(tempo,"/");
+ else
+ strncpy(tempo,f,last+1);
+ tempo[last+1]='\0';
+ strcat(tempo,f+i+4);
+ strcpy(f,tempo); // remplacer
+ i=-1; // recommencer
+ last=0;
+ }
+
+ if (i>=0)
+ last=i;
+ else
+ last=0;
+ }
+
+ i++;
+ }
+
+ // éliminer ./
+ while ( (a=strstr(f,"./")) ) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,a+2);
+ strcpy(a,tempo);
+ }
+ // delete all remaining ../ (potential threat)
+ while ( (a=strstr(f,"../")) ) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,a+3);
+ strcpy(a,tempo);
+ }
+
+}
+
+
+// fermer liaison fichier ou socket
+HTS_INLINE void deletehttp(htsblk* r) {
+#if HTS_DEBUG_CLOSESOCK
+ char info[256];
+ sprintf(info,"deletehttp: (htsblk*) %d\n",r);
+ DEBUG_W2(info);
+#endif
+ if (r->soc!=INVALID_SOCKET) {
+ if (r->is_file) {
+ if (r->fp)
+ fclose(r->fp);
+ r->fp=NULL;
+ } else {
+ if (r->soc!=LOCAL_SOCKET_ID)
+ deletesoc_r(r);
+ }
+ r->soc=INVALID_SOCKET;
+ }
+}
+
+// fermer une socket
+HTS_INLINE void deletesoc(T_SOC soc) {
+ if (soc!=INVALID_SOCKET) {
+// J'ai planté.. pas de shutdown
+//#if HTS_WIDE_DEBUG
+// DEBUG_W("shutdown\n");
+//#endif
+// shutdown(soc,2); // shutdown
+//#if HTS_WIDE_DEBUG
+// DEBUG_W("shutdown done\n");
+//#endif
+ // Ne pas oublier de fermer la connexion avant de partir.. (plus propre)
+#if HTS_WIDE_DEBUG
+ DEBUG_W("close\n");
+#endif
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("close done\n");
+#endif
+ }
+}
+
+/* Will also clean other things */
+HTS_INLINE void deletesoc_r(htsblk* r) {
+#if HTS_USEOPENSSL
+ if (r->ssl_con) {
+ SSL_shutdown(r->ssl_con);
+ // SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
+ SSL_free(r->ssl_con);
+ r->ssl_con=NULL;
+ }
+#endif
+ deletesoc(r->soc);
+ r->soc=INVALID_SOCKET;
+}
+
+// renvoi le nombre de secondes depuis 1970
+HTS_INLINE TStamp time_local(void) {
+ return ((TStamp) time(NULL));
+}
+
+// number of millisec since 1970
+HTS_INLINE TStamp mtime_local(void) {
+#ifndef HTS_DO_NOT_USE_FTIME
+ struct timeb B;
+ ftime( &B );
+ return (TStamp) ( ((TStamp) B.time * (TStamp) 1000)
+ + ((TStamp) B.millitm) );
+#else
+ // not precise..
+ return (TStamp) ( ((TStamp) time_local() * (TStamp) 1000)
+ + ((TStamp) 0) );
+#endif
+}
+
+// convertit un nombre de secondes en temps (chaine)
+void sec2str(char *st,TStamp t) {
+ int j,h,m,s;
+
+ j=(int) (t/(3600*24));
+ t-=((TStamp) j)*(3600*24);
+ h=(int) (t/(3600));
+ t-=((TStamp) h)*3600;
+ m=(int) (t/60);
+ t-=((TStamp) m)*60;
+ s=(int) t;
+
+ if (j>0)
+ sprintf(st,"%d days, %d hours %d minutes %d seconds",j,h,m,s);
+ else if (h>0)
+ sprintf(st,"%d hours %d minutes %d seconds",h,m,s);
+ else if (m>0)
+ sprintf(st,"%d minutes %d seconds",m,s);
+ else
+ sprintf(st,"%d seconds",s);
+}
+
+// idem, plus court (chaine)
+void qsec2str(char *st,TStamp t) {
+ int j,h,m,s;
+
+ j=(int) (t/(3600*24));
+ t-=((TStamp) j)*(3600*24);
+ h=(int) (t/(3600));
+ t-=((TStamp) h)*3600;
+ m=(int) (t/60);
+ t-=((TStamp) m)*60;
+ s=(int) t;
+
+ if (j>0)
+ sprintf(st,"%dd,%02dh,%02dmin%02ds",j,h,m,s);
+ else if (h>0)
+ sprintf(st,"%dh,%02dmin%02ds",h,m,s);
+ else if (m>0)
+ sprintf(st,"%dmin%02ds",m,s);
+ else
+ sprintf(st,"%ds",s);
+}
+
+
+// heure actuelle, GMT, format rfc (taille buffer 256o)
+void time_gmt_rfc822(char* s) {
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=gmtime(&tt);
+ if (A==NULL)
+ A=localtime(&tt);
+ time_rfc822(s,A);
+}
+
+// heure actuelle, format rfc (taille buffer 256o)
+void time_local_rfc822(char* s) {
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ time_rfc822_local(s,A);
+}
+
+/* convertir une chaine en temps */
+struct tm* convert_time_rfc822(char* s) {
+ struct tm* result;
+ /* */
+ char months[]="jan feb mar apr may jun jul aug sep oct nov dec";
+ char str[256];
+ char* a;
+ /* */
+ int result_mm=-1;
+ int result_dd=-1;
+ int result_n1=-1;
+ int result_n2=-1;
+ int result_n3=-1;
+ int result_n4=-1;
+ /* */
+ NOSTATIC_RESERVE(result, struct tm, 1);
+
+ if ((int) strlen(s) > 200)
+ return NULL;
+ strcpy(str,s);
+ hts_lowcase(str);
+ /* éliminer :,- */
+ while( (a=strchr(str,'-')) ) *a=' ';
+ while( (a=strchr(str,':')) ) *a=' ';
+ while( (a=strchr(str,',')) ) *a=' ';
+ /* tokeniser */
+ a=str;
+ while(*a) {
+ char *first,*last;
+ char tok[256];
+ /* découper mot */
+ while(*a==' ') a++; /* sauter espaces */
+ first=a;
+ while((*a) && (*a!=' ')) a++;
+ last=a;
+ tok[0]='\0';
+ if (first!=last) {
+ char* pos;
+ strncat(tok,first,(int) (last - first));
+ /* analyser */
+ if ( (pos=strstr(months,tok)) ) { /* month always in letters */
+ result_mm=((int) (pos - months))/4;
+ } else {
+ int number;
+ if (sscanf(tok,"%d",&number) == 1) { /* number token */
+ if (result_dd<0) /* day always first number */
+ result_dd=number;
+ else if (result_n1<0)
+ result_n1=number;
+ else if (result_n2<0)
+ result_n2=number;
+ else if (result_n3<0)
+ result_n3=number;
+ else if (result_n4<0)
+ result_n4=number;
+ } /* sinon, bruit de fond(+1GMT for exampel) */
+ }
+ }
+ }
+ if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) {
+ if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */
+ result->tm_year=result_n4-1900;
+ result->tm_hour=result_n1;
+ result->tm_min=result_n2;
+ result->tm_sec=max(result_n3,0);
+ } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
+ result->tm_hour=result_n2;
+ result->tm_min=result_n3;
+ result->tm_sec=max(result_n4,0);
+ if (result_n1<=50) /* 00 means 2000 */
+ result->tm_year=result_n1+100;
+ else if (result_n1<1000) /* 99 means 1999 */
+ result->tm_year=result_n1;
+ else /* 2000 */
+ result->tm_year=result_n1-1900;
+ }
+ result->tm_isdst=0; /* assume GMT */
+ result->tm_yday=-1; /* don't know */
+ result->tm_wday=-1; /* don't know */
+ result->tm_mon=result_mm;
+ result->tm_mday=result_dd;
+ return result;
+ }
+ return NULL;
+}
+
+/* sets file time. -1 if error */
+int set_filetime(char* file,struct tm* tm_time) {
+ struct utimbuf tim;
+#ifndef HTS_DO_NOT_USE_FTIME
+ struct timeb B;
+ B.timezone=0;
+ ftime( &B );
+ tim.actime=tim.modtime=mktime(tm_time) - B.timezone*60;
+#else
+ // bogus time (GMT/local)..
+ tim.actime=tim.modtime=mktime(tm_time);
+#endif
+ return utime(file,&tim);
+}
+
+/* sets file time from RFC822 date+time, -1 if error*/
+int set_filetime_rfc822(char* file,char* date) {
+ struct tm* tm_s=convert_time_rfc822(date);
+ if (tm_s) {
+ return set_filetime(file,tm_s);
+ } else return -1;
+}
+
+
+// heure au format rfc (taille buffer 256o)
+HTS_INLINE void time_rfc822(char* s,struct tm * A) {
+ strftime(s,256,"%a, %d %b %Y %H:%M:%S GMT",A);
+}
+
+// heure locale au format rfc (taille buffer 256o)
+HTS_INLINE void time_rfc822_local(char* s,struct tm * A) {
+ strftime(s,256,"%a, %d %b %Y %H:%M:%S",A);
+}
+
+// conversion en b,Kb,Mb
+char* int2bytes(LLint n) {
+ char** a=int2bytes2(n);
+ char* buff;
+ NOSTATIC_RESERVE(buff, char, 256);
+
+ strcpy(buff,a[0]);
+ strcat(buff,a[1]);
+ return concat(buff,"");
+}
+
+// conversion en b/s,Kb/s,Mb/s
+char* int2bytessec(long int n) {
+ char* buff;
+ char** a=int2bytes2(n);
+ NOSTATIC_RESERVE(buff, char, 256);
+
+ strcpy(buff,a[0]);
+ strcat(buff,a[1]);
+ return concat(buff,"/s");
+}
+char* int2char(int n) {
+ char* buffer;
+ NOSTATIC_RESERVE(buffer, char, 32);
+ sprintf(buffer,"%d",n);
+ return concat(buffer,"");
+}
+
+// conversion en b,Kb,Mb, nombre et type séparés
+// limite: 2.10^9.10^6B
+
+/* See http://physics.nist.gov/cuu/Units/binary.html */
+#define ToLLint(a) ((LLint)(a))
+#define ToLLintKiB (ToLLint(1024))
+#define ToLLintMiB (ToLLintKiB*ToLLintKiB)
+#ifdef HTS_LONGLONG
+#define ToLLintGiB (ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#endif
+typedef struct {
+ char buff1[256];
+ char buff2[32];
+ char* buffadr[2];
+} strc_int2bytes2;
+char** int2bytes2(LLint n) {
+ strc_int2bytes2* strc;
+ NOSTATIC_RESERVE(strc, strc_int2bytes2, 1);
+
+ if (n < ToLLintKiB) {
+ sprintf(strc->buff1,"%d",(int)(LLint)n);
+ strcpy(strc->buff2,"B");
+ } else if (n < ToLLintMiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/ToLLintKiB)),(int)((LLint)((n%ToLLintKiB)*100)/ToLLintKiB));
+ strcpy(strc->buff2,"KiB");
+ }
+#ifdef HTS_LONGLONG
+ else if (n < ToLLintGiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
+ strcpy(strc->buff2,"MiB");
+ } else if (n < ToLLintTiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintGiB))),(int)((LLint)(((n%(ToLLintGiB))*100)/(ToLLintGiB))));
+ strcpy(strc->buff2,"GiB");
+ } else if (n < ToLLintPiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintTiB))),(int)((LLint)(((n%(ToLLintTiB))*100)/(ToLLintTiB))));
+ strcpy(strc->buff2,"TiB");
+ } else {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintPiB))),(int)((LLint)(((n%(ToLLintPiB))*100)/(ToLLintPiB))));
+ strcpy(strc->buff2,"PiB");
+ }
+#else
+ else {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
+ strcpy(strc->buff2,"MiB");
+ }
+#endif
+ strc->buffadr[0]=strc->buff1;
+ strc->buffadr[1]=strc->buff2;
+ return strc->buffadr;
+}
+
+#if HTS_WIN
+#else
+// ignore sigpipe?
+int sig_ignore_flag( int setflag ) { // flag ignore
+ static int flag=0; /* YES, this one is true static */
+ if (setflag>=0)
+ flag=setflag;
+ return flag;
+}
+#endif
+
+// envoi de texte (en têtes généralement) sur la socket soc
+HTS_INLINE int sendc(htsblk* r, char* s) {
+ int n;
+
+#if HTS_WIN
+#else
+ sig_ignore_flag(1);
+#endif
+#if HDEBUG
+ write(0,s,strlen(s));
+#endif
+
+#if HTS_USEOPENSSL
+ if (r->ssl) {
+ n = SSL_write(r->ssl_con, s, strlen(s));
+ } else
+#endif
+ n = send(r->soc,s,strlen(s),0);
+
+#if HTS_WIN
+#else
+ sig_ignore_flag(0);
+#endif
+
+ return n;
+}
+
+
+// Remplace read
+void finput(int fd,char* s,int max) {
+ char c;
+ int j=0;
+ do {
+ //c=fgetc(fp);
+ if (read(fd,&c,1)<=0) {
+ c=0;
+ }
+ if (c!=0) {
+ switch(c) {
+ case 10: c=0; break;
+ case 13: break; // sauter ces caractères
+ default: s[j++]=c; break;
+ }
+ }
+ } while((c!=0) && (j<max-1));
+ s[j++]='\0';
+}
+
+// Like linput, but in memory (optimized)
+int binput(char* buff,char* s,int max) {
+ char* end;
+ int count;
+
+ // clear buffer
+ s[0]='\0';
+ // end of buffer?
+ if ( *buff == '\0')
+ return 1;
+ // find ending \n
+ end=strchr(buff,'\n');
+ // ..or end of buffer
+ if (!end)
+ end=buff+strlen(buff);
+ // then count number of bytes, maximum=max
+ count=min(max,end-buff);
+ // and strip annoying ending cr
+ while( (count>0) && (buff[count] == '\r'))
+ count--;
+ // copy
+ if (count > 0) {
+ strncat(s, buff, count);
+ }
+ // and terminate with a null char
+ s[count]='\0';
+ // then return the supplemental jump offset
+ return (end-buff)+1;
+}
+
+// Lecture d'une ligne (peut être unicode à priori)
+int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+int linput_trim(FILE* fp,char* s,int max) {
+ int rlen=0;
+ char* ls=(char*) malloct(max+2);
+ s[0]='\0';
+ if (ls) {
+ char* a;
+ // lire ligne
+ rlen=linput(fp,ls,max);
+ if (rlen) {
+ // sauter espaces et tabs en fin
+ while( (rlen>0) && ((ls[max(rlen-1,0)]==' ') || (ls[max(rlen-1,0)]=='\t')) )
+ ls[--rlen]='\0';
+ // sauter espaces en début
+ a=ls;
+ while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
+ a++;
+ rlen--;
+ }
+ if (rlen>0) {
+ memcpy(s,a,rlen); // can copy \0 chars
+ s[rlen]='\0';
+ }
+ }
+ //
+ freet(ls);
+ }
+ return rlen;
+}
+int linput_cpp(FILE* fp,char* s,int max) {
+ int rlen=0;
+ s[0]='\0';
+ do {
+ int ret;
+ if (rlen>0)
+ if (s[rlen-1]=='\\')
+ s[--rlen]='\0'; // couper \ final
+ // lire ligne
+ ret=linput_trim(fp,s+rlen,max-rlen);
+ if (ret>0)
+ rlen+=ret;
+ } while((s[max(rlen-1,0)]=='\\') && (rlen<max));
+ return rlen;
+}
+
+// idem avec les car spéciaux
+void rawlinput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j++]='\0';
+}
+
+
+// compare le début de f avec s et retourne la position de la fin
+// 'A=a' (case insensitive)
+int strfield(const char* f,const char* s) {
+ int r=0;
+ while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
+ if (*s==0)
+ return r;
+ else
+ return 0;
+}
+
+//cherche chaine, case insensitive
+char* strstrcase(char *s,char *o) {
+ while((*s) && (strfield(s,o)==0)) s++;
+ if (*s=='\0') return NULL;
+ return s;
+}
+
+
+// Unicode detector
+// See http://www.unicode.org/unicode/reports/tr28/
+// (sect Table 3.1B. Legal UTF-8 Byte Sequences)
+typedef struct {
+ unsigned int pos;
+ unsigned char data[4];
+} t_auto_seq;
+
+// char between a and b
+#define CHAR_BETWEEN(c, a, b) ( (c) >= 0x##a ) && ( (c) <= 0x##b )
+// sequence start
+#define SEQBEG ( inseq == 0 )
+// in this block
+#define BLK(n,a, b) ( (seq.pos >= n) && ((err = CHAR_BETWEEN(seq.data[n], a, b))) )
+#define ELT(n,a) BLK(n,a,a)
+// end
+#define SEQEND ((ok = 1))
+// sequence started, character will fail if error
+#define IN_SEQ ( (inseq = 1) )
+// decoding error
+#define BAD_SEQ ( (ok == 0) && (inseq != 0) && (!err) )
+// no sequence started
+#define NO_SEQ ( inseq == 0 )
+
+// is this block an UTF unicode textfile?
+// 0 : no
+// 1 : yes
+// -1: don't know
+int is_unicode_utf8(unsigned char* buffer, unsigned int size) {
+ t_auto_seq seq;
+ unsigned int i;
+ int is_utf=-1;
+
+ seq.pos=0;
+ for(i=0 ; i < size ; i++) {
+ unsigned int ok=0;
+ unsigned int inseq=0;
+ unsigned int err=0;
+
+ seq.data[seq.pos]=buffer[i];
+ /**/ if ( SEQBEG && BLK(0,00,7F) && IN_SEQ && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,C2,DF) && IN_SEQ && BLK(1,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,E0 ) && IN_SEQ && BLK(1,A0,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,E1,EC) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,ED ) && IN_SEQ && BLK(1,80,9F) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,EE,EF) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,F0 ) && IN_SEQ && BLK(1,90,BF) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,F1,F3) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,F4 ) && IN_SEQ && BLK(1,80,8F) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( NO_SEQ ) { // bad, unknown
+ return 0;
+ }
+ /* */
+
+ /* Error */
+ if ( BAD_SEQ ) {
+ return 0;
+ }
+
+ /* unicode character */
+ if (seq.pos > 0)
+ is_utf=1;
+
+ /* Next */
+ if (ok)
+ seq.pos=0;
+ else
+ seq.pos++;
+
+ /* Internal error */
+ if (seq.pos >= 4)
+ return 0;
+
+ }
+
+ return is_utf;
+}
+
+void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map) {
+ unsigned int i;
+ memset(map, 0, sizeof(unsigned int) * 256);
+ for(i = 0 ; i < size ; i++) {
+ map[buffer[i]]++;
+ }
+}
+
+
+// le fichier est-il un fichier html?
+// 0 : non
+// 1 : oui
+// -1 : on sait pas
+// -2 : on sait pas, pas d'extension
+int ishtml(char* fil) {
+ char *a;
+
+ // patch pour les truc.html?Choix=toto
+ if ( (a=strchr(fil,'?')) ) // paramètres?
+ a--; // pointer juste avant le ?
+ else
+ a=fil+strlen(fil)-1; // pointer sur le dernier caractère
+
+ if (*a=='/') return -1; // répertoire, on sait pas!!
+ //if (*a=='/') return 1; // ok répertoire, html
+
+ while ( (*a!='.') && (*a!='/') && ( a > fil)) a--;
+ if (*a=='.') { // a une extension
+ char fil_noquery[HTS_URLMAXSIZE*2];
+ fil_noquery[0]='\0';
+ a++; // pointer sur extension
+ strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ a=strchr(fil_noquery,'?');
+ if (a)
+ *a='\0';
+ return ishtml_ext(fil_noquery); // retour
+ } else return -2; // indéterminé, par exemple /truc
+}
+
+// idem, mais pour uniquement l'extension
+int ishtml_ext(char* a) {
+ int html=0;
+ //
+ if (strfield2(a,"html")) html = 1;
+ else if (strfield2(a,"htm")) html = 1;
+ else if (strfield2(a,"shtml")) html = 1;
+ else if (strfield2(a,"phtml")) html = 1;
+ else if (strfield2(a,"htmlx")) html = 1;
+ else if (strfield2(a,"shtm")) html = 1;
+ else if (strfield2(a,"phtm")) html = 1;
+ else if (strfield2(a,"htmx")) html = 1;
+ //
+ // insuccès..
+ else {
+ switch(is_knowntype(a)) {
+ case 1:
+ html = 0; // connu, non html
+ break;
+ case 2:
+ html = 1; // connu, html
+ break;
+ default:
+ html = -1; // inconnu..
+ break;
+ }
+ }
+ return html;
+}
+
+// error (404,500..)
+HTS_INLINE int ishttperror(int err) {
+ switch (err/100) {
+ case 4: case 5: return 1;
+ break;
+ }
+ return 0;
+}
+
+
+// retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
+// une identification
+char* jump_identification(char* source) {
+ char *a,*trytofind;
+ // rechercher dernier @ (car parfois email transmise dans adresse!)
+ // mais sauter ftp:// éventuel
+ a = jump_protocol(source);
+ trytofind = strrchr_limit(a, '@', strchr(a,'/'));
+ return (trytofind != NULL)?trytofind:a;
+}
+
+// find port (:80) or NULL if not found
+// can handle IPV6 addresses
+char* jump_toport(char* source) {
+ char *a,*trytofind;
+ a = jump_identification(source);
+ trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
+ a = strchr( (trytofind)?trytofind:a, ':');
+ return a;
+}
+
+// strrchr, but not too far
+char* strrchr_limit(char* s, char c, char* limit) {
+ if (limit == NULL) {
+ char* p = strchr(s, c);
+ return p?(p+1):NULL;
+ } else {
+ char *a=NULL, *p;
+ for(;;) {
+ p=strchr((a)?a:s, c);
+ if ((p >= limit) || (p == NULL))
+ return a;
+ a=p+1;
+ }
+ }
+}
+
+// retourner adr sans ftp://
+HTS_INLINE char* jump_protocol(char* source) {
+ int p;
+ // scheme
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ if ((p=strfield(source,"http:")))
+ source+=p;
+ else if ((p=strfield(source,"ftp:")))
+ source+=p;
+ else if ((p=strfield(source,"https:")))
+ source+=p;
+ else if ((p=strfield(source,"file:")))
+ source+=p;
+ // net_path
+ if (strncmp(source,"//",2)==0)
+ source+=2;
+ return source;
+}
+
+// codage base 64 a vers b
+void code64(char* a,char* b) {
+ int i1=0,i2=0,i3=0,i4=0;
+ unsigned long store;
+ int n;
+ const char _hts_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ b[0]='\0';
+ while(*a) {
+ // 24 bits
+ n=1; store=0; store |= ((*a++) & 0xff);
+ if (*a) { n=2; store <<= 8; store |= ((*a++) & 0xff); }
+ if (*a) { n=3; store <<= 8; store |= ((*a++) & 0xff); }
+ if (n==3) {
+ i4=store & 63;
+ i3=(store>>6) & 63;
+ i2=(store>>12) & 63;
+ i1=(store>>18) & 63;
+ } else if (n==2) {
+ store<<=2;
+ i3=store & 63;
+ i2=(store>>6) & 63;
+ i1=(store>>12) & 63;
+ } else {
+ store<<=4;
+ i2=store & 63;
+ i1=(store>>6) & 63;
+ }
+
+ *b++ = _hts_base64[i1];
+ *b++ = _hts_base64[i2];
+ if (n>=2)
+ *b++ = _hts_base64[i3];
+ else
+ *b++ = '=';
+ if (n>=3)
+ *b++ = _hts_base64[i4];
+ else
+ *b++ = '=';
+ }
+ *b++='\0';
+}
+
+// remplacer &quot; par " etc..
+// buffer MAX 1Ko
+#define strcmpbeg(a, b) strncmp(a, b, strlen(b))
+void unescape_amp(char* s) {
+ while(*s) {
+ if (*s=='&') {
+ char* end=strchr(s,';');
+ if ( end && (((int) (end - s)) <= 8) ) {
+ unsigned char c=0;
+
+ // http://www.w3.org/TR/xhtml-modularization/dtd_module_defs.html
+ if (strcmpbeg(s, "&#") == 0) {
+ int num=0;
+ if ( (s[2] == 'x') || (s[2] == 'X')) {
+ if (sscanf(s+3, "%x", &num) == 1) {
+ c=(unsigned char)num;
+ }
+ } else {
+ if (sscanf(s+2, "%d", &num) == 1) {
+ c=(unsigned char)num;
+ }
+ }
+ } else if (strcmpbeg(s, "&nbsp;")==0)
+ c=32; // hack - c=160;
+ else if (strcmpbeg(s, "&iexcl;")==0)
+ c=161;
+ else if (strcmpbeg(s, "&cent;")==0)
+ c=162;
+ else if (strcmpbeg(s, "&pound;")==0)
+ c=163;
+ else if (strcmpbeg(s, "&curren;")==0)
+ c=164;
+ else if (strcmpbeg(s, "&yen;")==0)
+ c=165;
+ else if (strcmpbeg(s, "&brvbar;")==0)
+ c=166;
+ else if (strcmpbeg(s, "&sect;")==0)
+ c=167;
+ else if (strcmpbeg(s, "&uml;")==0)
+ c=168;
+ else if (strcmpbeg(s, "&copy;")==0)
+ c=169;
+ else if (strcmpbeg(s, "&ordf;")==0)
+ c=170;
+ //else if (strcmpbeg(s, "&laquo;")==0)
+ // c=171;
+ else if (strcmpbeg(s, "&not;")==0)
+ c=172;
+ //else if (strcmpbeg(s, "&shy;")==0)
+ // c=173;
+ else if (strcmpbeg(s, "&reg;")==0)
+ c=174;
+ else if (strcmpbeg(s, "&macr;")==0)
+ c=175;
+ else if (strcmpbeg(s, "&deg;")==0)
+ c=176;
+ else if (strcmpbeg(s, "&plusmn;")==0)
+ c=177;
+ else if (strcmpbeg(s, "&sup2;")==0)
+ c=178;
+ else if (strcmpbeg(s, "&sup3;")==0)
+ c=179;
+ else if (strcmpbeg(s, "&acute;")==0)
+ c=180;
+ else if (strcmpbeg(s, "&micro;")==0)
+ c=181;
+ else if (strcmpbeg(s, "&para;")==0)
+ c=182;
+ else if (strcmpbeg(s, "&middot;")==0)
+ c=183;
+ else if (strcmpbeg(s, "&cedil;")==0)
+ c=184;
+ else if (strcmpbeg(s, "&sup1;")==0)
+ c=185;
+ else if (strcmpbeg(s, "&ordm;")==0)
+ c=186;
+ //else if (strcmpbeg(s, "&raquo;")==0)
+ // c=187;
+ else if (strcmpbeg(s, "&frac14;")==0)
+ c=188;
+ else if (strcmpbeg(s, "&frac12;")==0)
+ c=189;
+ else if (strcmpbeg(s, "&frac34;")==0)
+ c=190;
+ else if (strcmpbeg(s, "&iquest;")==0)
+ c=191;
+ else if (strcmpbeg(s, "&Agrave;")==0)
+ c=192;
+ else if (strcmpbeg(s, "&Aacute;")==0)
+ c=193;
+ else if (strcmpbeg(s, "&Acirc;")==0)
+ c=194;
+ else if (strcmpbeg(s, "&Atilde;")==0)
+ c=195;
+ else if (strcmpbeg(s, "&Auml;")==0)
+ c=196;
+ else if (strcmpbeg(s, "&Aring;")==0)
+ c=197;
+ else if (strcmpbeg(s, "&AElig;")==0)
+ c=198;
+ else if (strcmpbeg(s, "&Ccedil;")==0)
+ c=199;
+ else if (strcmpbeg(s, "&Egrave;")==0)
+ c=200;
+ else if (strcmpbeg(s, "&Eacute;")==0)
+ c=201;
+ else if (strcmpbeg(s, "&Ecirc;")==0)
+ c=202;
+ else if (strcmpbeg(s, "&Euml;")==0)
+ c=203;
+ else if (strcmpbeg(s, "&Igrave;")==0)
+ c=204;
+ else if (strcmpbeg(s, "&Iacute;")==0)
+ c=205;
+ else if (strcmpbeg(s, "&Icirc;")==0)
+ c=206;
+ else if (strcmpbeg(s, "&Iuml;")==0)
+ c=207;
+ else if (strcmpbeg(s, "&ETH;")==0)
+ c=208;
+ else if (strcmpbeg(s, "&Ntilde;")==0)
+ c=209;
+ else if (strcmpbeg(s, "&Ograve;")==0)
+ c=210;
+ else if (strcmpbeg(s, "&Oacute;")==0)
+ c=211;
+ else if (strcmpbeg(s, "&Ocirc;")==0)
+ c=212;
+ else if (strcmpbeg(s, "&Otilde;")==0)
+ c=213;
+ else if (strcmpbeg(s, "&Ouml;")==0)
+ c=214;
+ else if (strcmpbeg(s, "&times;")==0)
+ c=215;
+ else if (strcmpbeg(s, "&Oslash;")==0)
+ c=216;
+ else if (strcmpbeg(s, "&Ugrave;")==0)
+ c=217;
+ else if (strcmpbeg(s, "&Uacute;")==0)
+ c=218;
+ else if (strcmpbeg(s, "&Ucirc;")==0)
+ c=219;
+ else if (strcmpbeg(s, "&Uuml;")==0)
+ c=220;
+ else if (strcmpbeg(s, "&Yacute;")==0)
+ c=221;
+ else if (strcmpbeg(s, "&THORN;")==0)
+ c=222;
+ else if (strcmpbeg(s, "&szlig;")==0)
+ c=223;
+ else if (strcmpbeg(s, "&agrave;")==0)
+ c=224;
+ else if (strcmpbeg(s, "&aacute;")==0)
+ c=225;
+ else if (strcmpbeg(s, "&acirc;")==0)
+ c=226;
+ else if (strcmpbeg(s, "&atilde;")==0)
+ c=227;
+ else if (strcmpbeg(s, "&auml;")==0)
+ c=228;
+ else if (strcmpbeg(s, "&aring;")==0)
+ c=229;
+ else if (strcmpbeg(s, "&aelig;")==0)
+ c=230;
+ else if (strcmpbeg(s, "&ccedil;")==0)
+ c=231;
+ else if (strcmpbeg(s, "&egrave;")==0)
+ c=232;
+ else if (strcmpbeg(s, "&eacute;")==0)
+ c=233;
+ else if (strcmpbeg(s, "&ecirc;")==0)
+ c=234;
+ else if (strcmpbeg(s, "&euml;")==0)
+ c=235;
+ else if (strcmpbeg(s, "&igrave;")==0)
+ c=236;
+ else if (strcmpbeg(s, "&iacute;")==0)
+ c=237;
+ else if (strcmpbeg(s, "&icirc;")==0)
+ c=238;
+ else if (strcmpbeg(s, "&iuml;")==0)
+ c=239;
+ else if (strcmpbeg(s, "&eth;")==0)
+ c=240;
+ else if (strcmpbeg(s, "&ntilde;")==0)
+ c=241;
+ else if (strcmpbeg(s, "&ograve;")==0)
+ c=242;
+ else if (strcmpbeg(s, "&oacute;")==0)
+ c=243;
+ else if (strcmpbeg(s, "&ocirc;")==0)
+ c=244;
+ else if (strcmpbeg(s, "&otilde;")==0)
+ c=245;
+ else if (strcmpbeg(s, "&ouml;")==0)
+ c=246;
+ else if (strcmpbeg(s, "&divide;")==0)
+ c=247;
+ else if (strcmpbeg(s, "&oslash;")==0)
+ c=248;
+ else if (strcmpbeg(s, "&ugrave;")==0)
+ c=249;
+ else if (strcmpbeg(s, "&uacute;")==0)
+ c=250;
+ else if (strcmpbeg(s, "&ucirc;")==0)
+ c=251;
+ else if (strcmpbeg(s, "&uuml;")==0)
+ c=252;
+ else if (strcmpbeg(s, "&yacute;")==0)
+ c=253;
+ else if (strcmpbeg(s, "&thorn;")==0)
+ c=254;
+ else if (strcmpbeg(s, "&yuml;")==0)
+ c=255;
+ //
+ else if (strcmpbeg(s,"&amp;")==0)
+ c='&';
+ else if (strcmpbeg(s,"&gt;")==0)
+ c='>';
+ else if (strcmpbeg(s,"&laquo;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&lt;")==0)
+ c='<';
+ else if (strcmpbeg(s,"&nbsp;")==0)
+ c=' ';
+ else if (strcmpbeg(s,"&quot;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&raquo;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&shy;")==0)
+ c='-';
+ else if (strcmpbeg(s,"&tilde;")==0)
+ c='~';
+ // remplacer?
+ if (c) {
+ char buff[HTS_URLMAXSIZE*2];
+ buff[0]=(char) c;
+ strcpy(buff+1,end+1);
+ strcpy(s,buff);
+ }
+ }
+ }
+ s++;
+ }
+}
+
+// remplacer %20 par ' ', | par : etc..
+// buffer MAX 1Ko
+char* unescape_http(char* s) {
+ char* tempo;
+ int i,j=0;
+ NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%') {
+ i++;
+ tempo[j++]=(char) ehex(s+i);
+ i++; // sauter 2 caractères finalement
+ }
+ /*
+ NON a cause de trucs comme /home/0,1837,1|7|1173|Content,00.html
+ else if (s[i]=='|') { // exemple: file:///C|Program%20Files...
+ tempo[j++]=':';
+ }
+ */
+ else
+ tempo[j++]=s[i];
+ }
+ tempo[j++]='\0';
+ return tempo;
+}
+
+// unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
+char* unescape_http_unharm(char* s, int no_high) {
+ char* tempo;
+ int i,j=0;
+ NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%') {
+ int nchar=(char) ehex(s+i+1);
+
+ int test = ( CHAR_RESERVED(nchar)
+ || CHAR_DELIM(nchar)
+ || CHAR_UNWISE(nchar)
+ || CHAR_LOW(nchar) /* CHAR_SPECIAL */
+ || CHAR_XXAVOID(nchar)
+ || (
+ (no_high)
+ &&
+ CHAR_HIG(nchar)
+ )
+ );
+
+ if (!test) {
+ tempo[j++]=(char) ehex(s+i+1);
+ i+=2;
+ } else {
+ tempo[j++]='%';
+ }
+ }
+ /*
+ NON a cause de trucs comme /home/0,1837,1|7|1173|Content,00.html
+ else if (s[i]=='|') { // exemple: file:///C|Program%20Files...
+ tempo[j++]=':';
+ }
+ */
+ else
+ tempo[j++]=s[i];
+ }
+ tempo[j++]='\0';
+ return tempo;
+}
+
+// remplacer " par %xx etc..
+// buffer MAX 1Ko
+void escape_spc_url(char* s) {
+ x_escape_http(s,2);
+}
+// smith / john -> smith%20%2f%20john
+void escape_in_url(char* s) {
+ x_escape_http(s,1);
+}
+// smith / john -> smith%20/%20john
+void escape_uri(char* s) {
+ x_escape_http(s,3);
+}
+void escape_uri_utf(char* s) {
+ x_escape_http(s,30);
+}
+void escape_check_url(char* s) {
+ x_escape_http(s,0);
+}
+// same as escape_check_url, but returns char*
+char* escape_check_url_addr(char* s) {
+ char* adr;
+ escape_check_url(adr = concat(s,""));
+ return adr;
+}
+
+
+void x_escape_http(char* s,int mode) {
+ while(*s) {
+ int test=0;
+ if (mode == 0)
+ test=(strchr("\" ",*s)!=0);
+ else if (mode==1) {
+ test = ( CHAR_RESERVED(*s)
+ || CHAR_DELIM(*s)
+ || CHAR_UNWISE(*s)
+ || CHAR_SPECIAL(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+ else if (mode==2)
+ test=(strchr(" ",*s)!=0); // n'escaper que espace
+ else if (mode==3) { // échapper que ce qui est nécessaire
+ test = (
+ CHAR_SPECIAL(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+ else if (mode==30) { // échapper que ce qui est nécessaire
+ test = (
+ CHAR_LOW(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+
+ if (test) {
+ char buffer[HTS_URLMAXSIZE*2];
+ int n;
+ n=(int)(unsigned char) *s;
+ strcpy(buffer,s+1);
+ sprintf(s,"%%%02x",n);
+ strcat(s,buffer);
+ }
+ s++;
+ }
+}
+
+
+HTS_INLINE int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+HTS_INLINE int ehex(char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+
+}
+
+// concat, concatène deux chaines et renvoi le résultat
+// permet d'alléger grandement le code
+// il faut savoir qu'on ne peut mettre plus de 16 concat() dans une expression
+typedef struct {
+ char buff[16][HTS_URLMAXSIZE*2*2];
+ int rol;
+} concat_strc;
+char* concat(const char* a,const char* b) {
+ concat_strc* strc;
+ NOSTATIC_RESERVE(strc, concat_strc, 1);
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpy(strc->buff[strc->rol],a);
+ if (b) strcat(strc->buff[strc->rol],b);
+ return strc->buff[strc->rol];
+}
+// conversion fichier / -> antislash
+#if HTS_DOSNAME
+char* __fconv(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='/') // convertir
+ a[i]='\\';
+ return a;
+}
+char* fconcat(char* a,char* b) {
+ return __fconv(concat(a,b));
+}
+char* fconv(char* a) {
+ return __fconv(concat(a,""));
+}
+#endif
+
+/* / et \\ en / */
+char* __fslash(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='\\') // convertir
+ a[i]='/';
+ return a;
+}
+char* fslash(char* a) {
+ return __fslash(concat(a,""));
+}
+
+// conversion minuscules, avec buffer
+char* convtolower(char* a) {
+ concat_strc* strc;
+ NOSTATIC_RESERVE(strc, concat_strc, 1);
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpy(strc->buff[strc->rol],a);
+ hts_lowcase(strc->buff[strc->rol]); // lower case
+ return strc->buff[strc->rol];
+}
+
+// conversion en minuscules
+void hts_lowcase(char* s) {
+ int i;
+ for(i=0;i<(int) strlen(s);i++)
+ if ((s[i]>='A') && (s[i]<='Z'))
+ s[i]+=('a'-'A');
+}
+
+// remplacer un caractère d'une chaîne dans une autre
+HTS_INLINE void hts_replace(char *s,char from,char to) {
+ char* a;
+ while ((a=strchr(s,from))!=NULL) {
+ *a=to;
+ }
+}
+
+
+// caractère espace, guillemets, CR, LF etc..
+/* SECTION OPTIMISEE:
+ #define is_space(c) (strchr(" \"\x0d\x0a\x09'",c)!=NULL)
+ #define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",c)!=NULL)
+*/
+/*
+HTS_INLINE int is_space(char c) {
+ if (c==' ') return 1; // spc
+ if (c=='"') return 1; // quote
+ if (c==10) return 1; // lf
+ if (c==13) return 1; // cr
+ if (c=='\'') return 1; // quote
+ //if (c=='`') return 1; // backquote << non
+ if (c==9) return 1; // tab
+ return 0;
+}
+*/
+
+// caractère espace, CR, LF, TAB
+/*
+HTS_INLINE int is_realspace(char c) {
+ if (c==' ') return 1; // spc
+ if (c==10) return 1; // lf
+ if (c==13) return 1; // cr
+ if (c==9) return 1; // tab
+ return 0;
+}
+*/
+
+
+
+
+
+// deviner type d'un fichier local..
+// ex: fil="toto.gif" -> s="image/gif"
+void guess_httptype(char *s,char *fil) {
+ get_httptype(s,fil,1);
+}
+// idem
+// flag: 1 si toujours renvoyer un type
+void get_httptype(char *s,char *fil,int flag) {
+ if (ishtml(fil)==1)
+ strcpy(s,"text/html");
+ else {
+ char *a=fil+strlen(fil)-1;
+ while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
+ if (*a=='.') {
+ int ok=0;
+ int j=0;
+ a++;
+ while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
+ if (strfield2(hts_mime[j][1],a)) {
+ if (hts_mime[j][0][0]!='*') { // Une correspondance existe
+ strcpy(s,hts_mime[j][0]);
+ ok=1;
+ }
+ }
+ j++;
+ }
+
+ if (!ok) if (flag) sprintf(s,"application/%s",a);
+ } else {
+ if (flag) strcpy(s,"application/octet-stream");
+ }
+ }
+}
+
+// get type of fil (php)
+// s: buffer (text/html) or NULL
+// return: 1 if known by user
+int get_userhttptype(int setdefs,char *s,char *ext) {
+ char** buffer=NULL;
+ NOSTATIC_RESERVE(buffer, char*, 1);
+ if (setdefs) {
+ *buffer=s;
+ return 1;
+ } else {
+ if (s)
+ s[0]='\0';
+ if (!ext)
+ return 0;
+ if (*buffer) {
+ char search[1024];
+ char* detect;
+ sprintf(search,"\n%s=",ext); // php=text/html
+ detect=strstr(*buffer,search);
+ if (!detect) {
+ sprintf(search,"\n%s\n",ext); // php\ncgi=text/html
+ detect=strstr(*buffer,search);
+ }
+ if (detect) {
+ detect=strchr(detect,'=');
+ if (detect) {
+ detect++;
+ if (s) {
+ char* a;
+ a=strchr(detect,'\n');
+ if (a) {
+ strncat(s,detect,(int) (a - detect));
+ }
+ }
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+// renvoyer extesion d'un type mime..
+// ex: "image/gif" -> gif
+void give_mimext(char *s,char *st) {
+ int ok=0;
+ int j=0;
+ s[0]='\0';
+ while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
+ if (strfield2(hts_mime[j][0],st)) {
+ if (hts_mime[j][1][0]!='*') { // Une correspondance existe
+ strcpy(s,hts_mime[j][1]);
+ ok=1;
+ }
+ }
+ j++;
+ }
+ // wrap "x" mimetypes, such as:
+ // application/x-mp3
+ // or
+ // application/mp3
+ if (!ok) {
+ int p;
+ char* a=NULL;
+ if ((p=strfield(st,"application/x-")))
+ a=st+p;
+ else if ((p=strfield(st,"application/")))
+ a=st+p;
+ if (a) {
+ if ((int)strlen(a) >= 1) {
+ if ((int)strlen(a) <= 4) {
+ strcpy(s,a);
+ ok=1;
+ }
+ }
+ }
+ }
+}
+// extension connue?..
+// 0 : non
+// 1 : oui
+// 2 : html
+int is_knowntype(char *fil) {
+ int j=0;
+ if (!fil)
+ return 0;
+ while(strnotempty(hts_mime[j][1])) {
+ if (strfield2(hts_mime[j][1],fil)) {
+ if (strfield2(hts_mime[j][0],"text/html"))
+ return 2;
+ else
+ return 1;
+ }
+ j++;
+ }
+
+ // Known by user?
+ return (is_userknowntype(fil));
+}
+// extension : html,gif..
+char* get_ext(char *fil) {
+ char* fil_noquery;
+ char *a=fil+strlen(fil)-1;
+ NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2);
+
+ while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
+ if (*a=='.') {
+ fil_noquery[0]='\0';
+ a++; // pointer sur extension
+ strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ a=strchr(fil_noquery,'?');
+ if (a)
+ *a='\0';
+ return concat(fil_noquery,"");
+ }
+ else
+ return "";
+}
+// known type?..
+// 0 : no
+// 1 : yes
+// 2 : html
+// setdefs : set mime buffer:
+// file=(char*) "asp=text/html\nphp=text/html\n"
+int is_userknowntype(char *fil) {
+ char mime[1024];
+ if (!fil)
+ return 0;
+ if (!strnotempty(fil))
+ return 0;
+ mime[0]='\0';
+ get_userhttptype(0,mime,fil);
+ if (!strnotempty(mime))
+ return 0;
+ else if (strfield2(mime,"text/html"))
+ return 2;
+ else
+ return 1;
+}
+
+// page dynamique?
+// is_dyntype(get_ext("foo.asp"))
+int is_dyntype(char *fil) {
+ int j=0;
+ if (!fil)
+ return 0;
+ if (!strnotempty(fil))
+ return 0;
+ while(strnotempty(hts_ext_dynamic[j])) {
+ if (strfield2(hts_ext_dynamic[j],fil)) {
+ return 1;
+ }
+ j++;
+ }
+ return 0;
+}
+
+// types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
+// connaissent pas le type
+int may_unknown(char* st) {
+ int j=0;
+ // types média
+ if (may_be_hypertext_mime(st))
+ return 1;
+ while(strnotempty(hts_mime_keep[j])) {
+ if (strfield2(hts_mime_keep[j],st)) { // trouvé
+ return 1;
+ }
+ j++;
+ }
+ return 0;
+}
+
+
+
+// -- Utils fichiers
+
+// pretty print for i/o
+void fprintfio(FILE* fp,char* buff,char* prefix) {
+ char nl=1;
+ while(*buff) {
+ switch(*buff) {
+ case 13: break;
+ case 10:
+ fprintf(fp,"\r\n");
+ nl=1;
+ break;
+ default:
+ if (nl)
+ fprintf(fp,prefix);
+ nl=0;
+ fputc(*buff,fp);
+ }
+ buff++;
+ }
+}
+
+/* Le fichier existe-t-il? (ou est-il accessible?) */
+int fexist(char* s) {
+ FILE* fp;
+ if (strnotempty(s)==0) // nom vide: non trouvé
+ return 0;
+ fp=fopen(fconv(s),"rb");
+ if (fp!=NULL) fclose(fp);
+ return (fp!=NULL);
+}
+
+/* Taille d'un fichier, -1 si n'existe pas */
+/* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */
+/* Note: NOT YET READY FOR 64-bit */
+//LLint fsize(char* s) {
+int fsize(char* s) {
+ /*
+#if HTS_WIN
+ HANDLE hFile;
+ DWORD dwSizeHigh = 0;
+ DWORD dwSizeLow = 0;
+ hFile = CreateFile(s,0,0,NULL,OPEN_EXISTING,0,NULL);
+ if (hFile) {
+ dwSizeLow = GetFileSize (hFile, & dwSizeHigh) ;
+ CloseHandle(hFile);
+ if (dwSizeLow != 0xFFFFFFFF)
+ return (dwSizeLow & (dwSizeHigh<<32));
+ else
+ return -1;
+ } else
+ return -1;
+#else
+ */
+ FILE* fp;
+ if (strnotempty(s)==0) // nom vide: erreur
+ return -1;
+ fp=fopen(fconv(s),"rb");
+ if (fp!=NULL) {
+ int i;
+ fseek(fp,0,SEEK_END);
+ i=ftell(fp);
+ fclose(fp);
+ return i;
+ } else return -1;
+ /*
+#endif
+ */
+}
+
+int fpsize(FILE* fp) {
+ int oldpos,size;
+ if (!fp)
+ return -1;
+ oldpos=ftell(fp);
+ fseek(fp,0,SEEK_END);
+ size=ftell(fp);
+ fseek(fp,oldpos,SEEK_SET);
+ return size;
+}
+
+/* root dir, with ending / */
+typedef struct {
+ char path[1024+4];
+ int init;
+} hts_rootdir_strc;
+char* hts_rootdir(char* file) {
+ static hts_rootdir_strc strc = {"", 0};
+ //NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1);
+ if (file) {
+ if (!strc.init) {
+ strc.path[0]='\0';
+ strc.init=1;
+ if (strnotempty(file)) {
+ char* a;
+ strcpy(strc.path,file);
+ while((a=strrchr(strc.path,'\\'))) *a='/';
+ if ((a=strrchr(strc.path,'/'))) {
+ *(a+1)='\0';
+ } else
+ strc.path[0]='\0';
+ }
+ if (!strnotempty(strc.path)) {
+ if( getcwd( strc.path, 1024 ) == NULL )
+ strc.path[0]='\0';
+ else
+ strcat(strc.path,"/");
+ }
+ }
+ return NULL;
+ } else if (strc.init)
+ return strc.path;
+ else
+ return "";
+}
+
+
+
+hts_stat_struct HTS_STAT;
+//
+// return number of downloadable bytes, depending on rate limiter
+// see engine_stats() routine, too
+// this routine works quite well for big files and regular ones, but apparently the rate limiter has
+// some problems with very small files (rate too high)
+LLint check_downloadable_bytes(int rate) {
+ if (rate>0) {
+ TStamp time_now;
+ TStamp elapsed_useconds;
+ LLint bytes_transfered_during_period;
+ LLint left;
+
+ // get the older timer
+ int id_timer = (HTS_STAT.istat_idlasttimer + 1) % 2;
+
+ time_now=mtime_local();
+ elapsed_useconds = time_now - HTS_STAT.istat_timestart[id_timer];
+ // NO totally stupid - elapsed_useconds+=1000; // for the next second, too
+ bytes_transfered_during_period = (HTS_STAT.HTS_TOTAL_RECV-HTS_STAT.istat_bytes[id_timer]);
+
+ left = ((rate * elapsed_useconds)/1000) - bytes_transfered_during_period;
+ if (left <= 0)
+ left = 0;
+
+ return left;
+ } else
+ return TAILLE_BUFFER;
+}
+
+//
+// 0 : OK
+// 1 : slow down
+#if 0
+int HTS_TOTAL_RECV_CHECK(int var) {
+ if (HTS_STAT.HTS_TOTAL_RECV_STATE)
+ return 1;
+ /*
+ {
+ if (HTS_STAT.HTS_TOTAL_RECV_STATE==3) {
+ var = min(var,32);
+ Sleep(250);
+ } else if (HTS_STAT.HTS_TOTAL_RECV_STATE==2) {
+ var = min(var,256);
+ Sleep(100);
+ } else {
+ var/=2;
+ if (var<=0) var=1;
+ Sleep(50);
+ }
+ }
+ */
+ return 0;
+}
+#endif
+
+// Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
+// >0 : data received
+// == 0 : not yet data
+// <0 : no more data or error
+HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
+ int retour;
+ // return read(soc,buff,size);
+ if (r->is_file) {
+#if HTS_WIDE_DEBUG
+ DEBUG_W("read\n");
+#endif
+ if (r->fp)
+ retour=fread(buff,1,size,r->fp);
+ else
+ retour=-1;
+ } else {
+#if HTS_WIDE_DEBUG
+ DEBUG_W("recv\n");
+ if (r->soc==INVALID_SOCKET)
+ printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
+#endif
+ //HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues
+#if HTS_USEOPENSSL
+ if (r->ssl) {
+ retour = SSL_read(r->ssl_con, buff, size);
+ if (retour <= 0) {
+ int err_code = SSL_get_error(r->ssl_con, retour);
+ if (
+ (err_code == SSL_ERROR_WANT_READ)
+ ||
+ (err_code == SSL_ERROR_WANT_WRITE)
+ )
+ {
+ retour = 0; /* no data yet (ssl cache) */
+ } else {
+ retour = -1; /* eof or error */
+ }
+ }
+ } else {
+#endif
+ retour=recv(r->soc,buff,size,0);
+ }
+ if (retour > 0) // compter flux entrant
+ HTS_STAT.HTS_TOTAL_RECV+=retour;
+#if HTS_USEOPENSSL
+ }
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("recv/read done\n");
+#endif
+ return retour;
+}
+
+
+// -- Gestion cache DNS --
+// 'RX98
+#if HTS_DNSCACHE
+
+// 'capsule' contenant uniquement le cache
+t_dnscache* _hts_cache(void) {
+ t_dnscache* cache;
+ NOSTATIC_RESERVE(cache, t_dnscache, 1);
+ return cache;
+}
+
+// lock le cache dns pour tout opération d'ajout
+// plus prudent quand plusieurs threads peuvent écrire dedans..
+// -1: status? 0: libérer 1:locker
+
+/*
+ Simple lock function for cache
+
+ Return value: always 0
+ Parameter:
+ 1 wait for lock (mutex) available and lock it
+ 0 unlock the mutex
+ [-1 check if locked (always return 0 with mutex)]
+ -999 initialize
+*/
+#if USE_BEGINTHREAD
+int _hts_lockdns(int i) {
+ static PTHREAD_LOCK_TYPE hMutex;
+ return htsSetLock(&hMutex,i);
+}
+#else
+int _hts_lockdns(int i) {
+ int l=0;
+ if (i>=0)
+ l=i;
+ return l;
+}
+#endif
+
+// routine pour le cache - retour optionnel à donner à chaque fois
+// NULL: nom non encore testé dans le cache
+// si h_length==0 alors le nom n'existe pas dans le dns
+t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) {
+ // attendre que le cache dns soit prêt
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+
+ while(1) {
+ if (strcmp(cache->iadr,iadr)==0) { // ok trouvé
+ if (cache->host_length>0) { // entrée valide
+ if (retour->h_addr_list[0])
+ memcpy(retour->h_addr_list[0], cache->host_addr, cache->host_length);
+ retour->h_length=cache->host_length;
+ } else if (cache->host_length==0) { // en cours
+ _hts_lockdns(0); // délocker
+ return NULL;
+ } else { // erreur dans le dns, déja vérifié
+ if (retour->h_addr_list[0])
+ retour->h_addr_list[0][0]='\0';
+ retour->h_length=0; // erreur, n'existe pas
+ }
+ _hts_lockdns(0); // délocker
+ return retour;
+ } else { // on a pas encore trouvé
+ if (cache->n!=NULL) { // chercher encore
+ cache=cache->n; // suivant!
+ } else {
+ _hts_lockdns(0); // délocker
+ return NULL; // non présent
+ }
+ }
+ }
+}
+
+// tester si iadr a déja été testé (ou en cours de test)
+// 0 non encore
+// 1 ok
+// 2 non présent
+int hts_dnstest(char* _iadr) {
+ char* iadr;
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2);
+
+ // sauter user:pass@ éventuel
+ strcpy(iadr,jump_identification(_iadr));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0';
+ }
+
+#if HTS_WIN
+ if (inet_addr(iadr)!=INADDR_NONE) // numérique
+#else
+ if (inet_addr(iadr)!=(in_addr_t) -1 ) // numérique
+#endif
+ return 1;
+
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+ while(1) {
+ if (strcmp(cache->iadr,iadr)==0) { // ok trouvé
+ _hts_lockdns(0); // délocker
+ return 1; // présent!
+ } else { // on a pas encore trouvé
+ if (cache->n!=NULL) { // chercher encore
+ cache=cache->n; // suivant!
+ } else {
+ _hts_lockdns(0); // délocker
+ return 2; // non présent
+ }
+ }
+ }
+}
+
+
+t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
+ t_fullhostent* buffer = (t_fullhostent*) v_buffer;
+ /* Clear */
+ fullhostent_init(buffer);
+
+ /* Protection */
+ if (!strnotempty(hostname)) {
+ return NULL;
+ }
+
+ /*
+ Strip [] if any : [3ffe:b80:1234:1::1]
+ The resolver doesn't seem to handle IP6 addresses in brackets
+ */
+ if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo, hostname+1, strlen(hostname)-2);
+ strcpy(hostname, tempo);
+ }
+
+ {
+#if HTS_INET6==0
+ /*
+ ipV4 resolver
+ */
+ t_hostent* hp=gethostbyname(hostname);
+ if (hp!=NULL) {
+ if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) {
+ memcpy(buffer->hp.h_addr_list[0], hp->h_addr_list[0], hp->h_length);
+ buffer->hp.h_length = hp->h_length;
+ return &(buffer->hp);
+ }
+ }
+#else
+ /*
+ ipV6 resolver
+ */
+ /*
+ int error_num=0;
+ t_hostent* hp=getipnodebyname(hostname, AF_INET6, AI_DEFAULT, &error_num);
+ oops, deprecated :(
+ */
+ struct addrinfo* res = NULL;
+ struct addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+ if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
+ hints.ai_family = PF_INET;
+ else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
+ hints.ai_family = PF_INET6;
+ else // V4 + V6
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ if (getaddrinfo(hostname, NULL, &hints, &res) == 0) {
+ if (res) {
+ if ( (res->ai_addr) && (res->ai_addrlen) && (res->ai_addrlen <= buffer->addr_maxlen) ) {
+ memcpy(buffer->hp.h_addr_list[0], res->ai_addr, res->ai_addrlen);
+ buffer->hp.h_length = res->ai_addrlen;
+ freeaddrinfo(res);
+ return &(buffer->hp);
+ }
+ }
+ }
+ if (res) {
+ freeaddrinfo(res);
+ }
+
+#endif
+ }
+ return NULL;
+}
+
+// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
+t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) {
+ char iadr[HTS_URLMAXSIZE*2];
+ t_fullhostent* buffer = (t_fullhostent*) v_buffer;
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ t_hostent* hp;
+
+ /* Clear */
+ fullhostent_init(buffer);
+
+ strcpy(iadr,jump_identification(_iadr));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0';
+ }
+
+ // effacer structure de retour, créer nouvelle
+ /*
+ memset(&host, 0, sizeof(t_hostent));
+ host.h_addr_list=he;
+ he[0]=NULL;
+ he[1]=NULL;
+ host.h_length=0;
+ */
+ cache->iadr[0]='*';
+ cache->iadr[1]='\0';
+
+ /* get IP from the dns cache */
+ hp = _hts_ghbn(cache, iadr, &buffer->hp);
+ if (hp) {
+ if (hp->h_length>0)
+ return hp;
+ else
+ return NULL; // entrée erronée (erreur DNS) dans le DNS
+ } else { // non présent dans le cache dns, tester
+ t_dnscache* c=cache;
+ while(c->n) c=c->n; // calculer queue
+
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname\n");
+#endif
+#if HDEBUG
+ printf("gethostbyname (not in cache)\n");
+#endif
+ {
+ unsigned long inetaddr;
+#if HTS_WIN
+ if ((inetaddr=inet_addr(iadr))==INADDR_NONE) {
+#else
+ if ((inetaddr=inet_addr(iadr))==(in_addr_t) -1 ) {
+#endif
+#if DEBUGDNS
+ printf("resolving (not cached) %s\n",iadr);
+#endif
+ hp=vxgethostbyname(iadr, buffer); // calculer IP host
+ } else { // numérique, convertir sans passer par le dns
+ buffer->hp.h_addr_list[0]=(char*) &inetaddr;
+ buffer->hp.h_length=4;
+ hp=&buffer->hp;
+ }
+ }
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname done\n");
+#endif
+ cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
+ if (cache->n!=NULL) {
+ strcpy(cache->n->iadr,iadr);
+ if (hp!=NULL) {
+ memcpy(cache->n->host_addr, hp->h_addr_list[0], hp->h_length);
+ cache->n->host_length=hp->h_length;
+ } else {
+ cache->n->host_addr[0]='\0';
+ cache->n->host_length=0; // non existant dans le dns
+ }
+ cache->n->n=NULL;
+ return hp;
+ } else { // on peut pas noter, mais on peut renvoyer le résultat
+ return hp;
+ }
+ } // retour hp du cache
+}
+
+#else
+HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) {
+ t_hostent* retour;
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname (2)\n");
+#endif
+#if DEBUGDNS
+ printf("blocking method gethostbyname() in progress for %s\n",iadr);
+#endif
+ retour=vxgethostbyname(jump_identification(iadr), );
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname (2) done\n");
+#endif
+ return retour;
+}
+#endif
+
+
+// --- Tracage des mallocs() ---
+#if HTS_TRACE_MALLOC
+typedef struct _mlink {
+ void* adr;
+ int len;
+ int id;
+ struct _mlink* next;
+} mlink;
+mlink trmalloc = {NULL,0,0,NULL};
+int trmalloc_id=0;
+
+HTS_INLINE void* hts_malloc(size_t len,size_t len2) {
+ mlink* lnk = (mlink*) calloc(1,sizeof(mlink));
+ void* r = NULL;
+ if (lnk) {
+ if (len2)
+ r = calloc(len,len2);
+ else
+ r = malloc(len);
+ if (r) {
+ lnk->adr=r;
+ if (len2)
+ lnk->len=len*len2;
+ else
+ lnk->len=len;
+ lnk->id=trmalloc_id++;
+ lnk->next=trmalloc.next;
+ trmalloc.next=lnk;
+#if MEMDEBUG
+ //printf("malloc: %d\n",r);
+#endif
+ } else free(lnk);
+ }
+ return r;
+}
+HTS_INLINE void hts_free(void* adr) {
+ mlink* lnk = &trmalloc;
+ if (!adr) {
+#if MEMDEBUG
+ printf("* unexpected free() error at %d\n",adr);
+#endif
+ return;
+ }
+ do {
+ if (lnk->next->adr==adr) {
+ mlink* blk_free=lnk->next;
+#if 1
+ lnk->next=lnk->next->next;
+ free((void*) blk_free);
+#else
+#if MEMDEBUG
+ if (blk_free->id==-1) {
+ printf("* memory has already been freed: %d (id=%d)\n",blk_free->adr,blk_free->id);
+ }
+#endif
+ blk_free->id=-1;
+#endif
+ free(adr);
+#if MEMDEBUG
+ //printf("free: %d (id=%d)\n",blk_free->adr,blk_free->id);
+#endif
+ return;
+ }
+ lnk=lnk->next;
+ } while(lnk->next != NULL);
+#if MEMDEBUG
+ printf("* unexpected free() error at %d\n",adr);
+#endif
+ free(adr);
+}
+HTS_INLINE void* hts_realloc(void* adr,size_t len) {
+ mlink* lnk = &trmalloc;
+ do {
+ if (lnk->next->adr==adr) {
+ adr = realloc(adr,len);
+ lnk->next->adr = adr;
+ lnk->next->len = len;
+#if MEMDEBUG
+ //printf("realloc: %d (id=%d)\n",lnk->next->adr,lnk->next->id);
+#endif
+ return adr;
+ }
+ lnk=lnk->next;
+ } while(lnk->next != NULL);
+#if MEMDEBUG
+ printf("* unexpected realloc() error at %d\n",adr);
+#endif
+ return realloc(adr,len);
+}
+// check the malloct() and calloct() trace stack
+void hts_freeall(void) {
+ while(trmalloc.next) {
+#if MEMDEBUG
+ printf("* block %d\t not released: at %d\t (%d\t bytes)\n",trmalloc.next->id,trmalloc.next->adr,trmalloc.next->len);
+#endif
+ if (trmalloc.next->id != -1) {
+ freet(trmalloc.next->adr);
+ }
+ }
+}
+#endif
+
+
+// -- divers //
+
+// cut path and project name
+// patch also initial path
+void cut_path(char* fullpath,char* path,char* pname) {
+ path[0]=pname[0]='\0';
+ if (strnotempty(fullpath)) {
+ if ((fullpath[strlen(fullpath)-1]=='/') || (fullpath[strlen(fullpath)-1]=='\\'))
+ fullpath[strlen(fullpath)-1]='\0';
+ if (strlen(fullpath)>1) {
+ char* a;
+ while( (a=strchr(fullpath,'\\')) ) *a='/'; // remplacer par /
+ a=fullpath+strlen(fullpath)-2;
+ while( (*a!='/') && ( a > fullpath)) a--;
+ if (*a=='/') a++;
+ strcpy(pname,a);
+ strncat(path,fullpath,(int) (a - fullpath));
+ }
+ }
+}
+
+
+
+// -- Gestion protocole ftp --
+
+#if HTS_WIN
+int ftp_available(void) {
+ return 1;
+}
+#else
+int ftp_available(void) {
+ return 1; // ok!
+ //return 0; // SOUS UNIX, PROBLEMES
+}
+#endif
+
+
+
+int hts_init(void) {
+ static int hts_init_ok = 0;
+ if (!hts_init_ok) {
+ hts_init_ok = 1;
+ // default wrappers
+ htswrap_init();
+ htswrap_add("init",htsdefault_init);
+ htswrap_add("free",htsdefault_uninit);
+ htswrap_add("start",htsdefault_start);
+ htswrap_add("change-options",htsdefault_chopt);
+ htswrap_add("end",htsdefault_end);
+ htswrap_add("check-html",htsdefault_checkhtml);
+ htswrap_add("loop",htsdefault_loop);
+ htswrap_add("query",htsdefault_query);
+ htswrap_add("query2",htsdefault_query2);
+ htswrap_add("query3",htsdefault_query3);
+ htswrap_add("check-link",htsdefault_check);
+ htswrap_add("pause",htsdefault_pause);
+ htswrap_add("save-file",htsdefault_filesave);
+ htswrap_add("link-detected",htsdefault_linkdetected);
+ htswrap_add("transfer-status",htsdefault_xfrstatus);
+ htswrap_add("save-name",htsdefault_savename);
+ }
+
+#if HTS_USEOPENSSL
+ /*
+ Initialize the OpensSSL library
+ */
+ if (!openssl_ctx) {
+ SSL_library_init();
+ SSL_load_error_strings();
+ ERR_load_crypto_strings();
+ ERR_load_SSL_strings();
+ SSLeay_add_ssl_algorithms();
+ // OpenSSL_add_all_algorithms();
+ openssl_ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!openssl_ctx) {
+ fprintf(stderr, "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n");
+ abort();
+ }
+ }
+#endif
+
+ /* Init vars and thread-specific values */
+ hts_initvar();
+
+ return 1;
+}
+int hts_uninit(void) {
+ hts_freevar();
+ /* htswrap_free(); */
+ return 1;
+}
+
+// defaut wrappers
+void __cdecl htsdefault_init(void) {
+}
+void __cdecl htsdefault_uninit(void) {
+ hts_freevar();
+}
+int __cdecl htsdefault_start(void* opt) {
+ return 1;
+}
+int __cdecl htsdefault_chopt(void* opt) {
+ return 1;
+}
+int __cdecl htsdefault_end(void) {
+ return 1;
+}
+int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+ return 1;
+}
+char* __cdecl htsdefault_query(char* question) {
+ return "";
+}
+char* __cdecl htsdefault_query2(char* question) {
+ return "";
+}
+char* __cdecl htsdefault_query3(char* question) {
+ return "";
+}
+int __cdecl htsdefault_check(char* adr,char* fil,int status) {
+ return -1;
+}
+void __cdecl htsdefault_pause(char* lockfile) {
+ while (fexist(lockfile)) {
+ Sleep(1000);
+ }
+}
+void __cdecl htsdefault_filesave(char* file) {
+}
+int __cdecl htsdefault_linkdetected(char* link) {
+ return 1;
+}
+int __cdecl htsdefault_xfrstatus(void* back) {
+ return 1;
+}
+int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+// end defaut wrappers
+
+
+
+// Fin
+
diff --git a/src/htslib.h b/src/htslib.h
new file mode 100644
index 0000000..9b2aca3
--- /dev/null
+++ b/src/htslib.h
@@ -0,0 +1,339 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Subroutines .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+
+#ifndef HTS_DEFH
+#define HTS_DEFH
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* basic net definitions */
+#include "htsbasenet.h"
+
+/* cookies et auth */
+#include "htsbauth.h"
+
+// Attention, définition existante également dans le shell
+// (à modifier avec celle-ci)
+#define POSTTOK "?>post"
+
+#include <stdio.h>
+
+#include "htsopt.h"
+
+// structure pour paramètres supplémentaires lors de la requête
+typedef struct {
+ short int user_agent_send; // user agent (ex: httrack/1.0 [sun])
+ short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0
+ short int range_used; // Range utilisé
+ short int nocompression; // Pas de compression
+ char user_agent[64];
+ char lang_iso[64];
+ t_proxy proxy; // proxy
+} htsrequest;
+
+
+// structure pour retour d'une connexion/prise d'en tête
+typedef struct {
+ int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
+ short int notmodified; // page ou fichier NON modifié (transféré)
+ short int is_write; // sortie sur disque (out) ou en mémoire (adr)
+ short int is_chunk; // mode chunk
+ short int compressed; // compressé?
+ char* adr; // adresse du bloc de mémoire, NULL=vide
+ FILE* out; // écriture directe sur disque (si is_write=1)
+ LLint size; // taille fichier
+ char msg[80]; // message éventuel si échec ("\0"=non précisé)
+ char contenttype[64]; // content-type ("text/html" par exemple)
+ char contentencoding[64]; // content-encoding ("gzip" par exemple)
+ char* location; // on copie dedans éventuellement la véritable 'location'
+ LLint totalsize; // taille totale à télécharger (-1=inconnue)
+ short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1
+ T_SOC soc; // ID socket
+ FILE* fp; // fichier pour file://
+#if HTS_USEOPENSSL
+ short int ssl; // is this connection a SSL one? (https)
+ // BIO* ssl_soc; // SSL structure
+ SSL * ssl_con; // connection structure
+#endif
+ char lastmodified[64]; // Last-Modified
+ char etag[64]; // Etag
+ char cdispo[256]; // Content-Disposition coupé
+ LLint crange; // Content-Range
+ /* */
+ htsrequest req; // paramètres pour la requête
+ /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/
+} htsblk;
+
+
+/* ANCIENNE STURCTURE pour cache 1.0 */
+typedef struct {
+ int statuscode; // ANCIENNE STURCTURE - status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
+ int notmodified; // ANCIENNE STURCTURE - page ou fichier NON modifié (transféré)
+ int is_write; // ANCIENNE STURCTURE - sortie sur disque (out) ou en mémoire (adr)
+ char* adr; // ANCIENNE STURCTURE - adresse du bloc de mémoire, NULL=vide
+ FILE* out; // ANCIENNE STURCTURE - écriture directe sur disque (si is_write=1)
+ int size; // ANCIENNE STURCTURE - taille fichier
+ char msg[80]; // ANCIENNE STURCTURE - message éventuel si échec ("\0"=non précisé)
+ char contenttype[64]; // ANCIENNE STURCTURE - content-type ("text/html" par exemple)
+ char* location; // ANCIENNE STURCTURE - on copie dedans éventuellement la véritable 'location'
+ int totalsize; // ANCIENNE STURCTURE - taille totale à télécharger (-1=inconnue)
+ int is_file; // ANCIENNE STURCTURE - ce n'est pas une socket mais un descripteur de fichier si 1
+ T_SOC soc; // ANCIENNE STURCTURE - ID socket
+ FILE* fp; // ANCIENNE STURCTURE - fichier pour file://
+ t_proxy proxy; // ANCIENNE STURCTURE - proxy
+ int user_agent_send; // ANCIENNE STURCTURE - user agent (ex: httrack/1.0 [sun])
+ char user_agent[64];
+ int http11; // ANCIENNE STURCTURE - l'en tête doit être signé HTTP/1.1 et non HTTP/1.0
+} OLD_htsblk;
+/* fin ANCIENNE STURCTURE pour cache 1.0 */
+
+// cache pour le dns, pour éviter de faire des gethostbyname sans arrêt
+typedef struct t_dnscache {
+ char iadr[1024];
+ struct t_dnscache* n;
+ char host_addr[HTS_MAXADDRLEN]; // 4 octets (v4), ou 16 octets (v6)
+ int host_length; // 4 normalement - ==0 alors en cours de résolution
+ // ou >16 si sockaddr
+ // ==-1 alors erreur (host n'éxiste pas)
+} t_dnscache;
+
+
+
+
+/*
+#ifdef __cplusplus
+extern "C" {
+#endif
+*/
+
+// fonctions unix/winsock
+int hts_read(htsblk* r,char* buff,int size);
+//int HTS_TOTAL_RECV_CHECK(int var);
+LLint check_downloadable_bytes(int rate);
+
+int hts_init(void);
+int hts_uninit(void);
+
+
+// fonctions principales
+int http_fopen(char* adr,char* fil,htsblk* retour);
+int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour);
+int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour);
+htsblk httpget(char* url);
+//int newhttp(char* iadr,char* err=NULL);
+int newhttp(char* iadr,htsblk* retour,int port,int waitconnect);
+HTS_INLINE void deletehttp(htsblk* r);
+HTS_INLINE void deletesoc(T_SOC soc);
+HTS_INLINE void deletesoc_r(htsblk* r);
+htsblk http_location(char* adr,char* fil,char* loc);
+htsblk http_test(char* adr,char* fil,char* loc);
+int check_readinput(htsblk* r);
+void http_fread(T_SOC soc,htsblk* retour);
+LLint http_fread1(htsblk* r);
+void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd);
+void treatfirstline(htsblk* retour,char* rcvd);
+void infostatuscode(char* msg,int statuscode);
+
+// sous-fonctions
+htsblk xhttpget(char* adr,char* fil);
+htsblk http_gethead(char* adr,char* fil);
+LLint http_xfread1(htsblk* r,int bufl);
+HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer);
+t_hostent* vxgethostbyname(char* hostname, void* v_buffer);
+t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour);
+int ftp_available(void);
+#if HTS_DNSCACHE
+int hts_dnstest(char* _iadr);
+t_dnscache* _hts_cache(void);
+int _hts_lockdns(int i);
+#endif
+
+// outils divers
+HTS_INLINE TStamp time_local(void);
+HTS_INLINE TStamp mtime_local(void);
+void sec2str(char *s,TStamp t);
+void qsec2str(char *st,TStamp t);
+void time_gmt_rfc822(char* s);
+void time_local_rfc822(char* s);
+struct tm* convert_time_rfc822(char* s);
+int set_filetime(char* file,struct tm* tm_time);
+int set_filetime_rfc822(char* file,char* date);
+HTS_INLINE void time_rfc822(char* s,struct tm * A);
+HTS_INLINE void time_rfc822_local(char* s,struct tm * A);
+char* int2char(int n);
+char* int2bytes(LLint n);
+char* int2bytessec(long int n);
+char** int2bytes2(LLint n);
+HTS_INLINE int sendc(htsblk* r, char* s);
+void finput(int fd,char* s,int max);
+int binput(char* buff,char* s,int max);
+int linput(FILE* fp,char* s,int max);
+int linput_trim(FILE* fp,char* s,int max);
+int linput_cpp(FILE* fp,char* s,int max);
+void rawlinput(FILE* fp,char* s,int max);
+int strfield(const char* f,const char* s);
+#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) )
+char* strstrcase(char *s,char *o);
+int ident_url_absolute(char* url,char* adr,char* fil);
+void fil_simplifie(char* f);
+int is_unicode_utf8(unsigned char* buffer, unsigned int size);
+void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map);
+int ishtml(char* urlfil);
+int ishtml_ext(char* a);
+int ishttperror(int err);
+void guess_httptype(char *s,char *fil);
+void get_httptype(char *s,char *fil,int flag);
+int get_userhttptype(int setdefs,char *s,char *ext);
+void give_mimext(char *s,char *st);
+int is_knowntype(char *fil);
+int is_userknowntype(char *fil);
+int is_dyntype(char *fil);
+char* get_ext(char *fil);
+int may_unknown(char* st);
+char* jump_identification(char*);
+char* jump_toport(char*);
+char* strrchr_limit(char* s, char c, char* limit);
+HTS_INLINE char* jump_protocol(char* source);
+void code64(char* a,char* b);
+void unescape_amp(char* s);
+void escape_spc_url(char* s);
+void escape_in_url(char* s);
+void escape_uri(char* s);
+void escape_uri_utf(char* s);
+void escape_check_url(char* s);
+char* escape_check_url_addr(char* s);
+void x_escape_http(char* s,int mode);
+HTS_INLINE int ehexh(char c);
+char* unescape_http(char* s);
+char* unescape_http_unharm(char* s, int no_high);
+char* antislash_unescaped(char* s);
+int ehex(char* s);
+char* concat(const char* a,const char* b);
+#define copychar(a) concat((a),NULL)
+#if HTS_DOSNAME
+char* fconcat(char* a,char* b);
+char* fconv(char* a);
+#else
+#define fconv(a) (a)
+#define fconcat(a,b) concat(a,b)
+#endif
+char* fslash(char* a);
+char* __fslash(char* a);
+
+char* convtolower(char* a);
+char* concat(const char* a,const char* b);
+void hts_lowcase(char* s);
+void hts_replace(char *s,char from,char to);
+
+/* Spaces: CR,LF,TAB,FF */
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) )
+#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
+#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+//HTS_INLINE int is_space(char);
+//HTS_INLINE int is_realspace(char);
+
+void fprintfio(FILE* fp,char* buff,char* prefix);
+
+#if HTS_WIN
+#else
+int sig_ignore_flag( int setflag ); // flag ignore
+#endif
+
+void cut_path(char* fullpath,char* path,char* pname);
+int fexist(char* s);
+/*LLint fsize(char* s); */
+int fpsize(FILE* fp);
+int fsize(char* s);
+/* root dir */
+char* hts_rootdir(char* file);
+
+// Threads
+#if USE_PTHREAD
+typedef void* ( *beginthread_type )( void * );
+unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist );
+#endif
+
+/*
+#ifdef __cplusplus
+}
+#endif
+*/
+
+
+
+/* variables globales */
+//extern LLint HTS_TOTAL_RECV; // flux entrant reçu
+//extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
+extern hts_stat_struct HTS_STAT;
+extern int _DEBUG_HEAD;
+extern FILE* ioinfo;
+
+/* constantes */
+extern const char hts_mime_keep[][32];
+extern const char hts_mime[][2][32];
+extern const char hts_detect[][32];
+extern const char hts_detectbeg[][32];
+extern const char hts_nodetect[][32];
+extern const char hts_detectURL[][32];
+extern const char hts_detectandleave[][32];
+extern const char hts_detect_js[][32];
+
+// defaut wrappers
+void __cdecl htsdefault_init(void);
+void __cdecl htsdefault_uninit(void);
+int __cdecl htsdefault_start(void* opt);
+int __cdecl htsdefault_chopt(void* opt);
+int __cdecl htsdefault_end(void);
+int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl htsdefault_query(char* question);
+char* __cdecl htsdefault_query2(char* question);
+char* __cdecl htsdefault_query3(char* question);
+int __cdecl htsdefault_check(char* adr,char* fil,int status);
+void __cdecl htsdefault_pause(char* lockfile);
+void __cdecl htsdefault_filesave(char*);
+int __cdecl htsdefault_linkdetected(char* link);
+int __cdecl htsdefault_xfrstatus(void* back);
+int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+// end defaut wrappers
+
+#endif
+
+
diff --git a/src/htsmd5.c b/src/htsmd5.c
new file mode 100644
index 0000000..47242d8
--- /dev/null
+++ b/src/htsmd5.c
@@ -0,0 +1,76 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmd5.c subroutines: */
+/* generate a md5 hash */
+/* */
+/* Written March 1993 by Branko Lankester */
+/* Modified June 1993 by Colin Plumb for altered md5.c. */
+/* Modified October 1995 by Erik Troan for RPM */
+/* Modified 2000 by Xavier Roche for domd5mem */
+/* ------------------------------------------------------------ */
+
+#include "htsmd5.h"
+#include "md5.h"
+#include <string.h>
+#include <stdio.h>
+
+int domd5mem(unsigned char * buf, int len,
+ unsigned char * digest, int asAscii) {
+ int endian = 1;
+ unsigned char bindigest[16];
+ MD5_CTX ctx;
+
+ MD5Init(&ctx, * ( (char*) &endian));
+ MD5Update(&ctx, buf, len);
+ MD5Final(bindigest, &ctx);
+
+ if (!asAscii) {
+ memcpy(digest, bindigest, 16);
+ } else {
+ sprintf(digest, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+ "%02x%02x%02x%02x%02x",
+ bindigest[0], bindigest[1], bindigest[2], bindigest[3],
+ bindigest[4], bindigest[5], bindigest[6], bindigest[7],
+ bindigest[8], bindigest[9], bindigest[10], bindigest[11],
+ bindigest[12], bindigest[13], bindigest[14], bindigest[15]);
+
+ }
+
+ return 0;
+}
+
+unsigned long int md5sum32(char* buff) {
+ char digest[16];
+ domd5mem(buff,strlen(buff),digest,0);
+ return *( (long int*)(char*)digest );
+}
diff --git a/src/htsmd5.h b/src/htsmd5.h
new file mode 100644
index 0000000..84148bd
--- /dev/null
+++ b/src/htsmd5.h
@@ -0,0 +1,52 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmd5.c subroutines: */
+/* generate a md5 hash */
+/* */
+/* Written March 1993 by Branko Lankester */
+/* Modified June 1993 by Colin Plumb for altered md5.c. */
+/* Modified October 1995 by Erik Troan for RPM */
+/* Modified 2000 by Xavier Roche for domd5mem */
+/* ------------------------------------------------------------ */
+
+#ifndef HTSMD5_DEFH
+#define HTSMD5_DEFH
+
+int domd5mem(unsigned char * buf, int len,
+ unsigned char * digest, int asAscii);
+unsigned long int md5sum32(char* buff);
+
+#endif
+
+
+
diff --git a/src/htsname.c b/src/htsname.c
new file mode 100644
index 0000000..2df0c98
--- /dev/null
+++ b/src/htsname.c
@@ -0,0 +1,1266 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* savename routine (compute output filename) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsname.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htstools.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+/* END specific definitions */
+
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+#define ADD_STANDARD_PATH \
+ { /* ajout nom */\
+ char buff[HTS_URLMAXSIZE*2];\
+ buff[0]='\0';\
+ strncat(buff,start_pos,(int) (nom_pos - start_pos));\
+ url_savename_addstr(save,buff);\
+ }
+
+#define ADD_STANDARD_NAME(shortname) \
+ { /* ajout nom */\
+ char buff[HTS_URLMAXSIZE*2];\
+ standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
+ url_savename_addstr(save,buff);\
+ }
+
+
+/* Avoid stupid DOS system folders/file such as 'nul' */
+/* Based on linux/fs/umsdos/mangle.c */
+static const char *hts_tbdev[] =
+{
+ "/prn", "/con", "/aux", "/nul",
+ "/lpt1", "/lpt2", "/lpt3", "/lpt4",
+ "/com1", "/com2", "/com3", "/com4",
+ "/clock$",
+ "/emmxxxx0", "/xmsxxxx0", "/setverxx",
+ ""
+};
+
+
+
+// forme le nom du fichier à sauver (save) à partir de fil et adr
+// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
+int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) {
+ char newfil[HTS_URLMAXSIZE*2]; /* ="" */
+ char* fil;
+ char* adr;
+ char* print_adr;
+ char *start_pos=NULL,*nom_pos=NULL,*dot_pos=NULL; // Position nom et point
+ // pour changement d'extension ou de nom (content-disposition)
+ int ext_chg=0;
+ char ext[256];
+ int max_char=0;
+ //CLEAR
+ newfil[0]=ext[0]='\0';
+
+ /* 8-3 ? */
+ switch(opt->savename_83) {
+ case 1:
+ max_char=8;
+ break;
+ case 2:
+ max_char=30;
+ break;
+ default:
+ max_char=8;
+ break;
+ }
+
+ // effacer save
+ save[0]='\0';
+ // fil
+ fil = fil_complete;
+ // et adr (sauter user/pass)
+ // on prend le parti de mettre les fichiers avec login/pass au même endroit que si ils
+ // étaient capturés sans ces paramètres
+ // c'est pour cette raison qu'on ignore totalement adr_complete (même pour la recherche en table de hachage)
+ adr=jump_identification(adr_complete);
+
+ // à afficher sans ftp://
+ print_adr=jump_protocol(adr);
+
+ // court-circuit pour lien primaire
+ if (strnotempty(adr)==0) {
+ if (strcmp(fil,"primary")==0) {
+ strcat(save,"primary.html");
+ return 0;
+ }
+ }
+
+
+ // vérifier que le nom n'a pas déja été calculé (si oui le renvoyer tel que)
+ // vérifier que le nom n'est pas déja pris...
+ // NOTE: si on cherche /toto/ et que /toto est trouvé on le prend (et réciproquqment) ** // **
+ if (liens!=NULL) {
+ int i;
+
+#if HTS_HASH
+ i=hash_read(hash,adr,fil_complete,1); // recherche table 1 (adr+fil)
+ if (i>=0) { // ok, trouvé
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ i=hash_read(hash,adr,fil_complete,2); // recherche table 2 (former_adr+former_fil)
+ if (i>=0) { // ok, trouvé
+ // copier location moved!
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // et save
+ strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ return 0;
+ }
+#else
+ for(i=lien_tot-1;i>=0;i--) {
+#if HTS_CASSE
+ if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ if (liens[i]->former_adr) { // tester ancienne loc?
+#if HTS_CASSE
+ if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete)))
+#endif
+ {
+ // copier location moved!
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // et save
+ strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ return 0;
+ }
+ }
+ }
+#endif
+
+ // chercher sans / ou avec / dans former
+ {
+ char fil_complete_patche[HTS_URLMAXSIZE*2];
+ strcpy(fil_complete_patche,fil_complete);
+ // Version avec ou sans /
+ if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/')
+ fil_complete_patche[strlen(fil_complete_patche)-1]='\0';
+ else
+ strcat(fil_complete_patche,"/");
+#if HTS_HASH
+ i=hash_read(hash,adr,fil_complete_patche,2); // recherche table 2 (former_adr+former_fil)
+ if (i>=0) {
+ // écraser fil et adr (pas former_fil?????)
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // écrire save
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+#else
+ // même boucle en gros
+ for(i=lien_tot-1;i>=0;i--) {
+ if (liens[i]->former_adr) { // former-adr?
+#if HTS_CASSE
+ if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0))
+#else
+ if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete_patche)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ // écraser fil et adr (pas former_fil?????)
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // écrire save
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ }
+ }
+#endif
+ }
+ }
+
+ // vérifier la non présence de paramètres dans le nom de fichier
+ // si il y en a, les supprimer (ex: truc.cgi?subj=aspirateur)
+ // néanmoins, gardé pour vérifier la non duplication (voir après)
+ {
+ char* a;
+ a=strchr(fil,'?');
+ if (a!=NULL) {
+ strncat(newfil,fil,(int) (a - fil));
+ } else {
+ strcpy(newfil,fil);
+ }
+ fil=newfil;
+ }
+ // décoder %
+ strcpy(fil,unescape_http(fil));
+ /*
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ int i,j=0;
+ for (i=0;i<(int) strlen(fil);i++) {
+ if (fil[i]=='%') {
+ i++;
+ tempo[j++]=(char) ehex(fil+i);
+ i++; // sauter 2 caractères finalement
+ } else
+ tempo[j++]=fil[i];
+ }
+ tempo[j++]='\0';
+ strcpy(fil,tempo);
+ }
+ */
+
+
+ /* replace shtml to html.. */
+ switch (ishtml(fil)) { /* .html,.shtml,.. */
+ case 1:
+ if (
+ (strcmp(get_ext(fil),"html") != 0)
+ && (strcmp(get_ext(fil),"htm") != 0)
+ ) {
+ strcpy(ext,"html");
+ ext_chg=1;
+ }
+ break;
+ case 0:
+ if (!strnotempty(ext)) {
+ if (is_userknowntype(get_ext(fil))) { // mime known by user
+ char mime[1024];
+ mime[0]=ext[0]='\0';
+ get_userhttptype(0,mime,get_ext(fil));
+ if (strnotempty(mime)) {
+ give_mimext(ext,mime);
+ if (strnotempty(ext)) {
+ ext_chg=1;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+
+ // si option check_type activée
+ if ((opt->check_type) && (!ext_chg)) {
+ if ( (!strfield(adr_complete,"file://"))
+ && (!strfield(adr_complete,"ftp://"))
+ ) {
+ // tester type avec requète HEAD si on ne connait pas le type du fichier
+ if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html?
+ if (ishtml(fil)<0) { // on ne sait pas si c'est un html ou un fichier..
+ // lire dans le cache
+ htsblk r = cache_read(opt,cache,adr,fil,NULL); // test uniquement
+ if (r.statuscode != -1) { // pas d'erreur de lecture cache
+ char s[16]; s[0]='\0';
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ if (strnotempty(r.cdispo)) { /* filename given */
+ ext_chg=2; /* change filename */
+ strcpy(ext,r.cdispo);
+ }
+ else if (!may_unknown(r.contenttype)) { // on peut patcher à priori?
+ give_mimext(s,r.contenttype); // obtenir extension
+ if (strnotempty(s)>0) { // on a reconnu l'extension
+ ext_chg=1;
+ strcpy(ext,s);
+ }
+ }
+ //
+ } else { // test imposible dans le cache, faire une requête
+ //
+#if HTS_ANALYSTE
+ int hihp=_hts_in_html_parsing;
+#endif
+ int has_been_moved=0;
+ char curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2];
+ curr_adr[0]=curr_fil[0]='\0';
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=2; // test
+#endif
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ strcpy(curr_adr,adr_complete);
+ strcpy(curr_fil,fil_complete);
+ // ajouter dans le backing le fichier en mode test
+ // savename: rien car en mode test
+ if (back_add(back,back_max,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) {
+ int b;
+ b=back_index(back,back_max,curr_adr,curr_fil,BACK_ADD_TEST);
+ if (b>=0) {
+ int petits_tours=0;
+ int get_test_request=0; // en cas de bouclage sur soi même avec HEAD, tester avec GET.. parfois c'est la cause des problèmes
+ do {
+ // temps à attendre, et remplir autant que l'on peut le cache (backing)
+ if (back[b].status>0) back_wait(back,back_max,opt,cache,0);
+ if (ptr>=0)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
+ // on est obligé d'appeler le shell pour le refresh..
+#if HTS_ANALYSTE
+ {
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ return -1;
+ } else if (_hts_cancel) { // cancel 2 ou 1 (cancel parsing)
+ back_delete(back,b); // cancel test
+ }
+ }
+#endif
+
+
+ // traitement des 304,303..
+ if (back[b].status<=0) {
+ if ( (back[b].r.statuscode==301)
+ || (back[b].r.statuscode==302)
+ || (back[b].r.statuscode==303)
+ || (back[b].r.statuscode==307)
+ ) { // agh moved.. un tit tour de plus
+ if ((petits_tours<5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus!
+ if ((int) strnotempty(back[b].r.location)) { // location existe!
+ char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ mov_url[0]=mov_adr[0]=mov_fil[0]='\0';
+ //
+ strcpy(mov_url,back[b].r.location); // copier URL
+ if (ident_url_relatif(mov_url,curr_adr,curr_fil,mov_adr,mov_fil)>=0) {
+ // si non bouclage sur soi même, ou si test avec GET non testé
+ if ((strcmp(mov_adr,curr_adr)) || (strcmp(mov_fil,curr_fil)) || (get_test_request==0)) {
+ // bouclage?
+ if ((!strcmp(mov_adr,curr_adr)) && (!strcmp(mov_fil,curr_fil)))
+ get_test_request=1; // faire requète avec GET
+
+ // recopier former_adr/fil?
+ if ((former_adr) && (former_fil)) {
+ if (strnotempty(former_adr)==0) { // Pas déja noté
+ strcpy(former_adr,curr_adr);
+ strcpy(former_fil,curr_fil);
+ }
+ }
+
+ // check explicit forbidden - don't follow 3xx in this case
+ {
+ int set_prio_to=0;
+ robots_wizard* robots = (robots_wizard*) opt->robotsptr;
+ if (hts_acceptlink(opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ opt->filters.filters,opt->filters.filptr,opt->maxfilter,
+ robots,
+ &set_prio_to,
+ NULL) == 1)
+ { /* forbidden */
+ has_been_moved = 1;
+ back_delete(back,b); // ok
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ mov_url[0]='\0';
+ }
+ }
+
+ // ftp: stop!
+ if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête
+ has_been_moved = 1;
+ back_delete(back,b); // ok
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ } else if (*mov_url) {
+ char* methode;
+ if (!get_test_request)
+ methode=BACK_ADD_TEST; // tester avec HEAD
+ else {
+ methode=BACK_ADD_TEST2; // tester avec GET
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil);
+ test_flush;
+ }
+ }
+ // Ajouter
+ if (back_add(back,back_max,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
+ test_flush;
+ }
+
+ // libérer emplacement backing actuel et attendre le prochain
+ back_delete(back,b);
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ b=back_index(back,back_max,curr_adr,curr_fil,methode);
+ if (!get_test_request)
+ has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé
+ petits_tours++;
+ //
+ } else {// sinon on fait rien et on s'en va.. (ftp etc)
+ if ( (opt->debug>1) && (opt->errlog)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ }
+ } else {
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+
+ }
+ }
+ } else{ // arrêter les frais
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+ } // ok, leaving
+ }
+
+ } while(back[b].status>0);
+
+ // Si non déplacé, forcer type?
+ if (!has_been_moved) {
+ if (back[b].r.statuscode!=-10) { // erreur
+ if (strnotempty(back[b].r.contenttype)==0)
+ strcpy(back[b].r.contenttype,"text/html"); // message d'erreur en html
+ // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
+ // libérer emplacement backing
+ /*if (opt->errlog!=NULL) {
+ fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
+ test_flush;
+ }
+ back_delete(back,b);
+ return -1; // ERREUR (404 par exemple)
+ */
+ }
+
+ { // pas d'erreur, changer type?
+ char s[16];
+ s[0]='\0';
+ if (strnotempty(back[b].r.cdispo)) { /* filename given */
+ ext_chg=2; /* change filename */
+ strcpy(ext,back[b].r.cdispo);
+ }
+ else if ((!may_unknown(back[b].r.contenttype)) || (!get_ext(back[b].url_fil)) ) { // on peut patcher à priori? (pas interdit ou pas de type)
+ give_mimext(s,back[b].r.contenttype); // obtenir extension
+ if (strnotempty(s)>0) { // on a reconnu l'extension
+ ext_chg=1;
+ strcpy(ext,s);
+ }
+ }
+ }
+ }
+ // FIN Si non déplacé, forcer type?
+
+ // libérer emplacement backing
+ back_delete(back,b);
+
+ // --- --- ---
+ // oops, a été déplacé.. on recalcule en récursif (osons!)
+ if (has_been_moved) {
+ // copier adr, fil (optionnel, mais sinon marche pas pour le rip)
+ strcpy(adr_complete,curr_adr);
+ strcpy(fil_complete,curr_fil);
+ // copier adr, fil
+
+ return url_savename(curr_adr,curr_fil,save,NULL,NULL,referer_adr,referer_fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
+ }
+ // --- --- ---
+
+ }
+
+ } else {
+ printf("PANIC! : Savename Crash adding error, unexpected error found.. [%d]\n",__LINE__);
+#if BDEBUG==1
+ printf("error while savename crash adding\n");
+#endif
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected savename backing error at %s%s"LF,adr,fil_complete);
+ test_flush;
+ }
+
+ }
+ // restaurer
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=hihp;
+#endif
+ } // caché?
+ }
+ }
+ }
+
+
+
+ // - - - DEBUT NOMMAGE - - -
+
+ // Donner nom par défaut?
+ if (fil[strlen(fil)-1]=='/') {
+ if (!strfield(adr_complete,"ftp://"))
+ strcat(fil,DEFAULT_HTML); // nommer page par défaut!!
+ else {
+ if (!opt->proxy.active)
+ strcat(fil,DEFAULT_FTP); // nommer page par défaut (texte)
+ else
+ strcat(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http)
+ }
+ }
+ // Changer extension?
+ // par exemple, php3 sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
+ if (ext_chg) { // changer ext
+ char* a=fil+strlen(fil)-1;
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug");
+ if (ext_chg==1)
+ fprintf(opt->log,"Changing link extension %s%s to .%s"LF,adr_complete,fil_complete,ext);
+ else
+ fprintf(opt->log,"Changing link name %s%s to %s"LF,adr_complete,fil_complete,ext);
+ test_flush;
+ }
+ if (ext_chg==1) {
+ while((a > fil) && (*a!='.') && (*a!='/')) a--;
+ if (*a=='.') *a='\0'; // couper
+ strcat(fil,"."); // recopier point
+ } else {
+ while(( a > fil) && (*a!='/')) a--;
+ if (*a=='/') a++;
+ *a='\0';
+ }
+ strcat(fil,ext); // copier ext/nom
+ }
+
+ // Rechercher premier / et dernier .
+ {
+ char* a=fil+strlen(fil)-1;
+
+ // passer structures
+ start_pos=fil;
+ while(( a > fil) && (*a != '/') && (*a != '\\')) {
+ if (*a == '.') // point? noter position
+ if (!dot_pos)
+ dot_pos=a;
+ a--;
+ }
+ if ((*a=='/') || (*a=='\\')) a++;
+ nom_pos = a;
+ }
+
+
+ // un nom de fichier est généré
+ // s'il existe déja, alors on le mofifie légèrement
+
+ // ajouter nom du site éventuellement en premier
+ if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t)
+ char* a = opt->savename_userdef;
+ char* b = save;
+ /*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */
+ char tok;
+
+ /*
+ { // Rechercher premier /
+ char* a=fil+strlen(fil)-1;
+ // passer structures
+ while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) {
+ if (*a == '.') // point? noter position
+ if (!dot_pos)
+ dot_pos=a;
+ a--;
+ }
+ if ((*a=='/') || (*a=='\\')) a++;
+ nom_pos = a;
+ }
+ */
+
+ // Construire nom
+ while ((*a) && (((int) (b - save)) < HTS_URLMAXSIZE ) ) { // parser, et pas trop long..
+ if (*a == '%') {
+ int short_ver=0;
+ a++;
+ if (*a == 's') {
+ short_ver=1;
+ a++;
+ }
+ *b='\0';
+ switch(tok=*a++) {
+ case '[': // %[param]
+ if (strchr(a,']')) {
+ char name[256];
+ char* c=name;
+ while(*a!=']') {
+ *c++=*a++;
+ }
+ a++;
+ *c++='\0';
+ strcat(name,"="); /* param=.. */
+ c=strchr(fil_complete,'?');
+ /* parameters exists */
+ if (c) {
+ c=strstr(c,name); /* finds param= */
+ if (c) {
+ c+=strlen(name); /* jumps "param=" */
+ while( (*c) && (*c!='&'))
+ *b++=*c++;
+ }
+ }
+ }
+ break;
+ case '%': *b++='%'; break;
+ case 'n': // nom sans ext
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'N': // nom avec ext
+ // RECOPIE NOM + EXT
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ // RECOPIE NOM + EXT
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strcpy(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,DEFAULT_EXT); // pas de..
+ else
+ strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+ b+=strlen(b); // pointer à la fin
+ //
+ break;
+ case 't': // ext
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strcpy(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,DEFAULT_EXT); // pas de..
+ else
+ strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'p': // path sans dernier /
+ *b='\0';
+ if (nom_pos != fil + 1) { // pas: /index.html (chemin nul)
+ if (!short_ver) { // Noms longs
+ strncat(b,fil,(int) (nom_pos - fil) - 1);
+ } else {
+ char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2];
+ pth[0]=n83[0]='\0';
+ //
+ strncat(pth,fil,(int) (nom_pos - fil) - 1);
+ long_to_83(opt->savename_83,n83,pth);
+ strcpy(b,n83);
+ }
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'h': // host
+ *b='\0';
+ if (strcmp(adr_complete,"file://")==0) {
+ if (!short_ver) // Noms longs
+ strcpy(b,"localhost");
+ else
+ strcpy(b,"local");
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,print_adr);
+ else
+ strncat(b,print_adr,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'M': /* host/address?query MD5 (128-bits) */
+ *b='\0';
+ {
+ char digest[32+2];
+ char buff[HTS_URLMAXSIZE*2];
+ digest[0]=buff[0]='\0';
+ strcpy(buff,adr);
+ strcat(buff,fil_complete);
+ domd5mem(buff,strlen(buff),digest,1);
+ strcpy(b,digest);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'Q': case 'q': /* query MD5 (128-bits/16-bits)
+ GENERATED ONLY IF query string exists! */
+ *b='\0';
+ strncat(b,url_md5(fil_complete),(tok == 'Q')?32:4);
+ b+=strlen(b); // pointer à la fin
+ break;
+ }
+ } else
+ *b++=*a++;
+ }
+ *b++='\0';
+ //
+ // Types prédéfinis
+ //
+
+ }
+ //
+ // Structure originale
+ else if (opt->savename_type%100==0) {
+ /* recopier www.. */
+ if (opt->savename_type!=100) {
+ if (((opt->savename_type/1000)%2)==0) { // >1000 signifie "pas de www/"
+ if (strcmp(adr_complete,"file://")==0) {
+ //## if (*adr==lOCAL_CHAR) {
+ if (opt->savename_83 != 1) // noms longs
+ strcat(save,"localhost");
+ else
+ strcat(save,"local");
+ } else {
+ // adresse url
+ if (!opt->savename_83) { // noms longs (et pas de .)
+ strcat(save,print_adr);
+ } else { // noms 8-3
+ if (strlen(print_adr)>4) {
+ if (strfield(print_adr,"www."))
+ strncat(save,print_adr+4,max_char);
+ else
+ strncat(save,print_adr,8);
+ } else strncat(save,print_adr,max_char);
+ }
+ }
+ if (*fil!='/') strcat(save,"/");
+ }
+ }
+
+#if HTS_CASSE==0
+ hts_lowcase(save);
+#endif
+
+ /*
+ // ne sert à rien car a déja été filtré normalement
+ if ((*fil=='.') && (*(fil+1)=='/')) // ./index.html ** //
+ url_savename_addstr(save,fil+2);
+ else // index.html ou /index.html
+ url_savename_addstr(save,fil);
+ if (save[strlen(save)-1]=='/')
+ strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+*/
+
+ /* add name */
+ ADD_STANDARD_PATH;
+ ADD_STANDARD_NAME(0);
+
+ }
+ //
+ // Structure html/image
+ else {
+ // dossier "web" ou "www.xxx" ?
+ if (((opt->savename_type/1000)%2)==0) { // >1000 signifie "pas de www/"
+ if ((opt->savename_type/100)%2) {
+ if (strcmp(adr_complete,"file://")==0) {
+ //## if (*adr==lOCAL_CHAR) {
+ if (opt->savename_83 != 1) // noms longs
+ strcat(save,"localhost/");
+ else
+ strcat(save,"local/");
+ } else {
+ // adresse url
+ if (!opt->savename_83) { // noms longs
+ strcat(save,print_adr); strcat(save,"/");
+ } else { // noms 8-3
+ if (strlen(print_adr)>4) {
+ if (strfield(print_adr,"www."))
+ strncat(save,print_adr+4,max_char);
+ else
+ strncat(save,print_adr,max_char);
+ strcat(save,"/");
+ } else {
+ strncat(save,print_adr,max_char); strcat(save,"/");
+ }
+ }
+ }
+ } else {
+ strcat(save,"web/"); // répertoire général
+ }
+ }
+
+ // si un html à coup sûr
+ if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
+ if (opt->savename_type%100==2) { // html/
+ strcat(save,"html/");
+ }
+ } else {
+ if ((opt->savename_type%100==1) || (opt->savename_type%100==2)) { // html & images
+ strcat(save,"images/");
+ }
+ }
+
+ switch (opt->savename_type%100) {
+ case 4: case 5: { // séparer par types
+ char* a=fil+strlen(fil)-1;
+ // passer structures
+ while(( a > fil) && (*a != '/') && (*a != '\\')) a--;
+ if ((*a=='/') || (*a=='\\')) a++;
+
+ // html?
+ if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
+ if (opt->savename_type%100==5)
+ strcat(save,"html/");
+ } else {
+ char* a=fil+strlen(fil)-1;
+ while(( a> fil) && (*a != '/') && (*a != '.')) a--;
+ if (*a!='.')
+ strcat(save,"other");
+ else
+ strcat(save,a+1);
+ strcat(save,"/");
+ }
+ /*strcat(save,a);*/
+ /* add name */
+ ADD_STANDARD_NAME(0);
+ }
+ break;
+ case 99: { // 'codé' .. c'est un gadget
+ int i;
+ int j;
+ char* a;
+ char C[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-";
+ int L;
+ // pseudo-CRC sur fil et adr pour initialiser générateur aléatoire..
+ unsigned int s=0;
+ L=strlen(C);
+ for(i=0;i<(int) strlen(fil_complete);i++) {
+ s+=(unsigned int) fil_complete[i];
+ }
+ for(i=0;i<(int) strlen(adr_complete);i++) {
+ s+=(unsigned int) adr_complete[i];
+ }
+ srand(s);
+
+ j=strlen(save);
+ for(i=0;i<8;i++) {
+ char c=C[(rand()%L)];
+ save[i+j]=c;
+ }
+ save[i+j]='\0';
+ // ajouter extension
+ a=fil+strlen(fil)-1;
+ while(( a > fil) && (*a != '/') && (*a != '.')) a--;
+ if (*a=='.') {
+ strcat(save,a); // ajouter
+ }
+ }
+ break;
+ default: { // noms sans les noms des répertoires
+ // ne garder que le nom, pas la structure
+ /*
+ char* a=fil+strlen(fil)-1;
+ while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) a--;
+ if ((*a=='/') || (*a=='\\')) a++;
+ strcat(save,a);
+ */
+
+ /* add name */
+ ADD_STANDARD_NAME(0);
+ }
+ break;
+ }
+
+#if HTS_CASSE==0
+ hts_lowcase(save);
+#endif
+
+ if (save[strlen(save)-1]=='/')
+ strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+ }
+
+
+ // vérifier qu'on ne doit pas forcer l'extension
+ // par exemple, asp sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
+ /*if (ext_chg) {
+ char* a=save+strlen(save)-1;
+ while(((int) a>(int) save) && (*a!='.') && (*a!='/')) a--;
+ if (*a=='.') *a='\0'; // couper
+ // recopier extension
+ strcat(save,".");
+ strcat(save,ext); // copier ext
+ }*/
+ // de même en cas de manque d'extension on en place une de manière forcée..
+ // cela évite les /chez/toto et les /chez/toto/index.html incompatibles
+ if (opt->savename_type != -1) {
+ char* a=save+strlen(save)-1;
+ while(( a > save) && (*a!='.') && (*a!='/')) a--;
+ if (*a!='.') { // agh pas de point
+ //strcat(save,".none"); // a éviter
+ strcat(save,".html"); // préférable!
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+ }
+
+ // effacer pass au besoin pour les autentifications
+ // (plus la peine : masqué au début)
+/*
+ {
+ char* a=jump_identification(save);
+ if (a!=save) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char *b;
+ tempo[0]='\0';
+ strcpy(tempo,"[");
+ b=strchr(save,':');
+ if (!b) b=strchr(save,'@');
+ if (b)
+ strncat(tempo,save,(int) b-(int) a);
+ strcat(tempo,"]");
+ strcat(tempo,a);
+ strcpy(save,a);
+ }
+ }
+*/
+
+ // éviter les / au début (cause: N100)
+ if (save[0]=='/') {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,save+1);
+ strcpy(save,tempo);
+ }
+
+ // changer les ~,:,",*,? en _ pour sauver sur disque
+ hts_replace(save,'~','_'); // interdit sous unix (~foo)
+ //
+ hts_replace(save,'\\','_');
+ hts_replace(save,':','_'); // interdit sous windows
+ hts_replace(save,'*','_'); // interdit sous windows
+ hts_replace(save,'?','_'); // doit pas arriver!!
+ hts_replace(save,'\"','_'); // interdit sous windows
+ hts_replace(save,'<','_'); // interdit sous windows
+ hts_replace(save,'>','_'); // interdit sous windows
+ hts_replace(save,'|','_'); // interdit sous windows
+ //
+ hts_replace(save,'@','_');
+ //
+ { // éliminer les // (comme ftp://)
+ char* a;
+ while( (a=strstr(save,"//")) ) *a='_';
+ // Eliminer chars spéciaux
+ a=save -1 ;
+ while(*(++a))
+ if ( ((unsigned char)(*a) <= 31)
+ || ((unsigned char)(*a) == 127) )
+ *a='_';
+ }
+
+
+#if HTS_OVERRIDE_DOS_FOLDERS
+ /* Replace /foo/nul/bar by /foo/nul-/bar */
+ {
+ int i=0;
+ while(hts_tbdev[i][0]) {
+ char* a=save;
+ while((a=strstr(a,hts_tbdev[i]))) {
+ switch ( (int) a[strlen(hts_tbdev[i])] ) {
+ case '\0':
+ case '/': {
+ char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0';
+ strncat(tempo,save,(int) (a - save) + strlen(hts_tbdev[i]));
+ strcat(tempo,"-");
+ strcat(tempo,a+strlen(hts_tbdev[i]));
+ strcpy(save,tempo);
+ }
+ break;
+ }
+ a+=strlen(hts_tbdev[i]);
+ }
+ i++;
+ }
+ }
+#endif
+
+ // conversion 8-3 .. y compris pour les répertoires
+ if (opt->savename_83) {
+ char n83[HTS_URLMAXSIZE*2];
+ long_to_83(opt->savename_83,n83,save);
+ strcpy(save,n83);
+ }
+
+
+ /* ensure that there is no ../ (potential vulnerability) */
+ fil_simplifie(save);
+
+#if HTS_ANALYSTE
+ {
+ hts_htmlcheck_savename(adr_complete,fil_complete,referer_adr,referer_fil,save);
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save);
+ test_flush;
+ }
+ }
+#endif
+
+ // chemin primaire éventuel A METTRE AVANT
+ if (strnotempty(opt->path_html)) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,opt->path_html);
+ strcat(tempo,save);
+ strcpy(save,tempo);
+ }
+
+
+ // vérifier que le nom n'est pas déja pris...
+ if (liens!=NULL) {
+ int nom_ok;
+ do {
+ int i;
+ int len;
+ len=strlen(save); // taille
+ //
+ nom_ok=1; // à priori bon
+ // on part de la fin pour optimiser, plus les opti de taille pour aller encore plus vite..
+#if DEBUG_SAVENAME
+printf("\nStart search\n");
+#endif
+
+#if HTS_HASH
+ i=hash_read(hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0)
+#else
+ for(i=lien_tot-1;i>=0;i--) {
+#if DEBUG_SAVENAME
+printf("%cParse: %d",13,i);
+#endif
+
+ if (liens[i]->sav_len==len) { // même taille de chaîne
+#if HTS_CASSE
+ if (strcmp(liens[i]->sav,save)==0) // existe déja
+#else
+ if (strfield2(liens[i]->sav,save)) // un tel nom existe déja
+#endif
+#endif
+ {
+#if HTS_CASSE
+ if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ //printf("Ok, %s\n",save);
+ //i=lien_tot; // sortir
+ i=0;
+#if DEBUG_SAVENAME
+printf("\nOK ALREADY DEFINED\n",13,i);
+#endif
+ } else { // utilisé par un AUTRE, changer de nom
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=save+strlen(save)-1;
+ char* b;
+ int n=2;
+ tempo[0]='\0';
+
+#if DEBUG_SAVENAME
+printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete);
+#endif
+ nom_ok=0;
+ i=0;
+
+ while(( a > save) && (*a!='.') && (*a!='\\') && (*a!='/')) a--;
+ if (*a=='.')
+ strncat(tempo,save,(int) (a - save));
+ else
+ strcat(tempo,save);
+
+ // tester la présence d'un -xx (ex: index-2.html -> index-3.html)
+ b=tempo+strlen(tempo)-1;
+ while (isdigit((unsigned char)*b)) b--;
+ if (*b=='-') {
+ sscanf(b+1,"%d",&n);
+ *b='\0'; // couper
+ n++; // plus un
+ }
+
+ // en plus il faut gérer le 8-3 .. pas facile le client
+ if (opt->savename_83) {
+ int max;
+ char* a=tempo+strlen(tempo)-1;
+ while(( a > tempo) && (*a!='/')) a--;
+ if (*a=='/') a++;
+ max=max_char-1-nombre_digit(n);
+ if ((int) strlen(a)>max)
+ *(a+max)='\0'; // couper sinon il n'y aura pas la place!
+ }
+
+ // ajouter -xx (ex: index.html -> index-2.html)
+ sprintf(tempo+strlen(tempo),"-%d",n);
+
+ // ajouter extension
+ if (*a=='.')
+ strcat(tempo,a);
+
+ strcpy(save,tempo);
+
+ //printf("switched: %s\n",save);
+
+ } // if
+#if HTS_HASH
+ }
+#else
+ } // if
+ } // if sav_len
+ } // for
+#endif
+#if DEBUG_SAVENAME
+printf("\nEnd search, %s\n",fil_complete);
+#endif
+ } while(!nom_ok);
+
+ }
+
+ //printf("'%s' %s %s\n",save,adr,fil);
+
+ return 0;
+}
+
+/* nom avec md5 urilisé partout */
+void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver) {
+ b[0]='\0';
+ /* Nom */
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcat(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ /* MD5 - 16 bits */
+ strncat(b,url_md5(fil_complete),4);
+ /* Ext */
+ if (dot_pos) {
+ strcat(b,".");
+ if (!short_ver) // Noms longs
+ strcat(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcat(b,DEFAULT_EXT); // pas de..
+ else
+ strcat(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+}
+
+
+/* Petit md5 */
+char* url_md5(char* fil_complete) {
+ char* digest;
+ char* a;
+ NOSTATIC_RESERVE(digest, char, 32+2);
+ digest[0]='\0';
+ a=strchr(fil_complete,'?');
+ if (a) {
+ if (strlen(a)) {
+ char buff[HTS_URLMAXSIZE*2];
+ a++;
+ digest[0]=buff[0]='\0';
+ strcat(buff,a); /* query string MD5 */
+ domd5mem(buff,strlen(buff),digest,1);
+ }
+ }
+ return digest;
+}
+
+// interne à url_savename: ajoute une chaîne à une autre avec \ -> /
+void url_savename_addstr(char* d,char* s) {
+ int i=strlen(d);
+ while(*s) {
+ if (*s=='\\') // remplacer \ par des /
+ d[i++]='/';
+ else
+ d[i++]=*s;
+ s++;
+ }
+ d[i]='\0';
+}
+
+#undef test_flush
diff --git a/src/htsname.h b/src/htsname.h
new file mode 100644
index 0000000..aae5f99
--- /dev/null
+++ b/src/htsname.h
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* savename routine (compute output filename) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSNAME_DEFH
+#define HTSNAME_DEFH
+
+#include "htscore.h"
+
+int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe);
+void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver);
+void url_savename_addstr(char* d,char* s);
+char* url_md5(char* fil_complete);
+
+#endif
diff --git a/src/htsnet.h b/src/htsnet.h
new file mode 100644
index 0000000..d12b1e4
--- /dev/null
+++ b/src/htsnet.h
@@ -0,0 +1,242 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Net definitions */
+/* Used in .c files that needs connect() functions and so */
+/* Note: includes htsbasenet.h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFNETH
+#define HTS_DEFNETH
+
+/* basic net definitions */
+#include "htsbasenet.h"
+
+#include <ctype.h>
+#if HTS_WIN
+ // pour read
+ #include <io.h>
+ // pour FindFirstFile
+ #include <winbase.h>
+#else
+ //typedef int T_SOC;
+ #define INVALID_SOCKET -1
+ #include <netdb.h>
+ #include <sys/types.h>
+ #include <sys/socket.h>
+ #include <netinet/in.h>
+ #include <sys/time.h>
+ #include <sys/ioctl.h>
+ /* gethostname & co */
+ #include <unistd.h>
+ /* inet_addr */
+ #include <arpa/inet.h>
+ // pas la peine normalement..
+#if HTS_PLATFORM!=3
+ #include <sys/filio.h>
+#else
+#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
+ typedef unsigned long in_addr_t;
+#endif
+#endif
+#ifndef min
+ #define min(a,b) ((a)>(b)?(b):(a))
+ #define max(a,b) ((a)>(b)?(a):(b))
+#endif
+#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
+#endif
+
+/*
+ ** ipV4 **
+*/
+#if HTS_INET6==0
+
+/* Ipv4 structures */
+typedef struct in_addr INaddr;
+/* This should handle all cases */
+typedef struct {
+ union {
+ struct sockaddr_in in;
+ struct sockaddr sa;
+ unsigned char v4data[4];
+ unsigned char v6data[16];
+ unsigned char pad[128];
+ } m_addr;
+} SOCaddr;
+
+/* Ipv4 structure members */
+#define SOCaddr_sinaddr(server) ((server).m_addr.in.sin_addr)
+#define SOCaddr_sinfamily(server) ((server).m_addr.in.sin_family)
+#define SOCaddr_sinport(server) ((server).m_addr.in.sin_port)
+
+/* AF_xx */
+#define AFinet AF_INET
+
+/* Set port to sockaddr structure */
+#define SOCaddr_initport(server, port) do { \
+ SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+} while(0)
+
+/* Copy sockaddr to another one */
+#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
+if (hpsize == sizeof(struct sockaddr_in)) { \
+ server_len=sizeof(struct sockaddr_in); \
+ SOCaddr_sinfamily(server) = (*(struct sockaddr_in*)(hpaddr)).sin_family; \
+ memcpy(&SOCaddr_sinaddr(server), &(*(struct sockaddr_in*)(hpaddr)).sin_addr, sizeof(SOCaddr_sinaddr(server))); \
+} else if (hpsize == 4) {\
+ server_len=sizeof(struct sockaddr_in); \
+ SOCaddr_sinfamily(server) = AF_INET; \
+ memcpy(&SOCaddr_sinaddr(server), (hpaddr), sizeof(SOCaddr_sinaddr(server))); \
+} else if ((hpsize > 0) && (hpsize <= sizeof(server))) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else { \
+ server_len=0; \
+} \
+} while(0)
+
+/* Get dotted address */
+#define SOCaddr_inetntoa(namebuf, namebuflen, ss, sslen) do { \
+char* dot = (char*) inet_ntoa(SOCaddr_sinaddr(ss)); \
+(namebuf)[0]='\0'; \
+if (dot) { \
+strcpy(namebuf, dot); \
+} \
+} while(0)
+
+/* Get protocol ID */
+#define SOCaddr_getproto(ss, sslen) ('1')
+
+/*
+ ** ipV6 **
+*/
+#else
+
+/* Ipv4 structures */
+typedef struct in6_addr INaddr;
+/* This should handle all cases */
+typedef struct {
+ union {
+ struct sockaddr_in6 in6;
+ struct sockaddr_in in;
+ struct sockaddr sa;
+ unsigned char v4data[4];
+ unsigned char v6data[16];
+ unsigned char pad[128];
+ } m_addr;
+} SOCaddr;
+
+/* Ipv4 structure members */
+#define SOCaddr_sinaddr(server) ((server).m_addr.in6.sin6_addr)
+#define SOCaddr_sinfamily(server) ((server).m_addr.in6.sin6_family)
+#define SOCaddr_sinport(server) ((server).m_addr.in6.sin6_port)
+#define SOCaddr_sinflowinfo(server) ((server).m_addr.in6.sin6_flowinfo)
+/* #define SOCaddr_sinscopeid(a) ((a).m_addr.in6.sin6_scope_id) */
+
+/* AF_xx */
+#define AFinet AF_INET6
+
+/* Set port to sockaddr structure */
+#define SOCaddr_initport(server, port) do { \
+ SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+} while(0)
+
+/*
+ Copy sockaddr to SOCaddr
+
+ Note;
+ The '> sizeof(struct sockaddr_in6)' hack if for the VC6 structure which
+ lacks the scope id
+*/
+#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
+if (hpsize == sizeof(struct sockaddr_in6)) { \
+ server_len=sizeof(struct sockaddr_in6); \
+ SOCaddr_sinfamily(server) = (*(struct sockaddr_in6*)(hpaddr)).sin6_family; \
+ SOCaddr_sinflowinfo(server) = (*(struct sockaddr_in6*)(hpaddr)).sin6_flowinfo; \
+ memcpy(&SOCaddr_sinaddr(server), &(*(struct sockaddr_in6*)(hpaddr)).sin6_addr, sizeof(SOCaddr_sinaddr(server))); \
+} else if (hpsize > sizeof(struct sockaddr_in6)) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else if (hpsize == sizeof(struct sockaddr_in)) { \
+ server_len=sizeof(struct sockaddr_in); \
+ (*(struct sockaddr_in*)(&server)).sin_family = AF_INET; \
+ memcpy(&(*(struct sockaddr_in*)&(server)).sin_addr, &(*(struct sockaddr_in*)(hpaddr)).sin_addr, sizeof((*(struct sockaddr_in*)(hpaddr)).sin_addr)); \
+} else if (hpsize == 4) {\
+ server_len=sizeof(struct sockaddr_in); \
+ (*(struct sockaddr_in*)(&server)).sin_family = AF_INET; \
+ memcpy(&(*(struct sockaddr_in*)&(server)).sin_addr, hpaddr, 4); \
+} else if (hpsize == 16) {\
+ server_len=sizeof(struct sockaddr_in6); \
+ SOCaddr_sinfamily(server) = AF_INET6; \
+ memcpy(&SOCaddr_sinaddr(server), (hpaddr), 16); \
+} else if ((hpsize > 0) && (hpsize <= sizeof(server))) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else { \
+ server_len=0; \
+} \
+} while(0)
+
+/* Get dotted address */
+#define SOCaddr_inetntoa(namebuf, namebuflen, ss, sslen) do { \
+(namebuf)[0]='\0'; \
+getnameinfo((struct sockaddr *)&(ss), sslen, \
+ (namebuf), namebuflen, NULL, 0, NI_NUMERICHOST); \
+} while(0)
+
+/* Get protocol ID */
+#define SOCaddr_getproto(ss, sslen) ((sslen == sizeof(struct sockaddr_in6))?('2'):('1'))
+
+#endif
+
+/* Buffer structure to copy various hostent structures */
+typedef struct {
+ t_hostent hp;
+ char* list[2];
+ char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */
+ unsigned int addr_maxlen;
+} t_fullhostent;
+
+/* Initialize a t_fullhostent structure */
+#define fullhostent_init(h) do { \
+memset((h), 0, sizeof(t_fullhostent)); \
+(h)->hp.h_addr_list = (char **) & ((h)->list); \
+(h)->list[0] = (char *) & ((h)->addr); \
+(h)->list[1] = NULL; \
+(h)->addr_maxlen = HTS_MAXADDRLEN; \
+} while(0)
+
+
+#endif
+
+
diff --git a/src/htsnostatic.c b/src/htsnostatic.c
new file mode 100644
index 0000000..5971d5d
--- /dev/null
+++ b/src/htsnostatic.c
@@ -0,0 +1,260 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsnostatic.c subroutines: */
+/* thread-safe routines for reentrancy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsnostatic.h"
+
+#include "htsbase.h"
+#include "htshash.h"
+
+typedef struct {
+ /*
+ inthash values;
+ */
+ inthash blocks;
+} hts_varhash;
+
+#if USE_BEGINTHREAD
+static PTHREAD_LOCK_TYPE hts_static_Mutex;
+#endif
+static int hts_static_Mutex_init=0;
+#if HTS_WIN
+#else
+static PTHREAD_KEY_TYPE hts_static_key;
+#endif
+
+int hts_initvar() {
+ if (!hts_static_Mutex_init) {
+ /* Init done */
+ hts_static_Mutex_init=1;
+#if USE_BEGINTHREAD
+ /* Init mutex */
+ htsSetLock(&hts_static_Mutex, -999);
+
+#if HTS_WIN
+#else
+ /* Init hash */
+ PTHREAD_KEY_CREATE(&hts_static_key, hts_destroyvar);
+#endif
+#endif
+ }
+
+ /* Set specific thread value */
+#if USE_BEGINTHREAD
+#if HTS_WIN
+#else
+ {
+ void* thread_val;
+ hts_varhash* hts_static_hash = (hts_varhash*) malloc(sizeof(hts_static_hash));
+ if (!hts_static_hash)
+ return 0;
+ /*
+ hts_static_hash->values = inthash_new(HTS_VAR_MAIN_HASH);
+ if (!hts_static_hash->values)
+ return 0;
+ */
+ hts_static_hash->blocks = inthash_new(HTS_VAR_MAIN_HASH);
+ if (!hts_static_hash->blocks)
+ return 0;
+ /* inthash_value_is_malloc(hts_static_hash->values, 0); */ /* Regular values */
+ inthash_value_is_malloc(hts_static_hash->blocks, 1); /* We'll have to free them upon term! */
+ inthash_value_set_free_handler(hts_static_hash->blocks, hts_destroyvar_key); /* free handler */
+ thread_val = (void*) hts_static_hash;
+
+ PTHREAD_KEY_SET(hts_static_key, thread_val, inthash);
+ }
+#endif
+#endif
+
+ return 1;
+}
+
+/*
+ hash table free handler to free all keys
+*/
+void hts_destroyvar_key(void* adr) {
+#if HTS_WIN
+#else
+ hts_NostaticComplexKey* cKey = (hts_NostaticComplexKey*) adr;
+ if (cKey) {
+ void* block_address = NULL;
+ PTHREAD_KEY_GET(cKey->localKey, &block_address, void*);
+ /* Free block */
+ if (block_address) {
+ free(block_address);
+ }
+ cKey->localInit = 0;
+ }
+#endif
+}
+
+void hts_destroyvar(void* ptrkey) {
+#if HTS_WIN
+#else
+ if (ptrkey) {
+ hts_varhash* hashtables = (hts_varhash*) ptrkey;
+ PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */
+
+ /* Destroy has table */
+ inthash_delete(&(hashtables->blocks)); /* will magically call hts_destroyvar_key(), too */
+ /*
+ inthash_delete(&(hashtables->values));
+ */
+ free(ptrkey);
+ }
+#endif
+}
+
+/*
+ destroy all key values (for the current thread)
+*/
+int hts_freevar() {
+#if HTS_WIN
+#if 0
+ void* thread_val = NULL;
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hts_destroyvar(thread_val);
+ PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */
+ /*
+ PTHREAD_KEY_DELETE(hts_static_key); NO
+ */
+#endif
+#endif
+ return 1;
+}
+
+int hts_resetvar() {
+ int r;
+ hts_lockvar();
+ {
+ hts_freevar();
+ r = hts_initvar();
+ }
+ hts_unlockvar();
+ return r;
+}
+
+int hts_maylockvar() {
+ return hts_static_Mutex_init;
+}
+
+int hts_lockvar() {
+#if USE_BEGINTHREAD
+ htsSetLock(&hts_static_Mutex, 1);
+#endif
+ return 1;
+}
+
+int hts_unlockvar() {
+#if USE_BEGINTHREAD
+ htsSetLock(&hts_static_Mutex, 0);
+#endif
+ return 1;
+}
+
+int hts_setvar(char* name, long int value) {
+ return hts_setextvar(name, (long int)value, 0);
+}
+
+int hts_setblkvar(char* name, void* value) {
+ return hts_setextvar(name, (long int)value, 1);
+}
+
+int hts_setextvar(char* name, long int value, int flag) {
+#if HTS_WIN
+#else
+ void* thread_val = NULL;
+ hts_varhash* hashtables;
+
+ /*
+ hts_lockvar(); // NO - MUST be protected by caller
+ {
+ */
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hashtables = (hts_varhash*) thread_val;
+ if (hashtables) { // XXc XXC hack for win version
+ inthash_write(hashtables->blocks, name, value);
+ }
+#endif
+
+ return 1;
+}
+
+
+int hts_getvar(char* name, long int* ptrvalue) {
+ return hts_getextvar(name, (long int*)ptrvalue, 0);
+}
+
+int hts_getblkvar(char* name, void** ptrvalue) {
+ return hts_getextvar(name, (long int*)ptrvalue, 1);
+}
+
+int hts_getextvar(char* name, long int* ptrvalue, int flag) {
+#if HTS_WIN
+#else
+ void* thread_val = NULL;
+ hts_varhash* hashtables;
+
+ hts_lockvar();
+ {
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hashtables = (hts_varhash*) thread_val;
+ /* if (flag) {
+ */
+ inthash_read(hashtables->blocks, name, ptrvalue);
+ /*
+ } else {
+ inthash_read(hashtables->values, name, ptrvalue);
+ }
+ */
+ }
+ hts_unlockvar();
+#endif
+
+ return 1;
+}
+
+long int hts_directgetvar(char* name) {
+ long int value=0;
+ hts_getvar(name, &value);
+ return value;
+}
+
+void* hts_directgetblkvar(char* name) {
+ void* value=NULL;
+ hts_getblkvar(name, &value);
+ return value;
+}
diff --git a/src/htsnostatic.h b/src/htsnostatic.h
new file mode 100644
index 0000000..6dbb072
--- /dev/null
+++ b/src/htsnostatic.h
@@ -0,0 +1,223 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsnostatic.c subroutines: */
+/* thread-safe routines for reentrancy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+/*
+ Okay, with these routines, the engine should be fully reentrant (thread-safe)
+ All static references have been changed:
+
+ from
+ function foo() {
+ static bartype bar;
+ }
+ to:
+ function foo() {
+ bartype* bar;
+ NOSTATIC_RESERVE(bar, bartype, 1);
+ }
+*/
+
+#ifndef HTSNOSTATIC_DEFH
+#define HTSNOSTATIC_DEFH
+
+#include "htscore.h"
+#include "htsthread.h"
+
+/*
+#if USE_PTHREAD
+#if HTS_WIN
+#undef HTS_REENTRANT
+#else
+#define HTS_REENTRANT
+#endif
+#else
+#undef HTS_REENTRANT
+#endif
+*/
+
+#define HTS_VAR_MAIN_HASH 127
+
+/*
+ MutEx
+*/
+
+
+/* Magic per-thread variables functions
+
+ Example:
+ hts_lockvar();
+ hts_setvar("MyFoo", (long int)(void*)&foo);
+ hts_unlockvar();
+ ..
+ foo=(void*)(long int)hts_directgetvar("MyFoo");
+
+ Do not forget to initialize (hts_initvar()) the library once per thread
+*/
+int hts_initvar(void);
+int hts_freevar(void);
+int hts_resetvar(void);
+int hts_maylockvar(void);
+int hts_lockvar(void);
+int hts_unlockvar(void);
+
+int hts_setvar(char* name, long int value);
+int hts_getvar(char* name, long int* ptrvalue);
+long int hts_directgetvar(char* name);
+
+int hts_setblkvar(char* name, void* value);
+int hts_getblkvar(char* name, void** ptrvalue);
+void* hts_directgetblkvar(char* name);
+
+/* Internal */
+int hts_setextvar(char* name, long int value, int flag);
+int hts_getextvar(char* name, long int* ptrvalue, int flag);
+void hts_destroyvar(void* ptrkey);
+void hts_destroyvar_key(void* adr);
+
+/*
+ Ensure that the variable 'name' has 'nelts' of type 'type' reserved
+ fnc is an UNIQUE function name
+*/
+#define NOSTATIC_RESERVE(name, type, nelt) NOSTATIC_XRESERVE(name, type, nelt)
+
+/*
+ Note:
+ Yes, we first read the localInit flag variable without MutEx protection,
+ for optimization purpose, because the flag is set once initialization DONE.
+ If the first read fails, we *securely* re-check and initialize *if* necessary.
+ The abort() things should NEVER be called, and are here for safety reasons
+*/
+/*
+ function-specific static cKey:
+ cKey = { localKey, localInit }
+ || \
+ \/ \ ==1 upon initialization
+ thread variable
+ ||
+ \/
+ void*
+ ||
+ \/
+ 'thread-static' value
+
+ the function-specific static cKey is also referenced in the global
+ hashtable for free() purpose: (see hts_destroyvar())
+
+ global static key variable
+ 'hts_static_key'
+ ||
+ \/
+ thread variable
+ ||
+ \/
+ void*
+ ||
+ \/
+ hashtable
+ ||
+ \/
+ function-specific hash key
+ ||
+ \/
+ &cKey
+
+*/
+#if HTS_WIN
+
+/* Windows: handled by the compiler */
+#define NOSTATIC_XRESERVE(name, type, nelt) do { \
+ __declspec( thread ) static type thValue[nelt]; \
+ __declspec( thread ) int static initValue = 0; \
+ name = thValue; \
+ if (!initValue) { \
+ initValue = 1; \
+ memset(&thValue, 0, sizeof(thValue)); \
+ } \
+} while(0)
+
+#else
+
+/* Un*x : slightly more complex, we have to create a thread-key */
+typedef struct {
+ PTHREAD_KEY_TYPE localKey;
+ unsigned char localInit;
+} hts_NostaticComplexKey;
+#define NOSTATIC_XRESERVE(name, type, nelt) do { \
+static hts_NostaticComplexKey cKey={0,0}; \
+name = NULL; \
+if ( cKey.localInit ) { \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+} \
+if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
+ if (!hts_maylockvar()) { \
+ abort(); \
+ } \
+ hts_lockvar(); \
+ { \
+ { \
+ name = (type *) calloc((nelt), sizeof(type)); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+ { \
+ char elt_name[64+8]; \
+ sprintf(elt_name, #name "_%d", (int) __LINE__); \
+ PTHREAD_KEY_CREATE(&(cKey.localKey), NULL); \
+ hts_setblkvar(elt_name, &cKey); \
+ } \
+ PTHREAD_KEY_SET(cKey.localKey, name, type*); \
+ name = NULL; \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+ if ( ! cKey.localInit ) { \
+ cKey.localInit = 1; \
+ } \
+ } \
+ } \
+ hts_unlockvar(); \
+} \
+else { \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+} \
+} while(0)
+#endif
+
+#endif
diff --git a/src/htsopt.h b/src/htsopt.h
new file mode 100644
index 0000000..13bc962
--- /dev/null
+++ b/src/htsopt.h
@@ -0,0 +1,186 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: HTTrack parameters block */
+/* Called by httrack.h and some other files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTTRACK_DEFOPT
+#define HTTRACK_DEFOPT
+
+#include <stdio.h>
+#include "htsbasenet.h"
+#include "htsbauth.h"
+
+// structure proxy
+typedef struct {
+ int active;
+ char name[1024];
+ int port;
+} t_proxy;
+
+/* Structure utile pour copier en bloc les paramètres */
+typedef struct {
+ char*** filters;
+ int* filptr;
+ //int* filter_max;
+} htsfilters;
+
+/* Structure état du miroir */
+typedef struct {
+ int stop;
+} htsoptstate;
+
+
+// paramètres httrack (options)
+typedef struct {
+ int wizard; // wizard aucun/grand/petit
+ int flush; // fflush sur les fichiers log
+ int travel; // type de déplacements (same domain etc)
+ int seeker; // up & down
+ int depth; // nombre de niveaux de récursion
+ int extdepth; // nombre de niveaux de récursion à l'éxtérieur
+ int urlmode; // liens relatifs etc
+ int debug; // mode débug log
+ int getmode; // sauver html, images..
+ FILE* log; // fichier log
+ FILE* errlog; // et erreur
+ LLint maxsite; // taille max site
+ LLint maxfile_nonhtml; // taille max non html
+ LLint maxfile_html; // taille max html
+ int maxsoc; // nbre sockets
+ LLint fragment; // fragmentation d'un site
+ int nearlink; // prendre les images/data proche d'une page mais à l'extérieur
+ int makeindex; // faire un index
+ int kindex; // et un index 'keyword'
+ int delete_old; // effacer anciens fichiers
+ int timeout; // nombre de secondes de timeout
+ int rateout; // nombre d'octets minium pour le transfert
+ int maxtime; // temps max en secondes
+ int maxrate; // taux de transfert max
+ int maxconn; // nombre max de connexions/s
+ int waittime; // démarrage programmé
+ int cache; // génération d'un cache
+ int aff_progress; // barre de progression
+ int shell; // gestion d'un shell par pipe stdin/stdout
+ t_proxy proxy; // configuration du proxy
+ int savename_83; // conversion 8-3 pour les noms de fichiers
+ int savename_type; // type de noms: structure originale/html-images en un seul niveau
+ char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t)
+ int user_agent_send; // user agent (ex: httrack/1.0 [sun])
+ char user_agent[128];
+ char path_log[1024]; // chemin pour cache et log
+ char path_html[1024]; // chemin pour miroir
+ char path_bin[1024]; // chemin pour templates
+ int retry; // nombre d'essais supplémentaires en cas d'échec
+ int makestat; // mettre à jour un fichier log de statistiques de transfert
+ int maketrack; // mettre à jour un fichier log de statistiques d'opérations
+ int parsejava; // parsing des classes java pour récupérer les class, gif & cie
+ int hostcontrol; // abandon d'un host trop lent etc.
+ int errpage; // générer une page d'erreur en cas de 404 etc.
+ int check_type; // si type inconnu (cgi,asp,/) alors tester lien (et gérer moved éventuellement)
+ int all_in_cache; // tout mettre en cache!
+ int robots; // traitement des robots
+ int external; // pages externes->pages d'erreur
+ int passprivacy; // pas de mot de pass dans les liens externes?
+ int includequery; // include la query-string
+ int mirror_first_page; // miroir des liens
+ char sys_com[2048]; // commande système
+ int sys_com_exec; // executer commande
+ int accept_cookie; // gestion des cookies
+ t_cookie* cookie;
+ int http10; // forcer http 1.0
+ int nocompression; // pas de compression
+ int sizehack; // forcer réponse "mis à jour" si taille identique
+ int tolerant; // accepter content-length incorrect
+ int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple)
+ int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur
+ int verbosedisplay; // animation textuelle
+ char footer[256]; // ligne d'infos
+ int maxcache; // maximum en mémoire au niveau du cache (backing)
+ //int maxcache_anticipate; // maximum de liens à anticiper (majorant)
+ int ftp_proxy; // proxy http pour ftp
+ char filelist[1024]; // fichier liste URL à inclure
+ htsfilters filters; // contient les pointeurs pour les filtres
+ void* robotsptr; // robots ptr
+ char lang_iso[64]; // en, fr ..
+ char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2..
+ //
+ int maxlink; // nombre max de liens
+ int maxfilter; // nombre max de filtres
+ //
+ char* exec; // adresse du nom de l'éxecutable
+ //
+ int quiet; // poser des questions autres que wizard?
+ int keyboard; // vérifier stdin
+ //
+ int is_update; // c'est une update (afficher "File updated...")
+ int dir_topindex; // reconstruire top index par la suite
+ //
+ htsoptstate state; // état
+} httrackp;
+
+// stats for httrack
+typedef struct {
+ LLint HTS_TOTAL_RECV; // flux entrant reçu
+ LLint stat_bytes; // octets écrits sur disque
+ // int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
+ TStamp stat_timestart; // départ
+ //
+ LLint total_packed; // flux entrant compressé reçu
+ LLint total_unpacked; // flux entrant compressé reçu
+ int total_packedfiles; // fichiers compressés
+ //
+ TStamp istat_timestart[2]; // départ pour calcul instantanné
+ LLint istat_bytes[2]; // calcul pour instantanné
+ TStamp istat_reference01; // top départ donné par #0 à #1
+ int istat_idlasttimer; // id du timer qui a récemment donné une stat
+ //
+ int stat_files; // nombre de fichiers écrits
+ int stat_updated_files; // nombre de fichiers mis à jour
+ //
+ int stat_nsocket; // nombre de sockets
+ int stat_errors; // nombre d'erreurs
+ int stat_errors_front; // idem, mais au tout premier niveau
+ int stat_warnings; // '' warnings
+ int stat_infos; // '' infos
+ int nbk; // fichiers anticipés en arrière plan et terminés
+ LLint nb; // données transférées actuellement (estimation)
+ //
+ LLint rate;
+} hts_stat_struct;
+
+
+#endif
+
diff --git a/src/htsparse.c b/src/htsparse.c
new file mode 100644
index 0000000..b012a8d
--- /dev/null
+++ b/src/htsparse.c
@@ -0,0 +1,2377 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main source */
+/* DIRECT INCLUDE TO httrack.c */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#if HTS_ANALYSTE
+if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
+#endif
+ FILE* fp=NULL; // fichier écrit localement
+ char* adr=r.adr; // pointeur (on parcourt)
+ char* lastsaved; // adresse du dernier octet sauvé + 1
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"scan file.."LF); test_flush;
+ }
+
+
+ // Indexing!
+#if HTS_MAKE_KEYWORD_INDEX
+ if (opt.kindex) {
+ if (index_keyword(r.adr,r.size,r.contenttype,savename,opt.path_html)) {
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..done"LF); test_flush;
+ }
+ } else {
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..error!"LF); test_flush;
+ }
+ }
+ }
+#endif
+
+ // Now, parsing
+ if ((opt.getmode & 1) && (ptr>0)) { // récupérer les html sur disque
+ // créer le fichier html local
+ HT_ADD_FOP; // écrire peu à peu le fichier
+ }
+
+ if (!error) {
+ int detect_title=0; // détection du title
+ //
+ char* in_media=NULL; // in other media type (real media and so..)
+ int intag=0; // on est dans un tag
+ int incomment=0; // dans un <!--
+ int inscript=0; // dans un scipt pour applets javascript)
+ int inscript_tag=0; // on est dans un <body onLoad="... terminé par >
+ char inscript_tag_lastc='\0';
+ // terminaison (" ou ') du "<body onLoad=.."
+ int inscriptgen=0; // on est dans un code générant, ex après obj.write("..
+ char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ")
+ int no_esc_utf=0; // ne pas echapper chars > 127
+ int nofollow=0; // ne pas scanner
+ //
+ int parseall_lastc='\0'; // dernier caractère parsé pour parseall
+ int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";)
+ //
+ char* intag_start=adr;
+ char* intag_startattr=NULL;
+ int intag_start_valid=0;
+ HT_ADD_START; // débuter
+
+
+ /* statistics */
+ if ((opt.getmode & 1) && (ptr>0)) {
+ /*
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ }
+
+ /* Primary list or URLs */
+ if (ptr == 0) {
+ intag=1;
+ intag_start_valid=0;
+ }
+ /* Check is the file is a .js file */
+ else if (
+ (strfield2(r.contenttype,"application/x-javascript")!=0)
+ || (strfield2(r.contenttype,"text/css")!=0)
+ ) { /* JavaScript js file */
+ inscript=1;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"note: this file is a javascript file"LF); test_flush;
+ }
+ }
+ /* Or a real audio */
+ else if (strfield2(r.contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */
+ inscript=intag=1;
+ intag_start_valid=0;
+ in_media="RAM"; // real media!
+ }
+ // Detect UTF8 format
+ if (is_unicode_utf8((unsigned char*) r.adr, (unsigned int) r.size) == 1) {
+ no_esc_utf=1;
+ } else {
+ no_esc_utf=0;
+ }
+ // Hack to prevent any problems with ram files of other files
+ * ( r.adr + r.size ) = '\0';
+
+
+ // ------------------------------------------------------------
+ // analyser ce qu'il y a en mémoire (fichier html)
+ // on scanne les balises
+ // ------------------------------------------------------------
+#if HTS_ANALYSTE
+ _hts_in_html_done=0; // 0% scannés
+ _hts_cancel=0; // pas de cancel
+ _hts_in_html_parsing=1; // flag pour indiquer un parsing
+#endif
+ base[0]='\0'; // effacer base-href
+ lastsaved=adr;
+ do {
+ int p=0;
+ int valid_p=0; // force to take p even if == 0
+ int ending_p='\0'; // ending quote?
+ error=0;
+
+ /* Hack to avoid NULL char problems with C syntax */
+ /* Yes, some bogus HTML pages can embed null chars
+ and therefore can not be properly handled if this hack is not done
+ */
+ if ( ! (*adr) ) {
+ if ( ((int) (adr - r.adr)) < r.size)
+ *adr=' ';
+ }
+
+
+
+ /*
+ index.html built here
+ */
+ // Construction index.html (sommaire)
+ // Avant de tester les a href,
+ // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
+ if (!makeindex_done) { // autoriation d'écrire un index
+ if (!detect_title) {
+ if (opt.depth == liens[ptr]->depth) { // on note toujours les premiers liens
+ if (!in_media) {
+ if (opt.makeindex && (ptr>0)) {
+ if (opt.getmode & 1) { // autorisation d'écrire
+ p=strfield(adr,"title");
+ if (p) {
+ if (*(adr-1)=='/') p=0; // /title
+ } else {
+ if (strfield(adr,"/html"))
+ p=-1; // noter, mais sans titre
+ else if (strfield(adr,"body"))
+ p=-1; // noter, mais sans titre
+ else if ( ((int) (adr - r.adr) ) >= (r.size-1) )
+ p=-1; // noter, mais sans titre
+ else if ( (int) (adr - r.adr) >= r.size - 2) // we got to hurry
+ p=-1; // xxc xxc xxc
+ }
+ } else
+ p=0;
+
+ if (p) { // ok center
+ if (makeindex_fp==NULL) {
+ verif_backblue(opt.path_html); // générer gif
+ makeindex_fp=filecreate(fconcat(opt.path_html,"index.html"));
+ if (makeindex_fp!=NULL) {
+
+ // Header
+ fprintf(makeindex_fp,template_header,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ } else makeindex_done=-1; // fait, erreur
+ }
+
+ if (makeindex_fp!=NULL) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char s[HTS_URLMAXSIZE*2];
+ char* a=NULL;
+ char* b=NULL;
+ s[0]='\0';
+ if (p>0) {
+ a=strchr(adr,'>');
+ if (a!=NULL) {
+ a++;
+ while(is_space(*a)) a++; // sauter espaces & co
+ b=strchr(a,'<'); // prochain tag
+ }
+ }
+ if (lienrelatif(tempo,liens[ptr]->sav,concat(opt.path_html,"index.html"))==0) {
+ detect_title=1; // ok détecté pour cette page!
+ makeindex_links++; // un de plus
+ strcpy(makeindex_firstlink,tempo);
+ //
+ if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre
+ strcpy(s,tempo);
+ } else if ((b-a)<256) {
+ b--;
+ while(is_space(*b)) b--;
+ strncpy(s,a,b-a+1);
+ *(s+(b-a)+1)='\0';
+ }
+
+ // Body
+ fprintf(makeindex_fp,template_body,
+ tempo,
+ s
+ );
+
+ }
+ }
+ }
+ }
+ }
+
+ } else if (liens[ptr]->depth<opt.depth) { // on a sauté level1+1 et level1
+ HT_INDEX_END;
+ }
+ } // if (opt.makeindex)
+ }
+ // FIN Construction index.html (sommaire)
+ /*
+ end -- index.html built here
+ */
+
+
+
+ /* Parse */
+ if (
+ (*adr=='<') /* No starting tag */
+ && (!inscript) /* Not in (java)script */
+ && (!incomment) /* Not in comment (<!--) */
+ ) {
+ intag=1;
+ parseall_incomment=0;
+ //inquote=0; // effacer quote
+ intag_start=adr; intag_start_valid=1;
+ codebase[0]='\0'; // effacer éventuel codebase
+
+ if (opt.getmode & 1) { // sauver html
+ p=strfield(adr,"</html");
+ if (p==0) p=strfield(adr,"<head>");
+ // if (p==0) p=strfield(adr,"<doctype");
+ if (p) {
+ if (strnotempty(opt.footer)) {
+ char tempo[1024+HTS_URLMAXSIZE*2];
+ char gmttime[256];
+ char* eol="\n";
+ tempo[0]='\0';
+ if (strchr(r.adr,'\r'))
+ eol="\r\n";
+ time_gmt_rfc822(gmttime);
+ strcat(tempo,eol);
+ sprintf(tempo+strlen(tempo),opt.footer,jump_identification(urladr),urlfil,gmttime,"","","","","","","","");
+ strcat(tempo,eol);
+ //fwrite(tempo,1,strlen(tempo),fp);
+ HT_ADD(tempo);
+ }
+ }
+ }
+
+ // éliminer les <!-- (commentaires) : intag dévalidé
+ if (*(adr+1)=='!')
+ if (*(adr+2)=='-')
+ if (*(adr+3)=='-') {
+ intag=0;
+ incomment=1;
+ intag_start_valid=0;
+ }
+
+ }
+ else if (
+ (*adr=='>') /* ending tag */
+ && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */
+ ) {
+ if (inscript_tag) {
+ inscript_tag=inscript=0;
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ } else if (!incomment) {
+ intag=0; //inquote=0;
+
+ // entrée dans du javascript?
+ // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
+ //if (!inscript) { // sinon on est dans un obj.write("..
+ if ((intag_start_valid) &&
+ (
+ check_tag(intag_start,"script")
+ ||
+ check_tag(intag_start,"style")
+ )
+ ) {
+ char* a=intag_start; // <
+ // ** while(is_realspace(*(--a)));
+ if (*a=='<') { // sûr que c'est un tag?
+ inscript=1;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ }
+ }
+ } else { /* end of comment? */
+ // vérifier fermeture correcte
+ if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ }
+#if GT_ENDS_COMMENT
+ /* wrong comment ending */
+ else {
+ /* check if correct ending does not exists
+ <!-- foo > example <!-- bar > is sometimes accepted by browsers
+ when no --> is used somewhere else.. darn those browsers are dirty
+ */
+ if (!strstr(adr,"-->")) {
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ }
+ }
+#endif
+ }
+ //}
+ }
+ //else if (*adr==34) {
+ // inquote=(inquote?0:1);
+ //}
+ else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
+ int p_type=0;
+ int p_nocatch=0;
+ int p_searchMETAURL=0; // chercher ..URL=<url>
+ int add_class=0; // ajouter .class
+ int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>"
+ char* p_flush=NULL;
+
+
+ // ------------------------------------------------------------
+ // parsing évolé
+ // ------------------------------------------------------------
+ if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester..
+
+
+ /* caractère de terminaison pour "miniparsing" javascript=.. ?
+ (ex: <a href="javascript:()" action="foo"> ) */
+ if (inscript_tag) {
+ if (inscript_tag_lastc) {
+ if (*adr == inscript_tag_lastc) {
+ /* sortir */
+ inscript_tag=inscript=0;
+ incomment=0;
+ }
+ }
+ }
+
+
+ // Note:
+ // Certaines pages ne respectent pas le html
+ // notamment les guillements ne sont pas fixés
+ // Nous sommes dans un tag, donc on peut faire un test plus
+ // large pour pouvoi prendre en compte ces particularités
+
+ // à vérifier: ACTION, CODEBASE, VRML
+
+ if (in_media) {
+ if (strcmp(in_media,"RAM")==0) { // real media
+ p=0;
+ valid_p=1;
+ }
+ } else if (ptr>0) { /* pas première page 0 (primary) */
+ p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p
+
+ // ------------------------------
+ // détection d'écriture JavaScript.
+ // osons les obj.write et les obj.href=.. ! osons!
+ // note: inscript==1 donc on sautera après les \"
+ if (inscript) {
+ if (inscriptgen) { // on est déja dans un objet générant..
+ if (*adr==scriptgen_q) { // fermeture des " ou '
+ if (*(adr-1)!='\\') { // non
+ inscriptgen=0; // ok parsing terminé
+ }
+ }
+ } else {
+ char* a=NULL;
+ char check_this_fking_line=0; // parsing code javascript..
+ char must_be_terminated=0; // caractère obligatoire de terminaison!
+ int token_size;
+ if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")...
+ token_size=strfield(adr,".write");
+ if (token_size) {
+ a=adr+token_size;
+ while(is_realspace(*a)) a++; // sauter espaces
+ if (*a=='(') { // début parenthèse
+ check_this_fking_line=2; // à parser!
+ must_be_terminated=')';
+ a++; // sauter (
+ }
+ }
+ // euhh ??? ???
+ /* else if (strfield(adr,".href")) { // détection ...objet.href="...
+ a=adr+5;
+ while(is_realspace(*a)) a++; // sauter espaces
+ if (*a=='=') { // ohh un égal
+ check_this_fking_line=1; // à noter!
+ must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder
+ a++; // sauter =
+ }
+
+ }*/
+
+ // on a un truc du genre instruction"code généré" dont on parse le code
+ if (check_this_fking_line) {
+ while(is_realspace(*a)) a++;
+ if ((*a=='\'') || (*a=='"')) { // départ de '' ou ""
+ char *b;
+ int ex=0;
+ scriptgen_q=*a; // quote
+ b=a+1; // départ de la chaîne
+ // vérifier forme ("code") et pas ("code"+var), ingérable
+ do {
+ a++; // caractère suivant
+ if (*a==scriptgen_q) if (*(a-1)!='\\') // quote non slash
+ ex=1; // sortie
+ if ((*a==10) || (*a==13))
+ ex=1;
+ } while(!ex);
+ if (*a==scriptgen_q) { // fin du quote
+ a++;
+ while(is_realspace(*a)) a++;
+ if (*a==must_be_terminated) { // parenthèse fermante: ("..")
+
+ // bon, on doit parser une ligne javascript
+ // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
+ // on fixe p sur le saut nécessaire pour atteindre le nom du fichier
+ // et le moteur se débrouillera ensuite tout seul comme un grand
+ // 2) si check==2 c'est un peu plus tordu car là on génére du
+ // code html au sein de code javascript au sein de code html
+ // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle
+ // on devra parser les instructions standard comme <a href etc
+ // NOTE: le code javascript autogénéré n'est pas pris en compte!!
+ // (et ne marche pas dans 50% des cas de toute facon!)
+ if (check_this_fking_line==1) {
+ p=(int) (b - adr); // calculer saut!
+ } else {
+ inscriptgen=1; // SCRIPTGEN actif
+ adr=b; // jump
+ }
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncat(str,b,minimum((int) (a - b + 1), 32));
+ fspc(opt.log,"debug"); fprintf(opt.log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
+ }
+ }
+
+ }
+
+ }
+
+
+ }
+ }
+ }
+ // fin detection code générant javascript vers html
+ // ------------------------------
+
+
+ // analyse proprement dite, A HREF=.. etc..
+ if (!p) {
+ // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
+ if ((intag && (!inscript)) || inscriptgen) {
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ // <A HREF=.. pour les liens HTML
+ p=rech_tageq(adr,"href");
+ if (p) { // href.. tester si c'est une bas href!
+ if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui!
+ // ** note: base href et codebase ne font pas bon ménage..
+ p_type=2; // c'est un chemin
+ }
+ }
+
+ /* Tags supplémentaires à vérifier (<img src=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detect[i])) ) {
+ p=rech_tageq(adr,hts_detect[i]);
+ i++;
+ }
+ }
+
+ /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
+ p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
+ i++;
+ }
+ }
+
+ /* Tags supplémentaires à vérifier : URL=.. */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
+ p=rech_tageq(adr,hts_detectURL[i]);
+ i++;
+ }
+ if (p)
+ p_searchMETAURL=1;
+ }
+
+ /* Tags supplémentaires à vérifier, mais à ne pas capturer */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
+ p=rech_tageq(adr,hts_detectandleave[i]);
+ i++;
+ }
+ if (p)
+ p_nocatch=1; /* ne pas rechercher */
+ }
+
+ /* Evénements */
+ if (p==0) {
+ int i=0;
+ /* détection onLoad etc */
+ while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
+ p=rech_tageq(adr,hts_detect_js[i]);
+ i++;
+ }
+ /* non détecté - détecter également les onXxxxx= */
+ if (p==0) {
+ if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
+ p=0;
+ while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
+ if (p<64) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=')
+ p++;
+ else p=0;
+ } else p=0;
+ }
+ }
+ /* OK, événement repéré */
+ if (p) {
+ inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
+ adr+=p; /* saut */
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_tag=inscript=1;
+ }
+ p=0; /* quoi qu'il arrive, ne rien démarrer ici */
+ }
+
+ // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
+ if (p==0) {
+ p=rech_tageq(adr,"code");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
+ add_class=1; // ajouter .class au besoin
+
+ // vérifier qu'il n'y a pas de codebase APRES
+ // sinon on swappe les deux.
+ // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
+
+ {
+ char *a;
+ a=adr;
+ while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
+ if (rech_tageq(a,"codebase")) { // banzai! codebase=
+ char* b;
+ b=strchr(a,'>');
+ if (b) {
+ if (((int) (b - adr)) < 1000) { // au total < 1Ko
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,a,(int) (b - a) );
+ strcat( tempo," ");
+ strncat(tempo,adr,(int) (a - adr - 1));
+ // éventuellement remplire par des espaces pour avoir juste la taille
+ while((int) strlen(tempo)<((int) (b - adr)))
+ strcat(tempo," ");
+ // pas d'erreur?
+ if ((int) strlen(tempo) == ((int) (b - adr) )) {
+ strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
+ p=0; // DEVALIDER!!
+ p_type=0;
+ add_class=0;
+ }
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ // liens à patcher mais pas à charger (ex: codebase)
+ if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
+ p=rech_tageq(adr,"codebase");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-2;
+ } else p=-1; // ne plus chercher
+ }
+ }
+
+
+ // Meta tags pour robots
+ if (p==0) {
+ if (opt.robots) {
+ if ((intag_start_valid) && check_tag(intag_start,"meta")) {
+ if (rech_tageq(adr,"name")) { // name=robots.txt
+ char tempo[1100];
+ char* a;
+ tempo[0]='\0';
+ a=strchr(adr,'>');
+#if DEBUG_ROBOTS
+ printf("robots.txt meta tag detected\n");
+#endif
+ if (a) {
+ if (((int) (a - adr)) < 999 ) {
+ strncat(tempo,adr,(int) (a - adr));
+ if (strstrcase(tempo,"content")) {
+ if (strstrcase(tempo,"robots")) {
+ if (strstrcase(tempo,"nofollow")) {
+#if DEBUG_ROBOTS
+ printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
+#endif
+ nofollow=1; // NE PLUS suivre liens dans cette page
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // entrée dans une applet javascript
+ /*if (!inscript) { // sinon on est dans un obj.write("..
+ if (p==0)
+ if (rech_sampletag(adr,"script"))
+ if (check_tag(intag_start,"script")) {
+ inscript=1;
+ }
+ }*/
+
+ // Ici on procède à une analyse du code javascript pour tenter de récupérer
+ // certains fichiers évidents.
+ // C'est devenu obligatoire vu le nombre de pages qui intègrent
+ // des images réactives par exemple
+ }
+ } else if (inscript) {
+ if (
+ (
+ (strfield(adr,"/script"))
+ ||
+ (strfield(adr,"/style"))
+ )
+ ) {
+ char* a=adr;
+ //while(is_realspace(*(--a)));
+ while( is_realspace(*a) ) a--;
+ a--;
+ if (*a=='<') { // sûr que c'est un tag?
+ inscript=0;
+ }
+ } else {
+ /*
+ Script Analyzing - different types supported:
+ foo="url"
+ foo("url") or foo(url)
+ foo "url"
+ */
+ int nc;
+ char expected = '='; // caractère attendu après
+ char* expected_end = ";";
+ int can_avoid_quotes=0;
+ char quotes_replacement='\0';
+ if (inscript_tag)
+ expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'"
+ nc = strfield(adr,".src"); // nom.src="image";
+ if (!nc) nc = strfield(adr,".location"); // document.location="doc"
+ if (!nc) nc = strfield(adr,".href"); // document.location="doc"
+ if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
+ expected='('; // parenthèse
+ expected_end="),"; // fin: virgule ou parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,"url")) ) { // url(url)
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ can_avoid_quotes=1;
+ quotes_replacement=')';
+ }
+ if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
+ if (is_space(*(adr+nc))) {
+ expected=0; // no char expected
+ } else
+ nc=0;
+ }
+ if (nc) {
+ char *a;
+ a=adr+nc;
+ while(is_realspace(*a)) a++;
+ if ((*a == expected) || (!expected)) {
+ if (expected)
+ a++;
+ while(is_realspace(*a)) a++;
+ if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
+ char *b,*c;
+ int ndelim=1;
+ if ((*a==34) || (*a=='\''))
+ a++;
+ else
+ ndelim=0;
+ b=a;
+ if (ndelim) {
+ while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
+ }
+ else {
+ while((*b != quotes_replacement) && (*b!='\0')) b++;
+ }
+ c=b--; c+=ndelim;
+ while(*c==' ') c++;
+ if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
+ c-=(ndelim+1);
+ if ((int) (c - a + 1)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncat(str,a,minimum((int) (c - a + 1),32));
+ fspc(opt.log,"debug"); fprintf(opt.log,"link detected in javascript: %s"LF,str); test_flush;
+ }
+ p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
+ if (can_avoid_quotes) {
+ ending_p=quotes_replacement;
+ }
+ }
+ }
+
+
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ } else { // ptr == 0
+ //p=rech_tageq(adr,"primary"); // lien primaire, yeah
+ p=0; // No stupid tag anymore, raw link
+ valid_p=1; // Valid even if p==0
+ while ((adr[p] == '\r') || (adr[p] == '\n'))
+ p++;
+ //can_avoid_quotes=1;
+ ending_p='\r';
+ }
+
+ } else if (isspace((unsigned char)*adr)) {
+ intag_startattr=adr+1; // attribute in tag (for dirty parsing)
+ }
+
+
+ // ------------------------------------------------------------
+ // dernier recours - parsing "sale" : détection systématique des .gif, etc.
+ // risque: générer de faux fichiers parazites
+ // fix: ne parse plus dans les commentaires
+ // ------------------------------------------------------------
+ if ( (opt.parseall) && (ptr>0) && (!in_media) ) { // option parsing "brut"
+ int incomment_justquit=0;
+ if (!is_realspace(*adr)) {
+ int noparse=0;
+
+ // Gestion des /* */
+ if (inscript) {
+ if (parseall_incomment) {
+ if ((*adr=='/') && (*(adr-1)=='*'))
+ parseall_incomment=0;
+ incomment_justquit=1; // ne pas noter dernier caractère
+ } else {
+ if ((*adr=='/') && (*(adr+1)=='*'))
+ parseall_incomment=1;
+ }
+ } else
+ parseall_incomment=0;
+
+ /* vérifier que l'on est pas dans un <!-- --> pur */
+ if ( (!intag) && (incomment) && (!inscript))
+ noparse=1; /* commentaire */
+
+ // recherche d'URLs
+ if ((!parseall_incomment) && (!noparse)) {
+ if (!p) { // non déja trouvé
+ if (adr != r.adr) { // >1 caractère
+ // scanner les chaines
+ if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
+ if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif..
+ char *a=adr;
+ char stop=*adr; // " ou '
+ int count=0;
+
+ // sauter caractères
+ a++;
+ // copier
+ while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
+
+ // ok chaine terminée par " ou '
+ if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
+ char c;
+ char* aend;
+ //
+ aend=a; // sauver début
+ a++;
+ while(is_taborspace(*a)) a++;
+ c=*a;
+ if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
+ // le / est pour funct("img.gif" /* URL */);
+ char tempo[HTS_URLMAXSIZE*2];
+ char type[256];
+ int url_ok=0; // url valide?
+ tempo[0]='\0'; type[0]='\0';
+ //
+ strncat(tempo,adr+1,count);
+ //
+ if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
+ int invalid_url=0;
+
+ // escape
+ unescape_amp(tempo);
+
+ // Couper au # ou ? éventuel
+ {
+ char* a=strchr(tempo,'#');
+ if (a)
+ *a='\0';
+ a=strchr(tempo,'?');
+ if (a)
+ *a='\0';
+ }
+
+ // vérifier qu'il n'y a pas de caractères spéciaux
+ if (!strnotempty(tempo))
+ invalid_url=1;
+ else if (strchr(tempo,'*')
+ || strchr(tempo,'<')
+ || strchr(tempo,'>'))
+ invalid_url=1;
+
+ /* non invalide? */
+ if (!invalid_url) {
+ // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
+ if (c!='+') { // PAS de plus à la fin
+ char* a;
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
+ if (
+ (strfield(tempo,"http:"))
+ || (strfield(tempo,"ftp:"))
+#if HTS_USEOPENSSL
+ || (strfield(tempo,"https:"))
+#endif
+ ) // ok pas de problème
+ url_ok=1;
+ else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
+ if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
+ url_ok=1;
+ } else if ((a=strchr(tempo,'/'))) { // un slash: ok..
+ if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
+ if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
+ url_ok=1;
+ }
+ }
+ }
+ // Prendre si extension reconnue
+ if (!url_ok) {
+ get_httptype(type,tempo,0);
+ if (strnotempty(type)) // type reconnu!
+ url_ok=1;
+ else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
+ url_ok=1;
+ // MAIS pas les foobar@aol.com !!
+ if (strchr(tempo,'@'))
+ url_ok=0;
+ }
+ //
+ // Ok, cela pourrait être une URL
+ if (url_ok) {
+
+ // Check if not fodbidden tag (id,name..)
+ if (intag_start_valid) {
+ if (intag_start)
+ if (intag_startattr)
+ if (intag)
+ if (!inscript)
+ if (!incomment) {
+ int i=0,nop=0;
+ while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
+ nop=rech_tageq(intag_startattr,hts_nodetect[i]);
+ i++;
+ }
+ // Forbidden tag
+ if (nop) {
+ url_ok=0;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
+ }
+ }
+ }
+ }
+
+
+ // Accepter URL, on la traitera comme une URL normale!!
+ if (url_ok)
+ p=1;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ } // p == 0
+
+ // plus dans un commentaire
+ if (!incomment_justquit)
+ parseall_lastc=*adr; // caractère avant le prochain
+
+ } // not in comment
+
+ } // if realspace
+ } // if parseall
+
+
+ // ------------------------------------------------------------
+ // p!=0 : on a repéré un éventuel lien
+ // ------------------------------------------------------------
+ //
+ if ((p>0) || (valid_p)) { // on a repéré un lien
+ //int lien_valide=0;
+ char* eadr=NULL; /* fin de l'URL */
+ char* quote_adr=NULL; /* adresse du ? dans l'adresse */
+ int ok=1;
+ char quote='\0';
+
+ // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe
+ if ((nofollow) || (opt.state.stop))
+ p_nocatch=1;
+
+ // écrire codebase avant, flusher avant code
+ if ((p_type==-1) || (p_type==-2)) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_ADR; // refresh
+ }
+ lastsaved=adr; // dernier écrit+1
+ }
+
+ // sauter espaces
+ adr+=p;
+ while((is_space(*adr)) && (quote=='\0')) {
+ if (!quote)
+ if ((*adr=='\"') || (*adr=='\''))
+ quote=*adr; // on doit attendre cela à la fin
+ // puis quitter
+ adr++; // sauter les espaces, "" et cie
+ }
+
+ /* Stop at \n (LF) if primary links*/
+ if (ptr == 0)
+ quote='\n';
+ /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
+ else if (inscript)
+ quote='\0';
+
+ // sauter éventuel \" ou \' javascript
+ if (inscript) { // on est dans un obj.write("..
+ if (*adr=='\\') {
+ if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \'
+ adr+=2; // sauter
+ }
+ }
+ }
+
+ // sauter content="1;URL=http://..
+ if (p_searchMETAURL) {
+ int l=0;
+ while(
+ (adr + l + 4 < r.adr + r.size)
+ && (!strfield(adr+l,"URL="))
+ && (l<128) ) l++;
+ if (!strfield(adr+l,"URL="))
+ ok=-1;
+ else
+ adr+=(l+4);
+ }
+
+ /* éviter les javascript:document.location=.. : les parser, plutôt */
+ if (ok!=-1) {
+ if (strfield(adr,"javascript:")) {
+ ok=-1;
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_tag=inscript=1;
+ inscript_tag_lastc=quote; /* à attendre à la fin */
+ }
+ }
+
+ if (p_type==1) {
+ if (*adr=='#') {
+ adr++; // sauter # pour usemap etc
+ }
+ }
+ eadr=adr;
+
+ // ne pas flusher après code si on doit écrire le codebase avant!
+ if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_ADR; // refresh
+ }
+ lastsaved=adr; // dernier écrit+1
+ // après on écrira soit les données initiales,
+ // soir une URL/lien modifié!
+ } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite
+
+ if (ok!=-1) { // continuer
+ // découper le lien
+ do {
+ if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0)
+ if (!is_space(*eadr))
+ ok=0;
+ }
+ if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
+ ok=-1; // ne pas traiter ce lien
+
+ if (ok > 0) {
+ //if (*eadr!=' ') {
+ if (is_space(*eadr)) { // guillemets,CR, etc
+ if ((!quote) || (*eadr==quote)) // si pas d'attente de quote spéciale ou si quote atteinte
+ ok=0;
+ } else if (ending_p && (*eadr==ending_p))
+ ok=0;
+ else {
+ switch(*eadr) {
+ case '>':
+ if (!quote) {
+ if (!inscript) {
+ intag=0; // PLUS dans un tag!
+ intag_start_valid=0;
+ }
+ ok=0;
+ }
+ break;
+ /*case '<':*/
+ case '#':
+ if (*(eadr-1) != '&') // &#40;
+ ok=0;
+ break;
+ // case '?': non!
+ case '\\': if (inscript) ok=0; break; // \" ou \' point d'arrêt
+ case '?': quote_adr=adr; break; // noter position query
+ }
+ }
+ //}
+ }
+ eadr++;
+ } while(ok==1);
+
+ // Empty link detected
+ if ( (((int) (eadr - adr))) <= 1) { // link empty
+ ok=-1; // No
+ if (*adr != '#') { // Not empty+unique #
+ if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
+ if (quote) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD("#"); // We add this for a <href="">
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ if (ok==0) { // tester un lien
+ char lien[HTS_URLMAXSIZE*2];
+ int meme_adresse=0; // 0 par défaut pour primary
+ //char *copie_de_adr=adr;
+ //char* p;
+
+ // construire lien (découpage)
+ if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long?
+ strncpy(lien,adr,((int) (eadr - adr))-1);
+ *(lien+ (((int) (eadr - adr)))-1 )='\0';
+ //printf("link: %s\n",lien);
+ // supprimer les espaces
+ while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
+
+
+#if HTS_STRIP_DOUBLE_SLASH
+ // supprimer les // en / (sauf pour http://)
+ {
+ char *a,*p,*q;
+ int done=0;
+ a=strchr(lien,':'); // http://
+ if (a) {
+ a++;
+ while(*a=='/') a++; // position après http://
+ } else {
+ a=lien; // début
+ while(*a=='/') a++; // position après http://
+ }
+ q=strchr(a,'?'); // ne pas traiter après '?'
+ if (!q)
+ q=a+strlen(a)-1;
+ while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par /
+ if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3)
+ done=1; // stopper
+ } else {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,a,(int) p - (int) a);
+ strcat (tempo,p+1);
+ strcpy(a,tempo); // recopier
+ }
+ }
+ }
+#endif
+
+ } else
+ lien[0]='\0'; // erreur
+
+ // ------------------------------------------------------
+ // Lien repéré et extrait
+ if (strnotempty(lien)>0) { // construction du lien
+ char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr
+ int forbidden_url=-1; // lien non interdit (mais non autorisé..)
+ int just_test_it=0; // mode de test des liens
+ int set_prio_to=0; // pour capture de page isolée
+ int import_done=0; // lien importé (ne pas scanner ensuite *à priori*)
+ //
+ adr[0]='\0'; fil[0]='\0';
+ //
+ // 0: autorisé
+ // 1: interdit (patcher tout de même adresse)
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link detected in html: %s"LF,lien); test_flush;
+ }
+
+ // external check
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_linkdetected(lien)) {
+ error=1; // erreur
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s refused by external wrapper"LF,lien);
+ test_flush;
+ }
+ }
+#endif
+
+ // purger espaces de début et fin, CR,LF résiduels
+ // (IMG SRC="foo.<\n>gif")
+ {
+ char* a;
+ while (is_realspace(lien[0])) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,lien+1);
+ strcpy(lien,tempo);
+ }
+ while(strnotempty(lien)
+ && (is_realspace(lien[max(0,(int)(strlen(lien))-1)])) ) {
+ lien[strlen(lien)-1]='\0';
+ }
+ while ((a=strchr(lien,'\n'))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+1);
+ strcpy(lien,tempo);
+ }
+ while ((a=strchr(lien,'\r'))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+1);
+ strcpy(lien,tempo);
+ }
+ }
+
+ /* Unescape/escape %20 and other &nbsp; */
+ {
+ char query[HTS_URLMAXSIZE*2];
+ char* a=strchr(lien,'?');
+ if (a) {
+ strcpy(query,a);
+ *a='\0';
+ } else
+ query[0]='\0';
+ // conversion &amp; -> & et autres joyeusetés
+ unescape_amp(lien);
+ unescape_amp(query);
+ // décoder l'inutile (%2E par exemple) et coder espaces
+ // XXXXXXXXXXXXXXXXX strcpy(lien,unescape_http(lien));
+ strcpy(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
+ escape_spc_url(lien);
+ strcat(lien,query); /* restore */
+ }
+
+ // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
+ {
+ char* a=jump_identification(lien);
+ while( (a=strchr(a,'\\')) ) *a='/';
+ }
+
+ // supprimer le(s) ./
+ while ((lien[0]=='.') && (lien[1]=='/')) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,lien+2);
+ strcpy(lien,tempo);
+ }
+ if (strnotempty(lien)==0) // sauf si plus de nom de fichier
+ strcpy(lien,"./");
+
+ // vérifie les /~machin -> /~machin/
+ // supposition dangereuse?
+ // OUI!!
+#if HTS_TILDE_SLASH
+ if (lien[strlen(lien)-1]!='/') {
+ char *a=lien+strlen(lien)-1;
+ // éviter aussi index~1.html
+ while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
+ if (*a=='~') {
+ strcat(lien,"/"); // ajouter slash
+ }
+ }
+#endif
+
+ // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
+ // yes, this is dirty
+ // but I'm so lazzy..
+ // and besides the java "code" convention is really a pain in html code
+ if (p_type==-1) {
+ char* a=strrchr(lien,'.');
+ add_class_dots_to_patch=0;
+ if (a) {
+ char* b;
+ do {
+ b=strchr(lien,'.');
+ if ((b != a) && (b)) {
+ add_class_dots_to_patch++;
+ *b='/';
+ }
+ } while((b != a) && (b));
+ }
+ }
+
+ // éliminer les éventuels :80 (port par défaut!)
+ if (link_has_authority(lien)) {
+ char * a;
+ a=strstr(lien,"//"); // "//" authority
+ if (a)
+ a+=2;
+ else
+ a=lien;
+ // while((*a) && (*a!='/') && (*a!=':')) a++;
+ a=jump_toport(a);
+ if (a) { // port
+ int port=0;
+ int defport=80;
+ char* b=a+1;
+#if HTS_USEOPENSSL
+ // FIXME
+ //if (strfield(adr, "https:")) {
+ //}
+#endif
+ while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
+ if (port==defport) { // port 80, default - c'est débile
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+3); // sauter :80
+ strcpy(lien,tempo);
+ }
+ }
+ }
+
+ // filtrer les parazites (mailto & cie)
+ /*
+ if (strfield(lien,"mailto:")) { // ne pas traiter
+ error=1;
+ } else if (strfield(lien,"news:")) { // ne pas traiter
+ error=1;
+ }
+ */
+
+ // vérifier que l'on ne doit pas ajouter de .class
+ if (!error) {
+ if (add_class) {
+ char *a = lien+strlen(lien)-1;
+ while(( a > lien) && (*a!='/') && (*a!='.')) a--;
+ if (*a != '.')
+ strcat(lien,".class"); // ajouter .class
+ else if (!strfield2(a,".class"))
+ strcat(lien,".class"); // idem
+ }
+ }
+
+ // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/)
+ if (!error) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"position link check %s"LF,lien); test_flush;
+ }
+
+ if ((p_type==2) || (p_type==-2)) { // code ou codebase
+ // Vérifier les codebase=applet (au lieu de applet/)
+ if (p_type==-2) { // codebase
+ if (strnotempty(lien)) {
+ if (fil[strlen(lien)-1]!='/') { // pas répertoire
+ strcat(lien,"/");
+ }
+ }
+ }
+ /* only one ending / (bug on some pages) */
+ if ((int)strlen(lien)>2) {
+ while( (lien[strlen(lien)-2]=='/') && ((int)strlen(lien)>2) ) /* double // (bug) */
+ lien[strlen(lien)-1]='\0';
+ }
+ // copier nom host si besoin est
+ if (!link_has_authority(lien)) { // pas de http://
+ char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif??
+ if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {
+ error=1;
+ } else {
+ strcpy(lien,"http://");
+ strcat(lien,adr2);
+ if (*fil2!='/')
+ strcat(lien,"/");
+ strcat(lien,fil2);
+ {
+ char* a;
+ a=lien+strlen(lien)-1;
+ while((*a) && (*a!='/') && ( a> lien)) a--;
+ if (*a=='/') {
+ *(a+1)='\0';
+ }
+ }
+ //char tempo[HTS_URLMAXSIZE*2];
+ //strcpy(tempo,"http://");
+ //strcat(tempo,urladr); // host
+ //if (*lien!='/')
+ // strcat(tempo,"/");
+ //strcat(tempo,lien);
+ //strcpy(lien,tempo);
+ }
+ }
+
+ if (!error) { // pas d'erreur?
+ if (p_type==2) { // code ET PAS codebase
+ char* a=lien+strlen(lien)-1;
+ while( (a > lien) && (*a) && (*a!='/')) a--;
+ if (*a=='/') // ok on a repéré le dernier /
+ *(a+1)='\0'; // couper
+ else {
+ *lien='\0'; // éliminer
+ error=1; // erreur, ne pas poursuivre
+ }
+ }
+
+ // stocker base ou codebase?
+ switch(p_type) {
+ case 2: {
+ //if (*lien!='/') strcat(base,"/");
+ strcpy(base,lien);
+ }
+ break; // base
+ case -2: {
+ //if (*lien!='/') strcat(codebase,"/");
+ strcpy(codebase,lien);
+ }
+ break; // base
+ }
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"code/codebase link %s base %s"LF,lien,base); test_flush;
+ }
+ //printf("base code: %s - %s\n",lien,base);
+ }
+
+ } else {
+ char* _base;
+ if (p_type==-1) // code (applet)
+ _base=codebase;
+ else
+ _base=base;
+
+
+ // ajouter chemin de base href..
+ if (strnotempty(_base)) { // considérer base
+ if (!link_has_authority(lien)) { // non absolue
+ //if (*lien!='/') { // non absolu sur le site (/)
+ if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
+ // mailto: and co: do NOT add base
+ if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
+ char tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ strcpy(tempo,_base);
+ strcat(tempo,lien + ((*lien=='/')?1:0) );
+ strcpy(lien,tempo); // patcher en considérant base
+ // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
+ }
+ } else {
+ error=1; // erreur
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s too long with base href"LF,lien);
+ test_flush;
+ }
+ }
+ //}
+ }
+ }
+
+
+ }
+ }
+
+
+ // transformer lien quelconque (http, relatif, etc) en une adresse
+ // et un chemin+fichier (adr,fil)
+ if (!error) {
+ int reponse;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"build relative link %s with %s%s"LF,lien,urladr,urlfil); test_flush;
+ }
+ if ((reponse=ident_url_relatif(lien,urladr,urlfil,adr,fil))<0) {
+ adr[0]='\0'; // erreur
+ if (reponse==-2) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s not caught (unknown ftp:// protocol)"LF,lien);
+ test_flush;
+ }
+ } else {
+ if ((opt.debug>1) && (opt.errlog!=NULL)) {
+ fspc(opt.errlog,"debug"); fprintf(opt.errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,urladr,urlfil); test_flush;
+ }
+ }
+ }
+ } else {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link %s not build, error detected before"LF,lien); test_flush;
+ }
+ adr[0]='\0';
+ }
+
+#if HTS_CHECK_STRANGEDIR
+ // !ATTENTION!
+ // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
+ // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
+ // et un fichier en http A PRIORI : je fais donc un test
+ // En cas de moved xxx, on recalcule adr et fil, tout simplement
+ // DEFAUT: test effectué plusieurs fois! à revoir!!!
+ if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
+ //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
+ if (fil[strlen(fil)-1]!='/') { // pas répertoire
+ if (ishtml(fil)==-2) { // pas d'extension
+ char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
+ loc[0]='\0';
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link-check-directory: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ // tester éventuelle nouvelle position
+ switch (http_location(adr,fil,loc).statuscode) {
+ case 200: // ok au final
+ if (strnotempty(loc)) { // a changé d'adresse
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
+ test_flush;
+ }
+
+ // recalculer adr et fil!
+ if (ident_url_absolute(loc,adr,fil)==-1) {
+ adr[0]='\0'; // cancel
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link-check-dir: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ }
+ break;
+ case -2: case -3: // timeout ou erreur grave
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
+ test_flush;
+ }
+
+ break;
+ }
+
+ }
+ }
+ }
+#endif
+
+ // Le lien doit juste être réécrit, mais ne doit pas générer un lien
+ // exemple: <FORM ACTION="url_cgi">
+ if (p_nocatch) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link forced external at %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
+ adr,fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard link test: %d"LF,forbidden_url);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // calculer meme_adresse
+ meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
+
+
+
+ // Début partie sauvegarde
+
+ // ici on forme le nom du fichier à sauver, et on patche l'URL
+ if (adr[0]!='\0') {
+ // savename: simplifier les ../ et autres joyeusetés
+ char save[HTS_URLMAXSIZE*2];
+ int r_sv=0;
+ // En cas de moved, adresse première
+ char former_adr[HTS_URLMAXSIZE*2];
+ char former_fil[HTS_URLMAXSIZE*2];
+ //
+ save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
+ //
+
+ // nom du chemin à sauver si on doit le calculer
+ // note: url_savename peut décider de tester le lien si il le trouve
+ // suspect, et modifier alors adr et fil
+ // dans ce cas on aura une référence directe au lieu des traditionnels
+ // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
+ // gif sont impliqués par exemple)
+ if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
+ if (forbidden_url!=1) {
+ char last_adr[HTS_URLMAXSIZE*2];
+ last_adr[0]='\0';
+ //char last_fil[HTS_URLMAXSIZE*2]="";
+ strcpy(last_adr,adr); // ancienne adresse
+ //strcpy(last_fil,fil); // ancien chemin
+ r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
+ if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
+
+ // 2e test si moved
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard moved link retest at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
+ adr,fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard moved link retest: %d"LF,forbidden_url);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ //import_done=1; // c'est un import!
+ meme_adresse=0; // on a changé
+ }
+ } else {
+ strcpy(save,""); // dummy
+ }
+ }
+ if (r_sv!=-1) { // pas d'erreur, on continue
+ /* log */
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug");
+ if (forbidden_url!=1) { // le lien va être chargé
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
+ fprintf(opt.log,"Code/Codebase: %s%s"LF,adr,fil);
+ } else if ((opt.getmode & 4)==0) {
+ fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
+ } else {
+ if (!ishtml(fil))
+ fprintf(opt.log,"Record after: %s%s -> %s"LF,adr,fil,save);
+ else
+ fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
+ }
+ } else
+ fprintf(opt.log,"External: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ /* FIN log */
+
+ // écrire lien
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
+ lastsaved=eadr-1+1; // sauter "
+ }
+ /* */
+ else if (opt.urlmode==0) { // URL absolue dans tous les cas
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ }
+ }
+
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ /* */
+ } else if (opt.urlmode >= 4) { // ne rien faire dans tous les cas!
+ /* */
+ /* leave the link 'as is' */
+ /* Sinon, dépend de interne/externe */
+ } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
+ if ((opt.getmode & 1) && (ptr>0)) {
+ if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
+ if (!opt.external) {
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ }
+ //
+ } else { // fichier/page externe, mais on veut générer une erreur
+ //
+ int patch_it=0;
+ int add_url=0;
+ char* cat_name=NULL;
+ char* cat_data=NULL;
+ int cat_nb=0;
+ int cat_data_len=0;
+
+ // ajouter lien external
+ switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
+ case 1: case -2: // html ou répertoire
+ if (opt.getmode & 1) { // sauver html
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ }
+ break;
+ default: // inconnu
+ // asp, cgi..
+ if (is_dyntype(get_ext(fil))) {
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ } else if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm"))
+ || (ishtml(fil)!=0) ) {
+ patch_it=1; // redirect
+ add_url=1; // avec link aussi
+ cat_name="external.gif";
+ cat_nb=1;
+ cat_data=HTS_DATA_UNKNOWN_GIF;
+ cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
+ }
+ break;
+ }// html,gif
+
+ if (patch_it) {
+ char save[HTS_URLMAXSIZE*2];
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(save,opt.path_html);
+ strcat(save,cat_name);
+ if (lienrelatif(tempo,save,savename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ HT_ADD(tempo); // page externe
+ if (add_url) {
+ HT_ADD("?link="); // page externe
+
+ // same as above
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr) + 2); // scheme
+ HT_ADD(tmp);
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ }
+ //
+
+ }
+ }
+
+ // écrire fichier?
+ if (verif_external(cat_nb,1)) {
+ //if (!fexist(fconcat(opt.path_html,cat_name))) {
+ FILE* fp = filecreate(fconcat(opt.path_html,cat_name));
+ if (fp) {
+ if (cat_data_len==0) { // texte
+ verif_backblue(opt.path_html);
+ fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
+ } else { // data
+ fwrite(cat_data,cat_data_len,1,fp);
+ }
+ fclose(fp);
+ usercommand(0,NULL,fconcat(opt.path_html,cat_name));
+ }
+ }
+ } else { // écrire normalement le nom de fichier
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }// patcher?
+ } // external
+ } else { // que le nom de fichier (classe java)
+ // en gros recopie de plus bas: copier codebase et base
+ if (p_flush) {
+ char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté
+ char tempo_pat[HTS_URLMAXSIZE*2];
+
+ // Calculer chemin
+ tempo_pat[0]='\0';
+ strcpy(tempo,fil); // <-- ajouté
+ {
+ char* a=strrchr(tempo,'/');
+
+ // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
+ // we have to do the contrary now
+ if (add_class_dots_to_patch>0) {
+ while( (add_class_dots_to_patch>0) && (a) ) {
+ *a='.'; // convert "false" java / into .
+ add_class_dots_to_patch--;
+ a=strrchr(tempo,'/');
+ }
+ // if add_class_dots_to_patch, this is because there is a problem!!
+ if (add_class_dots_to_patch) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+
+ // Cut path/filename
+ if (a) {
+ char tempo2[HTS_URLMAXSIZE*2];
+ strcpy(tempo2,a+1); // FICHIER
+ strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpy(tempo,tempo2); // fichier
+ }
+ }
+
+ // érire codebase="chemin"
+ if ((opt.getmode & 1) && (ptr>0)) {
+ char tempo4[HTS_URLMAXSIZE*2];
+ tempo4[0]='\0';
+
+ if (strnotempty(tempo_pat)) {
+ HT_ADD("codebase=\"http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*tempo_pat!='/') HT_ADD("/");
+ HT_ADD(tempo_pat);
+ HT_ADD("\" ");
+ }
+
+ strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ HT_ADD(tempo4); // refresh code="
+ HT_ADD(tempo);
+ }
+ }
+ }
+ }
+ lastsaved=eadr-1;
+ }
+ /*
+ else if (opt.urlmode==1) { // ABSOLU, c'est le cas le moins courant
+ // NE FONCTIONNE PAS!! (et est inutile)
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ // écrire le lien modifié, absolu
+ HT_ADD("file:");
+ if (*save=='/')
+ HT_ADD(save+1)
+ else
+ HT_ADD(save)
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ */
+ else if (opt.urlmode==3) { // URI absolue /
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ HT_ADD(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ else if (opt.urlmode==2) { // RELATIF
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ // calculer le lien relatif
+
+ if (lienrelatif(tempo,save,savename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ test_flush;
+ }
+
+ // lien applet (code) - il faut placer un codebase avant
+ if (p_type==-1) { // que le nom de fichier
+
+ if (p_flush) {
+ char tempo_pat[HTS_URLMAXSIZE*2];
+ tempo_pat[0]='\0';
+ {
+ char* a=strrchr(tempo,'/');
+
+ // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
+ // we have to do the contrary now
+ if (add_class_dots_to_patch>0) {
+ while( (add_class_dots_to_patch>0) && (a) ) {
+ *a='.'; // convert "false" java / into .
+ add_class_dots_to_patch--;
+ a=strrchr(tempo,'/');
+ }
+ // if add_class_dots_to_patch, this is because there is a problem!!
+ if (add_class_dots_to_patch) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+
+ if (a) {
+ char tempo2[HTS_URLMAXSIZE*2];
+ strcpy(tempo2,a+1);
+ strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpy(tempo,tempo2); // fichier
+ }
+ }
+
+ // érire codebase="chemin"
+ if ((opt.getmode & 1) && (ptr>0)) {
+ char tempo4[HTS_URLMAXSIZE*2];
+ tempo4[0]='\0';
+
+ if (strnotempty(tempo_pat)) {
+ HT_ADD("codebase=\"");
+ HT_ADD(tempo_pat);
+ HT_ADD("\" ");
+ }
+
+ strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ HT_ADD(tempo4); // refresh code="
+ }
+ }
+ //lastsaved=adr; // dernier écrit+1
+ }
+
+ if ((opt.getmode & 1) && (ptr>0)) {
+ // écrire le lien modifié, relatif
+ HT_ADD(tempo);
+
+ // Add query-string, for informational purpose only
+ // Useless, because all parameters-pages are saved into different targets
+ if (opt.includequery) {
+ char* a=strchr(lien,'?');
+ if (a) {
+ HT_ADD(a);
+ }
+ }
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error building relative link %s and %s"LF,save,savename);
+ test_flush;
+ }
+ }
+ } // sinon le lien sera écrit normalement
+
+
+#if 0
+ if (fexist(save)) { // le fichier existe..
+ adr[0]='\0';
+ //if ((opt.debug>0) && (opt.log!=NULL)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link has already been written on disk, cancelled: %s"LF,save);
+ test_flush;
+ }
+ }
+#endif
+
+ /* Security check */
+ if (strlen(save) >= HTS_URLMAXSIZE) {
+ adr[0]='\0';
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link is too long: %s"LF,save);
+ test_flush;
+ }
+ }
+
+ if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && ( (forbidden_url!=1) || (just_test_it))) { // si le fichier n'existe pas, ajouter à la liste
+ // n'y a-t-il pas trop de liens?
+ if (lien_tot+1 >= lien_max-4) { // trop de liens!
+ printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
+ fprintf(opt.errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+
+ } else { // noter le lien sur la listes des liens à charger
+ int pass_fix,dejafait=0;
+
+ // Calculer la priorité de ce lien
+ if ((opt.getmode & 4)==0) { // traiter html après
+ pass_fix=0;
+ } else { // vérifier que ce n'est pas un !html
+ if (!ishtml(fil))
+ pass_fix=1; // priorité inférieure (traiter après)
+ else
+ pass_fix=max(0,numero_passe); // priorité normale
+ }
+
+ /* If the file seems to be an html file, get depth-1 */
+ /*
+ if (strnotempty(save)) {
+ if (ishtml(save) == 1) {
+ // descore_prio = 2;
+ } else {
+ // descore_prio = 1;
+ }
+ }
+ */
+
+ // vérifier que le lien n'a pas déja été noté
+ // si c'est le cas, alors il faut s'assurer que la priorité associée
+ // au fichier est la plus grande des deux priorités
+ //
+ // On part de la fin et on essaye de se presser (économise temps machine)
+#if HTS_HASH
+ {
+ int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0) {
+ liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
+ dejafait=1;
+ }
+ }
+#else
+ {
+ int l;
+ int i;
+ l=strlen(save); // opti
+ for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
+ if (liens[i]->sav_len==l) { // même taille de chaîne
+ if (strcmp(liens[i]->sav,save)==0) { // existe déja
+ liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
+ dejafait=1;
+ }
+ }
+ }
+ }
+#endif
+
+ // le lien n'a jamais été créé.
+ // cette fois ci, on le crée!
+ if (!dejafait) {
+ //
+ // >>>> CREER LE LIEN <<<<
+ //
+ // enregistrer lien à charger
+ //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
+ // même adresse: l'objet père est l'objet père de l'actuel
+
+ // DEBUT ROBOTS.TXT AJOUT
+ if (!just_test_it) {
+ if (
+ (!strfield(adr,"ftp://")) // non ftp
+ && (!strfield(adr,"file://")) ) { // non file
+ if (opt.robots) { // récupérer robots
+ if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés
+ if (checkrobots(&robots,adr,"") != -1) { // robots.txt ?
+ checkrobots_set(&robots,adr,""); // ajouter entrée vide
+ if (checkrobots(&robots,adr,"") == -1) { // robots.txt ?
+ // enregistrer robots.txt (MACRO)
+ liens_record(adr,"/robots.txt","","","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ liens[lien_tot]->testmode=0; // pas mode test
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=ptr;
+ liens[lien_tot]->depth=0;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=0;
+ lien_tot++; // UN LIEN DE PLUS
+#if DEBUG_ROBOTS
+ printf("robots.txt: added file robots.txt for %s\n",adr);
+#endif
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"robots.txt added at %s"LF,adr);
+ test_flush;
+ }
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // FIN ROBOTS.TXT AJOUT
+
+ // enregistrer (MACRO)
+ liens_record(adr,fil,save,former_adr,former_fil);
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+
+ // mode test?
+ if (!just_test_it)
+ liens[lien_tot]->testmode=0; // pas mode test
+ else
+ liens[lien_tot]->testmode=1; // mode test
+ if (!import_done)
+ liens[lien_tot]->link_import=0; // pas mode import
+ else
+ liens[lien_tot]->link_import=1; // mode import
+ // écrire autres paramètres de la structure-lien
+ if ((meme_adresse) && (!import_done) && (liens[ptr]->premier != 0))
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ else // sinon l'objet père est le précédent lui même
+ liens[lien_tot]->premier=lien_tot;
+ // liens[lien_tot]->premier=ptr;
+
+ liens[lien_tot]->precedent=ptr;
+ // noter la priorité
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth - 1;
+ else
+ liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page)
+ // noter pass
+ liens[lien_tot]->pass2=pass_fix;
+ liens[lien_tot]->retry=opt.retry;
+
+ //strcpy(liens[lien_tot]->adr,adr);
+ //strcpy(liens[lien_tot]->fil,fil);
+ //strcpy(liens[lien_tot]->sav,save);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ if (!just_test_it) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ } else {
+ fspc(opt.log,"debug"); fprintf(opt.log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
+ }
+ test_flush;
+ }
+
+ lien_tot++; // UN LIEN DE PLUS
+ } else { // if !dejafait
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link has already been recorded, cancelled: %s"LF,save);
+ test_flush;
+ }
+
+ }
+
+
+ } // si pas trop de liens
+ } // si adr[0]!='\0'
+
+
+ } // if adr[0]!='\0'
+
+ } // if adr[0]!='\0'
+
+ } // if strlen(lien)>0
+
+ } // if ok==0
+
+ adr=eadr-1; // ** sauter
+
+ } // if (p)
+
+ } // si '<' ou '>'
+
+ // plus loin
+ adr++;
+
+
+ /* Otimization: if we are scanning in HTML data (not in tag or script),
+ then jump to the next starting tag */
+ if (ptr>0) {
+ if ( (!intag) /* Not in tag */
+ && (!inscript) /* Not in (java)script */
+ && (!incomment) /* Not in comment (<!--) */
+ && (!inscript_tag) /* Not in tag with script inside */
+ )
+ {
+ /* Not at the end */
+ if (( ((int) (adr - r.adr)) ) < r.size) {
+ /* Not on a starting tag yet */
+ if (*adr != '<') {
+ char* adr_next = strchr(adr,'<');
+ /* Jump to near end (index hack) */
+ if (!adr_next) {
+ if (
+ ( (int)(adr - r.adr) < (r.size - 4))
+ &&
+ (r.size > 4)
+ ) {
+ adr = r.adr + r.size - 2;
+ }
+ } else {
+ adr = adr_next;
+ }
+ }
+ }
+ }
+ }
+
+ // ----------
+ // écrire peu à peu
+ if ((opt.getmode & 1) && (ptr>0)) HT_ADD_ADR;
+ lastsaved=adr; // dernier écrit+1
+ // ----------
+
+ // pour les stats du shell si parsing trop long
+#if HTS_ANALYSTE
+ if (r.size)
+ _hts_in_html_done=(100 * ((int) (adr - r.adr)) ) / (int)(r.size);
+ if (_hts_in_html_poll) {
+ _hts_in_html_poll=0;
+ // temps à attendre, et remplir autant que l'on peut le cache (backing)
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ //adr = r.adr + r.size; // exit
+ } else if (_hts_cancel==1) {
+ // adr = r.adr + r.size; // exit
+ nofollow=1; // moins violent
+ _hts_cancel=0;
+ }
+ }
+
+ // refresh the backing system each 2 seconds
+ if (engine_stats()) {
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+ }
+#endif
+ } while(( ((int) (adr - r.adr)) ) < r.size);
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=0; // flag
+ _hts_cancel=0; // pas de cancel
+#endif
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_END; // achever
+ }
+ //
+ //
+ //
+ } // if !error
+
+
+ if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ // sauver fichier
+ //structcheck(savename);
+ //filesave(r.adr,r.size,savename);
+
+#if HTS_ANALYSTE
+ } // analyse OK
+#endif
+
diff --git a/src/htsrobots.c b/src/htsrobots.c
new file mode 100644
index 0000000..8aabdd4
--- /dev/null
+++ b/src/htsrobots.c
@@ -0,0 +1,118 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* robots.txt (website robot file) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsrobots.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* END specific definitions */
+
+
+// -- robots --
+
+// fil="" : vérifier si règle déja enregistrée
+int checkrobots(robots_wizard* robots,char* adr,char* fil) {
+ while(robots) {
+ if (strfield2(robots->adr,adr)) {
+ if (fil[0]) {
+ int ptr=0;
+ char line[250];
+ if (strnotempty(robots->token)) {
+ do {
+ ptr+=binput(robots->token+ptr,line,200);
+ if (line[0]=='/') { // absolu
+ if (strfield(fil,line)) { // commence avec ligne
+ return -1; // interdit
+ }
+ } else { // relatif
+ if (strstrcase(fil,line)) {
+ return -1;
+ }
+ }
+ } while( (strnotempty(line)) && (ptr<(int) strlen(robots->token)) );
+ }
+ } else {
+ return -1;
+ }
+ }
+ robots=robots->next;
+ }
+ return 0;
+}
+int checkrobots_set(robots_wizard* robots,char* adr,char* data) {
+ if (((int) strlen(data)) > 999) return 0;
+ while(robots) {
+ if (strfield2(robots->adr,adr)) { // entrée existe
+ strcpy(robots->token,data);
+#if DEBUG_ROBOTS
+ printf("robots.txt: set %s to %s\n",adr,data);
+#endif
+ return -1;
+ }
+ else if (!robots->next) {
+ robots->next=(robots_wizard*) calloct(1,sizeof(robots_wizard));
+ if (robots->next) {
+ robots->next->next=NULL;
+ strcpy(robots->next->adr,adr);
+ strcpy(robots->next->token,data);
+#if DEBUG_ROBOTS
+ printf("robots.txt: new set %s to %s\n",adr,data);
+#endif
+ }
+#if DEBUG_ROBOTS
+ else
+ printf("malloc error!!\n");
+#endif
+ }
+ robots=robots->next;
+ }
+ return 0;
+}
+void checkrobots_free(robots_wizard* robots) {
+ if (robots->next) {
+ checkrobots_free(robots->next);
+ freet(robots->next);
+ robots->next=NULL;
+ }
+}
+
+// -- robots --
diff --git a/src/htsrobots.h b/src/htsrobots.h
new file mode 100644
index 0000000..62b9689
--- /dev/null
+++ b/src/htsrobots.h
@@ -0,0 +1,56 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* robots.txt (website robot file) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSROBOTS_DEFH
+#define HTSROBOTS_DEFH
+
+// robots wizard
+typedef struct robots_wizard {
+ char adr[1024];
+ char token[1024];
+ struct robots_wizard* next;
+} robots_wizard;
+
+
+// robots
+int checkrobots(robots_wizard* robots,char* adr,char* fil);
+void checkrobots_free(robots_wizard* robots);
+int checkrobots_set(robots_wizard* robots,char* adr,char* data);
+
+
+#endif
diff --git a/src/htssystem.h b/src/htssystem.h
new file mode 100644
index 0000000..989607d
--- /dev/null
+++ b/src/htssystem.h
@@ -0,0 +1,15 @@
+// Définition de la plate-forme utilisée
+
+// Sun Solaris .......... 0
+// Windows/95 ........... 1
+// Ibm 580 .............. 2
+
+#define HTS_PLATFORM 1
+
+// SHELL
+#define HTS_ANALYSTE 2
+
+
+// Fin de la définition
+
+
diff --git a/src/htssystem.h.windows9x b/src/htssystem.h.windows9x
new file mode 100644
index 0000000..0689e0c
--- /dev/null
+++ b/src/htssystem.h.windows9x
@@ -0,0 +1,11 @@
+/* HTTrack, Offline Browser for Windows and Unix */
+
+/* HTTrack system definition for Windows */
+/* This should be the only file you have to change */
+
+/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */
+
+
+/* Fix plateform number to 1 (Windows) */
+/* If it doesn't compile, try another one */
+#define HTS_PLATEFORM 1
diff --git a/src/htsthread.c b/src/htsthread.c
new file mode 100644
index 0000000..0a3bee6
--- /dev/null
+++ b/src/htsthread.c
@@ -0,0 +1,97 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Threads */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsglobal.h"
+#include "htsthread.h"
+
+// Threads - emulate _beginthread under Linux/Unix using pthread_XX
+// Some changes will have to be done, see PTHREAD_RETURN,PTHREAD_TYPE
+#if USE_PTHREAD
+#include <pthread.h> /* _beginthread, _endthread */
+
+unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist )
+{
+ pthread_t th;
+ int retcode;
+ /* create a thread */
+ retcode = pthread_create(&th, NULL, start_address, arglist);
+ if (retcode != 0) /* error */
+ return -1;
+ /* detach the thread from the main process so that is can be independent */
+ pthread_detach(th);
+ return 0;
+}
+#endif
+
+#if USE_BEGINTHREAD
+/*
+ Simple lock function
+
+ Return value: always 0
+ Parameter:
+ 1 wait for lock (mutex) available and lock it
+ 0 unlock the mutex
+ [-1 check if locked (always return 0 with mutex)]
+ -999 initialize
+*/
+int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) {
+#if HTS_WIN
+ /* lock */
+ if (lock==1)
+ WaitForSingleObject(*hMutex,INFINITE);
+ /* unlock */
+ else if (lock==0)
+ ReleaseMutex(*hMutex);
+ /* create */
+ else if (lock==-999)
+ *hMutex=CreateMutex(NULL,FALSE,NULL);
+#else
+ /* lock */
+ if (lock==1)
+ pthread_mutex_lock(hMutex);
+ /* unlock */
+ else if (lock==0)
+ pthread_mutex_unlock(hMutex);
+ /* create */
+ else if (lock==-999)
+ pthread_mutex_init(hMutex,0);
+#endif
+ return 0;
+}
+
+#endif
+
diff --git a/src/htsthread.h b/src/htsthread.h
new file mode 100644
index 0000000..cb3a139
--- /dev/null
+++ b/src/htsthread.h
@@ -0,0 +1,95 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Threads */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFTHREAD
+#define HTS_DEFTHREAD
+
+#include "htsglobal.h"
+#if USE_PTHREAD
+#include <pthread.h> /* _beginthread, _endthread */
+#endif
+#if HTS_WIN
+#include "windows.h"
+#endif
+
+#if USE_BEGINTHREAD
+#if HTS_WIN
+
+#define PTHREAD_RETURN
+#define PTHREAD_TYPE void __cdecl
+#define PTHREAD_LOCK_TYPE HANDLE
+
+/* Useless - see '__declspec( thread )' */
+/*
+#define PTHREAD_KEY_TYPE void*
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0)
+#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0)
+#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0)
+*/
+
+#else
+
+#define PTHREAD_RETURN NULL
+#define PTHREAD_TYPE void*
+#define PTHREAD_LOCK_TYPE pthread_mutex_t
+#define PTHREAD_KEY_TYPE pthread_key_t
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) pthread_key_create(ptrkey, uninit)
+#define PTHREAD_KEY_DELETE(key) pthread_key_delete(key)
+#define PTHREAD_KEY_SET(key, val, ptrtype) pthread_setspecific(key, (void*)val)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)pthread_getspecific(key); } while(0)
+
+#endif
+
+#else
+
+#define PTHREAD_LOCK_TYPE void*
+#define PTHREAD_KEY_TYPE void*
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0)
+#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0)
+#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0)
+
+#endif
+
+int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
+
+#if USE_PTHREAD
+unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist );
+#endif
+
+#endif
+
diff --git a/src/htstools.c b/src/htstools.c
new file mode 100644
index 0000000..1eeafbf
--- /dev/null
+++ b/src/htstools.c
@@ -0,0 +1,785 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* various tools (filename analyzing ..) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htstools.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+
+// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil
+// [adr et fil sont des buffers de 1ko]
+// 0 : ok
+// -1 : erreur
+// -2 : protocole non supporté (ftp)
+int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,char* fil) {
+ int ok=0;
+ int scheme=0;
+
+ adr[0]='\0'; fil[0]='\0'; //effacer buffers
+
+ // lien non vide!
+ if (strnotempty(lien)==0) return -1; // erreur!
+
+ // Scheme?
+ {
+ char* a=lien;
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ scheme=1;
+ }
+
+ // filtrer les parazites (mailto & cie)
+ // scheme+authority (//)
+ if (
+ (strfield(lien,"http://")) // scheme+//
+ || (strfield(lien,"file://")) // scheme+//
+ || (strncmp(lien,"//",2)==0) // // sans scheme (-> default)
+ ) {
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+ }
+ else if (strfield(lien,"ftp://")) {
+ // Note: ftp:foobar.gif is not valid
+ if (ftp_available()) { // ftp supporté
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+ } else {
+ ok=-2; // non supporté
+ }
+#if HTS_USEOPENSSL
+ } else if (strfield(lien,"https://")) {
+ // Note: ftp:foobar.gif is not valid
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+#endif
+ } else if ((scheme) && (
+ (!strfield(lien,"http:"))
+ && (!strfield(lien,"https:"))
+ && (!strfield(lien,"ftp:"))
+ )) {
+ ok=-1; // unknown scheme
+ } else { // c'est un lien relatif
+ char* a;
+
+ // On forme l'URL complète à partie de l'url actuelle
+ // et du chemin actuel si besoin est.
+
+ // copier adresse
+ if (((int) strlen(origin_adr)<HTS_URLMAXSIZE) && ((int) strlen(origin_fil)<HTS_URLMAXSIZE) && ((int) strlen(lien)<HTS_URLMAXSIZE)) {
+
+ /* patch scheme if necessary */
+ if (strfield(lien,"http:")) {
+ lien+=5;
+ strcpy(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
+ } else if (strfield(lien,"https:")) {
+ lien+=6;
+ strcpy(adr, "https://"); // même adresse forcée en https
+ strcat(adr, jump_protocol(origin_adr));
+ } else if (strfield(lien,"ftp:")) {
+ lien+=4;
+ strcpy(adr, "ftp://"); // même adresse forcée en ftp
+ strcat(adr, jump_protocol(origin_adr));
+ } else {
+ strcpy(adr,origin_adr); // même adresse ; et même éventuel protocole
+ }
+
+ if (*lien!='/') { // sinon c'est un lien absolu
+ a=strchr(origin_fil,'?');
+ if (!a) a=origin_fil+strlen(origin_fil);
+ while((*a!='/') && ( a > origin_fil) ) a--;
+ if (*a=='/') { // ok on a un '/'
+ if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) {
+ // copier chemin
+ strncpy(fil,origin_fil,((int) (a - origin_fil))+1);
+ *(fil + ((int) (a - origin_fil))+1)='\0';
+
+ // copier chemin relatif
+ if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
+ strcat(fil,lien + ((*lien=='/')?1:0) );
+ // simplifier url pour les ../
+ fil_simplifie(fil);
+ } else
+ ok=-1; // erreur
+ } else { // erreur
+ ok=-1; // erreur URL
+ }
+ } else { // erreur
+ ok=-1; // erreur URL
+ }
+ } else { // chemin absolu
+ // copier chemin directement
+ strcat(fil,lien);
+ } // *lien!='/'
+ } else
+ ok=-1;
+
+ } // test news: etc.
+
+ // case insensitive pour adresse
+ {
+ char *a=jump_identification(adr);
+ while(*a) {
+ if ((*a>='A') && (*a<='Z'))
+ *a+='a'-'A';
+ a++;
+ }
+ }
+
+ return ok;
+}
+
+
+
+
+
+// créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu)
+// un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif
+int lienrelatif(char* s,char* link,char* curr_fil) {
+ char _curr[HTS_URLMAXSIZE*2];
+ char newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2];
+ char* curr;
+ //int n=0;
+ char* a;
+ int slash=0;
+ //
+ newcurr_fil[0]='\0'; newlink[0]='\0';
+ //
+
+ // patch: éliminer les ? (paramètres) sinon bug
+ if ( (a=strchr(curr_fil,'?')) ) {
+ strncat(newcurr_fil,curr_fil,(int) (a - curr_fil));
+ curr_fil = newcurr_fil;
+ }
+ if ( (a=strchr(link,'?')) ) {
+ strncat(newlink,link,(int) (a - link));
+ link = newlink;
+ }
+
+ // recopier uniquement le chemin courant
+ curr=_curr;
+ strcpy(curr,curr_fil);
+ if ((a=strchr(curr,'?'))==NULL) // couper au ? (params)
+ a=curr+strlen(curr)-1; // pas de params: aller à la fin
+ while((*a!='/') && ( a> curr)) a--; // chercher dernier / du chemin courant
+ if (*a=='/') *(a+1)='\0'; // couper dernier /
+
+ // "effacer" s
+ s[0]='\0';
+
+ // sauter ce qui est commun aux 2 chemins
+ {
+ char *l,*c;
+ if (*link=='/') link++; // sauter slash
+ if (*curr=='/') curr++;
+ l=link;
+ c=curr;
+ // couper ce qui est commun
+#if HTS_CASSE
+ while ((*link==*curr) && (*link!=0)) {link++; curr++; }
+#else
+ while ((streql(*link,*curr)) && (*link!=0)) {link++; curr++; }
+#endif
+ // mais on veut un répertoirer entier!
+ // si on a /toto/.. et /toto2/.. on ne veut pas sauter /toto !
+ while(((*link!='/') || (*curr!='/')) && ( link > l)) { link--; curr--; }
+ //if (*link=='/') link++;
+ //if (*curr=='/') curr++;
+ }
+
+ // calculer la profondeur du répertoire courant et remonter
+ // LES ../ ONT ETE SIMPLIFIES
+ a=curr;
+ if (*a=='/') a++;
+ while(*a) if (*(a++)=='/') strcat(s,"../");
+ //if (strlen(s)==0) strcat(s,"/");
+
+ if (slash) strcat(s,"/"); // garder absolu!!
+
+ // on est dans le répertoire de départ, copier
+ strcat(s,link + ((*link=='/')?1:0) );
+
+ /* Security check */
+ if (strlen(s) >= HTS_URLMAXSIZE)
+ return -1;
+
+ // on a maintenant une chaine de la forme ../../test/truc.html
+ return 0;
+}
+
+/* Is the link absolute (http://www..) or relative (/bar/foo.html) ? */
+int link_has_authority(char* lien) {
+ char* a=lien;
+ if (isalpha((unsigned char)*a)) {
+ // Skip scheme?
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ a++;
+ else
+ return 0;
+ }
+ if (strncmp(a,"//",2) == 0)
+ return 1;
+ return 0;
+}
+
+int link_has_authorization(char* lien) {
+ char* adr = jump_protocol(lien);
+ char* firstslash = strchr(adr, '/');
+ char* detect = strchr(adr, '@');
+ if (firstslash) {
+ if (detect) {
+ return (detect < firstslash);
+ }
+ } else {
+ return (detect != NULL);
+ }
+ return 0;
+}
+
+
+// conversion chemin de fichier/dossier vers 8-3 ou ISO9660
+void long_to_83(int mode,char* n83,char* save) {
+ n83[0]='\0';
+
+ while(*save) {
+ char fn83[256],fnl[256];
+ int i=0;
+ fn83[0]=fnl[0]='\0';
+ while((save[i]) && (save[i]!='/')) { fnl[i]=save[i]; i++; }
+ fnl[i]='\0';
+ // conversion
+ longfile_to_83(mode,fn83,fnl);
+ strcat(n83,fn83);
+
+ save+=i;
+ if (*save=='/') { strcat(n83,"/"); save++; }
+ }
+}
+
+
+// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660
+void longfile_to_83(int mode,char* n83,char* save) {
+ int i=0,j=0,max=0;
+ char nom[256];
+ char ext[256];
+ nom[0]=ext[0]='\0';
+
+ switch(mode) {
+ case 1:
+ max=8;
+ break;
+ case 2:
+ max=30;
+ break;
+ default:
+ max=8;
+ break;
+ }
+
+ /* No starting . */
+ if (save[0] == '.') {
+ save[0]='_';
+ }
+ /* No multiple dots */
+ {
+ char* last_dot=strrchr(save, '.');
+ char* dot;
+ while((dot=strchr(save, '.'))) {
+ *dot = '_';
+ }
+ if (last_dot) {
+ *last_dot='.';
+ }
+ }
+ /*
+ Avoid: (ISO9660, but also suitable for 8-3)
+ (Thanks to jonat@cellcast.com for te hint)
+ /:;?\#*~
+ 0x00-0x1f and 0x80-0xff
+ */
+ for(i=0 ; i < (int) strlen(save) ; i++) {
+ if (
+ (strchr("/:;?\\#*~", save[i]))
+ ||
+ (save[i] < 32)
+ ||
+ (save[i] >= 127)
+ ) {
+ save[i]='_';
+ }
+ }
+
+ i=j=0;
+ while((i<max) && (save[j]) && (save[j]!='.')) {
+ if (save[j]!=' ') {
+ nom[i]=save[j];
+ i++;
+ }
+ j++;
+ } // recopier nom
+ nom[i]='\0';
+ if (save[j]) { // il reste au moins un point
+ i=strlen(save)-1;
+ while((i>0) && (save[i]!='.') && (save[i]!='/')) i--; // rechercher dernier .
+ if (save[i]=='.') { // point!
+ int j=0;
+ i++;
+ while((j<3) && (save[i]) ) { if (save[i]!=' ') { ext[j]=save[i]; j++; } i++; }
+ ext[j]='\0';
+ }
+ }
+ // corriger vers 8-3
+ n83[0]='\0';
+ strncat(n83,nom,8);
+ if (strnotempty(ext)) {
+ strcat(n83,".");
+ strncat(n83,ext,3);
+ }
+}
+
+// écrire backblue.gif
+int verif_backblue(char* base) {
+ int* done;
+ int ret=0;
+ NOSTATIC_RESERVE(done, int, 1);
+ //
+ if (!base) { // init
+ *done=0;
+ return 0;
+ }
+ if ( (!*done)
+ || (fsize(fconcat(base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) {
+ FILE* fp = filecreate(fconcat(base,"backblue.gif"));
+ *done=1;
+ if (fp) {
+ if (fwrite(HTS_DATA_BACK_GIF,HTS_DATA_BACK_GIF_LEN,1,fp) != HTS_DATA_BACK_GIF_LEN)
+ ret=1;
+ fclose(fp);
+ usercommand(0,NULL,fconcat(base,"backblue.gif"));
+ } else
+ ret=1;
+ //
+ fp = filecreate(fconcat(base,"fade.gif"));
+ if (fp) {
+ if (fwrite(HTS_DATA_FADE_GIF,HTS_DATA_FADE_GIF_LEN,1,fp) != HTS_DATA_FADE_GIF_LEN)
+ ret=1;
+ fclose(fp);
+ usercommand(0,NULL,fconcat(base,"fade.gif"));
+ } else
+ ret=1;
+ }
+ return ret;
+}
+
+// flag
+int verif_external(int nb,int test) {
+ int* status;
+ NOSTATIC_RESERVE(status, int, 2);
+ if (!test)
+ status[nb]=0; // reset
+ else if (!status[nb]) {
+ status[nb]=1;
+ return 1;
+ }
+ return 0;
+}
+
+
+// recherche chaîne de type truc<espaces>=
+// renvoi décalage à effectuer ou 0 si non trouvé
+/* SECTION OPTIMISEE:
+#define rech_tageq(adr,s) ( \
+ ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) ? \
+ ( (streql(*adr,*s)) ? \
+ (__rech_tageq(adr,s)) \
+ : 0 \
+ ) \
+ : 0\
+ )
+*/
+/*
+HTS_INLINE int rech_tageq(const char* adr,const char* s) {
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ if (streql(*adr,*s)) { // tester premier octet (optimisation)
+ return __rech_tageq(adr,s);
+ }
+ }
+ return 0;
+}
+*/
+// Deuxième partie
+HTS_INLINE int __rech_tageq(const char* adr,const char* s) {
+ int p;
+ p=strfield(adr,s);
+ if (p) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=') {
+ return p+1;
+ }
+ }
+ return 0;
+}
+// same, but check begining of adr wirh s (for <object src="bar.mov" .. hotspot123="foo.html">)
+HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s) {
+ int p;
+ p=strfield(adr,s);
+ if (p) {
+ while(isdigit((unsigned char)adr[p])) p++; // jump digits
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=') {
+ return p+1;
+ }
+ }
+ return 0;
+}
+
+// tag sans =
+HTS_INLINE int rech_sampletag(const char* adr,const char* s) {
+ int p;
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ p=strfield(adr,s);
+ if (p) {
+ if (!isalnum((unsigned char)adr[p])) { // <srcbis n'est pas <src
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+// teste si le tag contenu dans from est égal à "tag"
+HTS_INLINE int check_tag(char* from,const char* tag) {
+ char* a=from+1;
+ int i=0;
+ char s[256];
+ while(is_space(*a)) a++;
+ while((isalnum((unsigned char)*a) || (*a=='/')) && (i<250)) { s[i++]=*a; a++; }
+ s[i++]='\0';
+ return (strfield2(s,tag)); // comparer
+}
+
+// teste si un fichier dépasse le quota
+int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) {
+ int ok=1;
+ if (size>0) {
+ if (is_hypertext_mime(type)) {
+ if (maxhtml>0) {
+ if (size>maxhtml)
+ ok=0;
+ }
+ } else {
+ if (maxnhtml>0) {
+ if (size>maxnhtml)
+ ok=0;
+ }
+ }
+ }
+ return (!ok);
+}
+
+
+int hts_buildtopindex(char* path,char* binpath) {
+ FILE* fpo;
+ int retval=0;
+ char rpath[1024*2];
+ char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL;
+
+ // et templates html
+ toptemplate_header=readfile_or(fconcat(binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER);
+ toptemplate_body=readfile_or(fconcat(binpath,"templates/topindex-body.html"),HTS_INDEX_BODY);
+ toptemplate_footer=readfile_or(fconcat(binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER);
+
+ if (toptemplate_header && toptemplate_body && toptemplate_footer) {
+
+ strcpy(rpath,path);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/')
+ rpath[strlen(rpath)-1]='\0';
+ }
+
+ fpo=fopen(fconcat(rpath,"/index.html"),"wb");
+ if (fpo) {
+ find_handle h;
+ verif_backblue(concat(rpath,"/")); // générer gif
+ // Header
+ fprintf(fpo,toptemplate_header,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ /* Find valid project names */
+ h = hts_findfirst(rpath);
+ if (h) {
+ struct topindex_chain * chain=NULL;
+ struct topindex_chain * startchain=NULL;
+ do {
+ if (hts_findisdir(h)) {
+ char iname[HTS_URLMAXSIZE*2];
+ strcpy(iname,rpath);
+ strcat(iname,"/");
+ strcat(iname,hts_findgetname(h));
+ strcat(iname,"/index.html");
+ if (fexist(iname)) {
+ struct topindex_chain * oldchain=chain;
+ chain=calloc(sizeof(struct topindex_chain), 1);
+ if (!startchain) {
+ startchain=chain;
+ }
+ if (chain) {
+ if (oldchain) {
+ oldchain->next=chain;
+ }
+ chain->next=NULL;
+ strcpy(chain->name, hts_findgetname(h));
+ }
+ }
+
+ }
+ } while(hts_findnext(h));
+ hts_findclose(h);
+
+ /* Build sorted index */
+ chain=startchain;
+ while(chain) {
+ char hname[HTS_URLMAXSIZE*2];
+ strcpy(hname,chain->name);
+ escape_check_url(hname);
+ fprintf(fpo,toptemplate_body,
+ hname,
+ chain->name
+ );
+
+ chain=chain->next;
+ }
+
+
+ retval=1;
+ }
+
+ // Footer
+ fprintf(fpo,toptemplate_footer,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ fclose(fpo);
+
+ }
+
+ }
+
+ if (toptemplate_header)
+ freet(toptemplate_header);
+ if (toptemplate_body)
+ freet(toptemplate_body);
+ if (toptemplate_footer)
+ freet(toptemplate_footer);
+
+ return retval;
+}
+
+
+
+
+// Portable directory find functions
+/*
+// Example:
+find_handle h = hts_findfirst("/tmp");
+if (h) {
+ do {
+ if (hts_findisfile(h))
+ printf("File: %s (%d octets)\n",hts_findgetname(h),hts_findgetsize(h));
+ else if (hts_findisdir(h))
+ printf("Dir: %s\n",hts_findgetname(h));
+ } while(hts_findnext(h));
+ hts_findclose(h);
+}
+*/
+find_handle hts_findfirst(char* path) {
+ if (path) {
+ if (strnotempty(path)) {
+ find_handle_struct* find = (find_handle_struct*) calloc(1,sizeof(find_handle_struct));
+ if (find) {
+ memset(find, 0, sizeof(find_handle_struct));
+#if HTS_WIN
+ {
+ char rpath[1024*2];
+ strcpy(rpath,path);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]!='\\')
+ strcat(rpath,"\\");
+ }
+ strcat(rpath,"*.*");
+ find->handle = FindFirstFile(rpath,&find->hdata);
+ if (find->handle != INVALID_HANDLE_VALUE)
+ return find;
+ }
+#else
+ strcpy(find->path,path);
+ {
+ if (find->path[0]) {
+ if (find->path[strlen(find->path)-1]!='/')
+ strcat(find->path,"/");
+ }
+ }
+ find->hdir=opendir(path);
+ if (find->hdir != NULL) {
+ if (hts_findnext(find) == 1)
+ return find;
+ }
+#endif
+ free((void*)find);
+ }
+ }
+ }
+ return NULL;
+}
+int hts_findnext(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if ( (FindNextFile(find->handle,&find->hdata)))
+ return 1;
+#else
+ memset(&(find->filestat), 0, sizeof(find->filestat));
+ if ((find->dirp=readdir(find->hdir)))
+ if (find->dirp->d_name)
+ if (!stat(concat(find->path,find->dirp->d_name),&find->filestat))
+ return 1;
+#endif
+ }
+ return 0;
+}
+int hts_findclose(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if (find->handle) {
+ FindClose(find->handle);
+ find->handle=NULL;
+ }
+#else
+ if (find->hdir) {
+ closedir (find->hdir);
+ find->hdir=NULL;
+ }
+#endif
+ free((void*)find);
+ }
+ return 0;
+}
+char* hts_findgetname(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ return find->hdata.cFileName;
+#else
+ if (find->dirp)
+ return find->dirp->d_name;
+#endif
+ }
+ return NULL;
+}
+int hts_findgetsize(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ return find->hdata.nFileSizeLow;
+#else
+ return find->filestat.st_size;
+#endif
+ }
+ return -1;
+}
+int hts_findisdir(find_handle find) {
+ if (find) {
+ if (!hts_findissystem(find)) {
+#if HTS_WIN
+ if (find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ return 1;
+#else
+ if (S_ISDIR(find->filestat.st_mode))
+ return 1;
+#endif
+ }
+ }
+ return 0;
+}
+int hts_findisfile(find_handle find) {
+ if (find) {
+ if (!hts_findissystem(find)) {
+#if HTS_WIN
+ if (!(find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
+ return 1;
+#else
+ if (S_ISREG(find->filestat.st_mode))
+ return 1;
+#endif
+ }
+ }
+ return 0;
+}
+int hts_findissystem(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if (find->hdata.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_TEMPORARY))
+ return 1;
+ else if ( (!strcmp(find->hdata.cFileName,"..")) || (!strcmp(find->hdata.cFileName,".")) )
+ return 1;
+#else
+ if (
+ (S_ISCHR(find->filestat.st_mode))
+ ||
+ (S_ISBLK(find->filestat.st_mode))
+ ||
+ (S_ISFIFO(find->filestat.st_mode))
+ ||
+ (S_ISSOCK(find->filestat.st_mode))
+ )
+ return 1;
+ else if ( (!strcmp(find->dirp->d_name,"..")) || (!strcmp(find->dirp->d_name,".")) )
+ return 1;
+#endif
+ }
+ return 0;
+}
diff --git a/src/htstools.h b/src/htstools.h
new file mode 100644
index 0000000..b3e2c7e
--- /dev/null
+++ b/src/htstools.h
@@ -0,0 +1,138 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* various tools (filename analyzing ..) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSTOOLS_DEFH
+#define HTSTOOLS_DEFH
+
+/* specific definitions */
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsbase.h"
+#include "htscore.h"
+
+#if HTS_WIN
+#else
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#endif
+
+int ident_url_relatif(char *lien,char* urladr,char* urlfil,char* adr,char* fil);
+int lienrelatif(char* s,char* link,char* curr);
+int link_has_authority(char* lien);
+int link_has_authorization(char* lien);
+void long_to_83(int mode,char* n83,char* save);
+void longfile_to_83(int mode,char* n83,char* save);
+HTS_INLINE int __rech_tageq(const char* adr,const char* s);
+HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
+#define rech_tageq(adr,s) \
+ ( \
+ ( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
+ ( \
+ (streql(*(adr),*(s))) ? \
+ (__rech_tageq((adr),(s))) \
+ : 0 \
+ ) \
+ : 0\
+ )
+#define rech_tageqbegdigits(adr,s) \
+ ( \
+ ( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
+ ( \
+ (streql(*(adr),*(s))) ? \
+ (__rech_tageqbegdigits((adr),(s))) \
+ : 0 \
+ ) \
+ : 0\
+ )
+//HTS_INLINE int rech_tageq(const char* adr,const char* s);
+HTS_INLINE int rech_sampletag(const char* adr,const char* s);
+HTS_INLINE int check_tag(char* from,const char* tag);
+int verif_backblue(char* base);
+int verif_external(int nb,int test);
+
+int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type);
+
+int hts_buildtopindex(char* path,char* binpath);
+
+
+
+// Portable directory find functions
+
+#if HTS_WIN
+
+typedef struct {
+ WIN32_FIND_DATA hdata;
+ HANDLE handle;
+} find_handle_struct;
+
+
+#else
+
+typedef struct {
+ DIR * hdir;
+ struct dirent* dirp;
+ struct stat filestat;
+ char path[2048];
+} find_handle_struct;
+
+#endif
+
+typedef find_handle_struct* find_handle;
+
+typedef struct topindex_chain {
+ char name[2048]; /* path */
+ struct topindex_chain* next; /* next element */
+} topindex_chain ;
+
+
+// Directory find functions
+find_handle hts_findfirst(char* path);
+int hts_findnext(find_handle find);
+int hts_findclose(find_handle find);
+//
+char* hts_findgetname(find_handle find);
+int hts_findgetsize(find_handle find);
+int hts_findisdir(find_handle find);
+int hts_findisfile(find_handle find);
+int hts_findissystem(find_handle find);
+
+
+
+
+#endif
diff --git a/src/htswizard.c b/src/htswizard.c
new file mode 100644
index 0000000..b23f5fb
--- /dev/null
+++ b/src/htswizard.c
@@ -0,0 +1,880 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wizard system (accept/refuse links) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htswizard.h"
+#include "htsdefines.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+// version 1 pour httpmirror
+// flusher si on doit lire peu à peu le fichier
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+// pour alléger la syntaxe, des raccourcis sont créés
+#define urladr (liens[ptr]->adr)
+#define urlfil (liens[ptr]->fil)
+
+// libérer filters[0] pour insérer un élément dans filters[0]
+#define HT_INSERT_FILTERS0 {\
+ int i;\
+ if (*filptr > 0) {\
+ for(i = (*filptr)-1 ; i>=0 ; i--) {\
+ strcpy(filters[i+1],filters[i]);\
+ }\
+ }\
+ strcpy(filters[0],"");\
+ (*filptr)++;\
+ (*filptr)=minimum((*filptr),filter_max);\
+}
+
+
+
+/*
+httrackp opt bloc d'options
+int ptr,int lien_tot,lien_url** liens
+ relatif aux liens
+char* adr,char* fil
+ adresse/fichier à tester
+char** filters,int filptr,int filter_max
+ relatif aux filtres
+robots_wizard* robots
+ relatif aux robots
+int* set_prio_to
+ callback obligatoire "capturer ce lien avec prio=N-1"
+int* just_test_it
+ callback optionnel "ne faire que tester ce lien éventuellement"
+retour:
+ 0 accepté
+ 1 refusé
+ -1 pas d'avis
+*/
+int hts_acceptlink(httrackp* opt,
+ int ptr,int lien_tot,lien_url** liens,
+ char* adr,char* fil,
+ char*** ptrfilters,int* filptr,int filter_max,
+ robots_wizard* robots,
+ int* set_prio_to,
+ int* just_test_it) {
+
+ int forbidden_url=-1;
+ int meme_adresse;
+ char** filters = *ptrfilters;
+
+ // -------------------- PHASE 1 --------------------
+
+ /* Infos */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ /* Doit-on traiter les non html? */
+ if ((opt->getmode & 2)==0) { // non on ne doit pas
+ if (!ishtml(fil)) { // non il ne faut pas
+ //adr[0]='\0'; // ne pas traiter ce lien, pas traiter
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil);
+ test_flush;
+ }
+
+ }
+ }
+
+ /* Niveau 1: ne pas parser suivant! */
+ if (ptr>0) {
+ if (liens[ptr]->depth <= 1) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ /* en cas d'échec en phase 1, retour immédiat! */
+ if (forbidden_url==1) {
+ return forbidden_url;
+ }
+
+ // -------------------- PHASE 2 --------------------
+
+ // ------------------------------------------------------
+ // doit-on traiter ce lien?.. vérifier droits de déplacement
+ meme_adresse=strfield2(adr,urladr);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug");
+ if (meme_adresse)
+ fprintf(opt->log,"Compare addresses: %s=%s"LF,adr,urladr);
+ else
+ fprintf(opt->log,"Compare addresses: %s!=%s"LF,adr,urladr);
+ test_flush;
+ }
+ if (meme_adresse) { // même adresse
+ { // tester interdiction de descendre
+ // MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers
+ // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*..
+ char tempo[HTS_URLMAXSIZE*2];
+ char tempo2[HTS_URLMAXSIZE*2];
+
+ // note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
+ // ex: si on descend 2 fois on peut remonter 1 fois
+
+ if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
+ if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil);
+ test_flush;
+ }
+
+ // si vient de primary, ne pas tester lienrelatif avec (car host "différent")
+ /*if (liens[liens[ptr]->premier] == 0) { // vient de primary
+ }
+ */
+
+ // NEW: finalement OK, sauf pour les moved repérés par link_import
+ // PROBLEME : annulé a cause d'un lien éventuel isolé accepté..qui entrainerait un miroir
+
+ // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés))
+ // NEW
+ if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) {
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ // down
+ if ( (strncmp(tempo,"../",3)) || (strncmp(tempo2,"../",3))) { // pas montée sinon ne nbous concerne pas
+ int test1,test2;
+ if (!strncmp(tempo,"../",3))
+ test1=0;
+ else
+ test1 = (strchr(tempo +((*tempo =='/')?1:0),'/')!=NULL);
+ if (!strncmp(tempo2,"../",3))
+ test2=0;
+ else
+ test2 = (strchr(tempo2+((*tempo2=='/')?1:0),'/')!=NULL);
+ if ( (test1) && (test2) ) { // on ne peut que descendre
+ if ((opt->seeker & 1)==0) { // interdiction de descendre
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ } else { // autorisé à priori - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else if ( (test1) || (test2) ) { // on peut descendre pour accéder au lien
+ if ((opt->seeker & 1)!=0) { // on peut descendre - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+
+
+ // up
+ if ( (!strncmp(tempo,"../",3)) && (!strncmp(tempo2,"../",3)) ) { // impossible sans monter
+ if ((opt->seeker & 2)==0) { // interdiction de monter
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ } else { // autorisé à monter - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else if ( (!strncmp(tempo,"../",3)) || (!strncmp(tempo2,"../",3)) ) { // Possible en montant
+ if ((opt->seeker & 2)!=0) { // autorisé à monter - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ } // sinon autorisé en descente
+ }
+
+
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
+ test_flush;
+ }
+ }
+
+ } // tester interdiction de descendre?
+
+ { // tester interdiction de monter
+ char tempo[HTS_URLMAXSIZE*2];
+ char tempo2[HTS_URLMAXSIZE*2];
+ if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
+ if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
+ test_flush;
+ }
+
+ }
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
+ test_flush;
+ }
+
+ }
+ } // fin tester interdiction de monter
+
+ } else { // adresse différente, sortir?
+
+ //if (!opt->wizard) { // mode non wizard
+ // doit-on traiter ce lien?.. vérifier droits de sortie
+ switch((opt->travel & 255)) {
+ case 0:
+ if (!opt->wizard) // mode non wizard
+ forbidden_url=1; break; // interdicton de sortir au dela de l'adresse
+ case 1: { // sortie sur le même dom.xxx
+ int i=strlen(adr)-1;
+ int j=strlen(urladr)-1;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ i--; j--;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ if ((i>0) && (j>0)) {
+ if (!strfield2(adr+i,urladr+j)) { // !=
+ if (!opt->wizard) { // mode non wizard
+ //printf("refused: %s\n",adr);
+ forbidden_url=1; // pas même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ } else {
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0; // même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ } else
+ forbidden_url=1;
+ }
+ break;
+ case 2: { // sortie sur le même .xxx
+ int i=strlen(adr)-1;
+ int j=strlen(urladr)-1;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ if ((i>0) && (j>0)) {
+ if (!strfield2(adr+i,urladr+j)) { // !-
+ if (!opt->wizard) { // mode non wizard
+ //printf("refused: %s\n",adr);
+ forbidden_url=1; // pas même .xx
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0; // même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else forbidden_url=1;
+ }
+ break;
+ case 7: // everywhere!!
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0;
+ break;
+ }
+ } // switch
+
+ // ANCIENNE POS -- récupérer les liens à côtés d'un lien (nearlink)
+
+ } // fin test adresse identique/différente
+
+ // -------------------- PHASE 3 --------------------
+
+ // récupérer les liens à côtés d'un lien (nearlink) (nvelle pos)
+ if (opt->nearlink) {
+ if (!ishtml(fil)) { // non html
+ //printf("ok %s%s\n",ad,fil);
+ forbidden_url=0; // autoriser
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ // -------------------- PHASE 4 --------------------
+
+ // ------------------------------------------------------
+ // Si wizard, il se peut qu'on autorise ou qu'on interdise
+ // un lien spécial avant même de tester sa position, sa hiérarchie etc.
+ // peut court-circuiter le forbidden_url précédent
+ if (opt->wizard) { // le wizard entre en action..
+ //
+ int question=1; // poser une question
+ int force_mirror=0; // pour mirror links
+ int filters_answer=0; // décision prise par les filtres
+ char l[HTS_URLMAXSIZE*2];
+ char lfull[HTS_URLMAXSIZE*2];
+
+ if (forbidden_url!=-1) question=0; // pas de question, résolu
+
+ // former URL complète du lien actuel
+ strcpy(l,jump_identification(adr));
+ if (*fil!='/') strcat(l,"/");
+ strcat(l,fil);
+ // full version (http://foo:bar@www.foo.com/bar.html)
+ if (!link_has_authority(adr))
+ strcpy(lfull,"http://");
+ else
+ lfull[0]='\0';
+ strcat(lfull,adr);
+ if (*fil!='/') strcat(lfull,"/");
+ strcat(lfull,fil);
+
+ // tester filters (URLs autorisées ou interdites explicitement)
+
+ // si lien primaire on saute le joker, on est pas lémur
+ if (ptr==0) { // lien primaire, autoriser
+ question=1; // la question sera résolue automatiquement
+ forbidden_url=0;
+ } else {
+ int jok;
+ // filters, 0=sait pas 1=ok -1=interdit
+ {
+ int jokDepth1=0,jokDepth2=0;
+ int jok1=0,jok2=0;
+ jok1 = fa_strjoker(filters,*filptr,lfull,NULL,NULL,&jokDepth1);
+ jok2 = fa_strjoker(filters,*filptr,l, NULL,NULL,&jokDepth2);
+ if (jok2 == 0) // #2 doesn't know
+ jok = jok1; // then, use #1
+ else if (jok1 == 0) // #1 doesn't know
+ jok = jok2; // then, use #2
+ else if (jokDepth1 >= jokDepth2) // #1 matching rule is "after" #2, then it is prioritary
+ jok = jok1;
+ else // #2 matching rule is "after" #1, then it is prioritary
+ jok = jok2;
+ }
+
+ if (jok == 1) { // autorisé
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question, autorisé
+ forbidden_url=0; // URL autorisée
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ } else if (jok == -1) {
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question:
+ forbidden_url=1; // URL interdite
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ } // sinon on touche à rien
+ }
+
+ // vérifier mode mirror links
+ if (question) {
+ if (opt->mirror_first_page) { // mode mirror links
+ if (liens[ptr]->precedent==0) { // parent=primary!
+ forbidden_url=0; // autorisé
+ question=1; // résolution auto
+ force_mirror=5; // mirror (5)
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // vérifier récursivité extérieure
+ if ((question) && (ptr>0) && (!force_mirror)) {
+ if (opt->extdepth>0) {
+ // *set_prio_to = opt->extdepth + 1;
+ *set_prio_to = opt->extdepth + 1;
+ forbidden_url=0; // autorisé
+ question=0; // résolution auto
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+
+ // on doit poser la question.. peut on la poser?
+ // (oui je sais quel preuve de délicatesse, merci merci)
+ if ((question) && (ptr>0) && (!force_mirror)) {
+ if (opt->wizard==2) { // éliminer tous les liens non répertoriés comme autorisés (ou inconnus)
+ question=0;
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+
+ // vérifier robots.txt
+ if (opt->robots) {
+ int r = checkrobots(robots,adr,fil);
+ if (r == -1) { // interdiction
+#if DEBUG_ROBOTS
+ printf("robots.txt forbidden: %s%s\n",adr,fil);
+#endif
+ // question résolue, par les filtres, et mode robot non strict
+ if ((!question) && (filters_answer) && (opt->robots == 1) && (forbidden_url!=1)) {
+ r=0; // annuler interdiction des robots
+ if (!forbidden_url) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ if (r == -1) { // interdire
+ forbidden_url=1;
+ question=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ if (!question) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ if (!forbidden_url) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
+ } else {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
+ }
+ test_flush;
+ }
+#if BDEBUG==3
+ printf("at %s in %s, wizard says: url %s ",urladr,urlfil,l);
+ if (forbidden_url) printf("cancelled"); else printf(">SHARED<");
+ printf("\n");
+#endif
+ }
+
+ /* en cas de question, ou lien primaire (enregistrer autorisations) */
+ if (question || (ptr==0)) {
+#if HTS_ANALYSTE
+ char* s;
+#else
+ char s[4];
+#endif
+ int n=0;
+
+ // si primaire (plus bas) alors ...
+ if ((ptr!=0) && (force_mirror==0)) {
+ HTS_REQUEST_START;
+ HT_PRINT("\n");
+ HT_PRINT("At "); HT_PRINT(urladr); HT_PRINT(", there is a link ("); HT_PRINT(adr); HT_PRINT("/"); HT_PRINT(fil); HT_PRINT(") which goes outside the address."LF);
+ HT_PRINT("What should I do? (press a key + enter)"LF LF);
+ HT_PRINT("* Ignore all further links" LF);
+ HT_PRINT("0 Ignore this link (default if empty entry)"LF);
+ HT_PRINT("1 Ignore directory and lower structures"LF);
+ HT_PRINT("2 Ignore all domain"LF);
+ //HT_PRINT("3 (Ignore location, not implemented)\n");
+ HT_PRINT(LF);
+ HT_PRINT("4 Get only this page/link"LF);
+ HT_PRINT("5 Mirror this link (useful)"LF);
+ HT_PRINT("6 Mirror links located in the same domain"LF);
+ HT_PRINT(LF);
+//#if HTS_ANALYSTE!=2
+//HT_PRINT("! View extract of html code where the link is located"LF);
+//#endif
+ HTS_REQUEST_END;
+#if HTS_ANALYSTE
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcat(tempo,adr);
+ strcat(tempo,"/");
+ strcat(tempo,fil);
+ s=hts_htmlcheck_query3(tempo);
+ }
+#else
+ do {
+ io_flush; linput(stdin,s,2);
+#endif
+ if (strnotempty(s)==0) // entrée
+ n=0;
+ else if (isdigit((unsigned char)*s))
+ sscanf(s,"%d",&n);
+ else {
+ switch(*s) {
+ case '*': n=-1; break;
+ case '!': n=-999; {
+ /*char *a;
+ int i;
+ a=copie_de_adr-128;
+ if (a<r.adr) a=r.adr;
+ for(i=0;i<256;i++) {
+ if (a==copie_de_adr) printf("\nHERE:\n");
+ printf("%c",*a++);
+ }
+ printf("\n\n");
+ */
+ }
+ break;
+ default: n=-999; printf("What did you say?\n"); break;
+
+ }
+ }
+#if HTS_ANALYSTE
+#else
+ } while(n==-999);
+#endif
+ io_flush;
+ } else { // lien primaire: autoriser répertoire entier
+
+ /* sanity check */
+ if ((*filptr) + 1 >= opt->maxfilter) {
+ opt->maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n", (*filptr),__LINE__);
+ fflush(stdout);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*filptr) );
+ fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ test_flush;
+ }
+ abort(); // wild..
+ }
+ //opt->filters.filters=filters;
+ //*ptrfilters = filters;
+ }
+
+ if (!force_mirror) {
+ if ((opt->seeker & 1)==0) { // interdiction de descendre
+ n=7;
+ } else {
+ n=5; // autoriser miroir répertoires descendants (lien primaire)
+ }
+ } else // forcer valeur (sub-wizard)
+ n=force_mirror;
+ }
+
+ switch(n) {
+ case -1: // sauter tout le reste
+ forbidden_url=1;
+ opt->wizard=2; // sauter tout le reste
+ break;
+ case 0: // interdire les mêmes liens: adr/fil
+ forbidden_url=1;
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strcat(filters[0],fil);
+ break;
+
+ case 1: // éliminer répertoire entier et sous rép: adr/path/ *
+ forbidden_url=1;
+ {
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i);
+ if (filters[0][strlen(filters[0])-1]!='/') strcat(filters[0],"/");
+ strcat(filters[0],"*");
+ }
+ }
+
+ // ** ...
+ break;
+
+ case 2: // adresse adr*
+ forbidden_url=1;
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ break;
+
+ case 3: // ** A FAIRE
+ forbidden_url=1;
+ /*
+ {
+ int i=strlen(adr)-1;
+ while((adr[i]!='/') && (i>0)) i--;
+ if (i>0) {
+
+ }
+
+ }*/
+
+ break;
+ //
+ case 4: // same link
+ // PAS BESOIN!!
+ /*HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],adr);
+ if (*fil!='/') strcat(filters[0],"/");
+ strcat(filters[0],fil);*/
+
+
+ // étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent)
+ // il faut éviter d'un lien isolé effectue un miroir total..
+
+ *set_prio_to = 0+1; // niveau de récursion=0 (pas de miroir)
+
+ break;
+
+ case 5: // autoriser répertoire entier et fils
+ if ((opt->seeker & 2)==0) { // interdiction de monter
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i+1);
+ strcat(filters[0],"*");
+ }
+ } else { // autoriser domaine alors!!
+ HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ }
+ break;
+
+ case 6: // same domain
+ HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ break;
+ //
+ case 7: // autoriser ce répertoire
+ {
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i+1);
+ strcat(filters[0],"*[file]");
+ }
+ }
+
+ break;
+
+ case 50: // on fait rien
+ break;
+ } // switch
+
+ } // test du wizard sur l'url
+ } // fin du test wizard..
+
+ // -------------------- PHASE 5 --------------------
+
+ // lien non autorisé, peut-on juste le tester?
+ if (just_test_it) {
+ if (forbidden_url==1) {
+ if (opt->travel&256) { // tester tout de même
+ if (strfield(adr,"ftp://")==0) { // PAS ftp!
+ forbidden_url=1; // oui oui toujours interdit (note: sert à rien car ==1 mais c pour comprendre)
+ *just_test_it=1; // mais on teste
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link %s%s"LF,adr,fil);
+ }
+ }
+ }
+ }
+ //adr[0]='\0'; // cancel
+ }
+
+ // -------------------- PHASE 6 --------------------
+#if HTS_ANALYSTE
+ {
+ int test_url=hts_htmlcheck_check(adr,fil,forbidden_url);
+ if (test_url!=-1)
+ forbidden_url=test_url;
+ }
+#endif
+ return forbidden_url;
+}
+
+// tester taille
+int hts_testlinksize(httrackp* opt,
+ char* adr,char* fil,
+ LLint size) {
+ int jok=0;
+ if (size>=0) {
+ char l[HTS_URLMAXSIZE*2];
+ char lfull[HTS_URLMAXSIZE*2];
+ if (size>=0) {
+ LLint sz=size;
+ int size_flag=0;
+
+ // former URL complète du lien actuel
+ strcpy(l,jump_identification(adr));
+ if (*fil!='/') strcat(l,"/");
+ strcat(l,fil);
+ //
+ if (!link_has_authority(adr))
+ strcpy(lfull,"http://");
+ else
+ lfull[0]='\0';
+ strcat(lfull,adr);
+ if (*fil!='/') strcat(l,"/");
+ strcat(lfull,fil);
+
+ // tester filtres (taille)
+ // jok = fa_strjoker(opt->filters.filters,*opt->filters.filptr,l,&sz,&size_flag,NULL);
+
+ // filters, 0=sait pas 1=ok -1=interdit
+ {
+ int jokDepth1=0,jokDepth2=0;
+ int jok1=0,jok2=0;
+ LLint sz1=size,sz2=size;
+ int size_flag1=0,size_flag2=0;
+ jok1 = fa_strjoker(*opt->filters.filters,*opt->filters.filptr,lfull,&sz1,&size_flag1,&jokDepth1);
+ jok2 = fa_strjoker(*opt->filters.filters,*opt->filters.filptr,l, &sz2,&size_flag2,&jokDepth2);
+ if (jok2 == 0) { // #2 doesn't know
+ jok = jok1; // then, use #1
+ sz = sz1;
+ size_flag = size_flag1;
+ } else if (jok1 == 0) { // #1 doesn't know
+ jok = jok2; // then, use #2
+ sz = sz2;
+ size_flag = size_flag2;
+ } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary
+ jok = jok1;
+ sz = sz1;
+ size_flag = size_flag1;
+ } else { // #2 matching rule is "after" #1, then it is prioritary
+ jok = jok2;
+ sz = sz2;
+ size_flag = size_flag2;
+ }
+ }
+
+
+ // log
+ if (jok==1) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File confirmed (size test): %s%s ("LLintP")"LF,adr,fil,(LLint)(size));
+ }
+ } else if (jok==-1) {
+ if (size_flag) { /* interdit à cause de la taille */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled due to its size: %s%s ("LLintP", limit: "LLintP")"LF,adr,fil,(LLint)(size),(LLint)(sz));
+ }
+ } else {
+ jok=1;
+ }
+ }
+ }
+ }
+ return jok;
+}
+
+
+
+#undef test_flush
+#undef urladr
+#undef urlfil
+
+#undef HT_INSERT_FILTERS0
+
diff --git a/src/htswizard.h b/src/htswizard.h
new file mode 100644
index 0000000..28c5d2f
--- /dev/null
+++ b/src/htswizard.h
@@ -0,0 +1,53 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wizard system (accept/refuse links) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSWIZARD_DEFH
+#define HTSWIZARD_DEFH
+
+#include "htscore.h"
+
+int hts_acceptlink(httrackp* opt,
+ int ptr,int lien_tot,lien_url** liens,
+ char* adr,char* fil,
+ char*** filters,int* filptr,int filter_max,
+ robots_wizard* robots,
+ int* set_prio_to_0,
+ int* just_test_it);
+int hts_testlinksize(httrackp* opt,
+ char* adr,char* fil,
+ LLint size);
+#endif
diff --git a/src/htswrap.c b/src/htswrap.c
new file mode 100644
index 0000000..824af7e
--- /dev/null
+++ b/src/htswrap.c
@@ -0,0 +1,69 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wrapper system (for shell */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htswrap.h"
+#include "htshash.h"
+
+// typedef long (__stdcall * XSHBFF_WndProc_type)(HWND ,UINT ,WPARAM ,LPARAM);
+
+inthash wrappers=NULL;
+
+int htswrap_init(void) {
+ if (!wrappers)
+ wrappers=inthash_new(42);
+ return inthash_created(wrappers);
+}
+
+int htswrap_free(void) {
+ inthash_delete(&wrappers);
+ return 1;
+}
+
+int htswrap_add(char* name,void* fct) {
+ if (!wrappers)
+ htswrap_init();
+ inthash_write(wrappers,name,(unsigned long int)fct);
+ return 1;
+}
+
+unsigned long int htswrap_read(char* name) {
+ unsigned long int fct=0;
+ if (!wrappers)
+ htswrap_init();
+ inthash_read(wrappers,name,(void*)&fct);
+ return fct;
+}
diff --git a/src/htswrap.h b/src/htswrap.h
new file mode 100644
index 0000000..03bf73f
--- /dev/null
+++ b/src/htswrap.h
@@ -0,0 +1,48 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wrapper system (for shell */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSWRAP_DEFH
+#define HTSWRAP_DEFH
+
+int htswrap_init(void);
+int htswrap_add(char* name,void* fct);
+int htswrap_free(void);
+unsigned long int htswrap_read(char* name);
+
+#endif
diff --git a/src/htszlib.c b/src/htszlib.c
new file mode 100644
index 0000000..d138a1c
--- /dev/null
+++ b/src/htszlib.c
@@ -0,0 +1,84 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Unpacking subroutines using Jean-loup Gailly's Zlib */
+/* for http compressed data */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+/* specific definitions */
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsbase.h"
+#include "htscore.h"
+
+#if HTS_USEZLIB
+
+/* zlib */
+#include <zlib.h>
+#include "htszlib.h"
+
+/*
+ Unpack file into a new file
+ Return value: size of the new file, or -1 if an error occured
+*/
+int hts_zunpack(char* filename,char* newfile) {
+ if (filename && newfile) {
+ if (filename[0] && newfile[0]) {
+ gzFile gz = gzopen (filename, "rb");
+ if (gz) {
+ FILE* fpout=fopen(fconv(newfile),"wb");
+ int size=0;
+ if (fpout) {
+ int nr;
+ do {
+ char buff[1024];
+ nr=gzread (gz, buff, 1024);
+ if (nr>0) {
+ size+=nr;
+ if ((int)fwrite(buff,1,nr,fpout) != nr)
+ nr=size=-1;
+ }
+ } while(nr>0);
+ fclose(fpout);
+ } else
+ size=-1;
+ gzclose(gz);
+ return size;
+ }
+ }
+ }
+ return -1;
+}
+
+#endif
diff --git a/src/htszlib.h b/src/htszlib.h
new file mode 100644
index 0000000..63310b8
--- /dev/null
+++ b/src/htszlib.h
@@ -0,0 +1,49 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Unpacking subroutines using Jean-loup Gailly's Zlib */
+/* for http compressed data */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTS_DEFZLIB
+#define HTS_DEFZLIB
+
+#if HTS_USEZLIB
+
+int hts_zunpack(char* filename,char* newfile);
+
+#endif
+
+#endif
+
diff --git a/src/httrack-library.h b/src/httrack-library.h
new file mode 100644
index 0000000..13ecb46
--- /dev/null
+++ b/src/httrack-library.h
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: HTTrack definition file for library usage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTTRACK_DEFLIB
+#define HTTRACK_DEFLIB
+
+#include "htsglobal.h"
+#include "htsopt.h"
+#include "htswrap.h"
+
+int hts_init(void);
+int hts_main(int argc, char **argv);
+
+
+#endif
+
diff --git a/src/httrack.c b/src/httrack.c
new file mode 100644
index 0000000..0289fca
--- /dev/null
+++ b/src/httrack.c
@@ -0,0 +1,571 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsshow.c console progress info */
+/* Only used on Linux version */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#if HTS_WIN
+#else
+#ifndef Sleep
+#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
+#endif
+#endif
+
+#include "htsglobal.h"
+#include "httrack.h"
+
+// htswrap_add
+#include "htswrap.h"
+
+#if HTS_ANALYSTE_CONSOLE
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef _WIN32
+#include "Winsock.h"
+#endif
+/* END specific definitions */
+
+// ISO VT100/220 definitions
+#define VT_COL_TEXT_BLACK "30"
+#define VT_COL_TEXT_RED "31"
+#define VT_COL_TEXT_GREEN "32"
+#define VT_COL_TEXT_YELLOW "33"
+#define VT_COL_TEXT_BLUE "34"
+#define VT_COL_TEXT_MAGENTA "35"
+#define VT_COL_TEXT_CYAN "36"
+#define VT_COL_TEXT_WHITE "37"
+#define VT_COL_BACK_BLACK "40"
+#define VT_COL_BACK_RED "41"
+#define VT_COL_BACK_GREEN "42"
+#define VT_COL_BACK_YELLOW "43"
+#define VT_COL_BACK_BLUE "44"
+#define VT_COL_BACK_MAGENTA "45"
+#define VT_COL_BACK_CYAN "46"
+#define VT_COL_BACK_WHITE "47"
+//
+#define VT_GOTOXY(X,Y) "\33["Y";"X"f"
+#define VT_COLOR(C) "\33["C"m"
+#define VT_RESET "\33[m"
+#define VT_REVERSE "\33[7m"
+#define VT_UNREVERSE "\33[27m"
+#define VT_BOLD "\33[1m"
+#define VT_UNBOLD "\33[22m"
+#define VT_BLINK "\33[5m"
+#define VT_UNBLINK "\33[25m"
+//
+#define VT_CLREOL "\33[K"
+#define VT_CLRSOL "\33[1K"
+#define VT_CLRLIN "\33[2K"
+#define VT_CLREOS "\33[J"
+#define VT_CLRSOS "\33[1J"
+#define VT_CLRSCR "\33[2J"
+//
+#define csi(X) printf(s_csi( X ));
+void vt_clear(void) {
+ printf("%s%s%s",VT_RESET,VT_CLRSCR,VT_GOTOXY("1","0"));
+}
+void vt_home(void) {
+ printf("%s%s",VT_RESET,VT_GOTOXY("1","0"));
+}
+//
+
+
+/*
+#define STYLE_STATVALUES VT_COLOR(VT_COL_TEXT_BLACK)
+#define STYLE_STATTEXT VT_COLOR(VT_COL_TEXT_BLUE)
+*/
+#define STYLE_STATVALUES VT_BOLD
+#define STYLE_STATTEXT VT_UNBOLD
+#define STYLE_STATRESET VT_UNBOLD
+#define NStatsBuffer 14
+#define MAX_LEN_INPROGRESS 40
+
+static int use_show;
+
+
+int main(int argc, char **argv) {
+ hts_init();
+
+ /*
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+Log: "engine: init"
+
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+Log: "engine: free"
+
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+Log: "engine: start"
+
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+Log: "engine: end"
+
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+Log: "engine: change-options"
+
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+Log: "check-html: <url>"
+
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+Log: none
+
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+Log: "pause: <lockfile>"
+
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+Log: none
+
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+Log:
+ "engine: transfer-status: link updated: <url> -> <file>"
+ | "engine: transfer-status: link added: <url> -> <file>"
+ | "engine: transfer-status: link recorded: <url> -> <file>"
+ | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>"
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name");
+Log:
+ "engine: save-name: local name: <url> -> <file>"
+*/
+
+ htswrap_add("init",htsshow_init);
+ htswrap_add("free",htsshow_uninit);
+ htswrap_add("start",htsshow_start);
+ htswrap_add("change-options",htsshow_chopt);
+ htswrap_add("end",htsshow_end);
+ htswrap_add("check-html",htsshow_checkhtml);
+ htswrap_add("loop",htsshow_loop);
+ htswrap_add("query",htsshow_query);
+ htswrap_add("query2",htsshow_query2);
+ htswrap_add("query3",htsshow_query3);
+ htswrap_add("check-link",htsshow_check);
+ htswrap_add("pause",htsshow_pause);
+ htswrap_add("save-file",htsshow_filesave);
+ htswrap_add("link-detected",htsshow_linkdetected);
+ htswrap_add("transfer-status",htsshow_xfrstatus);
+ htswrap_add("save-name",htsshow_savename);
+
+ return hts_main(argc,argv);
+}
+
+
+/* CALLBACK FUNCTIONS */
+
+/* Initialize the Winsock */
+void __cdecl htsshow_init(void) {
+#ifdef _WIN32
+ {
+ WORD wVersionRequested; // requested version WinSock API
+ WSADATA wsadata; // Windows Sockets API data
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ printf("Winsock not found!\n");
+ return;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ printf("WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ return;
+ }
+ }
+#endif
+
+}
+void __cdecl htsshow_uninit(void) {
+#ifdef _WIN32
+ WSACleanup();
+#endif
+}
+int __cdecl htsshow_start(httrackp* opt) {
+ use_show=0;
+ if (opt->verbosedisplay==2) {
+ use_show=1;
+ vt_clear();
+ }
+ return 1;
+}
+int __cdecl htsshow_chopt(httrackp* opt) {
+ return __cdecl htsshow_start(opt);
+}
+int __cdecl htsshow_end(void) {
+ return 1;
+}
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+ static TStamp prev_mytime=0; /* ok */
+ static t_InpInfo SInfo; /* ok */
+ //
+ TStamp mytime;
+ long int rate=0;
+ char st[256];
+ //
+ int stat_written=-1;
+ int stat_updated=-1;
+ int stat_errors=-1;
+ int stat_warnings=-1;
+ int stat_infos=-1;
+ int nbk=-1;
+ LLint nb=-1;
+ int stat_nsocket=-1;
+ LLint stat_bytes=-1;
+ LLint stat_bytes_recv=-1;
+ int irate=-1;
+ if (stats) {
+ stat_written=stats->stat_files;
+ stat_updated=stats->stat_updated_files;
+ stat_errors=stats->stat_errors;
+ stat_warnings=stats->stat_warnings;
+ stat_infos=stats->stat_infos;
+ nbk=stats->nbk;
+ stat_nsocket=stats->stat_nsocket;
+ irate=(int)stats->rate;
+ nb=stats->nb;
+ stat_bytes=stats->nb;
+ stat_bytes_recv=stats->HTS_TOTAL_RECV;
+ }
+
+ if (!use_show)
+ return 1;
+
+ mytime=mtime_local();
+ if ((stat_time>0) && (stat_bytes_recv>0))
+ rate=(int)(stat_bytes_recv/stat_time);
+ else
+ rate=0; // pas d'infos
+
+ /* Infos */
+ if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes; // bytes
+ if (stat_time>=0) SInfo.stat_time=stat_time; // time
+ if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens
+ if (lien_n>=0) SInfo.lien_n=lien_n; // scanned
+ SInfo.stat_nsocket=stat_nsocket; // socks
+ if (rate>0) SInfo.rate=rate; // rate
+ if (irate>=0) SInfo.irate=irate; // irate
+ if (SInfo.irate<0) SInfo.irate=SInfo.rate;
+ if (SInfo.stat_back>=0) SInfo.stat_back=nbk;
+ if (stat_written>=0) SInfo.stat_written=stat_written;
+ if (stat_updated>=0) SInfo.stat_updated=stat_updated;
+ if (stat_errors>=0) SInfo.stat_errors=stat_errors;
+ if (stat_warnings>=0) SInfo.stat_warnings=stat_warnings;
+ if (stat_infos>=0) SInfo.stat_infos=stat_infos;
+
+
+ if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) {
+ prev_mytime=mytime;
+
+
+ st[0]='\0';
+ qsec2str(st,stat_time);
+ vt_home();
+ printf(
+ VT_GOTOXY("1","1")
+ VT_CLREOL
+ STYLE_STATTEXT "Bytes saved:"
+ STYLE_STATVALUES " \t%s"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","1")
+ STYLE_STATTEXT "Links scanned:"
+ STYLE_STATVALUES " \t%d/%d (+%d)"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","2")
+ STYLE_STATTEXT "Time:"
+ " \t"
+ STYLE_STATVALUES "%s"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","2")
+ STYLE_STATTEXT "Files written:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","3")
+ STYLE_STATTEXT "Transfer rate:"
+ " \t"
+ STYLE_STATVALUES "%s (%s)"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","3")
+ STYLE_STATTEXT "Files updated:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","4")
+ STYLE_STATTEXT "Active connections:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","4")
+ STYLE_STATTEXT "Errors:"
+ STYLE_STATVALUES " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"
+ STYLE_STATRESET
+ ,
+ /* */
+ (char*)int2bytes(SInfo.stat_bytes),
+ (int)lien_n,(int)SInfo.lien_tot,(int)nbk,
+ (char*)st,
+ (int)SInfo.stat_written,
+ (char*)int2bytessec(SInfo.irate),(char*)int2bytessec(SInfo.rate),
+ (int)SInfo.stat_updated,
+ (int)SInfo.stat_nsocket,
+ (int)SInfo.stat_errors
+ /* */
+ );
+
+
+ // parcourir registre des liens
+ if (back_index>=0) { // seulement si index passé
+ int j,k;
+ int index=0;
+ int ok=0; // idem
+ int l; // idem
+ //
+ t_StatsBuffer StatsBuffer[NStatsBuffer];
+
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ strcpy(StatsBuffer[i].state,"");
+ strcpy(StatsBuffer[i].name,"");
+ strcpy(StatsBuffer[i].file,"");
+ strcpy(StatsBuffer[i].url_sav,"");
+ StatsBuffer[i].back=0;
+ StatsBuffer[i].size=0;
+ StatsBuffer[i].sizetot=0;
+ }
+ }
+ for(k=0;k<2;k++) { // 0: lien en cours 1: autres liens
+ for(j=0;(j<3) && (index<NStatsBuffer);j++) { // passe de priorité
+ int _i;
+ for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) { // no lien
+ int i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
+ if (back[i].status>=0) { // signifie "lien actif"
+ // int ok=0; // OPTI
+ ok=0;
+ switch(j) {
+ case 0: // prioritaire
+ if ((back[i].status>0) && (back[i].status<99)) {
+ strcpy(StatsBuffer[index].state,"receive"); ok=1;
+ }
+ break;
+ case 1:
+ if (back[i].status==99) {
+ strcpy(StatsBuffer[index].state,"request"); ok=1;
+ }
+ else if (back[i].status==100) {
+ strcpy(StatsBuffer[index].state,"connect"); ok=1;
+ }
+ else if (back[i].status==101) {
+ strcpy(StatsBuffer[index].state,"search"); ok=1;
+ }
+ else if (back[i].status==1000) { // ohh le beau ftp
+ sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1;
+ }
+ break;
+ default:
+ if (back[i].status==0) { // prêt
+ if ((back[i].r.statuscode==200)) {
+ strcpy(StatsBuffer[index].state,"ready"); ok=1;
+ }
+ else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) {
+ char tempo[256]; tempo[0]='\0';
+ infostatuscode(tempo,back[i].r.statuscode);
+ strcpy(StatsBuffer[index].state,tempo); ok=1;
+ }
+ else {
+ strcpy(StatsBuffer[index].state,"error"); ok=1;
+ }
+ }
+ break;
+ }
+
+ if (ok) {
+ char s[HTS_URLMAXSIZE*2];
+ //
+ StatsBuffer[index].back=i; // index pour + d'infos
+ //
+ s[0]='\0';
+ strcpy(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
+ if (strcmp(back[i].url_adr,"file://"))
+ strcat(s,back[i].url_adr);
+ else
+ strcat(s,"localhost");
+ if (back[i].url_fil[0]!='/')
+ strcat(s,"/");
+ strcat(s,back[i].url_fil);
+
+ StatsBuffer[index].file[0]='\0';
+ {
+ char* a=strrchr(s,'/');
+ if (a) {
+ strncat(StatsBuffer[index].file,a,200);
+ *a='\0';
+ }
+ }
+
+ if ((l=strlen(s))<MAX_LEN_INPROGRESS)
+ strcpy(StatsBuffer[index].name,s);
+ else {
+ // couper
+ StatsBuffer[index].name[0]='\0';
+ strncat(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
+ strcat(StatsBuffer[index].name,"...");
+ strcat(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
+ }
+
+ if (back[i].r.totalsize>0) { // taille prédéfinie
+ StatsBuffer[index].sizetot=back[i].r.totalsize;
+ StatsBuffer[index].size=back[i].r.size;
+ } else { // pas de taille prédéfinie
+ if (back[i].status==0) { // prêt
+ StatsBuffer[index].sizetot=back[i].r.size;
+ StatsBuffer[index].size=back[i].r.size;
+ } else {
+ StatsBuffer[index].sizetot=8192;
+ StatsBuffer[index].size=(back[i].r.size % 8192);
+ }
+ }
+ index++;
+ }
+ }
+ }
+ }
+ }
+
+ /* LF */
+ printf("%s\n",VT_CLREOL);
+
+ /* Display current job */
+ {
+ int parsing=0;
+ printf("Current job: ");
+ if (!(parsing=hts_is_parsing(-1)))
+ printf("receiving files");
+ else {
+ switch(hts_is_testing()) {
+ case 0:
+ printf("parsing HTML file (%d%%)",parsing);
+ break;
+ case 1:
+ printf("parsing HTML file: testing links (%d%%)",parsing);
+ break;
+ case 2:
+ printf("purging files");
+ break;
+ }
+ }
+ printf("%s\n",VT_CLREOL);
+ }
+
+ /* Display background jobs */
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ if (strnotempty(StatsBuffer[i].state)) {
+ printf(VT_CLREOL" %s - \t%s%s \t%s / \t%s",
+ StatsBuffer[i].state,
+ StatsBuffer[i].name,
+ StatsBuffer[i].file,
+ int2bytes(StatsBuffer[i].size),
+ int2bytes(StatsBuffer[i].sizetot)
+ );
+ }
+ printf("%s\n",VT_CLREOL);
+ }
+ }
+
+
+ }
+
+ }
+
+
+
+ return 1;
+}
+char* __cdecl htsshow_query(char* question) {
+ static char s[12]=""; /* ok */
+ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
+ io_flush; linput(stdin,s,4);
+ return s;
+}
+char* __cdecl htsshow_query2(char* question) {
+ static char s[12]=""; /* ok */
+ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
+ io_flush; linput(stdin,s,4);
+ return s;
+}
+char* __cdecl htsshow_query3(char* question) {
+ static char line[256]; /* ok */
+ do {
+ io_flush; linput(stdin,line,206);
+ } while(!strnotempty(line));
+ printf("ok..\n");
+ return line;
+}
+int __cdecl htsshow_check(char* adr,char* fil,int status) {
+ return -1;
+}
+void __cdecl htsshow_pause(char* lockfile) {
+ while (fexist(lockfile)) {
+ Sleep(1000);
+ }
+}
+void __cdecl htsshow_filesave(char* file) {
+}
+int __cdecl htsshow_linkdetected(char* link) {
+ return 1;
+}
+int __cdecl htsshow_xfrstatus(lien_back* back) {
+ return 1;
+}
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+
+
+#endif
diff --git a/src/httrack.dsp b/src/httrack.dsp
new file mode 100644
index 0000000..7fc08da
--- /dev/null
+++ b/src/httrack.dsp
@@ -0,0 +1,324 @@
+# Microsoft Developer Studio Project File - Name="httrack" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=httrack - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "httrack.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "httrack.mak" CFG="httrack - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "httrack - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "httrack - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "httrack - Win32 Release avec debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "httrack - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /c
+# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD BASE RSC /l 0x40c /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /verbose
+
+!ELSEIF "$(CFG)" == "httrack - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /Fr /YX /FD /c
+# ADD BASE RSC /l 0x40c /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /map /debug /debugtype:both /machine:I386 /out:"c:\temp\test\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /profile
+
+!ELSEIF "$(CFG)" == "httrack - Win32 Release avec debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "httrack___Win32_Release_avec_debug"
+# PROP BASE Intermediate_Dir "httrack___Win32_Release_avec_debug"
+# PROP BASE Ignore_Export_Lib 0
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release_avec_debug"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# SUBTRACT BASE CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MT /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /YX /FD /c
+# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 wsock32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe"
+# SUBTRACT BASE LINK32 /verbose
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /debug /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /verbose
+
+!ENDIF
+
+# Begin Target
+
+# Name "httrack - Win32 Release"
+# Name "httrack - Win32 Debug"
+# Name "httrack - Win32 Release avec debug"
+# Begin Source File
+
+SOURCE=.\htsalias.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsalias.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsback.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsback.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsbauth.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsbauth.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscache.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscache.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscatchurl.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscatchurl.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscore.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscore.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscoremain.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscoremain.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsdefines.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsfilters.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsfilters.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsftp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsftp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsglobal.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshash.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshash.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshelp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshelp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsindex.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsindex.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsjava.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsjava.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htslib.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htslib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsmd5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsmd5.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsname.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsname.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsnostatic.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsnostatic.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsrobots.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsrobots.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsthread.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsthread.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htstools.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htstools.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswizard.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswizard.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswrap.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswrap.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\httrack.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\httrack.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\md5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\md5.h
+# End Source File
+# End Target
+# End Project
diff --git a/src/httrack.dsw b/src/httrack.dsw
new file mode 100644
index 0000000..9aa199f
--- /dev/null
+++ b/src/httrack.dsw
@@ -0,0 +1,29 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "httrack"=.\httrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/src/httrack.h b/src/httrack.h
new file mode 100644
index 0000000..a8633de
--- /dev/null
+++ b/src/httrack.h
@@ -0,0 +1,107 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsshow.c console progress info */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTSTOOLS_DEFH
+#define HTSTOOLS_DEFH
+
+#if HTS_ANALYSTE_CONSOLE
+
+#include "htsglobal.h"
+#include "htscore.h"
+
+typedef struct {
+ char name[1000];
+ char file[256];
+ char state[20];
+ char url_sav[HTS_URLMAXSIZE*2]; // pour cancel
+ char url_adr[HTS_URLMAXSIZE*2];
+ char url_fil[HTS_URLMAXSIZE*2];
+ LLint size;
+ LLint sizetot;
+ int offset;
+ //
+ int back;
+ //
+ int actived; // pour disabled
+} t_StatsBuffer;
+
+typedef struct {
+ int ask_refresh;
+ int refresh;
+ LLint stat_bytes;
+ int stat_time;
+ int lien_n;
+ int lien_tot;
+ int stat_nsocket;
+ int rate;
+ int irate;
+ int ft;
+ LLint stat_written;
+ int stat_updated;
+ int stat_errors;
+ int stat_warnings;
+ int stat_infos;
+ TStamp stat_timestart;
+ int stat_back;
+} t_InpInfo;
+
+// wrappers
+void __cdecl htsshow_init(void);
+void __cdecl htsshow_uninit(void);
+int __cdecl htsshow_start(httrackp* opt);
+int __cdecl htsshow_chopt(httrackp* opt);
+int __cdecl htsshow_end(void);
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl htsshow_query(char* question);
+char* __cdecl htsshow_query2(char* question);
+char* __cdecl htsshow_query3(char* question);
+int __cdecl htsshow_check(char* adr,char* fil,int status);
+void __cdecl htsshow_pause(char* lockfile);
+void __cdecl htsshow_filesave(char* file);
+int __cdecl htsshow_linkdetected(char* link);
+int __cdecl htsshow_xfrstatus(lien_back* back);
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
+int main(int argc, char **argv);
+void vt_color(int text,int back);
+void vt_clear(void);
+void vt_home(void);
+
+#endif
+
+#endif
+
diff --git a/src/md5.c b/src/md5.c
new file mode 100644
index 0000000..f5dadf2
--- /dev/null
+++ b/src/md5.c
@@ -0,0 +1,271 @@
+/*
+* This code implements the MD5 message-digest algorithm.
+* The algorithm is due to Ron Rivest. This code was
+* written by Colin Plumb in 1993, no copyright is claimed.
+* This code is in the public domain; do with it what you wish.
+*
+* Equivalent code is available from RSA Data Security, Inc.
+* This code has been tested against that, and is equivalent,
+* except that you don't need to include two pages of legalese
+* with every copy.
+*
+* To compute the message digest of a chunk of bytes, declare an
+* MD5Context structure, pass it to MD5Init, call MD5Update as
+* needed on buffers full of bytes, and then call MD5Final, which
+* will fill a supplied 16-byte array with the digest.
+*/
+
+/* #include "config.h" */
+
+#include <string.h> /* for memcpy() */
+#include "md5.h"
+
+static void byteReverse(unsigned char *buf, unsigned longs);
+
+/*
+* Note: this code is harmless on little-endian machines.
+*/
+#define byteSwap(a, b) do { \
+ a ^= b; \
+ b ^= a; \
+ a ^= b; \
+} while(0)
+static void byteReverse(unsigned char *buf, unsigned longs)
+{
+ /*uint32 t;*/
+ do {
+ /*
+ t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
+ ((unsigned) buf[1] << 8 | buf[0]);
+ *(uint32 *) buf = t;
+ */
+ byteSwap(buf[0], buf[3]);
+ byteSwap(buf[1], buf[2]);
+ buf += 4;
+ } while (--longs);
+}
+
+/*
+* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
+* initialization constants.
+*/
+void MD5Init(struct MD5Context *ctx, int brokenEndian)
+{
+ ctx->buf[0] = 0x67452301;
+ ctx->buf[1] = 0xefcdab89;
+ ctx->buf[2] = 0x98badcfe;
+ ctx->buf[3] = 0x10325476;
+
+ ctx->bits[0] = 0;
+ ctx->bits[1] = 0;
+
+ /*#ifdef WORDS_BIGENDIAN*/
+ if (brokenEndian) {
+ ctx->doByteReverse = 0;
+ } else {
+ ctx->doByteReverse = 1;
+ }
+ /*#else
+ ctx->doByteReverse = 0;
+ #endif
+ */
+}
+
+/*
+* Update context to reflect the concatenation of another buffer full
+* of bytes.
+*/
+void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
+{
+ uint32 t;
+
+ /* Update bitcount */
+
+ t = ctx->bits[0];
+ if ((ctx->bits[0] = t + ((uint32) len << 3)) < t)
+ ctx->bits[1]++; /* Carry from low to high */
+ ctx->bits[1] += len >> 29;
+
+ t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
+
+ /* Handle any leading odd-sized chunks */
+
+ if (t) {
+ unsigned char *p = (unsigned char *) ctx->in + t;
+
+ t = 64 - t;
+ if (len < t) {
+ memcpy(p, buf, len);
+ return;
+ }
+ memcpy(p, buf, t);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += t;
+ len -= t;
+ }
+ /* Process data in 64-byte chunks */
+
+ while (len >= 64) {
+ memcpy(ctx->in, buf, 64);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+
+ memcpy(ctx->in, buf, len);
+}
+
+/*
+* Final wrapup - pad to 64-byte boundary with the bit pattern
+* 1 0* (64-bit count of bits processed, MSB-first)
+*/
+void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
+{
+ unsigned count;
+ unsigned char *p;
+
+ /* Compute number of bytes mod 64 */
+ count = (ctx->bits[0] >> 3) & 0x3F;
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = ctx->in + count;
+ *p++ = 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ memset(p, 0, count);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+
+ /* Now fill the next block with 56 bytes */
+ memset(ctx->in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ memset(p, 0, count - 8);
+ }
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 14);
+
+ /* Append length in bits and transform */
+ ((uint32 *) ctx->in)[14] = ctx->bits[0];
+ ((uint32 *) ctx->in)[15] = ctx->bits[1];
+
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ if (ctx->doByteReverse)
+ byteReverse((unsigned char *) ctx->buf, 4);
+ memcpy(digest, ctx->buf, 16);
+ memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
+
+/*
+* The core of the MD5 algorithm, this alters an existing MD5 hash to
+* reflect the addition of 16 longwords of new data. MD5Update blocks
+* the data and converts bytes into longwords for this routine.
+*/
+void MD5Transform(uint32 buf[4], uint32 const in[16])
+{
+ register uint32 a, b, c, d;
+
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+}
+
diff --git a/src/md5.h b/src/md5.h
new file mode 100644
index 0000000..f5e5428
--- /dev/null
+++ b/src/md5.h
@@ -0,0 +1,36 @@
+#ifndef MD5_H
+#define MD5_H
+
+#ifdef __alpha
+typedef unsigned int uint32;
+#else
+typedef unsigned long uint32;
+#endif
+
+struct MD5Context {
+ uint32 buf[4];
+ uint32 bits[2];
+ unsigned char in[64];
+ int doByteReverse;
+};
+
+void MD5Init(struct MD5Context *context, int brokenEndian);
+void MD5Update(struct MD5Context *context, unsigned char const *buf,
+ unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(uint32 buf[4], uint32 const in[16]);
+
+int mdfile(char *fn, unsigned char *digest);
+int mdbinfile(char *fn, unsigned char *bindigest);
+
+/* These assume a little endian machine and return incorrect results!
+They are here for compatibility with old (broken) versions of RPM */
+int mdfileBroken(char *fn, unsigned char *digest);
+int mdbinfileBroken(char *fn, unsigned char *bindigest);
+
+/*
+* This is needed to make RSAREF happy on some MS-DOS compilers.
+*/
+typedef struct MD5Context MD5_CTX;
+
+#endif /* !MD5_H */
diff --git a/src/postinst-config.in b/src/postinst-config.in
new file mode 100755
index 0000000..f0edc72
--- /dev/null
+++ b/src/postinst-config.in
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Config file location
+cnfdir="__ETCPATH__"
+cnf="__ETCPATH__/httrack.conf"
+
+if test "`id -u`" -eq 0; then
+ mkdir -p "$cnfdir"
+ if ! test -f "$cnf"; then
+ echo "creating $cnf (please modify it) .."
+ cat>"$cnf" << EOF
+# HTTrack Website Copier Settings
+# See httrack --help for more information
+
+# Examples: (to uncomment)
+
+# set proxy proxy.myisp.com:8080
+# retries=2
+# set max-size 10000000
+# set max-time 36000
+# set user-agent Mouzilla/17.0 (compatible; HTTrack; I)
+#
+# There are MUCH more options.. try 'httrack --quiet --help | more'
+
+# Deny and allow for links
+# this will be used by default for all mirrors
+allow *.gif
+allow *.png
+deny ad.doubleclick.net/*
+
+# Path and other options
+# '~' in the *begining* means 'home dir'
+# '#' at the *end* means "projectname" (that is, the first URL given)
+# Example: '~/websites/#' will create /home/smith/websites/www.foo.com
+# folder when launching 'httrack www.foo.com'
+set path ~/websites/#
+
+EOF
+ fi
+
+ if ! grep "set path" "$cnf" >/dev/null; then
+ echo "default path set to <home dir>/websites/<first_site_name>"
+ fi
+
+ chown root:__ROOTGROUP__ "$cnf"
+ chmod 744 "$cnf"
+else
+ cat << EOF
+
+You are not root, therefore $cnf configuration file hasn't been created
+Re-run this sript ($0) as root if you want to do that
+
+EOF
+fi
+
diff --git a/src/strip_cr.in b/src/strip_cr.in
new file mode 100755
index 0000000..03af084
--- /dev/null
+++ b/src/strip_cr.in
@@ -0,0 +1,32 @@
+__PERL__
+# A simple script to convert DOS text files to
+# Unix one. Useful to strip all CR on .c and .h
+# sourcefiles.
+# Usage: strip_cr <files>
+foreach $fname (@ARGV) {
+ $ad=1;
+ if (open(FL,$fname)) {
+ if (open(FO,">".$fname.".tmp")) {
+ while(<FL>) {
+ s/\r\n$/\n/g;
+ print FO "$_";
+ }
+ close(FL);
+ close(FO);
+ if ((-s $fname) != (-s $fname.".tmp")) {
+ print("Stripping ".$fname."..\n");
+ rename($fname.".tmp",$fname);
+ } else {
+ unlink($fname.".tmp");
+ }
+ } else {
+ print "Unable to open ".$fname.".tmp\n";
+ }
+ } else {
+ print "Unable to open $fname\n";
+ }
+}
+if (!$ad) {
+ print "Ensure that a text file has no lines ended with CR (DOS)\n";
+ print "Usage: strip_cr <file>\n";
+}
diff --git a/templates/index-body.html b/templates/index-body.html
new file mode 100644
index 0000000..f8f0cdf
--- /dev/null
+++ b/templates/index-body.html
@@ -0,0 +1,8 @@
+ <TR>
+ <TD BACKGROUND="fade.gif">
+ &middot;
+ <A HREF="%s">
+ %s
+ </A>
+ </TD>
+ </TR>
diff --git a/templates/index-footer.html b/templates/index-footer.html
new file mode 100644
index 0000000..fa9b570
--- /dev/null
+++ b/templates/index-footer.html
@@ -0,0 +1,33 @@
+ </TABLE>
+ <BR>
+ <BR>
+ <BR>
+ <H6 ALIGN="RIGHT">
+ <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2002]</I>
+ </H6>
+ %s
+ <!-- Thanks for using HTTrack Website Copier! -->
+ %s
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/templates/index-header.html b/templates/index-header.html
new file mode 100644
index 0000000..33fa54c
--- /dev/null
+++ b/templates/index-header.html
@@ -0,0 +1,101 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>Local index - HTTrack Website Copier</title>
+ %s
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(backblue.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">HTTrack Website Copier - Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+ <meta name="generator" content="HTTrack Website Copier/3.x">
+ <TITLE>Local index - HTTrack</TITLE>
+</HEAD>
+<H1 ALIGN=Center>Index of locally available sites:</H1>
+ <TABLE BORDER="0" WIDTH="100%%" CELLSPACING="1" CELLPADDING="0">
diff --git a/templates/topindex-body.html b/templates/topindex-body.html
new file mode 100644
index 0000000..196239f
--- /dev/null
+++ b/templates/topindex-body.html
@@ -0,0 +1,5 @@
+ <TR>
+ <TD BACKGROUND="fade.gif">
+ &middot; <A HREF="%s/index.html">%s</A>
+ </TD>
+ </TR>
diff --git a/templates/topindex-footer.html b/templates/topindex-footer.html
new file mode 100644
index 0000000..1128f23
--- /dev/null
+++ b/templates/topindex-footer.html
@@ -0,0 +1,30 @@
+ </TABLE>
+ <BR>
+ <H6 ALIGN="RIGHT">
+ <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>
+ </H6>
+ %s
+ <!-- Thanks for using HTTrack Website Copier! -->
+
+<!-- ==================== Start epilogue ==================== -->
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</td>
+</tr>
+</table>
+
+<table width="76%%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
+ <tr>
+ <td id="footer"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ </tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/templates/topindex-header.html b/templates/topindex-header.html
new file mode 100644
index 0000000..21f4cf3
--- /dev/null
+++ b/templates/topindex-header.html
@@ -0,0 +1,100 @@
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+ <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" />
+ <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" />
+ <title>List of available projects - HTTrack Website Copier</title>
+ %s
+
+ <style type="text/css">
+ <!--
+
+body {
+ margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;
+ background: #77b;
+}
+body, td {
+ font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif;
+ }
+
+#subTitle {
+ background: #000; color: #fff; padding: 4px; font-weight: bold;
+ }
+
+#siteNavigation a, #siteNavigation .current {
+ font-weight: bold; color: #448;
+ }
+#siteNavigation a:link { text-decoration: none; }
+#siteNavigation a:visited { text-decoration: none; }
+
+#siteNavigation .current { background-color: #ccd; }
+
+#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }
+#siteNavigation a:active { text-decoration: none; background-color: #ccc; }
+
+
+a:link { text-decoration: underline; color: #00f; }
+a:visited { text-decoration: underline; color: #000; }
+a:hover { text-decoration: underline; color: #c00; }
+a:active { text-decoration: underline; }
+
+#pageContent {
+ clear: both;
+ border-bottom: 6px solid #000;
+ padding: 10px; padding-top: 20px;
+ line-height: 1.65em;
+ background-image: url(backblue.gif);
+ background-repeat: no-repeat;
+ background-position: top right;
+ }
+
+#pageContent, #siteNavigation {
+ background-color: #ccd;
+ }
+
+
+.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }
+.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }
+
+hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }
+
+h1 { margin: 0; font-weight: bold; font-size: 2em; }
+h2 { margin: 0; font-weight: bold; font-size: 1.6em; }
+h3 { margin: 0; font-weight: bold; font-size: 1.3em; }
+h4 { margin: 0; font-weight: bold; font-size: 1.18em; }
+
+.blak { background-color: #000; }
+.hide { display: none; }
+.tableWidth { min-width: 400px; }
+
+.tblRegular { border-collapse: collapse; }
+.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }
+.tblHeaderColor, .tblHeaderColor td { background: #99c; }
+.tblNoBorder td { border: 0; }
+
+
+// -->
+</style>
+
+</head>
+
+<table width="76%%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth">
+ <tr>
+ <td id="subTitle">HTTrack Website Copier - Open Source offline browser</td>
+ </tr>
+</table>
+<table width="76%%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth">
+<tr class="blak">
+<td>
+ <table width="100%%" border="0" align="center" cellspacing="1" cellpadding="0">
+ <tr>
+ <td colspan="6">
+ <table width="100%%" border="0" align="center" cellspacing="0" cellpadding="10">
+ <tr>
+ <td id="pageContent">
+<!-- ==================== End prologue ==================== -->
+
+
+<h1 ALIGN=Center>Index of locally available projects:</H1>
+ <table border="0" width="100%%%" cellspacing="1" cellpadding="0">