diff options
Diffstat (limited to 'src/htsrobots.c')
-rw-r--r-- | src/htsrobots.c | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/src/htsrobots.c b/src/htsrobots.c new file mode 100644 index 0000000..8aabdd4 --- /dev/null +++ b/src/htsrobots.c @@ -0,0 +1,118 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: httrack.c subroutines: */ +/* robots.txt (website robot file) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + + +#include "htsrobots.h" + +/* specific definitions */ +#include "htsbase.h" +#include "htslib.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +/* END specific definitions */ + + +// -- robots -- + +// fil="" : vérifier si règle déja enregistrée +int checkrobots(robots_wizard* robots,char* adr,char* fil) { + while(robots) { + if (strfield2(robots->adr,adr)) { + if (fil[0]) { + int ptr=0; + char line[250]; + if (strnotempty(robots->token)) { + do { + ptr+=binput(robots->token+ptr,line,200); + if (line[0]=='/') { // absolu + if (strfield(fil,line)) { // commence avec ligne + return -1; // interdit + } + } else { // relatif + if (strstrcase(fil,line)) { + return -1; + } + } + } while( (strnotempty(line)) && (ptr<(int) strlen(robots->token)) ); + } + } else { + return -1; + } + } + robots=robots->next; + } + return 0; +} +int checkrobots_set(robots_wizard* robots,char* adr,char* data) { + if (((int) strlen(data)) > 999) return 0; + while(robots) { + if (strfield2(robots->adr,adr)) { // entrée existe + strcpy(robots->token,data); +#if DEBUG_ROBOTS + printf("robots.txt: set %s to %s\n",adr,data); +#endif + return -1; + } + else if (!robots->next) { + robots->next=(robots_wizard*) calloct(1,sizeof(robots_wizard)); + if (robots->next) { + robots->next->next=NULL; + strcpy(robots->next->adr,adr); + strcpy(robots->next->token,data); +#if DEBUG_ROBOTS + printf("robots.txt: new set %s to %s\n",adr,data); +#endif + } +#if DEBUG_ROBOTS + else + printf("malloc error!!\n"); +#endif + } + robots=robots->next; + } + return 0; +} +void checkrobots_free(robots_wizard* robots) { + if (robots->next) { + checkrobots_free(robots->next); + freet(robots->next); + robots->next=NULL; + } +} + +// -- robots -- |