summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-05-14 17:48:04 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-05-14 17:48:04 +0000
commitfea8122ed35c8a7895fc1195d4257fb6c42fe128 (patch)
tree37736ffc0315ff3696a1b3ea036dfb697083f6a2 /src
parent7323230eb3c8fc4818d0cf34974497b3782a7279 (diff)
Fixed hashtable corruption because of dirty code directly modifying the host address in memory, leading to have hashtable positions not anymore valid.
This issue was especially triggered when a redirect was processed ("Warning moved treated for .." messages) * closes: #43
Diffstat (limited to 'src')
-rw-r--r--src/htscore.c2
-rw-r--r--src/htshash.c59
-rw-r--r--src/htshash.h1
-rw-r--r--src/htsparse.c4
4 files changed, 63 insertions, 3 deletions
diff --git a/src/htscore.c b/src/htscore.c
index 94ed251..6fe302a 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -2355,7 +2355,7 @@ void host_ban(httrackp * opt, lien_url ** liens, int ptr, int lien_tot,
if (strfield2(jump_identification(liens[i]->adr), host)) { // host
hts_log_print(opt, LOG_DEBUG, "Cancel: %s%s", liens[i]->adr,
liens[i]->fil);
- strcpybuff(liens[i]->adr, "!"); // cancel (invalide hash)
+ hash_invalidate_entry(opt->hash, i); // invalidate hashtable entry
// on efface pas le hash, because si on rencontre le lien, reverif sav..
}
} else {
diff --git a/src/htshash.c b/src/htshash.c
index 7bdf782..9389455 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -85,6 +85,14 @@ static int key_sav_equals(void *arg, const char *a, const char *b) {
return strcasecmp(a, b) == 0;
}
+static const char* key_sav_debug_print(void *arg, const char *a) {
+ return a;
+}
+
+static const char* value_sav_debug_print(void *arg, void *a) {
+ return (char*) a;
+}
+
/* Pseudo-key (lien_url structure) hash function */
static inthash_keys key_adrfil_hashes_generic(void *arg, const char *value_,
const int former) {
@@ -151,6 +159,32 @@ static int key_adrfil_equals_generic(void *arg, const char *a_, const char *b_,
}
}
+static const char* key_adrfil_debug_print_(void *arg, const char *a_, const int former) {
+ hash_struct *const hash = (hash_struct*) arg;
+ const int normalized = hash->normalized;
+ const lien_url*const a = (lien_url*) a_;
+ const char *const a_adr = !former ? a->adr : a->former_adr;
+ const char *const a_fil = !former ? a->fil : a->former_fil;
+ snprintf(hash->normfil, sizeof(hash->normfil), "%s%s", a_adr, a_fil);
+ return hash->normfil;
+}
+
+static const char* key_adrfil_debug_print(void *arg, const char *a_) {
+ return key_adrfil_debug_print_(arg, a_, 0);
+}
+
+static const char* key_former_adrfil_debug_print(void *arg, const char *a_) {
+ return key_adrfil_debug_print_(arg, a_, 1);
+}
+
+static const char* value_adrfil_debug_print(void *arg, void *value) {
+ hash_struct *const hash = (hash_struct*) arg;
+ inthash_value v;
+ v.ptr = value;
+ snprintf(hash->normfil2, sizeof(hash->normfil2), "%d", (int) v.intg);
+ return hash->normfil2;
+}
+
/* "adr"/"fil" lien_url structure members hashing function */
static inthash_keys key_adrfil_hashes(void *arg, const char *value_) {
return key_adrfil_hashes_generic(arg, value_, 0);
@@ -207,6 +241,20 @@ void hash_init(httrackp *opt, hash_struct * hash, int normalized) {
key_former_adrfil_hashes,
key_former_adrfil_equals,
hash);
+
+ /* pretty-printing */
+ inthash_set_print_handler(hash->sav,
+ key_sav_debug_print,
+ value_sav_debug_print,
+ NULL);
+ inthash_set_print_handler(hash->adrfil,
+ key_adrfil_debug_print,
+ value_adrfil_debug_print,
+ hash);
+ inthash_set_print_handler(hash->former_adrfil,
+ key_former_adrfil_debug_print,
+ value_adrfil_debug_print,
+ hash);
}
void hash_free(hash_struct *hash) {
@@ -272,3 +320,14 @@ void hash_write(hash_struct * hash, int lpos) {
inthash_write(hash->former_adrfil, (char*) hash->liens[lpos], lpos);
}
}
+
+void hash_invalidate_entry(hash_struct * hash, int lpos) {
+ if (inthash_remove(hash->adrfil, (char*) hash->liens[lpos])) {
+ /* devalidate entry now it is removed from hashtable */
+ strcpybuff(hash->liens[lpos]->adr, "!");
+ /* add back */
+ inthash_write(hash->adrfil, (char*) hash->liens[lpos], lpos);
+ } else {
+ assertf(! "error invalidating hash entry");
+ }
+}
diff --git a/src/htshash.h b/src/htshash.h
index 4810513..6a71cba 100644
--- a/src/htshash.h
+++ b/src/htshash.h
@@ -56,6 +56,7 @@ void hash_free(hash_struct *hash);
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
hash_struct_type type);
void hash_write(hash_struct * hash, int lpos);
+void hash_invalidate_entry(hash_struct * hash, int lpos);
int *hash_calc_chaine(hash_struct * hash, hash_struct_type type, int pos);
unsigned long int hash_cle(const char *nom1, const char *nom2);
#endif
diff --git a/src/htsparse.c b/src/htsparse.c
index e12e94c..b3778bc 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -3606,7 +3606,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
urladr, urlfil, mov_adr, mov_fil);
// canceller lien actuel
error = 1;
- strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
+ hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
// noter NOUVEAU lien
//xxc xxc
// set_prio_to=0+1; // protection if the moved URL is an html page!!
@@ -3742,7 +3742,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//
// canceller lien actuel
error = 1;
- strcpybuff(liens[ptr]->adr, "!"); // caractère bidon (invalide hash)
+ hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);