69,371
社区成员
发帖
与我相关
我的任务
分享
#ifndef __HASH_H__
#define __HASH_H__
unsigned int sdbm_hash(const char *);
unsigned int sax_hash(const char *);
extern unsigned int (*hash_table[2])(const char *);
#endif
#include"hash.h"
unsigned int (*hash_table[2])(const char *) = {
sdbm_hash,
sax_hash
};
unsigned int sax_hash(const char *key)
{
unsigned int h=0;
while(*key) h^=(h<<5)+(h>>2)+(unsigned char)*key++;
return h;
}
unsigned int sdbm_hash(const char *key)
{
unsigned int h=0;
while(*key) h=(unsigned char)*key++ + (h<<6) + (h<<16) - h;
return h;
}
#ifndef __BLOOMFILTER_H__
#define __BLOOMFILTER_H__
#include<stdlib.h>
typedef struct {
unsigned int size;
unsigned int num_of_hashfunc;
unsigned char *bloom;
}BF;
BF *bloom_create(unsigned int, unsigned int);
int bloom_destroy(BF *);
int bloom_add(char *, BF *);
int bloom_check(char *,BF *);
#endif
#include<stdlib.h>
#include<limits.h>
#include "bloomfilter.h"
#include "hash.h"
#define SETBIT(a, n) (a[n/CHAR_BIT] |= (1<<(n%CHAR_BIT)))
#define GETBIT(a, n) (a[n/CHAR_BIT] & (1<<(n%CHAR_BIT)))
BF *bloom_create(unsigned int size, unsigned int num_of_hashfunc)
{
BF *bloom_filter;
if(!(bloom_filter = malloc(sizeof(BF))))
return NULL;
if(!(bloom_filter->bloom = calloc((size+CHAR_BIT-1)/CHAR_BIT,sizeof(char)))){
free(bloom_filter);
return NULL;
}
bloom_filter->size = size;
bloom_filter->num_of_hashfunc = num_of_hashfunc;
return bloom_filter;
}
int bloom_destroy(BF *bloom_filter)
{
free(bloom_filter->bloom);
bloom_filter->bloom = NULL;
free(bloom_filter);
bloom_filter = NULL;
return 0;
}
int bloom_add(char *data, BF *bloom_filter)
{
int i;
for(i = 0; i <bloom_filter->num_of_hashfunc; i++) {
SETBIT(bloom_filter->bloom,hash_table[i](data)%bloom_filter->size);
}
return 0;
}
/*
*返回值1 代表查找成功,0表示查找失败
*/
int bloom_check(char *data, BF *bloom_filter)
{
int i;
for(i = 0; i <bloom_filter->num_of_hashfunc; i++) {
if(!(GETBIT(bloom_filter->bloom,hash_table[i](data)%bloom_filter->size))) {
printf("not found\n");
return 0;
}
}
return 1;
}
#include<stdio.h>
#include<string.h>
#include"bloomfilter.h"
int main(int argc, char *argv[])
{
FILE *fp_input,*fp_weburls,*fp_checkresult;
char line[1024];
char *p;
BF *bloom_filter = bloom_create(2500000,2);
if(argc<1) {
fprintf(stderr, "ERROR: No word file specified\n");
return EXIT_FAILURE;
}
if((fp_input = fopen("checkedurl.dat","r")) == NULL) {
perror(argv[1]);
return EXIT_FAILURE;
}
if((fp_weburls = fopen("web_urls.data","r")) == NULL) {
perror("web_urls.data");
return EXIT_FAILURE;
}
if((fp_checkresult = fopen("checkresult.dat","w")) == NULL) {
perror("checkresult.dat");
return EXIT_FAILURE;
}
while (fgets(line,sizeof(line),fp_weburls) != NULL) {
bloom_add(line,bloom_filter);
}
fclose(fp_weburls);
while (fgets(line,sizeof(line),fp_input) != NULL) {
if(bloom_check(line,bloom_filter)) {
fputs("1\n",fp_checkresult);
} else
fputs("0\n",fp_checkresult);
}
fclose(fp_input);
fclose(fp_checkresult);
return 0;
}