C语言实现简单的倒排文件索引
来源:http://blog.csdn.net/lansatiankongxxc/article/details/8314996
inver.h文件
#ifndef INVERT_FILE_H #define INVERT_FILE_H #include<stdio.h> #include<stdlib.h> typedef struct _invertfile_ { unsigned int tablelen; void **table; //unsigned int offset; unsigned int nodecount; }if_t; typedef struct _word_{ unsigned int id; unsigned int refered;// void *link; }word_t; typedef struct _word_frequency_{ unsigned int d_id; unsigned int refered;//the num of referenced in the document void *next; }wf_t; if_t* invertfile_create(int length); void invertfile_insert(if_t *h,int w_id,int d_id); wf_t* invertfile_search(if_t *h,int w_id,int d_id); void invertfile_traverse(if_t *h); void invertfile_free(if_t *h); #endif
invert.cpp
#include"invert.h" if_t* invertfile_create(int length){ if_t *h; h = (if_t *)calloc(1,sizeof(if_t)); if (NULL == h) return NULL; h->table =(void **)calloc(length,sizeof(void *)); h->tablelen=length; h->nodecount=0; word_t *w; for(int i=0;i<length;i++){ h->table[i]=malloc(sizeof(word_t)); w=(word_t*)h->table[i]; w->id=i; w->refered=0; w->link=NULL; } return h; } //check if document d_id have word w_id wf_t* invertfile_search(if_t *h,int w_id,int d_id){ word_t *w; wf_t *wf; w=(word_t*)h->table[w_id]; if(w->refered>0){ wf=(wf_t*)w->link; while(wf){ if(wf->d_id==d_id)return wf; wf=(wf_t*)wf->next; } } return NULL; } void invertfile_insert(if_t *h,int w_id,int d_id){ word_t * w=(word_t*)h->table[w_id]; wf_t * wf; if((wf=invertfile_search(h,w_id,d_id))!=NULL){ wf->refered++; } else{ wf=(wf_t *)malloc(sizeof(wf_t)); wf->next=w->link; w->link=wf; w->refered++; wf->refered++; wf->d_id=d_id; h->nodecount++; } } void invertfile_free(if_t *h){ word_t *w; wf_t* wf,*cur; for(int i=0;i<h->tablelen;i++){ w=(word_t*)h->table[i]; wf=(wf_t*)w->link; while(wf!=NULL){ cur=wf; wf=(wf_t*)wf->next; free(cur); } free(w); } free(h->table); } void invertfile_traverse(if_t *h){ word_t *w; wf_t* wf,*cur; for(int i=0;i<h->tablelen;i++){ w=(word_t*)h->table[i]; wf=(wf_t*)w->link; printf("word_id:%d;",w->id); while(wf!=NULL){ cur=wf; wf=(wf_t*)wf->next; printf("d_id:%d,freq:%d;",cur->d_id,cur->refered); } printf("\n"); } }
测试文件main.cpp
#include"invert.h" int main(){ if_t *f=invertfile_create(10); invertfile_insert(f,1,1); invertfile_insert(f,1,1); invertfile_insert(f,1,3); invertfile_insert(f,2,5); invertfile_traverse(f); invertfile_free(f); }
实验结果:
word_id:0; word_id:1;d_id:3,freq:1;d_id:1,freq:2; word_id:2;d_id:5,freq:1; word_id:3; word_id:4; word_id:5; word_id:6; word_id:7; word_id:8; word_id:9;