• 欢迎访问开心洋葱网站,在线教程,推荐使用最新版火狐浏览器和Chrome浏览器访问本网站,欢迎加入开心洋葱 QQ群
  • 为方便开心洋葱网用户,开心洋葱官网已经开启复制功能!
  • 欢迎访问开心洋葱网站,手机也能访问哦~欢迎加入开心洋葱多维思维学习平台 QQ群
  • 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏开心洋葱吧~~~~~~~~~~~~~!
  • 由于近期流量激增,小站的ECS没能经的起亲们的访问,本站依然没有盈利,如果各位看如果觉着文字不错,还请看官给小站打个赏~~~~~~~~~~~~~!

C语言实现简单的倒排文件索引

OC/C/C++ 水墨上仙 1871次浏览

C语言实现简单的倒排文件索引
来源:http://blog.csdn.net/lansatiankongxxc/article/details/8314996

inver.h文件

#ifndef INVERT_FILE_H  
#define INVERT_FILE_H  
#include<stdio.h>  
#include<stdlib.h>  
  
typedef struct _invertfile_ {  
    unsigned int tablelen;  
    void **table;  
    //unsigned int offset;    
    unsigned int nodecount;  
  
}if_t;  
typedef struct _word_{  
    unsigned int id;  
    unsigned int refered;//  
    void *link;  
}word_t;  
typedef struct _word_frequency_{  
    unsigned int d_id;  
    unsigned int refered;//the num of referenced in the document  
    void *next;  
}wf_t;  
  
if_t* invertfile_create(int length);  
void invertfile_insert(if_t *h,int w_id,int d_id);  
wf_t* invertfile_search(if_t *h,int w_id,int d_id);  
void invertfile_traverse(if_t *h);  
void invertfile_free(if_t *h);  
#endif  

invert.cpp

#include"invert.h"  
if_t* invertfile_create(int length){  
    if_t *h;  
      
    h = (if_t *)calloc(1,sizeof(if_t));  
    if (NULL == h) return NULL;       
      
    h->table =(void **)calloc(length,sizeof(void *));  
    h->tablelen=length;  
    h->nodecount=0;  
    word_t *w;  
    for(int i=0;i<length;i++){  
        h->table[i]=malloc(sizeof(word_t));  
        w=(word_t*)h->table[i];  
        w->id=i;  
        w->refered=0;  
        w->link=NULL;  
    }  
    return h;  
}  
//check if document d_id have word w_id  
wf_t* invertfile_search(if_t *h,int w_id,int d_id){  
    word_t *w;  
    wf_t    *wf;  
    w=(word_t*)h->table[w_id];  
    if(w->refered>0){  
        wf=(wf_t*)w->link;  
        while(wf){  
            if(wf->d_id==d_id)return wf;  
            wf=(wf_t*)wf->next;  
        }  
    }  
    return NULL;  
}  
void invertfile_insert(if_t *h,int w_id,int d_id){  
    word_t * w=(word_t*)h->table[w_id];  
    wf_t * wf;  
    if((wf=invertfile_search(h,w_id,d_id))!=NULL){  
        wf->refered++;  
    }  
    else{  
        wf=(wf_t *)malloc(sizeof(wf_t));  
        wf->next=w->link;  
        w->link=wf;  
        w->refered++;  
        wf->refered++;  
        wf->d_id=d_id;  
        h->nodecount++;  
    }  
}  
void invertfile_free(if_t *h){  
    word_t *w;  
    wf_t* wf,*cur;  
    for(int i=0;i<h->tablelen;i++){  
        w=(word_t*)h->table[i];  
        wf=(wf_t*)w->link;  
        while(wf!=NULL){  
            cur=wf;  
            wf=(wf_t*)wf->next;  
            free(cur);  
        }  
        free(w);  
    }  
    free(h->table);  
}  
void invertfile_traverse(if_t *h){  
    word_t *w;  
    wf_t* wf,*cur;  
    for(int i=0;i<h->tablelen;i++){  
        w=(word_t*)h->table[i];  
        wf=(wf_t*)w->link;  
        printf("word_id:%d;",w->id);  
        while(wf!=NULL){  
            cur=wf;  
            wf=(wf_t*)wf->next;  
            printf("d_id:%d,freq:%d;",cur->d_id,cur->refered);  
        }  
        printf("\n");  
    }  
}  

测试文件main.cpp

#include"invert.h"  
int main(){  
    if_t *f=invertfile_create(10);  
    invertfile_insert(f,1,1);  
    invertfile_insert(f,1,1);  
    invertfile_insert(f,1,3);  
    invertfile_insert(f,2,5);  
    invertfile_traverse(f);  
    invertfile_free(f);  
}  

实验结果:

word_id:0;  
word_id:1;d_id:3,freq:1;d_id:1,freq:2;  
word_id:2;d_id:5,freq:1;  
word_id:3;  
word_id:4;  
word_id:5;  
word_id:6;  
word_id:7;  
word_id:8;  
word_id:9;  


喜欢 (0)
加载中……