scws+xapian(2)

韩志专
2023-12-01

//scw.c

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<scws.h>
#define MAXLEN 10240

void read_file(const char *filename ,char *dest ,int maxlen){

FILE *file;
int pos , temp , i;
file = fopen(filename , "r");
if(NULL == file){
fprintf(stderr ,"open %s erro\n" , filename);
// return -1;
}
pos = 0;
for(i = 0 ; i<MAXLEN-1; i++){
temp = fgetc(file);
if(EOF == temp)
break;
dest[pos++]=temp;
}
fclose(file);
dest[pos] = 0;
}

main(int argc , const char* argv[]){
FILE *fp;
        fp = fopen(argv[2] ,"at+");

char text[1024000];
read_file(argv[1] , text ,MAXLEN);

scws_t s;
scws_res_t res,cur;

  if(!(s = scws_new())){
printf("ERROR:can't init the scws!\n");
exit(-1);
}

scws_set_charset(s , "utf8");
scws_set_dict(s ,"/usr/local/scws/etc/dict.utf8.xdb" ,SCWS_XDICT_XDB);
scws_set_rule(s, "/usr/local/scws/etc/rules.utf8.ini");

scws_send_text(s , text, strlen(text));
while(res=cur=scws_get_result(s)){
while(cur!=NULL){
        fprintf(fp ,"%.*s" , cur->len,text+cur->off);
      fprintf(fp ," ");
      cur = cur->next;
}
// fclose(fp);
scws_free_result(res);
}
scws_free(s);
}


//indexer.cpp

#include<xapian.h>
#include<fstream>
#include<string>
#include<iostream>
using namespace std;

int main(int argc , char **argv){
try{
ifstream ifile(argv[2]);
string content, line;
while(getline(ifile , line))
content += line;
        Xapian::WritableDatabase database(argv[1],Xapian::DB_CREATE_OR_OPEN);
        Xapian::Document document;
        Xapian::TermGenerator indexer;
        document.add_value(1,string("825"));
        document.set_data(content);
        indexer.set_document(document);
        indexer.index_text(content);

        database.add_document(document);
        database.commit();
   }catch(const Xapian::Error &e){
        cout<<"exception: "<<e.get_description()<<endl;
        }
}

//search.cpp

#include<xapian.h>
#include<iostream>
using namespace std;

int main(int argc , char **argv){
  try{
        string querystring(argv[2]);

        Xapian::Database database(argv[1]);
        Xapian::Enquire enquire(database);
Xapian::Query query(Xapian::Query::OP_OR ,argv+2 ,argc+argv);

        cout<<"query is: "<<query.get_description()<<endl;
        enquire.set_query(query);

        Xapian::MSet matches = enquire.get_mset(0,10);
        cout<<matches.get_matches_estimated()<<" result found"<<endl;
       for(Xapian::MSetIterator it = matches.begin() ; it != matches.end() ; it++){
        Xapian::Document doc = it.get_document();
        cout<<it.get_rank()<<": "<<it.get_percent()<<"%docid= "<<*it<<"   value="<<doc.get_value(1) <<",data "<<doc.get_data()<<endl;
        }
  }catch(const Xapian::Error &e){
        cout<<"exception: "<<e.get_description()<<endl;
        }
}

 类似资料: