//利用scws读取本地文件内容,分词生成terms,写入到本地文件
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<scws.h>
#define MAXLEN 10240
void read_file(const char *filename ,char *dest ,int maxlen){
FILE *file;
int pos , temp , i;
file = fopen(filename , "r");
if(NULL == file){
fprintf(stderr ,"open %s erro\n" , filename);
// return -1;
}
pos = 0;
for(i = 0 ; i<MAXLEN-1; i++){
temp = fgetc(file);
if(EOF == temp)
break;
dest[pos++]=temp;
}
fclose(file);
dest[pos] = 0;
}
main(){
FILE *fp;
fp = fopen("/usr/local/irtest/termsdata1" ,"at+");
char text[10240];
read_file("/usr/local/irtest/data1" , text ,MAXLEN);
scws_t s;
scws_res_t res,cur;
if(!(s = scws_new())){
printf("ERROR:can't init the scws!\n");
exit(-1);
}
scws_set_charset(s , "utf8");
scws_set_dict(s ,"/usr/local/scws/etc/dict.utf8.xdb" ,SCWS_XDICT_XDB);
scws_set_rule(s, "/usr/local/scws/etc/rules.utf8.ini");
scws_send_text(s , text, strlen(text));
while(res=cur=scws_get_result(s)){
while(cur!=NULL){
fprintf(fp ,"%.*s" , cur->len,text+cur->off);
fprintf(fp ," ");
cur = cur->next;
}
// fclose(fp);
scws_free_result(res);
}
scws_free(s);
}
//indexer.cpp
#include<xapian.h>
#include<fstream>
#include<string>
#include<iostream>
using namespace std;
int main(int argc , char **argv){
try{
ifstream ifile("/usr/local/irtest/termsdata1");
string content, line;
while(getline(ifile , line))
content += line;
Xapian::WritableDatabase database(argv[1],Xapian::DB_CREATE_OR_OPEN);
Xapian::Document document;
Xapian::TermGenerator indexer;
document.add_value(1,string("825"));
document.set_data(content);
indexer.set_document(document);
indexer.index_text(content);
database.add_document(document);
database.commit();
}catch(const Xapian::Error &e){
cout<<"exception: "<<e.get_description()<<endl;
}
}
//searcher.cpp
#include<xapian.h>
#include<iostream>
#define QUERY "新加坡"
using namespace std;
int main(int argc , char **argv){
try{
string querystring(QUERY);
Xapian::Database database(argv[1]);
Xapian::Enquire enquire(database);
Xapian::QueryParser qp;
Xapian::Query query = qp.parse_query(querystring);
cout<<"query is: "<<query.get_description()<<endl;
enquire.set_query(query);
Xapian::MSet matches = enquire.get_mset(0,10);
cout<<matches.get_matches_estimated()<<" result found"<<endl;
for(Xapian::MSetIterator it = matches.begin() ; it != matches.end() ; it++){
Xapian::Document doc = it.get_document();
cout<<it.get_rank()<<": "<<it.get_percent()<<"%docid= "<<*it<<" value="<<doc.get_value(1) <<",data "<<doc.get_data()<<endl;
}
}catch(const Xapian::Error &e){
cout<<"exception: "<<e.get_description()<<endl;
}
}
[root@jcdd second]# gcc -o demo1 -I/usr/local/scws/include/scws -L/usr/local/scws/lib readfenci.c -lscws -Wl,--rpath -Wl,/usr/local/scws/lib
[root@jcdd second]# ./demo1
[root@jcdd second]# g++ -std=c++0x index.cpp -o index -lxapian
[root@jcdd second]# g++ -std=c++0x search.cpp -o search -lxapian