当前位置: 首页 > 工具软件 > OpenNLP > 使用案例 >

java opennlp_opennlp的使用

邹书
2023-12-01

import java.io.*;

import opennlp.tools.sentdetect.SentenceDetectorME;

import opennlp.tools.sentdetect.SentenceModel;

import opennlp.tools.tokenize.Tokenizer;

import opennlp.tools.tokenize.TokenizerME;

import opennlp.tools.tokenize.TokenizerModel;

import opennlp.tools.util.Span;

public class Testing_openNLP {

/* http://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html 官方教程Apache OpenNLP Developer Documentation

* openNLP 中的各种模型可以在 http://opennlp.sourceforge.net/models-1.5/ 下载

* http://www.programcreek.com/2012/05/opennlp-tutorial/ this is good tutorial about openNLP tools

*

* */

public static void main(String[] args) {

//String testString = "This isn't the greatest example sentence in the world because I've seen better. Neither is this one. This one's not bad, though.";

String testString = "Hi. How are you? This is &3 $444 Mike.";

String tokens[] = Token(testString);

String sentences[] = sentenceSegmentation(testString);

String aa = "";

}

//分句

public static String[] sentenceSegmentation(String str){

try {

InputStream modelIn = new FileInputStream("en-sent.bin");

SentenceModel model = null;

try {

model = new SentenceModel(modelIn);

}

catch (IOException e) {

e.printStackTrace();

}

finally {

if (modelIn != null) {

try {

modelIn.close();

}

catch (IOException e) {

}

}

}

SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);

String sentences[] = sentenceDetector.sentDetect(str);

return sentences;

} catch (FileNotFoundException e1) {

e1.printStackTrace();

return null;

}

}

//分词

public static String[] Token(String str){

try{

InputStream modelIn = new FileInputStream("en-token.bin");

TokenizerModel model = null;

try {

model = new TokenizerModel(modelIn);

}

catch (IOException e) {

e.printStackTrace();

}

finally {

if (modelIn != null) {

try {

modelIn.close();

}

catch (IOException e) {

}

}

}

TokenizerME tokenizer = new TokenizerME(model);

String tokens[] = tokenizer.tokenize(str);

//double tokenProbs[] = tokenizer.getTokenProbabilities();//must be called directly after one of the tokenize methods was called.

return tokens;

}

catch(FileNotFoundException e){return null;}

}

}

 类似资料: