// Apply the java plugin to add support for Java
apply plugin: 'java'
// In this section you declare where to find the dependencies of your project
repositories {
// Use 'jcenter' for resolving your dependencies.
// You can declare any Maven/Ivy/file repository here.
maven {
url "http://maven.aliyun.com/nexus/content/groups/public"
// In this section you declare the dependencies for your production and test code
dependencies {
// https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp
compile group: 'edu.stanford.nlp', name: 'stanford-corenlp', version: '3.8.0'
compile files('lib/stanford-corenlp-3.8.0-models.jar')
compile files('lib/stanford-chinese-corenlp-2017-06-09-models.jar')
//compile group: 'edu.stanford.nlp', name: 'stanford-corenlp', version: '3.8.0', classifier:'models'
//compile group: 'edu.stanford.nlp', name: 'stanford-corenlp', version: '3.8.0', classifier:'models-chinese'
testCompile 'junit:junit:4.12'
具体源码看官方Demo:StanfordCoreNlpDemo.java,这里只是针对中文处理进行了一些修改。由于中文处理需要的内存比较大,所以配置jvm参数:-Xms512M -Xmx4096M
// Add in sentiment
Properties props = new Properties();
//props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
//StanfordCoreNLP pipeline = new StanfordCoreNLP();
// Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
Annotation annotation;
if (args.length > 0) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
annotation = new Annotation(" 循环经济是人类社会发展的必然选择,包装废弃物资源化是循环经济的要求。"
+ "包装废弃物资源化是一项系统工程,应从企业、区域和社会三个层面上进行,"
+ "因此,产生了三种包装废弃物资源化模式,即基于清洁生产、生态工业园区和基于社会层面的包装废弃物资源化模式。");
// run all the selected Annotators on this text
tokenize(Tokenization 分词)
ssplit(Sentence Splitting 断句)
pos(Part of Speech Tagging 词性标注)
lemma(Lemmatization 词干提取)
ner(Named Entity Recognition 命名实体识别)
parse(Constituency Parsing 语法分析)
depparse(Dependency Parsing 依存分析)
dcoref(Coreference Resolution 同指消解)
natlog(Natural Logic Polarity)