Stanford NLPとは
スタンフォードNLPグループが提供する、主要な計算言語学の問題に対して、統計NLP、深層学習NLP、およびルールベースのNLPツールです。
Software - The Stanford Natural Language Processing Group
https://nlp.stanford.edu/software/
項目 | 説明 |
---|---|
提供者 | スタンフォード大学 スタンフォードNLPグループ |
提供形式 | Java ライブラリ |
#Example without NLP4J
package hello.stanford;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.IndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.OriginalTextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentenceIndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
import edu.stanford.nlp.util.CoreMap;
public class HelloStanfordNLP {
public static void main(String[] args) {
String text = "A natural language parser is a program that works out the grammatical structure of sentences";
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, depparse");
StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
Annotation annotation = new Annotation(text);
coreNLP.annotate(annotation);
List<CoreMap> sentenceMap = annotation.get(SentencesAnnotation.class);
for (CoreMap label : sentenceMap) {
SemanticGraph graph = label.get(BasicDependenciesAnnotation.class);
IndexedWord root = graph.getFirstRoot();
printWord(root, graph, 0);
System.err.println("<graph>");
System.err.println(graph.toString());
System.err.println("</graph>");
}
}
public static void printWord(IndexedWord word, SemanticGraph graph, int tab) {
System.err.println("<word>");
System.err.println("depth:" + tab);
System.err.println("TextAnnotation:" + word.get(TextAnnotation.class));
System.err.println("OriginalTextAnnotation:" + word.get(OriginalTextAnnotation.class));
System.err.println("CharacterOffsetBeginAnnotation:" + word.get(CharacterOffsetBeginAnnotation.class));
System.err.println("CharacterOffsetEndAnnotation:" + word.get(CharacterOffsetEndAnnotation.class));
System.err.println("IndexAnnotation:" + word.get(IndexAnnotation.class));
System.err.println("SentenceIndexAnnotation:" + word.get(SentenceIndexAnnotation.class));
System.err.println("PartOfSpeechAnnotation:" + word.get(PartOfSpeechAnnotation.class));
System.err.println("</word>");
List<IndexedWord> list = graph.getChildList(word);
for (int n = 0; n < list.size(); n++) {
printWord(list.get(n), graph, tab + 1);
}
}
}
#Output without NLP4j
<word>
depth:0
TextAnnotation:program
OriginalTextAnnotation:program
CharacterOffsetBeginAnnotation:31
CharacterOffsetEndAnnotation:38
IndexAnnotation:7
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:1
TextAnnotation:parser
OriginalTextAnnotation:parser
CharacterOffsetBeginAnnotation:19
CharacterOffsetEndAnnotation:25
IndexAnnotation:4
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:2
TextAnnotation:A
OriginalTextAnnotation:A
CharacterOffsetBeginAnnotation:0
CharacterOffsetEndAnnotation:1
IndexAnnotation:1
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:2
TextAnnotation:natural
OriginalTextAnnotation:natural
CharacterOffsetBeginAnnotation:2
CharacterOffsetEndAnnotation:9
IndexAnnotation:2
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:JJ
</word>
<word>
depth:2
TextAnnotation:language
OriginalTextAnnotation:language
CharacterOffsetBeginAnnotation:10
CharacterOffsetEndAnnotation:18
IndexAnnotation:3
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:1
TextAnnotation:is
OriginalTextAnnotation:is
CharacterOffsetBeginAnnotation:26
CharacterOffsetEndAnnotation:28
IndexAnnotation:5
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:VBZ
</word>
<word>
depth:1
TextAnnotation:a
OriginalTextAnnotation:a
CharacterOffsetBeginAnnotation:29
CharacterOffsetEndAnnotation:30
IndexAnnotation:6
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:1
TextAnnotation:works
OriginalTextAnnotation:works
CharacterOffsetBeginAnnotation:44
CharacterOffsetEndAnnotation:49
IndexAnnotation:9
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:VBZ
</word>
<word>
depth:2
TextAnnotation:that
OriginalTextAnnotation:that
CharacterOffsetBeginAnnotation:39
CharacterOffsetEndAnnotation:43
IndexAnnotation:8
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:WDT
</word>
<word>
depth:2
TextAnnotation:out
OriginalTextAnnotation:out
CharacterOffsetBeginAnnotation:50
CharacterOffsetEndAnnotation:53
IndexAnnotation:10
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:RP
</word>
<word>
depth:2
TextAnnotation:structure
OriginalTextAnnotation:structure
CharacterOffsetBeginAnnotation:70
CharacterOffsetEndAnnotation:79
IndexAnnotation:13
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:3
TextAnnotation:the
OriginalTextAnnotation:the
CharacterOffsetBeginAnnotation:54
CharacterOffsetEndAnnotation:57
IndexAnnotation:11
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:3
TextAnnotation:grammatical
OriginalTextAnnotation:grammatical
CharacterOffsetBeginAnnotation:58
CharacterOffsetEndAnnotation:69
IndexAnnotation:12
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:JJ
</word>
<word>
depth:3
TextAnnotation:sentences
OriginalTextAnnotation:sentences
CharacterOffsetBeginAnnotation:83
CharacterOffsetEndAnnotation:92
IndexAnnotation:15
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NNS
</word>
<word>
depth:4
TextAnnotation:of
OriginalTextAnnotation:of
CharacterOffsetBeginAnnotation:80
CharacterOffsetEndAnnotation:82
IndexAnnotation:14
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:IN
</word>
<graph>
-> program/NN (root)
-> parser/NN (nsubj)
-> A/DT (det)
-> natural/JJ (amod)
-> language/NN (compound)
-> is/VBZ (cop)
-> a/DT (det)
-> works/VBZ (acl:relcl)
-> that/WDT (nsubj)
-> out/RP (compound:prt)
-> structure/NN (dobj)
-> the/DT (det)
-> grammatical/JJ (amod)
-> sentences/NNS (nmod)
-> of/IN (case)
</graph>
#Maven
<!-- https://mvnrepository.com/artifact/org.nlp4j/nlp4j-stanford -->
<dependency>
<groupId>org.nlp4j</groupId>
<artifactId>nlp4j-core</artifactId>
<version>[1.3.1.0,)</version>
</dependency>
<dependency>
<groupId>org.nlp4j</groupId>
<artifactId>nlp4j-stanford</artifactId>
<version>[1.3.0.0,)</version>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.0.0</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.0.0</version>
<classifier>models</classifier>
<scope>provided</scope>
</dependency>
コード
package nlp4j.stanford.examples;
import nlp4j.Document;
import nlp4j.Keyword;
import nlp4j.KeywordWithDependency;
import nlp4j.impl.DefaultDocument;
import nlp4j.stanford.StanfordPosDependencyAnnotator;
public class StanfordPosDependencyAnnotatorExample0 {
public static void main(String[] args) throws Exception {
StanfordPosDependencyAnnotator ann = new StanfordPosDependencyAnnotator();
Document doc = new DefaultDocument();
doc.putAttribute("text", "I eat sushi with chopsticks.");
ann.setProperty("target", "text");
ann.annotate(doc);
for (Keyword kwd : doc.getKeywords()) {
if (kwd instanceof KeywordWithDependency) {
KeywordWithDependency kd = (KeywordWithDependency) kwd;
System.err.println(kd.toStringAsXml());
}
}
}
}
#結果
<?xml version="1.0" encoding="UTF-8"?>
<w begin="2" depth="0" end="5" facet="VBP" id="0" lex="eat" relation="root" sequence="0" str="eat">
<w begin="0" depth="1" end="1" facet="PRP" id="1" lex="I" relation="nsubj" sequence="1" str="I"/>
<w begin="6" depth="1" end="11" facet="NN" id="2" lex="sushi" relation="obj" sequence="2" str="sushi"/>
<w begin="17" depth="1" end="27" facet="NNS" id="3" lex="chopstick" relation="obl" sequence="3" str="chopsticks">
<w begin="12" depth="2" end="16" facet="IN" id="4" lex="with" relation="case" sequence="4" str="with"/>
</w>
<w begin="27" depth="1" end="28" facet="." id="5" lex="." relation="punct" sequence="5" str="."/>
</w>