More than 1 year has passed since last update.

NLP4J - Java で英語の構文解析（Stanford NLPを利用）

Last updated at 2022-05-23Posted at 2021-07-18

Stanford NLPとは

スタンフォードNLPグループが提供する、主要な計算言語学の問題に対して、統計NLP、深層学習NLP、およびルールベースのNLPツールです。

Software - The Stanford Natural Language Processing Group
https://nlp.stanford.edu/software/

項目	説明
提供者	スタンフォード大学スタンフォードNLPグループ
提供形式	Java ライブラリ

#Example without NLP4J

package hello.stanford;

import java.util.List;
import java.util.Properties;

import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.IndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.OriginalTextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentenceIndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
import edu.stanford.nlp.util.CoreMap;

public class HelloStanfordNLP {
	public static void main(String[] args) {
		String text = "A natural language parser is a program that works out the grammatical structure of sentences";
		Properties properties = new Properties();
		properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, depparse");
		StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
		Annotation annotation = new Annotation(text);
		coreNLP.annotate(annotation);
		List<CoreMap> sentenceMap = annotation.get(SentencesAnnotation.class);
		for (CoreMap label : sentenceMap) {
			SemanticGraph graph = label.get(BasicDependenciesAnnotation.class);
			IndexedWord root = graph.getFirstRoot();
			printWord(root, graph, 0);
			System.err.println("<graph>");
			System.err.println(graph.toString());
			System.err.println("</graph>");
		}
	}

	public static void printWord(IndexedWord word, SemanticGraph graph, int tab) {
		System.err.println("<word>");
		System.err.println("depth:" + tab);
		System.err.println("TextAnnotation:" + word.get(TextAnnotation.class));
		System.err.println("OriginalTextAnnotation:" + word.get(OriginalTextAnnotation.class));
		System.err.println("CharacterOffsetBeginAnnotation:" + word.get(CharacterOffsetBeginAnnotation.class));
		System.err.println("CharacterOffsetEndAnnotation:" + word.get(CharacterOffsetEndAnnotation.class));
		System.err.println("IndexAnnotation:" + word.get(IndexAnnotation.class));
		System.err.println("SentenceIndexAnnotation:" + word.get(SentenceIndexAnnotation.class));
		System.err.println("PartOfSpeechAnnotation:" + word.get(PartOfSpeechAnnotation.class));
		System.err.println("</word>");

		List<IndexedWord> list = graph.getChildList(word);
		for (int n = 0; n < list.size(); n++) {
			printWord(list.get(n), graph, tab + 1);
		}
	}
}

#Output without NLP4j

<word>
depth:0
TextAnnotation:program
OriginalTextAnnotation:program
CharacterOffsetBeginAnnotation:31
CharacterOffsetEndAnnotation:38
IndexAnnotation:7
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:1
TextAnnotation:parser
OriginalTextAnnotation:parser
CharacterOffsetBeginAnnotation:19
CharacterOffsetEndAnnotation:25
IndexAnnotation:4
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:2
TextAnnotation:A
OriginalTextAnnotation:A
CharacterOffsetBeginAnnotation:0
CharacterOffsetEndAnnotation:1
IndexAnnotation:1
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:2
TextAnnotation:natural
OriginalTextAnnotation:natural
CharacterOffsetBeginAnnotation:2
CharacterOffsetEndAnnotation:9
IndexAnnotation:2
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:JJ
</word>
<word>
depth:2
TextAnnotation:language
OriginalTextAnnotation:language
CharacterOffsetBeginAnnotation:10
CharacterOffsetEndAnnotation:18
IndexAnnotation:3
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:1
TextAnnotation:is
OriginalTextAnnotation:is
CharacterOffsetBeginAnnotation:26
CharacterOffsetEndAnnotation:28
IndexAnnotation:5
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:VBZ
</word>
<word>
depth:1
TextAnnotation:a
OriginalTextAnnotation:a
CharacterOffsetBeginAnnotation:29
CharacterOffsetEndAnnotation:30
IndexAnnotation:6
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:1
TextAnnotation:works
OriginalTextAnnotation:works
CharacterOffsetBeginAnnotation:44
CharacterOffsetEndAnnotation:49
IndexAnnotation:9
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:VBZ
</word>
<word>
depth:2
TextAnnotation:that
OriginalTextAnnotation:that
CharacterOffsetBeginAnnotation:39
CharacterOffsetEndAnnotation:43
IndexAnnotation:8
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:WDT
</word>
<word>
depth:2
TextAnnotation:out
OriginalTextAnnotation:out
CharacterOffsetBeginAnnotation:50
CharacterOffsetEndAnnotation:53
IndexAnnotation:10
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:RP
</word>
<word>
depth:2
TextAnnotation:structure
OriginalTextAnnotation:structure
CharacterOffsetBeginAnnotation:70
CharacterOffsetEndAnnotation:79
IndexAnnotation:13
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
</word>
<word>
depth:3
TextAnnotation:the
OriginalTextAnnotation:the
CharacterOffsetBeginAnnotation:54
CharacterOffsetEndAnnotation:57
IndexAnnotation:11
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
</word>
<word>
depth:3
TextAnnotation:grammatical
OriginalTextAnnotation:grammatical
CharacterOffsetBeginAnnotation:58
CharacterOffsetEndAnnotation:69
IndexAnnotation:12
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:JJ
</word>
<word>
depth:3
TextAnnotation:sentences
OriginalTextAnnotation:sentences
CharacterOffsetBeginAnnotation:83
CharacterOffsetEndAnnotation:92
IndexAnnotation:15
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NNS
</word>
<word>
depth:4
TextAnnotation:of
OriginalTextAnnotation:of
CharacterOffsetBeginAnnotation:80
CharacterOffsetEndAnnotation:82
IndexAnnotation:14
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:IN
</word>
<graph>
-> program/NN (root)
  -> parser/NN (nsubj)
    -> A/DT (det)
    -> natural/JJ (amod)
    -> language/NN (compound)
  -> is/VBZ (cop)
  -> a/DT (det)
  -> works/VBZ (acl:relcl)
    -> that/WDT (nsubj)
    -> out/RP (compound:prt)
    -> structure/NN (dobj)
      -> the/DT (det)
      -> grammatical/JJ (amod)
      -> sentences/NNS (nmod)
        -> of/IN (case)

</graph>

#Maven

<!-- https://mvnrepository.com/artifact/org.nlp4j/nlp4j-stanford -->
<dependency>
	<groupId>org.nlp4j</groupId>
	<artifactId>nlp4j-core</artifactId>
	<version>[1.3.1.0,)</version>
</dependency>
<dependency>
	<groupId>org.nlp4j</groupId>
	<artifactId>nlp4j-stanford</artifactId>
	<version>[1.3.0.0,)</version>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp -->
<dependency>
	<groupId>edu.stanford.nlp</groupId>
	<artifactId>stanford-corenlp</artifactId>
	<version>4.0.0</version>
	<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp -->
<dependency>
	<groupId>edu.stanford.nlp</groupId>
	<artifactId>stanford-corenlp</artifactId>
	<version>4.0.0</version>
	<classifier>models</classifier>
	<scope>provided</scope>
</dependency>

コード

package nlp4j.stanford.examples;

import nlp4j.Document;
import nlp4j.Keyword;
import nlp4j.KeywordWithDependency;
import nlp4j.impl.DefaultDocument;
import nlp4j.stanford.StanfordPosDependencyAnnotator;

public class StanfordPosDependencyAnnotatorExample0 {
	public static void main(String[] args) throws Exception {
		StanfordPosDependencyAnnotator ann = new StanfordPosDependencyAnnotator();
		Document doc = new DefaultDocument();
		doc.putAttribute("text", "I eat sushi with chopsticks.");
		ann.setProperty("target", "text");
		ann.annotate(doc);
		for (Keyword kwd : doc.getKeywords()) {
			if (kwd instanceof KeywordWithDependency) {
				KeywordWithDependency kd = (KeywordWithDependency) kwd;
				System.err.println(kd.toStringAsXml());
			}
		}
	}
}

#結果

<?xml version="1.0" encoding="UTF-8"?>
<w begin="2" depth="0" end="5" facet="VBP" id="0" lex="eat" relation="root" sequence="0" str="eat">
    <w begin="0" depth="1" end="1" facet="PRP" id="1" lex="I" relation="nsubj" sequence="1" str="I"/>
    <w begin="6" depth="1" end="11" facet="NN" id="2" lex="sushi" relation="obj" sequence="2" str="sushi"/>
    <w begin="17" depth="1" end="27" facet="NNS" id="3" lex="chopstick" relation="obl" sequence="3" str="chopsticks">
        <w begin="12" depth="2" end="16" facet="IN" id="4" lex="with" relation="case" sequence="4" str="with"/>
    </w>
    <w begin="27" depth="1" end="28" facet="." id="5" lex="." relation="punct" sequence="5" str="."/>
</w>

NLP4J Index

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

NLP4J - Java で 英語の構文解析（Stanford NLPを利用）

Stanford NLPとは

コード

NLP4J - Java で英語の構文解析（Stanford NLPを利用）