コード


package pkg;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.ValueAnnotation;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.util.CoreMap;

public class Main {

    public static void main(String[] args) {
        String text = "I bouht a new red Nissan car for my family.";

        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize, ssplit, pos, depparse");
        StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(text);
        coreNLP.annotate(annotation);

        System.err.println("---");

        List<CoreMap> sentenceMap = annotation.get(SentencesAnnotation.class);
        for (CoreMap label : sentenceMap) {
            SemanticGraph graph = label.get(BasicDependenciesAnnotation.class);
            System.err.println(graph);
            System.err.println("---");
            Collection<IndexedWord> roots = graph.getRoots();
            for (IndexedWord root : roots) {
                Set<IndexedWord> used = new HashSet<IndexedWord>();
                printWord2(root, graph, 0, used, "root");
            }
        }
    }

    public static void printWord2(IndexedWord word, SemanticGraph graph, int depth, Set<IndexedWord> used,
            String relationName) {
        used.add(word);
        for (int n = 0; n < depth; n++) {
            System.err.print(" ");
        }
        System.err.println("" + word.get(ValueAnnotation.class) //
                + " : " //
                + word.getString(PartOfSpeechAnnotation.class) + " : " //
                + relationName //
        );
        List<SemanticGraphEdge> edges = graph.outgoingEdgeList(word);
        Collections.sort(edges);
        for (SemanticGraphEdge edge : edges) {
            IndexedWord targetWord = edge.getTarget();
            GrammaticalRelation relation = edge.getRelation();
            printWord2(targetWord, graph, depth + 1, used, relation.getShortName());
            List<SemanticGraphEdge> edges2 = graph.outgoingEdgeList(targetWord);
            for (SemanticGraphEdge e2 : edges2) {
                IndexedWord w2 = e2.getTarget();
                GrammaticalRelation r2 = edge.getRelation();
                if (used.contains(w2) == false) {
                    printWord2(w2, graph, depth + 1, used, r2.getShortName());
                }
            }
        }
    }

}

実行結果


Adding annotator tokenize
TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
Adding annotator ssplit
edu.stanford.nlp.pipeline.AnnotatorImplementations:
Adding annotator pos
Reading POS tagger model from edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger ... done [0.7 sec].
Adding annotator depparse
Loading depparse model file: edu/stanford/nlp/models/parser/nndep/PTB_Stanford_params.txt.gz ... 
PreComputed 100000, Elapsed Time: 2.156 (s)
Initializing dependency parser done [3.2 sec].
---
-> bouht/VBP (root)
  -> I/PRP (nsubj)
  -> car/NN (dobj)
    -> a/DT (det)
    -> new/JJ (amod)
    -> red/JJ (amod)
    -> Nissan/NNP (nn)
  -> for/IN (prep)
    -> family/NN (pobj)
      -> my/PRP$ (poss)
  -> ./. (punct)

---
bouht : VBP : root
 I : PRP : nsubj
 car : NN : dobj
  a : DT : det
  new : JJ : amod
  red : JJ : amod
  Nissan : NNP : nn
 for : IN : prep
  family : NN : pobj
   my : PRP$ : poss
 . : . : punct

Stanford CoreNLP で SemanticGraph#toString() と同様の結果を取得する

コード

実行結果