LoginSignup
0
0

More than 5 years have passed since last update.

Stanford CoreNLP で SemanticGraph#toString() と同様の結果を取得する

Posted at

コード


package pkg;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.ValueAnnotation;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.util.CoreMap;

public class Main {

    public static void main(String[] args) {
        String text = "I bouht a new red Nissan car for my family.";

        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize, ssplit, pos, depparse");
        StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(text);
        coreNLP.annotate(annotation);

        System.err.println("---");

        List<CoreMap> sentenceMap = annotation.get(SentencesAnnotation.class);
        for (CoreMap label : sentenceMap) {
            SemanticGraph graph = label.get(BasicDependenciesAnnotation.class);
            System.err.println(graph);
            System.err.println("---");
            Collection<IndexedWord> roots = graph.getRoots();
            for (IndexedWord root : roots) {
                Set<IndexedWord> used = new HashSet<IndexedWord>();
                printWord2(root, graph, 0, used, "root");
            }
        }
    }

    public static void printWord2(IndexedWord word, SemanticGraph graph, int depth, Set<IndexedWord> used,
            String relationName) {
        used.add(word);
        for (int n = 0; n < depth; n++) {
            System.err.print(" ");
        }
        System.err.println("" + word.get(ValueAnnotation.class) //
                + " : " //
                + word.getString(PartOfSpeechAnnotation.class) + " : " //
                + relationName //
        );
        List<SemanticGraphEdge> edges = graph.outgoingEdgeList(word);
        Collections.sort(edges);
        for (SemanticGraphEdge edge : edges) {
            IndexedWord targetWord = edge.getTarget();
            GrammaticalRelation relation = edge.getRelation();
            printWord2(targetWord, graph, depth + 1, used, relation.getShortName());
            List<SemanticGraphEdge> edges2 = graph.outgoingEdgeList(targetWord);
            for (SemanticGraphEdge e2 : edges2) {
                IndexedWord w2 = e2.getTarget();
                GrammaticalRelation r2 = edge.getRelation();
                if (used.contains(w2) == false) {
                    printWord2(w2, graph, depth + 1, used, r2.getShortName());
                }
            }
        }
    }

}

実行結果


Adding annotator tokenize
TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
Adding annotator ssplit
edu.stanford.nlp.pipeline.AnnotatorImplementations:
Adding annotator pos
Reading POS tagger model from edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger ... done [0.7 sec].
Adding annotator depparse
Loading depparse model file: edu/stanford/nlp/models/parser/nndep/PTB_Stanford_params.txt.gz ... 
PreComputed 100000, Elapsed Time: 2.156 (s)
Initializing dependency parser done [3.2 sec].
---
-> bouht/VBP (root)
  -> I/PRP (nsubj)
  -> car/NN (dobj)
    -> a/DT (det)
    -> new/JJ (amod)
    -> red/JJ (amod)
    -> Nissan/NNP (nn)
  -> for/IN (prep)
    -> family/NN (pobj)
      -> my/PRP$ (poss)
  -> ./. (punct)

---
bouht : VBP : root
 I : PRP : nsubj
 car : NN : dobj
  a : DT : det
  new : JJ : amod
  red : JJ : amod
  Nissan : NNP : nn
 for : IN : prep
  family : NN : pobj
   my : PRP$ : poss
 . : . : punct


0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0