LoginSignup
3
4

More than 5 years have passed since last update.

link-grammarで英文法間違いを検出する

Last updated at Posted at 2016-11-17

link-grammarで英文法間違いを見つける方法

link-grammarの簡単な使い方

link-grammar-test.py
from linkgrammar import Sentence, ParseOptions, Dictionary, Clinkgrammar as clg

po = ParseOptions(verbosity=1)

def desc(lkg):
    print(lkg.unused_word_cost())
    print(lkg.disjunct_cost())
    ws=[w for w in lkg.words()]
    print(ws)
    #print lkg.word(1),lkg.word(0)
    print [l for l in lkg.links()]

po = ParseOptions(min_null_count=0, max_null_count=999)

# English is the default language
en_dir = Dictionary() # open the dictionary only once

text='This are an pen.'

sent = Sentence(text, en_dir, po)
linkages = sent.parse()

if sent.null_count() > 0 :
    print('grammar error')
for linkage in linkages:
    desc(linkage)

この出力は

output.txt
grammar error
2
0.05
['LEFT-WALL', '[this]', '[are]', 'an', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: an-Ds**v-Ds-pen.n, Link: .-RW-RIGHT-WALL]
2
0.05
['LEFT-WALL', 'this.d', '[are]', '[an]', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: this.d-D*u-Ds-pen.n, Link: .-RW-RIGHT-WALL]
2
0.25
['LEFT-WALL', '[this]', '[are]', 'an', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: an-Ds**x-Ds-pen.n, Link: .-RW-RIGHT-WALL]

文法間違いがあると
null_count>0
になり,間違った部分は
lkg.wordsの中で[]で囲まれて表示される.

なおpythonでlink-grammar以外の英文法間違いを見つける方法

Emacsからlink-grammarの英文法チェックを使う

~/bin/
などに、このプログラムを保存

grammarlinkengfile.py
import os.path
import sys

from linkgrammar import Sentence, ParseOptions, Dictionary, Clinkgrammar as clg
import re

def is_no_link_ward(w):
    """
    >>> is_no_link_ward("[1234]")
    False
    >>> is_no_link_ward("[aBcD1234]")
    'aBcD1234'
    """
    l=re.findall(r"\[\d+\]",w)
    if len(l)==1 and l[0]==w:
        return False
    else:
        l=re.findall(r"\[[^\[\]]+\]",w)
        if len(l)==1 and l[0]==w:
            return l[0][1:-1]
        else:
            return False

def text_words2col_begin_end(text,words):
    """
    >>> text='This is a the test of bfgiuing and xxxvfrg'
    >>> words=['LEFT-WALL', 'this.p', 'is.v', '[a]', 'the', 'test.n', 'of', 'bfgiuing[!].g', 'and.j-n', 'xxxvfrg[?].n', 'RIGHT-WALL']
    >>> text_words2col_begin_end(text,words)
    [[0, 4], [5, 7], [8, 9], [10, 13], [14, 18], [19, 21], [22, 30], [31, 34], [35, 42]]
    """
    #print(len(text))
    t=text.lower()
    j=0
    jlis=[]
    for k,w in enumerate(words[1:-1]):
        j0=j
        w2=is_no_link_ward(w)
        #print(j,k,w,w2,t)
        if not(w2):
            w3=w
        else:
            w3=w2
        for c in w3.lower():
            if len(t)==0:
                break
            if t[0]==c:
                t=t[1:]
                j+=1
            else:
                break
        j1=j
        jlis.append([j0,j1])
        while len(t)>0 and re.match(r"\s+",t) is not None:
            t=t[1:]
            j+=1
        #print(t)
    return jlis

def desc(lkg):
    print(lkg.unused_word_cost())
    print(lkg.disjunct_cost())
    ws=[w for w in lkg.words()]
    print(ws)
    #print lkg.word(1),lkg.word(0)
    print [l for l in lkg.links()]


def text_error_cols(text):
    """
    >>> text='This is a the test of bfgiuing and xxxvfrg'
    >>> text_error_cols(text)
    [[8, 9], [10, 13]]
    """ 
    po = ParseOptions(min_null_count=0, max_null_count=999)
    en_dir = Dictionary() # open the dictionary only once
    sent = Sentence(text, en_dir, po)
    linkages = sent.parse()
    if sent.null_count() == 0 :
        return []
    else:
        error_cols=[]
        iws=[]
        for lkg in linkages:
            words=[w for w in lkg.words()]
            #desc(lkg)
            for k,w in enumerate(words):
                if is_no_link_ward(w):
                    if k in iws:
                        break
                    else:
                        iws.append(k)
                        js=text_words2col_begin_end(text,words)
                        error_cols.append(js[k-1])
        # print(iws)
        # print(error_cols)
        # for col in error_cols:
        #     print(text[col[0]:col[1]])
        return error_cols



if __name__ == '__main__':
    #import doctest
    #doctest.testmod()
    #print(sys.argv)
    fname = sys.argv[1]
    with open(fname) as f:
        texts = f.readlines()
        for i,text in enumerate(texts):
            #print(text)
            if len(text)>0:
                cols=text_error_cols(text)
                #if len(cols)> 0:
                for col in cols:
                    print(fname+":"+str(i+1)+":"+str(col[0]+1)+": linkerror")

grammarlinkengfile.pyを以下のelispで呼ぶ

grammarlinkengfile.el
(require 'flycheck)

(flycheck-define-checker link-grammar
  "link-grammar check"
  :command ("python" "/path/to/grammarlinkengfile.py" source)
  :error-patterns
  ((warning line-start (file-name) ":" line ":" column ": " (message) line-end))
  :modes (text-mode markdown-mode))

(add-to-list 'flycheck-checkers 'link-grammar)

(add-hook 'markdown-mode-hook 'flycheck-mode)

(load "/path/to/grammarlinkengfile")
したら適当な .md ファイルを開くと文法チェックされる。

3
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
4