6
Help us understand the problem. What are the problem?

More than 1 year has passed since last update.

posted at

updated at

# ExcelでN-gramってみる

Bigramしかないけど。

## コード

``````
Function CreateNGram(strInput, intN)
Dim arrNGram, intBound, i, j, strGram, didInc, arrTemp

If Len(strInput) = 0 Then Exit Function

ReDim arrNGram(Len(strInput) + 1, 1)
strInput = Chr(0) & UCase(Trim(strInput)) & Chr(0)
intBound = -1

For i = 1 To Len(strInput) - intN + 1
strGram = Mid(strInput, i, intN)
didInc = False

For j = 0 To intBound
If strGram = arrNGram(j, 0) Then
arrNGram(j, 1) = arrNGram(j, 1) + 1
didInc = True
Exit For
End If
Next

If Not didInc Then
intBound = intBound + 1
arrNGram(intBound, 0) = strGram
arrNGram(intBound, 1) = 1
End If
Next

ReDim arrTemp(intBound, 1)
For i = 0 To intBound
arrTemp(i, 0) = arrNGram(i, 0)
arrTemp(i, 1) = arrNGram(i, 1)
Next

CreateNGram = arrTemp
End Function

Function CompareNGram(arr1(), arr2())
Dim i, j, intMatches, intCount1, intCount2

intMatches = 0
intCount1 = 0

For i = 0 To UBound(arr1)
intCount1 = intCount1 + arr1(i, 1)
intCount2 = 0

For j = 0 To UBound(arr2)
intCount2 = intCount2 + arr2(j, 1)

If arr1(i, 0) = arr2(j, 0) Then
If arr1(i, 1) >= arr2(j, 1) Then
intMatches = intMatches + arr2(j, 1)
Else
intMatches = intMatches + arr1(i, 1)
End If
End If
Next
Next

CompareNGram = 2 * intMatches / (intCount1 + intCount2)
End Function

Function Bigram(ByVal str1 As String, ByVal str2 As String) As Double
Dim x(), y()
x = CreateNGram(str1, 2)
y = CreateNGram(str2, 2)
Bigram = CompareNGram(x, y)
End Function
``````

## 感想

どう標本するかは一考の余地があるかも。

Why not register and get more from Qiita?
1. We will deliver articles that match you
By following users and tags, you can catch up information on technical fields that you are interested in as a whole
2. you can read useful information later efficiently
By "stocking" the articles you like, you can search right away