0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

PowerShellでレーベンシュタイン距離(Levenshtein Distance, Edit Distance)

Posted at

コマンドレットっぽく体裁を整えてみる

LevenshteinDistance Class

class LevenshteinDistance {
    [String]$String1
    [String]$String2
    [Int32]$Distance
    [Double]$DistanceNormalized
    [Int32]$CostInsert
    [Int32]$CostDelete
    [Int32]$CostSubstitute
    [Int32]$CostMatch

    LevenshteinDistance([String]$String1, [String]$String2) {
        $this.Calculate($String1, $String2, 1, 1, 1, 0)
    }
    LevenshteinDistance([String]$String1, [Char[]]$String2) {
        $this.Calculate($String1, $String2, 1, 1, 1, 0)
    }
    LevenshteinDistance([Char[]]$String1, [String]$String2) {
        $this.Calculate($String1, $String2, 1, 1, 1, 0)
    }
    LevenshteinDistance([Char[]]$String1, [Char[]]$String2) {
        $this.Calculate($String1, $String2, 1, 1, 1, 0)
    }
    LevenshteinDistance([String]$String1, [String]$String2, [Int32]$CostInsert, [Int32]$CostDelete, [Int32]$CostSubstitute, [Int32]$CostMatch) {
        $this.Calculate($String1, $String2, $CostInsert, $CostDelete, $CostSubstitute, $CostMatch)
    }
    LevenshteinDistance([String]$String1, [Char[]]$String2, [Int32]$CostInsert, [Int32]$CostDelete, [Int32]$CostSubstitute, [Int32]$CostMatch) {
        $this.Calculate($String1, $String2, $CostInsert, $CostDelete, $CostSubstitute, $CostMatch)
    }
    LevenshteinDistance([Char[]]$String1, [String]$String2, [Int32]$CostInsert, [Int32]$CostDelete, [Int32]$CostSubstitute, [Int32]$CostMatch) {
        $this.Calculate($String1, $String2, $CostInsert, $CostDelete, $CostSubstitute, $CostMatch)
    }
    LevenshteinDistance([Char[]]$String1, [Char[]]$String2, [Int32]$CostInsert, [Int32]$CostDelete, [Int32]$CostSubstitute, [Int32]$CostMatch) {
        $this.Calculate($String1, $String2, $CostInsert, $CostDelete, $CostSubstitute, $CostMatch)
    }

    hidden [void]Calculate([Object]$String1, [Object]$String2, [Int32]$CostInsert, [Int32]$CostDelete, [Int32]$CostSubstitute, [Int32]$CostMatch) {
        if ($CostInsert -lt 0 -or $CostDelete -lt 0 -or $CostSubstitute -lt 0 -or $CostMatch -lt 0) {
            throw [ArgumentOutOfRangeException]::("The edit costs must be non-negative integers.")
        }
        $this.CostInsert = $CostInsert
        $this.CostDelete = $CostDelete
        $this.CostSubstitute = $CostSubstitute
        $this.CostMatch = $CostMatch
        $this.String1 = [String]::new($String1)
        $this.String2 = [String]::new($String2)

        [String]$str1 = $this.String1
        [String]$str2 = $this.String2

        if ($str1.Length -eq 0) {
            $this.Distance = $str2.Length * $CostInsert
            if ($str2.Length -eq 0) {
                $this.DistanceNormalized = [Double]::NaN
            }
            else {
                $this.DistanceNormalized = [Double]$this.Distance / [Double]$str2.Length
            }
            return
        }
        elseif ($str2.Length -eq 0) {
            $this.Distance = $str1.Length * $CostDelete
            $this.DistanceNormalized = [Double]$this.Distance / [Double]$str2.Length
            return
        }

        [Int32]$i = 0
        [Int32]$j = 0
        [Int32[]]$distancesPrevious = [Int32[]]::new($str2.Length + 1)
        [Int32[]]$distancesCurrent = $null
        $distancesPrevious[0] = 0
        for ($i = 1; $i -le $str2.Length; $i++) {
            $distancesPrevious[$i] = $distancesPrevious[$i - 1] + $CostInsert
        }
        Write-Verbose (" :    " + [String]::Join(", ", $str2))
        for ($i = 1; $i -le $str1.Length; $i++) {
            $distancesCurrent = [Int32[]]::new($str2.Length + 1)
            $distancesCurrent[0] = $distancesPrevious[0] + $CostDelete
            for ($j = 1; $j -le $str2.Length; $j++) {
                [Int32]$costDeleteCurrent = $distancesPrevious[$j] + $CostDelete
                [Int32]$costInsertCurrent = $distancesCurrent[$j - 1] + $CostInsert
                [Int32]$costSubstituteCurrent = $distancesPrevious[$j - 1] + $CostSubstitute
                if ($str1[$i] -eq $str2[$j]) {
                    [Int32]$costMatchCurrent = $distancesPrevious[$j - 1] + $CostMatch
                }
                else {
                    [Int32]$costMatchCurrent = [Int32]::MaxValue
                }
                $distancesCurrent[$j] = [Math]::Min([Math]::Min([Math]::Min($costInsertCurrent, $costDeleteCurrent), $costSubstituteCurrent), $costMatchCurrent)
            }
            Write-Verbose ($str1[$i - 1] + ": " + [String]::Join(", ", $distancesCurrent))
            if ($i -lt $str1.Length) {
                $distancesPrevious = $distancesCurrent
            }
        }

        $this.Distance = $distancesCurrent[$str2.Length]
        $this.DistanceNormalized = $this.Distance / [Math]::Max($str1.Length, $str2.Length)
        return
    }
}

Get-LevenshteinDistance Cmdlet

function Get-LevenshteinDistance {
    [CmdletBinding()]
    [OutputType([LevenshteinDistance])]
    param (
        [Parameter(ValueFromPipelineByPropertyName = $true, Mandatory = $true, Position = 0)]
        [ValidateNotNull()]
        [Object]$String1,

        [Parameter(ValueFromPipelineByPropertyName = $true, Mandatory = $true, Position = 1)]
        [ValidateNotNull()]
        [Object]$String2,

        [Parameter(ValueFromPipelineByPropertyName = $true, Position = 2)]
        [ValidateRange("NonNegative")]
        [Int32]$CostInsert = 1,

        [Parameter(ValueFromPipelineByPropertyName = $true, Position = 3)]
        [ValidateRange("NonNegative")]
        [Int32]$CostDelete = 1,

        [Parameter(ValueFromPipelineByPropertyName = $true, Position = 4)]
        [ValidateRange("NonNegative")]
        [Int32]$CostSubstitute = 1,

        [Parameter(ValueFromPipelineByPropertyName = $true, Position = 5)]
        [ValidateRange("NonNegative")]
        [Int32]$CostMatch = 0
    )

    if (!(($String1 -is [String] -or $String1 -is [Char[]]) -and ($String2 -is [String] -or $String2 -is [Char[]]) )) {
        throw [ArgumentException]::new("String1 and String2 must be either String or Char[]")
    }

    Write-Output ([LevenshteinDistance]::new($String1, $String2, $CostInsert, $CostDelete, $CostSubstitute, $CostMatch))

}

参考

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?