発端
???「compsoer
って既に334回は打ったしいい加減エイリアスにしてやる」
???「どうせならレーベンシュタイン距離使って似たもの全部エイリアスにしようぜ?」
過去にこんなの書いたけど,こいつらは文字の入れ替わり対応とか文字種ごとの重み付きとかができないので困っていた。その後,こんな改良版アルゴリズムがあることを知る。
作戦1: composer
っぽいもののエイリアスを作る
PHPだとライブラリが無かったけどPythonだったらあった。しかしPython 2。妥協。
何回か試していい結果が出るようにチューニングしました。
import numpy as np
from weighted_levenshtein import dam_lev
from itertools import permutations
insert_costs = np.ones(128, dtype=np.float64)
insert_costs[ord('c')] = 334
insert_costs[ord('r')] = 334
delete_costs = np.ones(128, dtype=np.float64)
delete_costs[ord('c')] = 334
delete_costs[ord('r')] = 334
substitute_costs = np.ones((128, 128), dtype=np.float64)
substitute_costs[ord('c'), :] = 334
substitute_costs[:, ord('r')] = 334
transpose_costs = np.ones((128, 128), dtype=np.float64) * 0.75
transpose_costs[ord('c'), :] = 334
transpose_costs[:, ord('c')] = 334
results = []
for i in xrange(7, 9):
for chars in permutations('composer', i):
cmd = ''.join(chars)
if dam_lev(cmd, 'composer',
insert_costs=insert_costs,
delete_costs=delete_costs,
substitute_costs=substitute_costs,
transpose_costs=transpose_costs
) < 2 and cmd != 'composer':
results.append(cmd)
for result in results:
print "alias %s=composer" % result
alias composr=composer
alias compoer=composer
alias compors=composer
alias compore=composer
alias compsor=composer
alias compser=composer
alias compsre=composer
alias compeor=composer
alias compesr=composer
alias comopsr=composer
alias comoper=composer
alias comoser=composer
alias comosre=composer
alias comoesr=composer
alias comsper=composer
alias comsoer=composer
alias copmosr=composer
alias copmoer=composer
alias copmser=composer
alias coposer=composer
alias coposre=composer
alias copoesr=composer
alias copsoer=composer
alias coomser=composer
alias coopser=composer
alias cmoposr=composer
alias cmopoer=composer
alias cmopser=composer
alias cmooser=composer
alias cmposer=composer
alias cmposre=composer
alias cmpoesr=composer
alias cmposer=composer
alias cmposre=composer
alias cmpoesr=composer
alias cmpsoer=composer
alias cmpsoer=composer
alias cmooser=composer
alias cmoposr=composer
alias cmopoer=composer
alias cmopser=composer
alias cpooser=composer
alias cpmoser=composer
alias cpmoser=composer
alias cpooser=composer
alias coomser=composer
alias coopser=composer
alias comopsr=composer
alias comoper=composer
alias comoser=composer
alias comosre=composer
alias comoesr=composer
alias composr=composer
alias compoer=composer
alias compors=composer
alias compore=composer
alias compsor=composer
alias compser=composer
alias compsre=composer
alias compeor=composer
alias compesr=composer
alias comsoer=composer
alias comsper=composer
alias coposer=composer
alias coposre=composer
alias copoesr=composer
alias copmosr=composer
alias copmoer=composer
alias copmser=composer
alias copsoer=composer
alias composre=composer
alias compoesr=composer
alias compsoer=composer
alias compsore=composer
alias comopser=composer
alias comopsre=composer
alias comopesr=composer
alias copmoser=composer
alias copmosre=composer
alias copmoesr=composer
alias copmsoer=composer
alias cmoposer=composer
alias cmoposre=composer
alias cmopoesr=composer
alias cmopsoer=composer
alias cmoopser=composer
alias cmoopser=composer
alias cmoposer=composer
alias cmoposre=composer
alias cmopoesr=composer
alias cmopsoer=composer
alias comopser=composer
alias comopsre=composer
alias comopesr=composer
alias composre=composer
alias compoesr=composer
alias compsoer=composer
alias compsore=composer
alias copmoser=composer
alias copmosre=composer
alias copmoesr=composer
alias copmsoer=composer
これで安心だゾウ🐘
作戦2: Python + command_not_found_handler
(作戦1を投下して数時間が経過したころ…)
"composer" のタイプミス多すぎ問題を解決する by @mpyw on @Qiita 頭おかしい…(꒪ω꒪) https://t.co/Zp0Cswb8Nz
— ぬばた (@nubata) 2016年10月18日
スマートな解決法かと思ったらぜんぜんスマートじゃなかった
— えあい (@eai04191) 2016年10月19日
ヤクザ解決法かよ
— ホウライ (@Fai_ri_4A) 2016年10月19日
知 っ て た
↓
どうせなら汎用化してやる!!!
↓
zsh で method_missing っぽいことをするには command_not_found_handler - mollifier delta blog を発見
↓
自前で実装
command_not_found_handler() {
local code='
import sys
import commands
import numpy as np
from weighted_levenshtein import dam_lev
insert_costs = np.ones(128, dtype=np.float64)
delete_costs = np.ones(128, dtype=np.float64)
substitute_costs = np.ones((128, 128), dtype=np.float64)
transpose_costs = np.ones((128, 128), dtype=np.float64) * 0.75
min_length = 5
min_similarity = 0.6
actual = sys.argv[1]
results = {}
if actual.startswith("_"):
sys.exit(1)
for expected in sys.stdin:
if len(expected) < min_length or expected.startswith("_"):
continue
result = 1.0 - dam_lev(actual, expected,
insert_costs=insert_costs,
delete_costs=delete_costs,
substitute_costs=substitute_costs,
transpose_costs=transpose_costs
) / max(len(actual), len(expected))
if (result >= min_similarity):
results[result] = expected
if not results:
sys.exit(1)
print results[max(results)]
'
local ___result
___result=$(
print -l ${(ok)functions} ${(ok)aliases} ${(ok)commands} ${(ok)builtins} |
python2.7 -c $code $0
)
if [[ $? == 0 ]]; then
printf %s "Do you mean '$___result' ? [y/n]: "
local answer
read answer
if [[ $answer == y || $answer == Y ]]; then
als=$(alias $___result)
if [[ $? == 0 ]]; then
eval local $als
fi
shift
eval $___result ${(q)@}
return
fi
fi
return 127
}
一応動くんですが,若干遅めなのが難点。
作戦3: Zshの自動補正機能を使う
こんなアホなことしなくても,zshには自動補正機能があるそうです。
mpyw@localhost:~$ setopt correct
mpyw@localhost:~$ compseor -V
zsh: correct 'compseor' to 'composer' [nyae]? y
Composer version 1.2.1 2016-09-12 11:27:19
mpyw@localhost:~$
ア゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛も゛っ゛と゛早゛く゛知゛り゛た゛か゛っ゛た゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛ア゛
(generated by 阿鼻叫喚ジェネレーター)
作戦4: C + command_not_found_handler
意外にも作戦3の精度がイマイチだったのと,作戦2の動作速度が遅いことを踏まえた上で,これを試してみることに。
こちらで紹介していただいたものを少しアレンジしました。実用的になったのでGitHubにも置いておきます。
#define BUFSIZE 512
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include <unistd.h>
/*
* This implementation allows the costs to be weighted:
*
* - w (as in "sWap")
* - s (as in "Substitution")
* - a (for insertion, AKA "Add")
* - d (as in "Deletion")
*
*/
double calc_similarity(const char *string1, const char *string2)
{
int len1 = strlen(string1),
len2 = strlen(string2);
double w = 0.75, s = 1, a = 1, d = 1,
*row0 = (double *)&(double[BUFSIZE]){},
*row1 = (double *)&(double[BUFSIZE]){},
*row2 = (double *)&(double[BUFSIZE]){};
for (int j = 0; j <= len2; ++j) {
row1[j] = j * a;
}
for (int i = 0; i < len1; ++i) {
row2[0] = (i + 1) * d;
for (int j = 0; j < len2; ++j) {
/* substitution */
row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
/* swap */
if (i > 0 && j > 0 &&
string1[i - 1] == string2[j] &&
string1[i] == string2[j - 1] &&
row2[j + 1] > row0[j - 1] + w) {
row2[j + 1] = row0[j - 1] + w;
}
/* deletion */
if (row2[j + 1] > row1[j + 1] + d) {
row2[j + 1] = row1[j + 1] + d;
}
/* insertion */
if (row2[j + 1] > row2[j] + a) {
row2[j + 1] = row2[j] + a;
}
}
double *dummy = row0;
row0 = row1;
row1 = row2;
row2 = dummy;
}
return 1.0 - row1[len2] / (len1 > len2 ? len1 : len2);
}
int readline(char *buffer)
{
int length;
fgets(buffer, BUFSIZE, stdin);
if (ferror(stdin) || feof(stdin)) {
return -1;
}
length = strlen(buffer);
if (length && buffer[length - 1] == '\n') {
buffer[--length] = '\0';
}
return length;
}
int main(int argc, char *argv[])
{
int opt, min_length = 1, length = 0;
double min_similarlity = 0.0, similarlity = 0.0;
char input[BUFSIZE] = {'\0'}, comparison[BUFSIZE] = {'\0'}, similitude[BUFSIZE] = {'\0'};
while ((opt = getopt(argc, argv, "i:l:s:h")) != -1) {
switch (opt) {
case 'i':
strncpy(input, optarg, BUFSIZE - 1);
break;
case 'l':
min_length = atoi(optarg);
break;
case 's':
min_similarlity = atof(optarg);
break;
case 'h':
default:
fprintf(stderr, "Usage: %s -i input < comparisons (Splitted by \"\\n\")\n", argv[0]);
fprintf(stderr, " [-l min_length=1] [-s min_similarlity=0.0]\n");
return opt != 'h';
}
}
if (!*input) {
fputs("No input specified\n", stderr);
return 1;
}
if (*input == '_') {
return 1;
}
similarlity = min_similarlity;
while ((length = readline(comparison)) > -1) {
if (length < min_length || length < 1 || *comparison == '_') {
continue;
}
double s = calc_similarity(input, comparison);
if (s < similarlity) {
continue;
}
similarlity = s;
strncpy(similitude, comparison, BUFSIZE - 1);
if (s == 1.0) {
break;
}
}
if (*similitude == '\0') {
return 1;
}
puts(similitude);
return 0;
}
パフォーマンス比較
作戦2,作戦3,作戦4の順番です。
0.15秒と0.23秒の違いはわりとでかいゾ