LoginSignup
0
0

More than 5 years have passed since last update.

c > Damerau–Levenshtein distance 実装 > 特定の文字列に対するコストに緩急つける > v0.2

Last updated at Posted at 2016-10-21

関連 http://qiita.com/mpyw/items/a4495d476ea9ffe54e16
関連 http://qiita.com/7of9/items/ca66015e2933202decc9
関連 http://qiita.com/7of9/items/f2513d05e65eb3b201c2

Damerau–Levenshtein distance関連

v0.2

参考 https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance

stackoverflowのコードを変更して、4つの操作において特定の文字列に対するコストの緩急をつけようとしている。

damlev.c
/*
v0.2 2016 Oct. 21
  - use cost from functions given below
  - add calc_cost_transposition()
  - add calc_cost_substitution()
  - add calc_cost_deletion()
  - add calc_cost_insertion()
  - increase size of DA[] for 128x128
v0.1 2016 Oct. 19
  - branched from stackoverflow post(*1)

[*1] http://stackoverflow.com/questions/10727174/damerau-levenshtein-distance-edit-distance-with-transposition-c-implementation
*/


#define USE_COST_FUNC

#define d(i,j) dd[(i) * (m+2) + (j) ]
#define min(x,y) ((x) < (y) ? (x) : (y))
#define min3(a,b,c) ((a)< (b) ? min((a),(c)) : min((b),(c)))
#define min4(a,b,c,d) ((a)< (b) ? min3((a),(c),(d)) : min3((b),(c),(d)))

int dprint(int* dd, int n,int m){
 int i,j;
 for (i=0; i < n+2;i++){
    for (j=0;j < m+2; j++){
        printf("%02d ",d(i,j));
    }
    printf("\n");
 }
 printf("\n");
 return 0;
}

int calc_cost_insertion(char s)
{
  if (s == 'c' || s == 'r') {
    return 334;
  }
  return 1;
}

int calc_cost_deletion(char s)
{
  if (s == 'c' || s == 'r') {
    return 334;
  }
  return 1;  
}

int calc_cost_substitution(char s, char t)
{
  if (s == 'c' && t == 'r') {
    return 334;
  }
  return 1;
}

int calc_cost_transposition(char s, char t)
{
  if (s == 'c' && t == 'r') {
    return 334;
  }
  return 1;
}

int dldist2(char *s, char* t, int n, int m) {
    int *dd;
    int i, j, i1,j1,cost,DB;
    int result_cost;
    int INFINITY = n + m;
//    int DA[256 * sizeof(int)];
    int DA[128 * 128 * sizeof(int)];
    int cost_ins; // insert_costs 
    int cost_del; // delete_costs 
    int cost_sub; // substitute_costs 
    int cost_tra; // transpose_costs 

    memset(DA, 0, sizeof(DA));

    if (!(dd = (int*) malloc((n+2)*(m+2)*sizeof(int)))) {
      return -1;
    }

    d(0,0) = INFINITY;
    for(i = 0; i < n+1; i++) {
      d(i+1,1) = i ;
      d(i+1,0) = INFINITY;
    }
    for(j = 0; j<m+1; j++) {
      d(1,j+1) = j ;
      d(0,j+1) = INFINITY;
    }      
//    dprint(dd,n,m);

    for(i = 1; i< n+1; i++) {
      DB = 0;
      for(j = 1; j< m+1; j++) {
        i1 = DA[t[j-1]];
        j1 = DB;
        cost = ((s[i-1]==t[j-1])?0:1);
        cost_ins = calc_cost_insertion(s[i+1-1]); // or t[]???
        cost_del = calc_cost_deletion(s[i-1]); // or t[]???
        cost_sub = calc_cost_substitution(s[i-1], t[j-1]); 
        cost_tra = calc_cost_transposition(s[i1 - 1], t[j1 - 1]);

        if(cost==0) DB = j;
#ifdef USE_COST_FUNC
        d(i+1,j+1) =
          min4(
              d(i,j) + cost_sub + cost, // substitution
              d(i+1,j) + cost_ins, // insertion
              d(i,j+1) + cost_del,  // deletion
              d(i1,j1) + cost_tra // transposition
            );
#else // USE_COST_FUNC
        d(i+1,j+1) =
          min4(d(i,j)+cost, // substitution
              d(i+1,j) + 1, // insertion
              d(i,j+1)+1,  // deletion
              d(i1,j1) + (i-i1-1) + 1 + (j-j1-1)); // transposition
#endif // USE_COST_FUNC          
      }
      DA[s[i-1]] = i;
//      dprint(dd,n,m);
    }
    result_cost = d(n+1,m+1);
    free(dd);
    return result_cost;
}

int main()
{
  char *inpstr = "composr"; // input
  char *expstr = "composer"; // expected

  int dist;
  dist = dldist2(inpstr, expstr, strlen(inpstr), strlen(expstr)); 
  printf("%d\n", dist);
  return 0;
}

実装の確かさは未確認。

疑問点

  • insertionのcostチェックするのはinput文字列か、expected文字列か?
  • deletionのcostチェックするのはinput文字列か、expected文字列か?
  • d(i,j) + cost_sub + cost, // substitutionにおいて+ costは余分か?
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0