0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

ローマ字かな変換

Last updated at Posted at 2023-01-07

ローマ字かな変換をするプログラムです。rk.cとしてセーブして下さい。
コンパイル後、コマンドラインから、rk <ローマ字> 、rk <かな> などとして変換して下さい。標準入力からも入力が取れます。母音の前のnなどには、後にアポストロフィ ' を付けて下さい。シェルからの入力時には、アポストロフィーが解釈されるので、\でエスケープして下さい。
cc rk.c -o rk で、コンパイルできます。

rk.c
/*
 * かなローマ字変換をするプログラム rk.c Ver.2.2 
 */

#include  <stdio.h>
#include  <stdlib.h>
#include  <string.h>
#include  <stdbool.h>
#include  <ctype.h>

#define isconsonant(x)  (index("kdsthnmyrwzjpbcgqvf",(x))!=NULL)

static  char  *tab[]={
    "-"  ,"ー"  ,
    "wa" ,"わ"  ,"wi" ,"うぃ",               "we" ,"うぇ","wo" ,"を"  ,
    "wha","うぁ","whi","うぃ",               "whe","うぇ","who","うぉ",
   "va","ゔぁ","vi","ゔぃ",             "ve","ゔぇ","vo","ゔぉ",
                                  "vu" ,"ゔ",
    "a"  ,"あ"  ,"i"  ,"い"  ,"u"  ,"う"  ,"e"  ,"え"  ,"o"  ,"お"  ,
                                  "wu" ,"う"  , "whu","う"  ,
    "kya","きゃ","kyi","きぃ","kyu","きゅ","kye","きぇ","kyo","きょ",
    "gya","ぎゃ","gyi","ぎぃ","gyu","ぎゅ","gye","ぎぇ","gyo","ぎょ",
    "nya","にゃ","nyi","にぃ","nyu","にゅ","nye","にぇ","nyo","にょ",
    "qa" ,"くぁ","qi" ,"くぃ","qu" ,"きゅ","qe" ,"くぇ","qo" ,"くぉ",
    "kwa","くゎ","kwi","くぃ","kwu","くぅ","kwe","くぇ","kwo","くぉ",
    "gwa","ぐゎ","gwi","ぐぃ","gwu","ぐぅ","gwe","ぐぇ","gwo","ぐぉ",
    "sha","しゃ",               "shu","しゅ","she","しぇ","sho","しょ",
    "sya","しゃ","syi","しぃ","syu","しゅ","sye","しぇ","syo","しょ",
    "ja" ,"じゃ",               "ju" ,"じゅ","je" ,"じぇ","jo" ,"じょ",
    "jya","じゃ","jyi","じぃ","jyu","じゅ","jye","じぇ","jyo","じょ",
    "cha","ちゃ",               "chu","ちゅ","che","ちぇ","cho","ちょ",
    "zya","じゃ","zyu","じゅ","zyo","じょ",
    "tya","ちゃ","tyi","ちぃ","tyu","ちゅ","tye","ちぇ","tyo","ちょ",
    "cya","ちゃ","cyi","ちぃ","cyu","ちゅ","cye","ちぇ","cyo","ちょ",
    "dya","ぢゃ","dyi","ぢぃ","dyu","ぢゅ","dye","ぢぇ","dyo","ぢょ",
    "tha","てゃ","thi","てぃ","thu","てゅ","the","てぇ","tho","てょ",
    "da" ,"だ"  ,"di" ,"ぢ"  ,"du" ,"づ"  ,"de" ,"で"  ,"do" ,"ど"  ,
    "dha","でゃ","dhi","でぃ","dhu","でゅ","dhe","でぇ","dho","でょ",
    "hya","ひゃ","hyi","ひぃ","hyu","ひゅ","hye","ひぇ","hyo","ひょ",
    "bya","びゃ","byi","びぃ","byu","びゅ","bye","びぇ","byo","びょ",
    "pya","ぴゃ","pyi","ぴぃ","pyu","ぴゅ","pye","ぴぇ","pyo","ぴょ",
    "fa" ,"ふぁ","fi" ,"ふぃ",               "fe" ,"ふぇ","fo" ,"ふぉ",
    "mya","みゃ","myi","みぃ","myu","みゅ","mye","みぇ","myo","みょ",
    "rya","りゃ","ryi","りぃ","ryu","りゅ","rye","りぇ","ryo","りょ",
    "ka" ,"か"  ,"ki" ,"き"  ,"ku" ,"く"  ,"ke" ,"け"  ,"ko" ,"こ"  ,
    "sa" ,"さ"  ,"shi" ,"し"  ,"su" ,"す"  ,"se" ,"せ"  ,"so" ,"そ"  ,
                   "si","し"  ,
    "ta" ,"た"  ,"chi" ,"ち"  ,"tsu" ,"つ"  ,"te" ,"て"  ,"to" ,"と"  ,
                   "ti","ち"  ,"tu","つ"  ,
    "na" ,"な"  ,"ni" ,"に"  ,"nu" ,"ぬ"  ,"ne" ,"ね"  ,"no" ,"の"  ,
    "ha" ,"は"  ,"hi" ,"ひ"  ,"fu" ,"ふ"  ,"he" ,"へ"  ,"ho" ,"ほ"  ,
                                  "hu" ,"ふ"  ,
    "ma" ,"ま"  ,"mi" ,"み"  ,"mu" ,"む"  ,"me" ,"め"  ,"mo" ,"も"  ,
    "ya" ,"や"  ,"yi" ,"ゐ"  ,"yu" ,"ゆ"  ,"ye" ,"ゑ"  ,"yo" ,"よ"  ,
    "ra" ,"ら"  ,"ri" ,"り"  ,"ru" ,"る"  ,"re" ,"れ"  ,"ro" ,"ろ"  ,
    "ga" ,"が"  ,"gi" ,"ぎ"  ,"gu" ,"ぐ"  ,"ge" ,"げ"  ,"go" ,"ご"  ,
    "za" ,"ざ"  ,"ji" ,"じ"  ,"zu" ,"ず"  ,"ze" ,"ぜ"  ,"zo" ,"ぞ"  ,
                   "zi" ,"じ"  ,
    "ba" ,"ば"  ,"bi" ,"び"  ,"bu" ,"ぶ"  ,"be" ,"べ"  ,"bo" ,"ぼ"  ,
    "pa" ,"ぱ"  ,"pi" ,"ぴ"  ,"pu" ,"ぷ"  ,"pe" ,"ぺ"  ,"po" ,"ぽ"  ,
    "ca" ,"か"  ,"ci" ,"し"  ,"cu" ,"く"  ,"ce" ,"せ"  ,"co" ,"こ"  ,
    "xya","ゃ"  ,"xyu","ゅ"  ,"xyo","ょ"  ,
    "xa" ,"ぁ"  ,"xi" ,"ぃ"  ,"xu" ,"ぅ"  ,"xe" ,"ぇ"  ,"xo" ,"ぉ"  ,
    "xtu","っ"  ,"xtsu","っ" ,"xwa","ゎ"  ,
    "ltu","っ"  ,"ltsu","っ" ,"lwa","ゎ"  ,
    "lya","ゃ"  ,"lyu","ゅ"  ,"lyo","ょ"  ,
    "la" ,"ぁ"  ,"li" ,"ぃ"  ,"lu" ,"ぅ"  ,"le" ,"ぇ"  ,"lo" ,"ぉ"  ,
    "n"  ,"ん"  ,
    NULL, NULL };

bool  isutf8vn(char *s) {
static  char  *utf8vn[]={"あ","い","う","え","お","や","ゐ","ゆ","ゑ","よ","な","に","ぬ","ね","の",NULL };
  for(int i=0;utf8vn[i]!=NULL;i++) {
    if (strncmp(utf8vn[i],s,strlen(utf8vn[i]))==0)
      return true;
    }
  return false;
}

bool  isutf8nw(char *s) {
static  char  *utf8nw[]={"うぁ","うぃ","うぇ","うぉ",NULL};
  for(int i=0;utf8nw[i]!=NULL;i++) {
    if (strncmp(utf8nw[i],s,strlen(utf8nw[i]))==0)
      return true;
    }
  return false;
}

int romajikanatabconv(char **sp,char **op) {
      int ol=0;
      char *s=*sp,*o=*op;
      for(int idx=0;tab[idx]!=NULL;idx+=2) {
        if (strncmp(tab[idx],s,strlen(tab[idx]))==0) {
          strcpy(o,tab[idx+1]);
          ol=strlen(tab[idx+1]);
          o+=ol;
          s+=strlen(tab[idx]);
          break;
          }
        }
      *o='\0';
      *sp=s;
      *op=o;
      return(ol);
}

void  romaji2kana(char *s,char *o) {
    while(s[0]!='\0') {

      // 子音が続いていれば、「っ」を挿入
      if (isconsonant(s[0]) && *s!='\0' && (*s==s[1])) {
        char *ss=s+1;
        char *op=o;
        int l=romajikanatabconv(&ss,&o);
        char *p="っ";
        int r=strlen(p);
        if (l) {
            for (int i=1;i<=l;i++)
                *(op+r+l-i)=*(op+l-i);
            for (int i=0;i<r;i++) 
                *(op+i)=*(p+i);
            o=op+l+r;
            s=ss;
            *o='\0';
            continue;
            }
        else {
            ++s;
            continue;
            }
        }
          // 'n'の次に、行末か、長音か、アポストロフィがあれば、「ん」に変換
      if (s[0]=='n' &&
           ((s[1]=='\0')||(s[1]=='\n')||(s[1]=='-')||s[1]=='\'')) {
          strcpy(o,"ん");
          o+=strlen("ん");
          s+=s[1]=='\''?2:1;
          continue;
          }

      // ローマ字かなテーブルを使って変換
      if (romajikanatabconv(&s,&o)==0)
          s++; 
      *o='\0';
      }
    return;
}

void  kana2romaji(char *s,char *o) {
    char  *p=o;
    int   nlen=strlen("ん");

    while(*s!='\0') {
    // 「ん」に母音か、な行、「y」行が続くなら、「n'」に変換
    //  「んうぁ(nwha) んうぃ(nwhi,nwi) んうぇ(nwhe,nwe) んうぉ(nwho)」は例外。
    if ((strncmp("ん",s,nlen)==0) && (!isutf8nw(s+nlen) && isutf8vn(s+nlen))) {
        strncpy(o,"n'",2);
        o+=2;
        s+=nlen;
        continue;
      }

    // ローマ字かなテーブルを使って変換
    bool f=false;
    for(int idx=0;tab[idx]!=NULL;idx+=2) {
      if (strncmp(tab[idx+1],s,strlen(tab[idx+1]))==0) {
          strcpy(o,tab[idx]);
          o+=strlen(tab[idx]);
          s+=strlen(tab[idx+1]);
          f=true;
          break;
          }
      }
    if (!f) s++;
    }

    *o='\0';

   // 変換後、「xtu」の後に子音が続いていれば、子音を重ねる。
    while(*p!='\0') {
      if (strncmp(p,"xtu",3)!=0) {
        p++;
        continue;
        }
      if (isconsonant(p[3])&&(p[3]!='\0')) {
          p[0]=p[3];
          strcpy(p+1,p+3);
          }
      p+=3;
      }
    return;
}

int rk(char *arg) {
  char    word[1024];
  if((isalpha(arg[0]))||(arg[0]=='-'))
    romaji2kana(arg,word);
  else
    kana2romaji(arg,word);
  printf("%s\n",word);
}

char *lftonull(char *arg) {
 *(strchr(arg,'\n'))='\0';
 return(arg);
}

int main(int argc,char *argv[]) {
  char    arg[1024];
  if ((argc==1)||((argc==2)&&(strcmp(argv[1],"-")==0)))
    while (fgets(arg,1023,(FILE *)stdin)!=NULL)
      rk(lftonull(arg));
  else {
    if ((strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-h")==0)) {
      printf("Usage(1): rk ことば /*かなからローマ字への変換 */\n");
      printf("Usage(2): rk kotoba /*ローマ字からかなへの変換 */\n");
      printf("Usage(3): rk または rk - /* 標準入力から入力を取ります。*/\n");
      }
    else
      rk(argv[1]);
    }
  exit(0);
}


0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?