More than 5 years have passed since last update.

【c++】文字列中に含まれる漢字の文字コード取得

Last updated at 2016-11-11Posted at 2016-11-10

#漢字の判別
文字列中に含まれている漢字を1つ1つ区別したかったので
文字コードを利用してやってみた.
やったこと忘れないための自分用メモ.
##文字列に含まれる漢字の文字コードを取得・表示

sample.cpp

#include "stdafx.h"
#include <stdio.h>
#include <iostream>
#include <string>
#include <vector>
using namespace std;

//s_jisの全角文字の1バイト目かどうかを判定する関数
int check_s_jis_zenkaku(unsigned char c){
   if(((c>=0x81)&&(c<=0x9f))||((c>=0xe0)&&(c<=0xfc)))return 1;
   else return 0;
} 

//入力された文字列に漢字が含まれているか調べる関数
//　引数　［charへのポインタ型変数］
//　返り値［漢字の文字コードが入ったint型のvector］
vector<int> checkKanji(char *str){
	unsigned int moji_code;//文字コード(16bitコード)
	unsigned char first_byte, second_byte;//第1byte、第2byte(8bitコード)
	char *ch;//文字列走査用
	vector<int> res;//返り値のベクトル

	ch = str; //文字列受け取り
	while(*(ch) != '\0'){
		first_byte	= (unsigned char)*ch;//第1バイトに変換
		second_byte = (unsigned char)*(ch+1);//第2バイトに変換
		moji_code	= first_byte*256 + second_byte;//文字コードに変換

		//文字コードが（s_jisの）漢字の部分ならvectorに格納
		if( ((moji_code >= 0x889f) && (moji_code<=0x9872)) || ((moji_code >= 0x989f) && (moji_code<=0xEAA4)) && 
			  ((second_byte>=0x40) && (second_byte<=0xFC)) &&  ((first_byte < 0xA0) || (first_byte > 0xDF)) && (second_byte!=0x7F)
			){
				res.push_back(moji_code);
		}
		//全角か半角かで進めるバイト数を変える
		if(check_s_jis_zenkaku(*ch)==1){ ch+=2; }
		else{ ch+=1; }
	}
	return res;
}

void main(){
	int ii;
	string str;
	vector<int> code_list;
	cout << "qで終了..." << endl;
	while(true){
		cout << "input >> "; cin >> str;//文字列の取得
		if(str=="q" || str=="ｑ"){ break; }//終了処理
		code_list = checkKanji(const_cast<char*>(str.c_str()));
        //取得した文字コードを16進数で表示
		if(code_list.size()==0){ continue; }
		for(ii=0; ii<code_list.size(); ii++){ cout << "[" << hex << code_list[ii] << dec << "]" << "\t"; }
		cout << endl;
	}
}

実行結果

qで終了...
input >> 犬
[8ca2]
input >> いぬ
input >> inu
input >> inu犬
[8ca2]
input >> 犬猫
[8ca2] [944c]
input >> 犬猫犬
[8ca2] [944c] [8ca2]

##感想
s-jisとeucで被ってる部分があるそうで今回はとりあえずs-jisでやってみた.
最後に環境を... win7 64bit, visual studio 2010

色々と拙いが備忘録だし最初の投稿はこんな感じで.
###参考にさせていただいたページ
http://www.tohoho-web.com/wwwkanji.htm
http://vivi.dyndns.org/tech/cpp/binHex.html
http://www5f.biglobe.ne.jp/~fuku-labo/library/program/cpp/2/076.htm

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up