レガシーなシステムで文字コードを細かくチェックする機会があり、簡単なツールを作ってみました。
public class SjisToUnicode {
public static void main( String[] args )
{
try {
String sjisHex = "819d";
byte[] sjisBytes = hexBin(sjisHex);
String sjisStr = new String(sjisBytes, "Windows-31j");
String unicode = strToUnicode(sjisStr);
System.out.println(unicode);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
//
public static String binHex(byte[] data) {
StringBuffer sb = new StringBuffer();
for (byte b : data) {
String s = Integer.toHexString(0xff & b);
if (s.length() == 1) {
sb.append("0");
}
sb.append(s);
}
return sb.toString();
}
public static byte[] hexBin(String hex) {
byte[] bytes = new byte[hex.length() / 2];
for (int index = 0; index < bytes.length; index++) {
bytes[index] = (byte) Integer.parseInt(hex.substring(index * 2, (index + 1) * 2), 16);
}
return bytes;
}
private static String strToUnicode(String original)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < original.length(); i++) {
sb.append(String.format("\\u%04X", Character.codePointAt(original, i)));
}
String unicode = sb.toString();
return unicode;
}
private static String unicodeToStr(String unicode)
{
String[] codeStrs = unicode.split("\\\\u");
int[] codePoints = new int[codeStrs.length - 1];
for (int i = 0; i < codePoints.length; i++) {
codePoints[i] = Integer.parseInt(codeStrs[i + 1], 16);
}
String encodedText = new String(codePoints, 0, codePoints.length);
return encodedText;
}
}
参考記事:Java: 16進文字列とバイト列を相互変換する
参考記事:JavaのUnicode文字列の変換用メソッド("あ" <-> "\u3042")