LoginSignup
5
7

More than 5 years have passed since last update.

VBA で UTF-8 変換

Last updated at Posted at 2016-06-12

VBA で UTF-8 変換

ADOでも変換は可能ですが、メモリ上では無理。
VBAから.NET 3.5 を呼び出すことも可能ですがちょっと非公式的な感じもありでいまいち。
VBAで実装すると以下のようになります。

'------------------------------------------------------------------------------------------------------------------------
' UTF-8 → UTF-16(LE)
'------------------------------------------------------------------------------------------------------------------------
Public Function GetString(ByRef bytBuf() As Byte) As String

    Dim bytRet() As Byte
    Dim i As Long
    Dim lngPos As Long
    Dim b0 As Long
    Dim b1 As Long
    Dim b2 As Long
    Dim b3 As Long

    GetString = ""

    On Error GoTo e

    i = LBound(bytBuf)
    ReDim bytRet(0 To (UBound(bytBuf) + 1) * 2)
    i = 0
    lngPos = 0

    Do Until i > UBound(bytBuf)

        b0 = bytBuf(i): i = i + 1

        Select Case True

'            // UTF-8:   [0xxx xxxx]
'            // Unicode: [0000 0000] [0xxx xxxx]
            Case (b0 < &H80&)
                bytRet(lngPos) = b0: lngPos = lngPos + 1
                bytRet(lngPos) = 0:  lngPos = lngPos + 1

'            // UTF-8:   [110y yyyy] [10xx xxxx]
'            // Unicode: [0000 0yyy] [yyxx xxxx]
            Case ((b0 And &HE0&) = &HC0 And (b0 And &H1E&) <> 0)

                b1 = bytBuf(i): i = i + 1
                Dim c As Long
                c = ((LShift(b0, 6)) And &H7C0&) Or (b1 And &H3F&)

                bytRet(lngPos) = LByte(c): lngPos = lngPos + 1
                bytRet(lngPos) = UByte(c): lngPos = lngPos + 1

'            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
'            // Unicode: [zzzz yyyy] [yyxx xxxx]
            Case ((b0 And &HF0&) = &HE0&)

                b1 = bytBuf(i): i = i + 1
                b2 = bytBuf(i): i = i + 1

                c = ((LShift(b0, 12)) And &HF000&) Or ((LShift(b1, 6)) And &HFC0&) Or (b2 And &H3F&)

                bytRet(lngPos) = LByte(c)
                lngPos = lngPos + 1
                bytRet(lngPos) = UByte(c)
                lngPos = lngPos + 1

'            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
'            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
'            //          [1101 11yy] [yyxx xxxx] (low surrogate)
'            //          * uuuuu = wwww + 1
            Case ((b0 And &HF8) = &HF0&)

                b1 = bytBuf(i): i = i + 1
                b2 = bytBuf(i): i = i + 1
                b3 = bytBuf(i): i = i + 1

                Dim uuuuu As Long
                Dim wwww As Long
                Dim zzzz As Long
                Dim yyyyyy As Long
                Dim xxxxxx As Long
                Dim hs As Long
                Dim ls As Long

'                // decode bytes into surrogate characters
                uuuuu = ((LShift(b0, 2)) And &H1C&) Or ((RShift(b1, 4)) And &H3&)
'                If (uuuuu > &H10) Then
'                    invalidSurrogate(uuuuu);
'                End If
                wwww = uuuuu - 1
                zzzz = b1 And &HF&
                yyyyyy = b2 And &H3F&
                xxxxxx = b3 And &H3F&

                hs = &HD800& Or ((LShift(wwww, 6)) And &H3C0&) Or (LShift(zzzz, 2)) Or (RShift(yyyyyy, 4))
                ls = &HDC00& Or ((LShift(yyyyyy, 6)) And &H3C0&) Or xxxxxx

                bytRet(lngPos) = LByte(hs)
                lngPos = lngPos + 1
                bytRet(lngPos) = UByte(hs)
                lngPos = lngPos + 1

                bytRet(lngPos) = LByte(ls)
                lngPos = lngPos + 1
                bytRet(lngPos) = UByte(ls)
                lngPos = lngPos + 1

        End Select

    Loop

    GetString = LeftB(bytRet, lngPos)
    Exit Function
e:

End Function
'------------------------------------------------------------------------------------------------------------------------
' UTF-16(LE) → UTF-8
'------------------------------------------------------------------------------------------------------------------------
Public Function getBytes(ByVal strBuf As String) As Byte()

    Dim bytBuf() As Byte
    Dim lngBuf As Long
    Dim bytRet() As Byte

    Dim i As Long
    Dim lngPos As Long

    On Error GoTo e

    If strBuf = "" Then
        Exit Function
    End If

    bytBuf = strBuf

    'バッファを最大 1文字×4バイト分確保
    ReDim bytRet(0 To (Len(strBuf) * 4))

    lngPos = 0

    For i = LBound(bytBuf) To UBound(bytBuf) Step 2

        lngBuf = LShift(bytBuf(i + 1), 8) + bytBuf(i)

        Select Case lngBuf

            Case Is < &H80&

                'UTF-8(ASCII)
                bytRet(lngPos) = lngBuf
                lngPos = lngPos + 1

            Case Is < &H800&

                'UTF-8(2バイト)
                bytRet(lngPos) = &HC0& Or RShift(lngBuf, 6)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (lngBuf And &H3F&)
                lngPos = lngPos + 1

            Case &HD800& To &HDBFF&

                Dim lngHigh As Long
                Dim lngLow As Long

                lngHigh = lngBuf

                i = i + 2
                lngLow = LShift(bytBuf(i + 1), 8) + bytBuf(i)

                'サロゲート(UTF-16→Unicode)
                lngBuf = &H10000 + (lngHigh - &HD800&) * &H400& + (lngLow - &HDC00&)

                'UTF-8(4バイト)
                bytRet(lngPos) = &HF0& Or RShift(lngBuf, 18)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (RShift(lngBuf, 12) And &H3F&)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (RShift(lngBuf, 6) And &H3F&)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (lngBuf And &H3F&)
                lngPos = lngPos + 1

            Case Else

                'UTF-8(3バイト)
                bytRet(lngPos) = &HE0& Or RShift(lngBuf, 12)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (RShift(lngBuf, 6) And &H3F&)
                lngPos = lngPos + 1

                bytRet(lngPos) = &H80& Or (lngBuf And &H3F&)
                lngPos = lngPos + 1

        End Select

    Next

    getBytes = LeftB(bytRet, lngPos)
    Exit Function
e:

End Function

'------------------------------------------------------------------------------------------------------------------------
' 下位バイト取得
'------------------------------------------------------------------------------------------------------------------------
Function LByte(ByVal lngValue As Long) As Long
    LByte = lngValue And &HFF&
End Function
'------------------------------------------------------------------------------------------------------------------------
' 上位バイト取得
'------------------------------------------------------------------------------------------------------------------------
Function UByte(ByVal lngValue As Long) As Long
    UByte = RShift((lngValue And &HFF00&), 8)
End Function
'------------------------------------------------------------------------------------------------------------------------
' 右シフト
'------------------------------------------------------------------------------------------------------------------------
Function RShift(ByVal lngValue As Long, ByVal lngKeta As Long) As Long
    RShift = lngValue \ (2 ^ lngKeta)
End Function
'------------------------------------------------------------------------------------------------------------------------
' 左シフト
'------------------------------------------------------------------------------------------------------------------------
Function LShift(ByVal lngValue As Long, ByVal lngKeta As Long) As Long
    LShift = lngValue * (2 ^ lngKeta)
End Function

他のソースをごらんになりたい方はこちらへ
Excelを便利にする250以上の機能を体系化したアドインはこちらです。
「RelaxTools Addin」窓の杜大賞受賞ソフト
http://software.opensquare.net/relaxtools/

5
7
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
7