More than 5 years have passed since last update.

CでBASE64のエンコード/デコードを行う関数

Last updated at 2017-01-16Posted at 2017-01-15

はじめに

CでBASE64のエンコードとでコードを行う関数です。
車輪の再発明です。
勉強もかねて、自分で書いてみました。
WikipediaのBASE64の解説と、RFC 4948を参考に実装しています。

実装

base64.c

# include <stdlib.h>
# include <string.h>
# include "base64.h"

static const char BASE64_TABLE[] = {
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
};
static const char BASE64_TABLE_URL[] = {
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
};
static const int BASE64_TABLE_LENGTH = {
	sizeof(BASE64_TABLE) / sizeof(BASE64_TABLE[0]) - 1
};

typedef struct tagBASE64_SPEC {
	BASE64_TYPE type;
	const char *table;
	char pad;
	int maxLineLength;
	char *lineSep;
	int lineSepLength;
} BASE64_SPEC;
static const BASE64_SPEC BASE64_SPECS[] = {
	{ BASE64_TYPE_STANDARD, BASE64_TABLE,     '=', 0,  NULL,   0 },
	{ BASE64_TYPE_MIME,     BASE64_TABLE,     '=', 76, "\r\n", 2 },
	{ BASE64_TYPE_URL,      BASE64_TABLE_URL, 0,   0,  NULL,   0 }
};
static const size_t BASE64_SPECS_LENGTH = {
	sizeof(BASE64_SPECS) / sizeof(BASE64_SPECS[0])
};

char *base64Encode(const char *data, const size_t size, const BASE64_TYPE type)
{
	BASE64_SPEC spec;
	size_t length;
	char *base64;
	char *cursor;
	int lineLength;
	int i;
	int j;

	if (data == NULL) {
		return NULL;
	}

	spec = BASE64_SPECS[0];
	for (i = 0; i < (int)BASE64_SPECS_LENGTH; i++) {
		if (BASE64_SPECS[i].type == type) {
			spec = BASE64_SPECS[i];
			break;
		}
	}

	length = size * 4 / 3 + 3 + 1;
	if (spec.maxLineLength > 0) {
		length += size / spec.maxLineLength * spec.lineSepLength;
	}
	base64 = malloc(length);
	if (base64 == NULL) {
		return NULL;
	}

	cursor = base64;
	lineLength = 0;
	for (i = 0, j = size; j > 0; i += 3, j -= 3) {
		if (spec.maxLineLength > 0) {
			if (lineLength >= spec.maxLineLength) {
				char *sep;

				for (sep = spec.lineSep; *sep != 0; sep++) {
					*(cursor++) = *sep;
				}
				lineLength = 0;
			}
			lineLength += 4;
		}

		if (j == 1) {
			*(cursor++) = spec.table[(data[i + 0] >> 2 & 0x3f)];
			*(cursor++) = spec.table[(data[i + 0] << 4 & 0x30)];
			*(cursor++) = spec.pad;
			*(cursor++) = spec.pad;
		}
		else if (j == 2) {
			*(cursor++) = spec.table[(data[i + 0] >> 2 & 0x3f)];
			*(cursor++) = spec.table[(data[i + 0] << 4 & 0x30) | (data[i + 1] >> 4 & 0x0f)];
			*(cursor++) = spec.table[(data[i + 1] << 2 & 0x3c)];
			*(cursor++) = spec.pad;
		}
		else {
			*(cursor++) = spec.table[(data[i + 0] >> 2 & 0x3f)];
			*(cursor++) = spec.table[(data[i + 0] << 4 & 0x30) | (data[i + 1] >> 4 & 0x0f)];
			*(cursor++) = spec.table[(data[i + 1] << 2 & 0x3c) | (data[i + 2] >> 6 & 0x03)];
			*(cursor++) = spec.table[(data[i + 2] << 0 & 0x3f)];
		}
	}
	*cursor = 0;

	return base64;
}

char *base64Decode(const char *base64, size_t *retSize, const BASE64_TYPE type)
{
	BASE64_SPEC spec;
	char table[0x80];
	size_t length;
	char *data;
	char *cursor;
	int i;
	int j;

	if (base64 == NULL) {
		return NULL;
	}

	spec = BASE64_SPECS[0];
	for (i = 0; i < (int)BASE64_SPECS_LENGTH; i++) {
		if (BASE64_SPECS[i].type == type) {
			spec = BASE64_SPECS[i];
			break;
		}
	}

	length = strlen(base64);
	data = malloc(length * 3 / 4 + 2 + 1);
	if (data == NULL) {
		return NULL;
	}

	memset(table, 0x80, sizeof(table));
	for (i = 0; i < BASE64_TABLE_LENGTH; i++) {
		table[spec.table[i] & 0x7f] = i;
	}

	cursor = data;
	for (i = 0, j = 0; i < (int)length; i++, j = i % 4) {
		char ch;

		if (base64[i] == spec.pad) {
			break;
		}

		ch = table[base64[i] & 0x7f];
		if (ch & 0x80) {
			continue;
		}
		if (j == 0) {
			*cursor = ch << 2 & 0xfc;
		}
		else if (j == 1) {
			*(cursor++) |= ch >> 4 & 0x03;
			*cursor = ch << 4 & 0xf0;
		}
		else if (j == 2) {
			*(cursor++) |= ch >> 2 & 0x0f;
			*cursor = ch << 6 & 0xc0;
		}
		else {
			*(cursor++) |= ch & 0x3f;
		}
	}
	*cursor = 0;
	*retSize = cursor - data;

	return data;
}

base64.h

# ifndef __BASE64_H__
# define __BASE64_H__

# ifdef __cplusplus
extern "C" {
# endif
# if 0
}
# endif

typedef enum tagBASE64_TYPE {
	BASE64_TYPE_STANDARD,
	BASE64_TYPE_MIME,
	BASE64_TYPE_URL
} BASE64_TYPE;

char *base64Encode(const char *data, const size_t size, const BASE64_TYPE type);
char *base64Decode(const char *base64, size_t *retSize, const BASE64_TYPE type);

# if 0
{
# endif
# ifdef __cplusplus
}
# endif

# endif // !__BASE64_H__

変更
- 2017/01/16
  - base64urlのパディング処理を修正しました。
  - 処理系により改行がスキップされない問題を処理しました。

使い方

エンコードするには、base64Encode関数にデータdataとデータサイズsize、手法typeを指定してください。
戻り値はmallocされた文字列が返ってきますので、使用後にはfreeしてください。

デコードするには、base64Decode関数に文字列base64とデータサイズの返却先retSize、手法typeを指定してください。
戻り値はmallocされたデータが返ってきますので、使用後にはfreeしてください。

手法typeには、BASE64のエンコード方法を渡します。
タイプBASE64_TYPE_MIMEでは、76文字毎に改行が入ります。
タイプBASE64_TYPE_URLでは、URLセーフなbase64urlでエンコードされます。

工夫

BASE64のデコードに際して、逆変換テーブルを作成するようにしています。
逆返還テーブルがない場合でも、strchrなどを用いて簡単にデコードしていくことができますが、おそらく変換スピードがけた違いに遅くなるはずです。
本来であれば、逆返還テーブルをコードに直接書いてもよいと思いますが、せいぜいCPUのステップ数が数百増えるぐらいですので、今回は動的に生成してみました。

テスト

main.c

# include <assert.h>
# include <stdlib.h>
# include <stdio.h>
# include <string.h>
# include "base64.h"

typedef struct tagBASE64_TEST {
	BASE64_TYPE type;
	const char *data;
	size_t size;
	const char *base64;
} BASE64_TEST;
static const BASE64_TEST BASE64_TESTS[] = {
	{ BASE64_TYPE_STANDARD, "",       0, "" },
	{ BASE64_TYPE_STANDARD, "f",      1, "Zg==" },
	{ BASE64_TYPE_STANDARD, "fo",     2, "Zm8=" },
	{ BASE64_TYPE_STANDARD, "foo",    3, "Zm9v" },
	{ BASE64_TYPE_STANDARD, "foob",   4, "Zm9vYg==" },
	{ BASE64_TYPE_STANDARD, "fooba",  5, "Zm9vYmE=" },
	{ BASE64_TYPE_STANDARD, "foobar", 6, "Zm9vYmFy" },
	{ BASE64_TYPE_STANDARD, ">>>>>>", 6, "Pj4+Pj4+" },
	{ BASE64_TYPE_STANDARD, "??????", 6, "Pz8/Pz8/" },
	{ BASE64_TYPE_MIME,     ">>>>>>", 6, "Pj4+Pj4+" },
	{ BASE64_TYPE_MIME,     "??????", 6, "Pz8/Pz8/" },
	{ BASE64_TYPE_URL,      ">>>>>>", 6, "Pj4-Pj4-" },
	{ BASE64_TYPE_URL,      "??????", 6, "Pz8_Pz8_" }
};
static const size_t BASE64_TESTS_LENGTH = {
	sizeof(BASE64_TESTS) / sizeof(BASE64_TESTS[0])
};

int main(void) {
	int i;

	for (i = 0; i < (int)BASE64_TESTS_LENGTH; i++) {
		BASE64_TEST test;
		char *data;
		char *base64;
		size_t size;

		test = BASE64_TESTS[i];

		base64 = base64Encode(test.data, test.size, test.type);
		printf("BASE64(\"%s\") = \"%s\"\n", test.data, base64);
		assert(strcmp(base64, test.base64) == 0);

		data = base64Decode(base64, &size, test.type);
		printf("DATA(\"%s\") = \"%s\"\n", base64, data);
		assert(size == test.size);
		assert(memcmp(data, test.data, size) == 0);

		free(base64);
		free(data);
	}

	return 0;
}

BASE64("") = ""
DATA("") = ""
BASE64("f") = "Zg=="
DATA("Zg==") = "f"
BASE64("fo") = "Zm8="
DATA("Zm8=") = "fo"
BASE64("foo") = "Zm9v"
DATA("Zm9v") = "foo"
BASE64("foob") = "Zm9vYg=="
DATA("Zm9vYg==") = "foob"
BASE64("fooba") = "Zm9vYmE="
DATA("Zm9vYmE=") = "fooba"
BASE64("foobar") = "Zm9vYmFy"
DATA("Zm9vYmFy") = "foobar"
BASE64(">>>>>>") = "Pj4+Pj4+"
DATA("Pj4+Pj4+") = ">>>>>>"
BASE64("??????") = "Pz8/Pz8/"
DATA("Pz8/Pz8/") = "??????"
BASE64(">>>>>>") = "Pj4+Pj4+"
DATA("Pj4+Pj4+") = ">>>>>>"
BASE64("??????") = "Pz8/Pz8/"
DATA("Pz8/Pz8/") = "??????"
BASE64(">>>>>>") = "Pj4-Pj4-"
DATA("Pj4-Pj4-") = ">>>>>>"
BASE64("??????") = "Pz8_Pz8_"
DATA("Pz8_Pz8_") = "??????"

RFC 4648に記載されているテストベクトルがすべてエンコード、デコードできていることが確認できました。
また、URLセーフなbase64urlのエンコード、デコードも正しくできていることが確認できました。

さいごに

この記事で示したコードはWTFPL v2としておりますので、ご自由に使っていただいて大丈夫です。
ただし、一切の責任を取りかねますのでご了承ください。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up