#今回の目的
- 基本的な使い方を知りたい
#先人の知恵
- Cでの正規表現
- C言語で正規表現を使うには?思ったより簡単だった!
- 【C言語入門】文字列内の検索方法まとめ(文字指定、正規表現)
- 正規表現したい!
- man 3 regex
- man 7 regex
- Programming with Regex
- printfの書式の * ってなんだ?
#サンプル
regex_sample.c
#include <stdio.h>
#include <malloc.h>
#include <regex.h>
int main(void)
{
char string[] = "abc, def, ghi";
const char regex[] = "([a-z]+), ([a-z]+), ([a-z]+)";
regex_t ptn_buf;
regmatch_t *match_buf;
size_t size;
int si, ei;
size_t i;
/* compile */
if (regcomp(&ptn_buf, regex, REG_EXTENDED | REG_NEWLINE) != 0) {
printf("regex compile failed\n");
return 1;
}
/* allocate match buffer */
size = ptn_buf.re_nsub + 1;
match_buf = malloc(sizeof(regmatch_t) * size);
if (match_buf == NULL) {
printf("no memory\n");
goto err_ptn;
}
/* match */
if (regexec(&ptn_buf, string, size, match_buf, 0) != 0) {
printf("no match\n");
goto err_match;
}
/* print result */
for (i=0; i<size; i++) {
si = match_buf[i].rm_so;
ei = match_buf[i].rm_eo;
if (si == -1 || ei == -1) {
continue;
}
printf("%.*s\n", ei-si, &string[si]);
}
/* free resource */
err_match:
free(match_buf);
err_ptn:
regfree(&ptn_buf);
return 0;
}
コンパイル.log
$ gcc --version
gcc (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ gcc -Wall -Wextra regexp_sample.c
実行結果.log
$ ./a.out
abc, def, ghi
abc
def
ghi
#正規表現コードの要素
ヘッダファイルのインクルード.c
#include <regex.h>
正規表現のコンパイル.c
regex_t ptn_buf;
const char regex[] = "([a-z]+), ([a-z]+), ([a-z]+)";
if (regcomp(&ptn_buf, regex, REG_EXTENDED | REG_NEWLINE) != 0) {
printf("regex compile failed\n");
return;
}
マッチデータ格納バッファの確保.c
size_t size;
regmatch_t *match_buf;
size = ptn_buf.re_nsub + 1;
match_buf = malloc(sizeof(regmatch_t) * size);
if (match_buf == NULL) {
printf("malloc failed\n");
return;
}
パターンマッチ実行.c
char string[] = "abc, def, ghi";
if (regexec( &ptn_buf, string, size, match_buf, 0 ) != 0) {
printf("no match\n");
return;
}
開放.c
regfree(&ptn_buf);
free(match_buf);
/usr/include/regex.h
struct re_pattern_buffer
{
/* Space that holds the compiled pattern. It is declared as
`unsigned char *' because its elements are sometimes used as
array indexes. */
unsigned char *__REPB_PREFIX(buffer);
/* Number of bytes to which `buffer' points. */
unsigned long int __REPB_PREFIX(allocated);
/* Number of bytes actually used in `buffer'. */
unsigned long int __REPB_PREFIX(used);
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t __REPB_PREFIX(syntax);
/* Pointer to a fastmap, if any, otherwise zero. re_search uses the
fastmap, if there is one, to skip over impossible starting points
for matches. */
char *__REPB_PREFIX(fastmap);
/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation is
applied to a pattern when it is compiled and to a string when it
is matched. */
__RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in `re_search_2', to see whether or
not we should use the fastmap, so we don't set this absolutely
perfectly; see `re_compile_fastmap' (the `duplicate' case). */
unsigned __REPB_PREFIX(can_be_null) : 1;
/* If REGS_UNALLOCATED, allocate space in the `regs' structure
for `max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
#ifdef __USE_GNU
# define REGS_UNALLOCATED 0
# define REGS_REALLOCATE 1
# define REGS_FIXED 2
#endif
unsigned __REPB_PREFIX(regs_allocated) : 2;
/* Set to zero when `regex_compile' compiles a pattern; set to one
by `re_compile_fastmap' if it updates the fastmap. */
unsigned __REPB_PREFIX(fastmap_accurate) : 1;
/* If set, `re_match_2' does not return information about
subexpressions. */
unsigned __REPB_PREFIX(no_sub) : 1;
/* If set, a beginning-of-line anchor doesn't match at the beginning
of the string. */
unsigned __REPB_PREFIX(not_bol) : 1;
/* Similarly for an end-of-line anchor. */
unsigned __REPB_PREFIX(not_eol) : 1;
/* If true, an anchor at a newline matches. */
unsigned __REPB_PREFIX(newline_anchor) : 1;
};
typedef struct re_pattern_buffer regex_t;