3
3

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

regex.hで正規表現

Posted at
myreg.cpp

# include <sys/types.h>
# include <regex.h>
# include <stdio.h>
# include <errno.h>
# include <string.h>

# include <string>
# include <map>

typedef std::string Str;
typedef std::map< int, Str > Mis;
typedef std::map< int, Mis > Mimis;
int myreg( Str input_string, Str pattern, Mimis &ret, size_t back_ref_size = 42 );

int myreg( Str input_string, Str pattern, Mimis &result, size_t back_ref_size ) {
	
	int ret = 0;
	regex_t rg;
	
	// cflagは、とりあえずREG_EXTENDEDで。動作がどう変化するかは、regcompのmanpageを参照。
	ret = regcomp( &rg, pattern.c_str(), REG_EXTENDED );
	
	if( ret ) {
		
		char buf[1024] = "";
		regerror( ret, &rg, buf, 1024 );
		
		fprintf( stderr, "regcomp failed. %s\n", buf );
		
		regfree( &rg );
		return( 1 );
		
	}
	
	int match_count = 0;
	
	while( 1 ) {
		
		regmatch_t *matching = new regmatch_t[back_ref_size];
		
		// eflagは、とりあえず0で。動作がどう変化するかは、regexecのmanpageを参照。
		ret = regexec( &rg, input_string.c_str(), back_ref_size, matching, 0 );
		
		if( ret ) {
			
			// これはエラーではなく、単にマッチするものが無くなっただけ
			delete [] matching;
			break;
			
		}
		
		int first_end_index = -1; // 最初の末尾の位置を取り出す。
		int back_ref_count = 0;
		
		for( size_t i = 0; i < back_ref_size; ++i, ++back_ref_count ) {
			
			int start_index = matching[i].rm_so;
			int end_index = matching[i].rm_eo;
			
			if( start_index == -1 || end_index == -1 ) {
				
				break;
				
			}
			
			if( i == 0 ) {
				
				first_end_index = end_index;
				
				// 空マッチ対策。
				if( start_index == end_index ) {
					
					++first_end_index;
					
				}
				
			}
			
			int len = end_index - start_index;
			
			if( len ) {
				
				result[match_count][back_ref_count] = input_string.substr( start_index, len );
				
			} else {
				
				result[match_count][back_ref_count] = "";
				
			}
			
		}
		
		if( first_end_index == -1 ) {
			
			delete [] matching;
			break;
			
		}
		
		input_string = input_string.substr( first_end_index );
		
		if( input_string.empty() ) {
			
			delete [] matching;
			break;
			
		}
		
		++match_count;
		
	}
	
	regfree( &rg );
	return( 0 );
	
}

int main( int argc, char **argv ) {
	
	if( argc != 3 ) {
		
		printf( "./myreg input_string pattern\n\n" );
		return( 1 );
		
	}
	
	printf( "input_string:\n%s\n", argv[1] );
	printf( "\n" );
	printf( "pattern:\n%s\n", argv[2] );
	printf( "\n" );
	
	Mimis result;
	
	if( myreg( argv[1], argv[2], result ) ) {
		
		return( 1 );
		
	}
	
	printf( "result:\n" );
	
	for( Mimis::iterator it = result.begin(), eit = result.end(); it != eit; ++it ) {
		
		for( Mis::iterator itt = it->second.begin(), eitt = it->second.end(); itt != eitt; ++itt ) {
			
			printf( "%d %d %s\n", it->first, itt->first, itt->second.c_str() );
			
		}
		
	}
	
	return( 0 );
	
}


CやC++で正規表現って、なんだかさくっと使えるのが無いような。

そんなこんなで、POSIX regex 関数を使うことに。

そして、作ったコードです。

全体マッチ+後方参照も取り出せます。
(後方参照の処理とかも、自分で全部書かないといけないとか、大変ですよね)

以下実行結果



[todanano@localhost samba]$ ./myreg 'abc123def' '(abc).*(def)'
input_string:
abc123def

pattern:
(abc).*(def)

result:
0 0 abc123def
0 1 abc
0 2 def
[todanano@localhost samba]$
[todanano@localhost samba]$ ./myreg '192.168.0.1-192.168.0.254' '([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})'
input_string:
192.168.0.1-192.168.0.254

pattern:
([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})

result:
0 0 192.168.0.1
0 1 192
0 2 168
0 3 0
0 4 1
1 0 192.168.0.254
1 1 192
1 2 168
1 3 0
1 4 254
[todanano@localhost samba]$
[todanano@localhost samba]$

3
3
2

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
3

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?