LoginSignup
8
7

More than 5 years have passed since last update.

Python 3の print() で UnicodeEncodeError を回避するデコレータ

Last updated at Posted at 2013-04-06

デコレータの練習がてら書いてみました。Python 3.3.0 で動作を確認しています。

ありきたりの内容ですので説明は省略しますが、デフォルトでは、

  • 標準入出力+エラー出力の encoding が 'ascii' だった場合は 'utf-8' に変更
  • 標準入出力+エラー出力の errors を 'backslashreplace' に変更

するようになっています。

仕様については再考の余地がありそうですので何か良いアイデアがありましたらコメント欄等でお知らせください。バグやマズい箇所の指摘等も歓迎します。

fixprint.py
#!/usr/bin/env python3
# vim:fileencoding=utf-8

# Copyright (c) 2013 Masami HIRATA <msmhrt@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     1. Redistributions of source code must retain the above copyright notice,
#        this list of conditions and the following disclaimer.
#
#     2. Redistributions in binary form must reproduce the above copyright
#        notice, this list of conditions and the following disclaimer in the
#        documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

__all__ = ["fixprint"]

import codecs
from collections import namedtuple
from functools import partial, wraps
import sys

STREAMS = ("stdin", "stdout", "stderr")
ARGUMENTS = ("encoding", "errors")
ENCODING_DEFAULT = 'utf-8'
ERRORS_DEFAULT = 'backslashreplace'
ENCODING_ASCII = 'ascii'
ENCODING_UTF8 = 'utf-8'
ANY = None


def fixprint(function=None, encoding=None, *, errors=None):
    """This decorator changes the 'encoding' and 'errors' of stdin/out/err

        >>> import sys
        >>> @fixprint(encoding='latin-1', errors='replace')
        ... def spam():
        ...     print("sys.stdout.encoding in spam() is '{}'".format(
        ...           sys.stdout.encoding))
        ...
        >>> @fixprint('utf-8')
        ... def ham():
        ...     spam()
        ...     print("sys.stdout.encoding in ham() is '{}'".format(
        ...           sys.stdout.encoding))
        ...
        >>> ham()  # doctest: +SKIP
        sys.stdout.encoding in spam() is 'latin-1'
        sys.stdout.encoding in ham() is 'utf-8'
        >>>
    """

    if not callable(function):
        if function is not None:
            if encoding is not None:
                # @fixprint("utf-8", encoding="ascii")  # WRONG
                raise TypeError("fixprint() takes 1 positional argument " +
                                "but 2 were given")
            else:
                # @fixprint("utf-8")  # CORRECT
                encoding = function
        return partial(fixprint, encoding=encoding, errors=errors)

    def _setarg(base, stream, argument, value):
        if stream is ANY:
            streams = STREAMS
        else:
            streams = (stream,)

        if argument is ANY:
            arguments = ARGUMENTS
        else:
            arguments = (argument,)

        for a_stream in streams:
            if type(getattr(base, a_stream)) is property:
                setattr(base, a_stream, namedtuple("_Stream", ARGUMENTS))

            for an_argument in arguments:
                setattr(getattr(base, a_stream), an_argument, value)

    def _reopen_stream(old_stream, new_stream):
        old_stream.flush()
        return open(old_stream.fileno(),
                    old_stream.mode,
                    encoding=new_stream.encoding,
                    errors=new_stream.errors,
                    closefd=False)

    @wraps(function)
    def _fixprint(*args, **kwargs):
        saved = namedtuple("_Saved", STREAMS)
        fixed = namedtuple("_Fixed", STREAMS)
        for stream in STREAMS:
            for argument in ARGUMENTS:
                value = getattr(getattr(sys, stream), argument)
                _setarg(saved, stream, argument, value)
                _setarg(fixed, stream, argument, value)

        normalize = lambda encoding: codecs.lookup(encoding).name

        if encoding is not None:
            _setarg(fixed, ANY, "encoding", encoding)
        elif normalize(saved.stdout.encoding) == normalize(ENCODING_ASCII):
            fixed.stdin.encoding = ENCODING_DEFAULT
            fixed.stdout.encoding = fixed.stderr.encoding = ENCODING_DEFAULT

        if errors is not None:
            _setarg(fixed, ANY, "errors", errors)
        elif normalize(fixed.stdout.encoding) != normalize(ENCODING_UTF8):
            _setarg(fixed, ANY, "errors", ERRORS_DEFAULT)

        sys.stdin = _reopen_stream(sys.stdin, fixed.stdin)
        sys.stdout = _reopen_stream(sys.stdout, fixed.stdout)
        sys.stderr = _reopen_stream(sys.stderr, fixed.stderr)

        try:
            result = function(*args, **kwargs)
        finally:
            sys.stdin = _reopen_stream(sys.stdin, saved.stdin)
            sys.stdout = _reopen_stream(sys.stdout, saved.stdout)
            sys.stderr = _reopen_stream(sys.stderr, saved.stderr)

        return result

    return _fixprint
8
7
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
8
7