0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

PyYAMLでcustom anchorをdumpする方法

Posted at

PyYAMLにはcustom tagを設定する方法はあるが、anchorはなかったので、参考程度に記載しておく。

今回の目的は下記のようなyamlの形式をpythonで自動作成できることである
"&No001"をpython側から付与できるようにする

data:
    - &No001 !Node
     num: !<NDArray> [10.0, 5.0, 0.0]

PyYAMLの内部構造

PyYAMLでdumpする場合、Dumperというクラスがpythonからyamlに変換処理を行う。
そのDumperが継承しているSerializerクラス内で、anchorを含むNodeの処理が行われている

serializer.py
class Serializer:
    ~
    def serialize_node(self, node, parent, index):
        # aliasがanchorを意味する
        alias = self.anchors[node]
        ~
        elif isinstance(node, MappingNode):
                implicit = (node.tag
                            == self.resolve(MappingNode, node.value, True))
                # nodeを渡し、eventを発行している
                self.emit(MappingStartEvent(alias, node.tag, implicit,
                    flow_style=node.flow_style))

なので、alias に指定した(今回は"&No001")を渡すようにすればよい

serialize_nodeをオーバーライドする

全体のコードはこのようになる

CustomSerializer.py
from yaml.nodes import *
from yaml.events import *
from yaml.serializer import Serializer


class CustomSerializer(Serializer):
    def __init__(self, encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None):
        super().__init__(encoding=encoding, explicit_start=explicit_start, explicit_end=explicit_end, version=version, tags=tags)

    def serialize_node(self, node, parent, index):
-       alias = self.anchors[node]
+       # anchorをaliasに渡す
+       alias = str(node.anchor) if node.anchor else None

        if node in self.serialized_nodes:
            self.emit(AliasEvent(alias))
        else:
            self.serialized_nodes[node] = True
            self.descend_resolver(parent, index)
            if isinstance(node, ScalarNode):
                detected_tag = self.resolve(ScalarNode, node.value, (True, False))
                default_tag = self.resolve(ScalarNode, node.value, (False, True))
                implicit = (node.tag == detected_tag), (node.tag == default_tag)
                self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
                    style=node.style))
            elif isinstance(node, SequenceNode):
                implicit = (node.tag
                            == self.resolve(SequenceNode, node.value, True))
                self.emit(SequenceStartEvent(alias, node.tag, implicit,
                    flow_style=node.flow_style))
                index = 0
                for item in node.value:
                    self.serialize_node(item, node, index)
                    index += 1
                self.emit(SequenceEndEvent())
            elif isinstance(node, MappingNode):
                implicit = (node.tag
                            == self.resolve(MappingNode, node.value, True))
                self.emit(MappingStartEvent(alias, node.tag, implicit,
                    flow_style=node.flow_style))
                for key, value in node.value:
                    self.serialize_node(key, node, None)
                    self.serialize_node(value, node, key)
                self.emit(MappingEndEvent())
            self.ascend_resolver()

これでnodeのanchorからeventに渡すことはできるようになった。

次に"represent_mapping関数"にはanchor引数がないため、これもオーバーライドする

※scalarもsequenceも同様に変更する

CustomRepresenter.py
from yaml.nodes import *
from yaml.representer import Representer

class CustomRepresenter(Representer):
-   def represent_mapping(self, tag, mapping, flow_style=None):
+   def represent_mapping(self, tag, mapping, flow_style=None, anchor=None):
        value = []
        node = MappingNode(tag, value, flow_style=flow_style)
        if self.alias_key is not None:
            self.represented_objects[self.alias_key] = node
        best_style = True
        if hasattr(mapping, 'items'):
            mapping = list(mapping.items())
            if self.sort_keys:
                try:
                    mapping = sorted(mapping)
                except TypeError:
                    pass
        for item_key, item_value in mapping:
            node_key = self.represent_data(item_key)
            node_value = self.represent_data(item_value)
            if not (isinstance(node_key, ScalarNode) and not node_key.style):
                best_style = False
            if not (isinstance(node_value, ScalarNode) and not node_value.style):
                best_style = False
            value.append((node_key, node_value))
        if flow_style is None:
            if self.default_flow_style is not None:
                node.flow_style = self.default_flow_style
            else:
                node.flow_style = best_style
+       # anchorがある場合nodeに渡す
+       if anchor is not None:
+           node.anchor = anchor

        return node

    def represent_scalar
        ~

    def represent_sequence
        ~

カスタムしたクラスを扱えるようにDumperも継承してCustomDumperを作成する

CustomDumper.py
class CustomDumper(Emitter, CustomSerializer, CustomRepresenter, Resolver):
    def __init__(self, stream,
            default_style=None, default_flow_style=False,
            canonical=None, indent=None, width=None,
            allow_unicode=None, line_break=None,
            encoding=None, explicit_start=None, explicit_end=None,
            version=None, tags=None, sort_keys=True):
        Emitter.__init__(self, stream, canonical=canonical,
                indent=indent, width=width,
                allow_unicode=allow_unicode, line_break=line_break)
-       Serializer.__init__(self, encoding=encoding,
+       CustomSerializer.__init__(self, encoding=encoding,
                explicit_start=explicit_start, explicit_end=explicit_end,
                version=version, tags=tags)
-       Representer.__init__(self, default_style=default_style,
+       CustomRepresenter.__init__(self, default_style=default_style,
                default_flow_style=default_flow_style, sort_keys=sort_keys)
        Resolver.__init__(self)

最後に作成したCustomDumperをyaml.dumpおよびyaml.add_representerの引数で指定してあげる

main.py
    class Tagging:
        def __init__(self, tag, value, anchor=None):
            self.tag = tag
            self.value = value
            self.anchor = anchor

    def custom_representer(dumper, data):
        if isinstance(data.value, dict):
            return dumper.represent_mapping(data.tag, data.value, anchor=data.anchor)
        elif isinstance(data.value, list):
            return dumper.represent_sequence(data.tag, data.value, flow_style=True, anchor=data.anchor)
        else:
            return dumper.represent_scalar(data.tag, data.value, anchor=data.anchor)
    
    yaml.add_representer(Tagging, custom_representer, Dumper=CustomDumper)

    data = {"num": Tagging("NDArray", [10.0, 5.0, 0.0])}
    list_ = [(Tagging("!Node", data, "No001"))]
    dict_ = {"data": list_}

    with open('data.yaml', 'w') as f:
        yaml.dump(data, f, Dumper=CustomDumper)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?