1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

GitHubのPull RequestのFiles ChangedをMarkdown形式Diffに整形するPythonコード

Posted at

今回のコードでできること

GitHubのPull RequestのFiles Changedを
image.png

このようなMarkdown形式Diffに整形
image.png

動機

  • 生成AIを利用したレビュー時、必要な差分のみをテキスト化して渡したい
  • Files Changed画面上の差分を差分個所がわかる形で複数まとめて取得したい

使い方

  • Files Changed画面で「名前を付けて保存」を行う(保存前にLoad diffもしておく)
    image.png

  • "GitDiffToMd.py"ファイルを作成してエディタでソースコードを張り付ける

  • "GitDiffToMd.py"をクリックして、指示に従って保存したHTMLファイルを指定する

  • "diff.md"にテキスト化した結果が入っているので、生成AIへの質問等で活用する

ソースコード(Pythonファイル)

from html.parser import HTMLParser
import os

class CopilotDiffEntryExtractor(HTMLParser):
    def __init__(self):
        super().__init__()
        self.in_target = False
        self.target_entries = []
        self.current_entry = []
        self.attrs = {}

    def handle_starttag(self, tag, attrs):
        if tag == "copilot-diff-entry":
            attr_dict = dict(attrs)
            if "data-file-path" in attr_dict:
                self.in_target = True
                self.current_entry = []
                self.attrs = attr_dict

        if self.in_target:
            attr_str = " ".join(f'{k}="{v}"' for k, v in attrs)
            self.current_entry.append(f"<{tag}{(' ' + attr_str) if attr_str else ''}>")

    def handle_endtag(self, tag):
        if self.in_target:
            self.current_entry.append(f"</{tag}>")
        if tag == "copilot-diff-entry":
            self.in_target = False
            self.target_entries.append(("".join(self.current_entry), self.attrs))
            self.current_entry = []
            self.attrs = {}

    def handle_data(self, data):
        if self.in_target:
            self.current_entry.append(data)

    def handle_startendtag(self, tag, attrs):
        if self.in_target:
            attr_str = " ".join(f'{k}="{v}"' for k, v in attrs)
            self.current_entry.append(f"<{tag}{(' ' + attr_str) if attr_str else ''}/>")

class GithubDiffHTMLParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.in_tr = False
        self.in_td = False
        self.td_class = []
        self.current_code = ""
        self.lines = []

    def handle_starttag(self, tag, attrs):
        if tag == "tr":
            self.in_tr = True
        elif tag == "td" and self.in_tr:
            self.in_td = True
            self.td_class = []
            for k, v in attrs:
                if k == "class":
                    self.td_class = v.split()
        elif tag == "span" and self.in_td:
            # Some code may be wrapped in <span>
            pass

    def handle_endtag(self, tag):
        if tag == "tr":
            self.in_tr = False
        elif tag == "td":
            if self.in_td:
                code = self.current_code.replace('\n', '').replace('\r', '')
                if "blob-code-addition" in self.td_class:
                    self.lines.append(f"+{code}")
                elif "blob-code-deletion" in self.td_class:
                    self.lines.append(f"-{code}")
                elif "blob-code-context" in self.td_class or "blob-code-inner" in self.td_class:
                    self.lines.append(f" {code}")
                self.current_code = ""
                self.td_class = []
            self.in_td = False

    def handle_data(self, data):
        if self.in_td:
            self.current_code += data

def github_diff_html_to_markdown_diff(html: str) -> str:
    parser = GithubDiffHTMLParser()
    parser.feed(html)
    return "```diff\n" + "\n".join(parser.lines) + "\n```"

def main():
    # 現在の作業ディレクトリを取得
    current_dir = os.getcwd()

    # HTMLファイルの入力を促す
    html_path = input("HTMLファイルを入力してください: ")

    # diff.mdの初期化
    output_file = os.path.join(current_dir, "diff.md")

    # HTMLファイルの存在チェック
    if not os.path.isfile(html_path):
        with open(output_file, "w", encoding="utf-8") as file_output:
            file_output.write("指定されたHTMLファイルは存在しません。\n")
        print("指定されたHTMLファイルは存在しません。")
        return
    
    with open(html_path, encoding="utf-8") as f:
        html = f.read()

    # copilot-diff-entry抽出
    entry_extractor = CopilotDiffEntryExtractor()
    entry_extractor.feed(html)
    file_entries = entry_extractor.target_entries

    with open(output_file, "w", encoding="utf-8") as file_output:
        for i, (entry_html, attrs) in enumerate(file_entries):
            # data-file-pathの値出力
            data_file_path = attrs.get("data-file-path", "")
            file_output.write(f"## {data_file_path}\n")
            file_output.write(github_diff_html_to_markdown_diff(entry_html))
            file_output.write("\n\n")


    # MD表示
    with open(output_file, "r", encoding="utf-8") as file_output:
        contents = file_output.read()
        print(contents)

    print(f"{len(file_entries)}個のdiffを保存")
    # 入力待ち
    input("続行するには何かキーを押してください...")

if __name__ == "__main__":
    main()

1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?