パーサを作成してみたい

JavaScript

Last updated at 2024-10-01Posted at 2024-08-05

JSでのパーサを作成してみたい

function parseMarkdown(markdown) {
    const rules = [
        { regex: /###### (.*?)(\n|$)/g, replacement: '<h6>$1</h6>$2' },
        { regex: /##### (.*?)(\n|$)/g, replacement: '<h5>$1</h5>$2' },
        { regex: /#### (.*?)(\n|$)/g, replacement: '<h4>$1</h4>$2' },
        { regex: /### (.*?)(\n|$)/g, replacement: '<h3>$1</h3>$2' },
        { regex: /## (.*?)(\n|$)/g, replacement: '<h2>$1</h2>$2' },
        { regex: /# (.*?)(\n|$)/g, replacement: '<h1>$1</h1>$2' },
        { regex: /\*\*\*(.*?)\*\*\*/g, replacement: '<strong><em>$1</em></strong>' },
        { regex: /\*\*(.*?)\*\*/g, replacement: '<strong>$1</strong>' },
        { regex: /\*(.*?)\*/g, replacement: '<em>$1</em>' },
        { regex: /__(.*?)__/g, replacement: '<strong>$1</strong>' },
        { regex: /_(.*?)_/g, replacement: '<em>$1</em>' },
        { regex: /```(.*?)```/gs, replacement: '<pre><code>$1</code></pre>' },
        { regex: /`(.*?)`/g, replacement: '<code>$1</code>' },
        { regex: /\n/g, replacement: '<br>' }
    ];

    // テーブル解析の追加
    const tableRegex = /((\|.*?\|)\n)+\|[\-\|]+\|\n((\|.*?\|)\n)*/g;

    function parseTable(tableMarkdown) {
        const rows = tableMarkdown.trim().split('\n');
        const header = rows[0].split('|').slice(1, -1).map(cell => cell.trim());
        const align = rows[1].split('|').slice(1, -1).map(cell => {
            if (cell.trim().startsWith(':') && cell.trim().endsWith(':')) return 'center';
            if (cell.trim().startsWith(':')) return 'left';
            if (cell.trim().endsWith(':')) return 'right';
            return 'left';
        });
        const body = rows.slice(2).map(row => row.split('|').slice(1, -1).map(cell => cell.trim()));

        let tableHtml = '<table><thead><tr>';
        header.forEach((cell, i) => {
            tableHtml += `<th style="text-align: ${align[i]}">${cell}</th>`;
        });
        tableHtml += '</tr></thead><tbody>';
        body.forEach(row => {
            tableHtml += '<tr>';
            row.forEach((cell, i) => {
                tableHtml += `<td style="text-align: ${align[i]}">${cell}</td>`;
            });
            tableHtml += '</tr>';
        });
        tableHtml += '</tbody></table>';

        return tableHtml;
    }

    let html = markdown;
    html = html.replace(tableRegex, (match) => parseTable(match));

    rules.forEach(rule => {
        html = html.replace(rule.regex, rule.replacement);
    });

    return html;
}

// テスト
const markdownText = `
# 見出し1
## 見出し2
### 見出し3

これは**太字**のテキストで、これは*イタリック*です。

これは`コード`です。

\`\`\`
これはコードブロックです
\`\`\`

| 見出し1 | 見出し2 | 見出し3 |
|:-------|:-------:|--------:|
| 左寄せ | 中央寄せ | 右寄せ |
| 左寄せ2 | 中央寄せ2 | 右寄せ2 |
`;

console.log(parseMarkdown(markdownText));

function parseMarkdown(markdown) {
const rules = [
{ regex: /###### (.?)(\n|$)/g, replacement: '

$1

' },
{ regex: /##### (.?)(\n|$)/g, replacement: '

$1

' },
{ regex: /#### (.?)(\n|$)/g, replacement: '
$1
' },
{ regex: /### (.?)(\n|$)/g, replacement: '

$1

' },
{ regex: /## (.?)(\n|$)/g, replacement: '
$1
' },
{ regex: /# (.?)(\n|$)/g, replacement: '

$1

' },
{ regex: /***(.?)***/g, replacement: '$1' },
{ regex: /**(.?)**/g, replacement: '$1' },
{ regex: /*(.?)*/g, replacement: '$1' },
{ regex: /__(.?)__/g, replacement: '$1' },
{ regex: /(.*?)/g, replacement: '$1' },
{ regex: /([^])```/gs, replacement: '
$1
' },
{ regex: /([^])`/g, replacement: '$1' }
];

const tableRegex = /((\|.*?\|)\n)+\|[\-\|]+\|\n((\|.*?\|)\n)*/g;

function parseTable(tableMarkdown) {
    const rows = tableMarkdown.trim().split('\n');
    const header = rows[0].split('|').slice(1, -1).map(cell => cell.trim());
    const align = rows[1].split('|').slice(1, -1).map(cell => {
        if (cell.trim().startsWith(':') && cell.trim().endsWith(':')) return 'center';
        if (cell.trim().startsWith(':')) return 'left';
        if (cell.trim().endsWith(':')) return 'right';
        return 'left';
    });
    const body = rows.slice(2).map(row => row.split('|').slice(1, -1).map(cell => cell.trim()));

    let tableHtml = '<table><thead><tr>';
    header.forEach((cell, i) => {
        tableHtml += `<th style="text-align: ${align[i]}">${cell}</th>`;
    });
    tableHtml += '</tr></thead><tbody>';
    body.forEach(row => {
        tableHtml += '<tr>';
        row.forEach((cell, i) => {
            tableHtml += `<td style="text-align: ${align[i]}">${cell}</td>`;
        });
        tableHtml += '</tr>';
    });
    tableHtml += '</tbody></table>';

    return tableHtml;
}

const lines = markdown.split('\n');
let html = '';
let inCodeBlock = false;
let inParagraph = false;

lines.forEach(line => {
    if (line.match(/^```/)) {
        inCodeBlock = !inCodeBlock;
        html += inCodeBlock ? '<pre><code>' : '</code></pre>';
        return;
    }

    if (inCodeBlock) {
        html += line + '\n';
        return;
    }

    if (line.match(tableRegex)) {
        if (inParagraph) {
            html += '</p>';
            inParagraph = false;
        }
        html += parseTable(line);
        return;
    }

    let processedLine = line;

    rules.forEach(rule => {
        processedLine = processedLine.replace(rule.regex, rule.replacement);
    });

    if (processedLine.trim() === '') {
        if (inParagraph) {
            html += '</p>';
            inParagraph = false;
        }
    } else {
        if (!inParagraph && !processedLine.startsWith('<h')) {
            html += '<p>';
            inParagraph = true;
        }
        html += processedLine + ' ';
    }
});

if (inParagraph) {
    html += '</p>';
}

return html;

}

// テスト
const markdownText = `

見出し1

見出し2

見出し3

これは太字のテキストで、これはイタリックです。

これは`コード`です。

```
これはコードブロックです
```

見出し1	見出し2	見出し3
左寄せ	中央寄せ	右寄せ
左寄せ2	中央寄せ2	右寄せ2
`;

console.log(parseMarkdown(markdownText));

kokokraa

function parseMarkdown(markdown) {
    const rules = [
        { regex: /###### (.*?)(\n|$)/g, replacement: '<h6>$1</h6>' },
        { regex: /##### (.*?)(\n|$)/g, replacement: '<h5>$1</h5>' },
        { regex: /#### (.*?)(\n|$)/g, replacement: '<h4>$1</h4>' },
        { regex: /### (.*?)(\n|$)/g, replacement: '<h3>$1</h3>' },
        { regex: /## (.*?)(\n|$)/g, replacement: '<h2>$1</h2>' },
        { regex: /# (.*?)(\n|$)/g, replacement: '<h1>$1</h1>' },
        { regex: /\*\*\*(.*?)\*\*\*/g, replacement: '<strong><em>$1</em></strong>' },
        { regex: /\*\*(.*?)\*\*/g, replacement: '<strong>$1</strong>' },
        { regex: /\*(.*?)\*/g, replacement: '<em>$1</em>' },
        { regex: /__(.*?)__/g, replacement: '<strong>$1</strong>' },
        { regex: /_(.*?)_/g, replacement: '<em>$1</em>' },
        { regex: /```([^```]*)```/gs, replacement: '<pre><code>$1</code></pre>' },
        { regex: /`([^`]*)`/g, replacement: '<code>$1</code>' }
    ];

    const tableRegex = /((\|.*?\|)\n)+\|[\-\|]+\|\n((\|.*?\|)\n)*/g;

    function parseTable(tableMarkdown) {
        const rows = tableMarkdown.trim().split('\n');
        const header = rows[0].split('|').slice(1, -1).map(cell => cell.trim());
        const align = rows[1].split('|').slice(1, -1).map(cell => {
            if (cell.trim().startsWith(':') && cell.trim().endsWith(':')) return 'center';
            if (cell.trim().startsWith(':')) return 'left';
            if (cell.trim().endsWith(':')) return 'right';
            return 'left';
        });
        const body = rows.slice(2).map(row => row.split('|').slice(1, -1).map(cell => cell.trim()));

        let tableHtml = '<table><thead><tr>';
        header.forEach((cell, i) => {
            tableHtml += `<th style="text-align: ${align[i]}">${cell}</th>`;
        });
        tableHtml += '</tr></thead><tbody>';
        body.forEach(row => {
            tableHtml += '<tr>';
            row.forEach((cell, i) => {
                tableHtml += `<td style="text-align: ${align[i]}">${cell}</td>`;
            });
            tableHtml += '</tr>';
        });
        tableHtml += '</tbody></table>';

        return tableHtml;
    }

    const lines = markdown.split('\n');
    let html = '';
    let inCodeBlock = false;
    let inParagraph = false;

    lines.forEach(line => {
        if (line.match(/^```/)) {
            inCodeBlock = !inCodeBlock;
            html += inCodeBlock ? '<pre><code>' : '</code></pre>';
            return;
        }

        if (inCodeBlock) {
            html += line + '\n';
            return;
        }

        if (line.match(tableRegex)) {
            if (inParagraph) {
                html += '</p>';
                inParagraph = false;
            }
            html += parseTable(line);
            return;
        }

        let processedLine = line;

        rules.forEach(rule => {
            processedLine = processedLine.replace(rule.regex, rule.replacement);
        });

        if (processedLine.trim() === '') {
            if (inParagraph) {
                html += '</p>';
                inParagraph = false;
            }
        } else {
            if (!inParagraph && !processedLine.startsWith('<h')) {
                html += '<p>';
                inParagraph = true;
            }
            html += processedLine + ' ';
        }
    });

    if (inParagraph) {
        html += '</p>';
    }

    return html;
}

// テスト
const markdownText = `
# 見出し1
## 見出し2
### 見出し3

これは**太字**のテキストで、これは*イタリック*です。

これは\`コード\`です。

\`\`\`
これはコードブロックです
\`\`\`

| 見出し1 | 見出し2 | 見出し3 |
|:-------|:-------:|--------:|
| 左寄せ | 中央寄せ | 右寄せ |
| 左寄せ2 | 中央寄せ2 | 右寄せ2 |
`;

console.log(parseMarkdown(markdownText));

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up