追記: Third Party Extensionsを導入した方がよさそうです⇒【続】MkDocsでテーブルのセル結合がしたい(mdx_tableau編)
はじめに
markdownを使ってドキュメントを書いていた。
はじめはVScode上でMarkdown All in OneとMarkdown Preview Enhancedを使っていたのだが、
MkDocs使いたいね、という話になって使ってみた。
ところがお互いに方言がすごくて、すんなり移行できない。
どうにかしたいなぁー。
とりあえずはテーブルでセル結合できないのを何とかしたい。
MkDocsはどのように markdownをパースしているのか?
とはいうものの、なんもわからん。
なんもわからんので、ひとまず、
find /usr/local/lib/python3.9/site-packages/mkdocs/commands/ -type f -exec grep -Hn markdown {} \;
して、結果を眺めてみる(ファイルパスは環境によって違うかもです)。
/usr/local/lib/python3.9/site-packages/mkdocs/structure/pages.py:11:import markdown
/usr/local/lib/python3.9/site-packages/mkdocs/tests/base.py:7:import markdown
みたいなのがヒットするので、MkDocs自体にパース処理が組み込まれているわけではなく、パッケージを使用しているのだろうね。
$ pip show mkdocs
Name: mkdocs
Version: 1.4.2
Summary: Project documentation with Markdown.
Home-page:
Author:
Author-email: Tom Christie <tom@tomchristie.com>
License:
Location: /usr/local/lib/python3.9/site-packages
Requires: click, ghp-import, importlib-metadata, jinja2, markdown, mergedeep, packaging, pyyaml, pyyaml-env-tag, watchdog
Required-by: mkdocs-material
$ pip list | grep -i markdown
Markdown 3.3.7
私の環境では、Markdown 3.3.7を使用しているようである。
$ pip show markdown
Name: Markdown
Version: 3.3.7
Summary: Python implementation of Markdown.
Home-page: https://Python-Markdown.github.io/
Author: Manfred Stienstra, Yuri takhteyev and Waylan limberg
Author-email: python.markdown@gmail.com
License: BSD License
Location: /usr/local/lib/python3.9/site-packages
Requires: importlib-metadata
Required-by: mkdocs, mkdocs-material, pymdown-extensions
ということらしい。
Python-Markdownのホームページを見てみる
テーブルは拡張機能のようであるが、基本機能のテーブルと、拡張機能のテーブルがあるのだろうか?
markdown_extensions = c.MarkdownExtensions(
builtins=['toc', 'tables', 'fenced_code'], configkey='mdx_configs'
)
"""PyMarkdown extension names."""
MkDocsでは、
- toc
- tables
- fenced_code
がデフォルトでExtension指定されているようである。
デフォルトでExtension指定されているということは、デフォルトで拡張機能のテーブルを使っているということである。
というわけで、この記事はMkDocsに限った話ではなく、Python-Markdownのmarkdown.extensions.tablesでセル結合しちゃおうというお話です。
tables.py を調べる
/usr/local/lib/python3.9/site-packages/markdown/extensions/tables.py をざっと眺めてみても、テーブル結合できるコードは入っていないようである。
pythonコードをいじっちゃおう
設定ではどうにもならないようなので、コードをいじってしまう。
めんどくさいので結果を示す。
オリジナルのソースコードはBSDライセンスのようです。
なので
- 無保証です(ご使用は自己責任で)
- 改変・再配布はご自由に
- オリジナル作者など詳細はソースの先頭を見てね
- BSDライセンスの条文はBSD License
オリジナルとの差分
$ diff -u tables.py.orig tables.py
--- tables.py.orig 2023-02-12 15:25:32.646208400 +0900
+++ tables.py 2023-02-13 20:22:45.831933200 +0900
@@ -60,6 +60,10 @@
is_table = rows[index].startswith('|')
if not is_table:
is_table = self.RE_END_BORDER.search(rows[index]) is not None
+ if not is_table and index == len(rows)-1:
+ is_table = rows[index].startswith('Table: ')
+ if not is_table and index == len(rows)-1:
+ is_table = rows[index].startswith('Caption: ')
if not is_table:
break
@@ -90,17 +94,60 @@
else:
align.append(None)
+ # combine multiline cell
+ #for i, row in enumerate(rows):
+ # if row.strip(' ').endswith(r'\\'):
+ # print("delete: {0}".format(row))
+ # rows.pop(i)
+
+ # rowspan
+ rowspan=[ [] ]
+ is_spanned=[ [] ]
+ rowspan_head=[]
+ is_spanned_head=[]
+ for j in align:
+ rowspan_head.append(0)
+ is_spanned_head.append(False)
+ for i,r in enumerate(rows):
+ rowspan.append([])
+ is_spanned.append([])
+ for j in align:
+ rowspan[i].append(0)
+ is_spanned[i].append(False)
+ for i, row in enumerate(rows):
+ cells = self._split_row(row)
+ if (i+1)<len(rows):
+ cells_nextrow = self._split_row(rows[i+1])
+ else:
+ cells_nextrow = []
+ for j,cell in enumerate(cells):
+ try:
+ c_next = cells_nextrow[j].strip(' ')
+ except IndexError:
+ c_next = ""
+ if c_next == '^':
+ rowspan[i][j]=self._calc_rowspan(i+1, j, rows)
+ is_spanned[i+1][j] = True
+
# Build table
table = etree.SubElement(parent, 'table')
+
+ row_last=rows[len(rows)-1]
+ caption_text = self._get_caption(rows)
+ if caption_text != '':
+ caption = etree.SubElement(table, 'caption')
+ caption.text=caption_text
+ rows.pop(len(rows)-1)
+
thead = etree.SubElement(table, 'thead')
- self._build_row(header, thead, align)
+ self._build_row(header, thead, align, rowspan_head, is_spanned_head)
tbody = etree.SubElement(table, 'tbody')
if len(rows) == 0:
# Handle empty table
self._build_empty_row(tbody, align)
else:
- for row in rows:
- self._build_row(row.strip(' '), tbody, align)
+ for i, row in enumerate(rows):
+ self._build_row(row.strip(' '), tbody, align, rowspan[i], is_spanned[i])
def _build_empty_row(self, parent, align):
"""Build an empty row."""
@@ -110,24 +157,104 @@
etree.SubElement(tr, 'td')
count -= 1
- def _build_row(self, row, parent, align):
+ def _build_row(self, row, parent, align, rowspan, is_rowspanned):
""" Given a row of text, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
if parent.tag == 'thead':
tag = 'th'
cells = self._split_row(row)
+
+ # colspan(init lists)
+ colspan = []
+ is_colspanned = []
+ for i, a in enumerate(align):
+ colspan.append(0)
+ is_colspanned.append(False)
+ # get colspan
+ for i, a in enumerate(align):
+ try:
+ celltext = cells[i].strip(' ')
+ except IndexError: # pragma: no cover
+ celltext=''
+ if celltext == '>':
+ if is_colspanned[i]==False:
+ colspan[i] = self._calc_colspan(i, cells)
+ if (i+1)<len(colspan):
+ is_colspanned[i+1]=True
+ cells[i]= self._get_colspantext(i, cells)
+ elif celltext == '<':
+ is_colspanned[i]=True
+ if i>0:
+ colspan[i-1] = self._calc_colspan(i, cells)
+
# We use align here rather than cells to ensure every row
# contains the same number of columns.
for i, a in enumerate(align):
- c = etree.SubElement(tr, tag)
+ if is_colspanned[i]==False and is_rowspanned[i]==False:
+ c = etree.SubElement(tr, tag)
+ if colspan[i] != 0:
+ c.set('colspan', "%d" %(colspan[i]))
+ if rowspan[i] != 0:
+ c.set('rowspan', "%d" %(rowspan[i]))
+
+ try:
+ c.text = cells[i].strip(' ')
+ except IndexError: # pragma: no cover
+ c.text = ""
+ if a:
+ c.set('align', a)
+
+#######################################################################
+ def _get_caption(self, rows):
+ row_last=rows[len(rows)-1]
+ caption_text=""
+ if row_last.startswith('Table: ') or row_last.startswith('Caption: '):
+ if row_last.startswith('Table: '):
+ caption_text=row_last[7:]
+ elif row_last.startswith('Caption: '):
+ caption_text=row_last[9:]
+ return caption_text
+
+#######################################################################
+ def _calc_rowspan(self, start, col, rows):
+ count=1
+ for i in range(start, len(rows)):
+ cells = self._split_row(rows[i])
try:
- c.text = cells[i].strip(' ')
- except IndexError: # pragma: no cover
- c.text = ""
- if a:
- c.set('align', a)
+ c= cells[col].strip(' ')
+ except IndexError:
+ c= ""
+ if c == '^':
+ count=count+1
+ else:
+ return(count)
+ return count
+
+#######################################################################
+ def _calc_colspan(self, start, cells):
+ count = 1
+ if cells[start].strip(' ') == '>' or cells[start].strip(' ') == '<':
+ a = cells[start].strip(' ')
+
+ for i in range(start,len(cells)):
+ if cells[i].strip(' ') == a:
+ count=count+1
+ else:
+ return count
+ return count
+ else:
+ return 0
+
+#######################################################################
+ def _get_colspantext(self, start, cells):
+ for i in range(start,len(cells)):
+ if cells[i].strip(' ') != '>':
+ return(cells[i])
+
+ return("")
+#####################################################################
def _split_row(self, row):
""" split a row of text into list of cells. """
if self.border:
ソースコード全体
"""
Tables Extension for Python-Markdown
====================================
Added parsing of tables to Python-Markdown.
See <https://Python-Markdown.github.io/extensions/tables>
for documentation.
Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
All changes Copyright 2008-2014 The Python Markdown Project
License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
from . import Extension
from ..blockprocessors import BlockProcessor
import xml.etree.ElementTree as etree
import re
PIPE_NONE = 0
PIPE_LEFT = 1
PIPE_RIGHT = 2
class TableProcessor(BlockProcessor):
""" Process Tables. """
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
def __init__(self, parser):
self.border = False
self.separator = ''
super().__init__(parser)
def test(self, parent, block):
"""
Ensure first two rows (column header and separator row) are valid table rows.
Keep border check and separator row do avoid repeating the work.
"""
is_table = False
rows = [row.strip(' ') for row in block.split('\n')]
if len(rows) > 1:
header0 = rows[0]
self.border = PIPE_NONE
if header0.startswith('|'):
self.border |= PIPE_LEFT
if self.RE_END_BORDER.search(header0) is not None:
self.border |= PIPE_RIGHT
row = self._split_row(header0)
row0_len = len(row)
is_table = row0_len > 1
# Each row in a single column table needs at least one pipe.
if not is_table and row0_len == 1 and self.border:
for index in range(1, len(rows)):
is_table = rows[index].startswith('|')
if not is_table:
is_table = self.RE_END_BORDER.search(rows[index]) is not None
if not is_table and index == len(rows)-1:
is_table = rows[index].startswith('Table: ')
if not is_table and index == len(rows)-1:
is_table = rows[index].startswith('Caption: ')
if not is_table:
break
if is_table:
row = self._split_row(rows[1])
is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
if is_table:
self.separator = row
return is_table
def run(self, parent, blocks):
""" Parse a table block and build table. """
block = blocks.pop(0).split('\n')
header = block[0].strip(' ')
rows = [] if len(block) < 3 else block[2:]
# Get alignment of columns
align = []
for c in self.separator:
c = c.strip(' ')
if c.startswith(':') and c.endswith(':'):
align.append('center')
elif c.startswith(':'):
align.append('left')
elif c.endswith(':'):
align.append('right')
else:
align.append(None)
# combine multiline cell
#for i, row in enumerate(rows):
# if row.strip(' ').endswith(r'\\'):
# print("delete: {0}".format(row))
# rows.pop(i)
# rowspan
rowspan=[ [] ]
is_spanned=[ [] ]
rowspan_head=[]
is_spanned_head=[]
for j in align:
rowspan_head.append(0)
is_spanned_head.append(False)
for i,r in enumerate(rows):
rowspan.append([])
is_spanned.append([])
for j in align:
rowspan[i].append(0)
is_spanned[i].append(False)
for i, row in enumerate(rows):
cells = self._split_row(row)
if (i+1)<len(rows):
cells_nextrow = self._split_row(rows[i+1])
else:
cells_nextrow = []
for j,cell in enumerate(cells):
try:
c_next = cells_nextrow[j].strip(' ')
except IndexError:
c_next = ""
if c_next == '^':
rowspan[i][j]=self._calc_rowspan(i+1, j, rows)
is_spanned[i+1][j] = True
# Build table
table = etree.SubElement(parent, 'table')
row_last=rows[len(rows)-1]
caption_text = self._get_caption(rows)
if caption_text != '':
caption = etree.SubElement(table, 'caption')
caption.text=caption_text
rows.pop(len(rows)-1)
thead = etree.SubElement(table, 'thead')
self._build_row(header, thead, align, rowspan_head, is_spanned_head)
tbody = etree.SubElement(table, 'tbody')
if len(rows) == 0:
# Handle empty table
self._build_empty_row(tbody, align)
else:
for i, row in enumerate(rows):
self._build_row(row.strip(' '), tbody, align, rowspan[i], is_spanned[i])
def _build_empty_row(self, parent, align):
"""Build an empty row."""
tr = etree.SubElement(parent, 'tr')
count = len(align)
while count:
etree.SubElement(tr, 'td')
count -= 1
def _build_row(self, row, parent, align, rowspan, is_rowspanned):
""" Given a row of text, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
if parent.tag == 'thead':
tag = 'th'
cells = self._split_row(row)
# colspan(init lists)
colspan = []
is_colspanned = []
for i, a in enumerate(align):
colspan.append(0)
is_colspanned.append(False)
# get colspan
for i, a in enumerate(align):
try:
celltext = cells[i].strip(' ')
except IndexError: # pragma: no cover
celltext=''
if celltext == '>':
if is_colspanned[i]==False:
colspan[i] = self._calc_colspan(i, cells)
if (i+1)<len(colspan):
is_colspanned[i+1]=True
cells[i]= self._get_colspantext(i, cells)
elif celltext == '<':
is_colspanned[i]=True
if i>0:
colspan[i-1] = self._calc_colspan(i, cells)
# We use align here rather than cells to ensure every row
# contains the same number of columns.
for i, a in enumerate(align):
if is_colspanned[i]==False and is_rowspanned[i]==False:
c = etree.SubElement(tr, tag)
if colspan[i] != 0:
c.set('colspan', "%d" %(colspan[i]))
if rowspan[i] != 0:
c.set('rowspan', "%d" %(rowspan[i]))
try:
c.text = cells[i].strip(' ')
except IndexError: # pragma: no cover
c.text = ""
if a:
c.set('align', a)
#######################################################################
def _get_caption(self, rows):
row_last=rows[len(rows)-1]
caption_text=""
if row_last.startswith('Table: ') or row_last.startswith('Caption: '):
if row_last.startswith('Table: '):
caption_text=row_last[7:]
elif row_last.startswith('Caption: '):
caption_text=row_last[9:]
return caption_text
#######################################################################
def _calc_rowspan(self, start, col, rows):
count=1
for i in range(start, len(rows)):
cells = self._split_row(rows[i])
try:
c= cells[col].strip(' ')
except IndexError:
c= ""
if c == '^':
count=count+1
else:
return(count)
return count
#######################################################################
def _calc_colspan(self, start, cells):
count = 1
if cells[start].strip(' ') == '>' or cells[start].strip(' ') == '<':
a = cells[start].strip(' ')
for i in range(start,len(cells)):
if cells[i].strip(' ') == a:
count=count+1
else:
return count
return count
else:
return 0
#######################################################################
def _get_colspantext(self, start, cells):
for i in range(start,len(cells)):
if cells[i].strip(' ') != '>':
return(cells[i])
return("")
#####################################################################
def _split_row(self, row):
""" split a row of text into list of cells. """
if self.border:
if row.startswith('|'):
row = row[1:]
row = self.RE_END_BORDER.sub('', row)
return self._split(row)
def _split(self, row):
""" split a row of text with some code into a list of cells. """
elements = []
pipes = []
tics = []
tic_points = []
tic_region = []
good_pipes = []
# Parse row
# Throw out \\, and \|
for m in self.RE_CODE_PIPES.finditer(row):
# Store ` data (len, start_pos, end_pos)
if m.group(2):
# \`+
# Store length of each tic group: subtract \
tics.append(len(m.group(2)) - 1)
# Store start of group, end of group, and escape length
tic_points.append((m.start(2), m.end(2) - 1, 1))
elif m.group(3):
# `+
# Store length of each tic group
tics.append(len(m.group(3)))
# Store start of group, end of group, and escape length
tic_points.append((m.start(3), m.end(3) - 1, 0))
# Store pipe location
elif m.group(5):
pipes.append(m.start(5))
# Pair up tics according to size if possible
# Subtract the escape length *only* from the opening.
# Walk through tic list and see if tic has a close.
# Store the tic region (start of region, end of region).
pos = 0
tic_len = len(tics)
while pos < tic_len:
try:
tic_size = tics[pos] - tic_points[pos][2]
if tic_size == 0:
raise ValueError
index = tics[pos + 1:].index(tic_size) + 1
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
pos += index + 1
except ValueError:
pos += 1
# Resolve pipes. Check if they are within a tic pair region.
# Walk through pipes comparing them to each region.
# - If pipe position is less that a region, it isn't in a region
# - If it is within a region, we don't want it, so throw it out
# - If we didn't throw it out, it must be a table pipe
for pipe in pipes:
throw_out = False
for region in tic_region:
if pipe < region[0]:
# Pipe is not in a region
break
elif region[0] <= pipe <= region[1]:
# Pipe is within a code region. Throw it out.
throw_out = True
break
if not throw_out:
good_pipes.append(pipe)
# Split row according to table delimiters.
pos = 0
for pipe in good_pipes:
elements.append(row[pos:pipe])
pos = pipe + 1
elements.append(row[pos:])
return elements
class TableExtension(Extension):
""" Add tables to Markdown. """
def extendMarkdown(self, md):
""" Add an instance of TableProcessor to BlockParser. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75)
def makeExtension(**kwargs): # pragma: no cover
return TableExtension(**kwargs)
バグはあるかもしれないので、ご使用の際は自己責任でお願いします。
適宜修正していきます。
つかいかた
基本的にMarkdown Preview Enhancedの表結合の文法を踏襲。
完全互換ではないかも。空セルの結合とかは実装してないです。
横結合
< を使う
| header1 | header2 |
| --------- | ------- |
| data1 | data2 |
| spanned data |< |
> を使ってもよい
| header1 | header2 |
| ------- | ------------ |
| data1 | data2 |
|> | spanned data |
縦結合
| header1 | header2 |
| ------- | ------------ |
| data1 | data2 |
| ^ | data3 |
おまけ
テーブルのブロック末尾にTable: かCaption: を付けるとキャプションが付けられる
| header1 | header2 |
| ------- | ------------ |
| data1 | data2 |
| ^ | data3 |
Table: 表1
最新版のPython-markdown
私の環境のPython-markdownは3.3.7であるが、最新版は3.4.1のようである。
3.3.7と3.4.1の差異は以下の通りで、変更量はそんなに多くない。
diffでマージしようとするとalignの出力箇所でconflictするかなー。
まぁ手動でマージできるレベルでしょう。
$ diff -u tables_3.3.7.py tables_3.4.1.py
--- tables_3.3.7.py 2023-02-12 17:09:32.897565600 +0900
+++ tables_3.4.1.py 2023-02-12 17:07:22.659948300 +0900
@@ -30,9 +30,11 @@
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
- def __init__(self, parser):
+ def __init__(self, parser, config):
self.border = False
self.separator = ''
+ self.config = config
+
super().__init__(parser)
def test(self, parent, block):
@@ -126,7 +128,10 @@
except IndexError: # pragma: no cover
c.text = ""
if a:
- c.set('align', a)
+ if self.config['use_align_attribute']:
+ c.set('align', a)
+ else:
+ c.set('style', f'text-align: {a};')
def _split_row(self, row):
""" split a row of text into list of cells. """
@@ -212,11 +217,19 @@
class TableExtension(Extension):
""" Add tables to Markdown. """
+ def __init__(self, **kwargs):
+ self.config = {
+ 'use_align_attribute': [False, 'True to use align attribute instead of style.'],
+ }
+
+ super().__init__(**kwargs)
+
def extendMarkdown(self, md):
""" Add an instance of TableProcessor to BlockParser. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
- md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75)
+ processor = TableProcessor(md.parser, self.getConfigs())
+ md.parser.blockprocessors.register(processor, 'table', 75)
def makeExtension(**kwargs): # pragma: no cover
2023.02.13追記
おまけがバグっておったわ。ガハハ
ソース修正済