from docx import Document
def extract_hyperlinks_with_text(docx_path):
# Word文書を開く
doc = Document(docx_path)
hyperlinks = []
# 文書のXMLデータを取得
doc_xml = doc.part._element
# 文書のXMLを再帰的に走査してhyperlink要素を探す
for hyperlink in doc_xml.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}hyperlink'):
# ハイパーリンクされているテキストを取得
text_elements = hyperlink.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t')
text = ''.join([el.text for el in text_elements])
# w:anchor属性からリンク先を取得
link = hyperlink.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}anchor')
# リンクが存在する場合にリストに追加
if link:
hyperlinks.append((text, link))
return hyperlinks
# 使用例
docx_path = 'path_to_your_document.docx'
hyperlinks_with_text = extract_hyperlinks_with_text(docx_path)
for text, link in hyperlinks_with_text:
print(f'Text: {text}, Hyperlink: {link}')
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme