ああ

アルゴリズム

Last updated at 2025-05-13Posted at 2025-05-13

import streamlit as st
import pdfplumber
import camelot
import tempfile

st.title("PDFデータ仕様書メタ情報抽出ツール")

uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type="pdf")

if uploaded_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.read())
pdf_path = tmp_file.name

st.subheader("抽出されたテキスト")
with pdfplumber.open(pdf_path) as pdf:
    for i, page in enumerate(pdf.pages):
        text = page.extract_text()
        st.text(f"--- Page {i + 1} ---")
        st.text(text)

st.subheader("抽出されたテーブル")
tables = camelot.read_pdf(pdf_path, pages="all", flavor="stream")
for i, table in enumerate(tables):
    st.text(f"--- Table {i + 1} ---")
    st.dataframe(table.df)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up