st ocr clipboard

Python

Last updated at 2024-06-30Posted at 2024-06-25


import streamlit as st
from PIL import ImageGrab
import io

def get_image_from_clipboard():
    image = ImageGrab.grabclipboard()
    if image is not None:
        return image
    return None

def main():
    st.title("クリップボード画像アプリ")

    st.write("クリップボードに画像をコピーし、以下のボタンをクリックしてください。")

    if st.button("クリップボードから画像を貼り付け"):
        image = get_image_from_clipboard()
        if image is not None:
            st.image(image, caption="クリップボードからの画像", use_column_width=True)
        else:
            st.error("クリップボードに画像が見つかりませんでした。")

if __name__ == "__main__":
    main()


import streamlit as st
from PIL import ImageGrab, Image
import pandas as pd
import easyocr
import io
import numpy as np
from typing import List, Tuple

def get_image_from_clipboard():
    image = ImageGrab.grabclipboard()
    if image is not None:
        return image
    return None

def pil_to_bytes(image):
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='PNG')
    img_byte_arr = img_byte_arr.getvalue()
    return img_byte_arr

def sort_by_y(element):
    return element[0][0][1]

def sort_by_x(element):
    return element[0][0][0]

def estimate_grid(elements: List[Tuple]) -> Tuple[List[float], List[float]]:
    x_coords = [bbox[0][0] for bbox, _ in elements]
    y_coords = [bbox[0][1] for bbox, _ in elements]
    
    x_coords.sort()
    y_coords.sort()
    
    def find_gaps(coords):
        gaps = []
        for i in range(1, len(coords)):
            gap = coords[i] - coords[i-1]
            if gap > 5:  # 5ピクセル以上の間隔を意味のあるギャップとする
                gaps.append((coords[i-1] + gap/2, gap))
        return sorted(gaps, key=lambda x: -x[1])[:10]  # 上位10個の大きなギャップを選択
    
    x_gaps = find_gaps(x_coords)
    y_gaps = find_gaps(y_coords)
    
    x_lines = [gap[0] for gap in x_gaps]
    y_lines = [gap[0] for gap in y_gaps]
    
    return x_lines, y_lines

def assign_to_cell(bbox, x_lines, y_lines):
    x = bbox[0][0]
    y = bbox[0][1]
    row = sum(1 for line in y_lines if line < y)
    col = sum(1 for line in x_lines if line < x)
    return row, col

def extract_table_structure(result):
    elements = [(bbox, text) for (bbox, text, prob) in result]
    elements.sort(key=sort_by_y)
    
    x_lines, y_lines = estimate_grid(elements)
    
    grid = {}
    for bbox, text in elements:
        row, col = assign_to_cell(bbox, x_lines, y_lines)
        if (row, col) in grid:
            grid[(row, col)] += " " + text
        else:
            grid[(row, col)] = text
    
    max_row = max(row for row, _ in grid.keys()) + 1
    max_col = max(col for _, col in grid.keys()) + 1
    
    df_data = [[grid.get((row, col), "") for col in range(max_col)] for row in range(max_row)]
    
    return pd.DataFrame(df_data)

def ocr_image_to_df(image):
    image_bytes = pil_to_bytes(image)
    reader = easyocr.Reader(['en', 'ja'])  # 言語を指定（必要に応じて調整）
    result = reader.readtext(image_bytes)

    # 表構造を抽出してデータフレームを作成
    df = extract_table_structure(result)

    return df, result

def main():
    st.title("クリップボード画像アプリ")

    st.write("クリップボードに画像をコピーし、以下のボタンをクリックしてください。")

    if st.button("クリップボードから画像を貼り付け"):
        image = get_image_from_clipboard()
        if image is not None:
            st.image(image, caption="クリップボードからの画像", use_column_width=True)

            # OCRを実行し、データフレームに変換
            df, ocr_result = ocr_image_to_df(image)

            # OCR結果のテキストを表示
            extracted_text = "\n".join([text for _, text, _ in ocr_result])
            st.write("抽出されたテキスト:")
            st.code(extracted_text)

            # データフレームを表示
            st.write("抽出された表:")
            st.dataframe(df)


        else:
            st.error("クリップボードに画像が見つかりませんでした。")

if __name__ == "__main__":
    main()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up