0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

PDFBOX 読み取りサンプル

Posted at
import java.io.FileInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.tools.ExtractText;

public class PDFsample {
	
	static String pdfFile  = "C:\\Temp\\sample-1.pdf";
	static String textFile = "C:\\Temp\\sample.txt";
	
	public static void main(String[] args) {
		//PDFをテキストファイルに書き出す
		test1();
		
		//PDFを単語区切りでコンソールに出力する
		test2();
	}
	
	/**
	 * PDFをテキストファイルに出力します
	 */
	public static void test1(){
		try
		{
			//PDFをテキストファイルに出力
			ExtractText.main(new String[]{pdfFile, textFile});
		}
		catch( Exception e )
		{
			e.printStackTrace();
		}
	}
	
	/**
	 * PDFを単語区切り(タブ)でコンソールに出力する
	 */
	public static void test2(){
		try{
		
			PDDocument document = PDDocument.load(new FileInputStream(pdfFile)  );
			PDFTextStripper s = new PDFTextStripper();
		
			//単語区切り文字をタブにセットする
			s.setWordSeparator("\t");
			
			//PDFテキストを読み取る
			String content = s.getText(document);
	
			//読み取り結果をコンソール出力する
			System.out.println( content );
		
		} catch(Exception e){
			e.printStackTrace();
		}
	}
}
0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?