0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

java + eclipse + mvn + pdfbox + apache poi

Posted at

pom.xml

参考のまま作成した pom.xml ではうまく jar が作成できなかった。
直下の プロパティを削除すると上手く「jar-with-dependencies」が効くようになる。

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId><PACKAGE_NAME></groupId>
	<artifactId>mvn_sample</artifactId>
	<version>0.0.1-SNAPSHOT</version>

	<name>mvn_sample</name>
	<!-- FIXME change it to the project's website -->
	<url>http://www.example.com</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<java.version>17</java.version>
		<maven.compiler.source>${java.version}</maven.compiler.source>
		<maven.compiler.target>${java.version}</maven.compiler.target>

	</properties>

	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.11</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.commons</groupId>
			<artifactId>commons-lang3</artifactId>
			<version>3.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.pdfbox</groupId>
			<artifactId>pdfbox</artifactId>
			<version>2.0.32</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<!-- 5.2.5 4.1.2 -->
			<version>5.3.0</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>5.3.0</version>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<artifactId>maven-assembly-plugin</artifactId>
				<version>3.0.0</version>
				<executions>
					<execution>
						<id>make-assembly</id>
						<phase>package</phase>
						<goals>
							<goal>single</goal>
						</goals>
					</execution>
				</executions>
				<configuration>
					<descriptorRefs>
						<descriptorRef>jar-with-dependencies</descriptorRef>
					</descriptorRefs>
					<archive>
						<manifest>
							<mainClass><PACKAGE_NAME>.mvn_sample.App</mainClass>
						</manifest>
					</archive>
				</configuration>
			</plugin>
		</plugins>
	</build>
</project>

App.java

Java17,eclipse 2024-06での pdfbox とpoi 参考

package <PACKAGE_NAME>.mvn_sample;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;

public class App {
	public static void main(String[] args) throws IOException {
		String exe_method = "";
		if (args.length == 0) {
			exe_method = "readXLS";
		} else {
			exe_method = args[0];
		}
		System.out.println("execute method : " + exe_method);
		switch (exe_method) {
		case "parsePDF":
			new App().parsePDF();
			break;
		case "parsePDF2":
			new App().parsePDF2();
			break;
		case "retvoid":
			new App().retvoid();
			break;
		case "readPDF":
			new App().readPDF();
			break;
		case "readXLS":
			new App().readXLS();
			break;
		default:
			String[] myArray = { "value1", "value2", "value3" };
			String ret = new App().join(myArray);
			System.out.println(ret);
		}

	}

	public void parsePDF() throws IOException {
		System.out.println("parsePDF");
		File file = new File("PATH\\sample_01.pdf");
		PDFParser pdfParser = new PDFParser(new org.apache.pdfbox.io.RandomAccessFile(file, "r"));
		pdfParser.parse(); // 分析
		PDDocument pdf = pdfParser.getPDDocument();
		PDFTextStripper stripper = new PDFTextStripper();
		System.out.println(stripper.getText(pdf));
	}

	public void parsePDF2() throws IOException {
		System.out.println("parsePDF2");
		RandomAccessRead rar = new RandomAccessBufferedFileInputStream(
				"PATH\\sample_01.pdf");
		PDFParser parser = new PDFParser(rar);
		parser.parse();
		PDDocument pdf = parser.getPDDocument();
		PDFTextStripper stripper = new PDFTextStripper();
		System.out.println(stripper.getText(pdf));

	}

	public void readPDF() throws IOException {
		System.out.println("readPDF");

		try (PDDocument document = PDDocument
				.load(new File("PATH\\sample_02.pdf"))) {
			System.out.println("PDDocument.load");

			document.getClass();

			PDFTextStripperByArea stripper = new PDFTextStripperByArea();
			stripper.setSortByPosition(true);

			PDFTextStripper tStripper = new PDFTextStripper();

			String pdfFileInText = tStripper.getText(document);
			//System.out.println("Text:" + st);

			// split by whitespace
			String lines[] = pdfFileInText.split("\\r?\\n");
			for (String line : lines) {
				System.out.println(line);
			}

		}

	}

	public void readXLS() throws EncryptedDocumentException, IOException {
		try (InputStream input = new FileInputStream("PATH\\sample_01.xls")) {
			//InputStream inp = new FileInputStream("workbook.xlsx");
			Workbook workbook = WorkbookFactory.create(input);
			Sheet sheet = workbook.getSheetAt(1);
			CellReference reference = new CellReference("A1");
			Row row = sheet.getRow(reference.getRow());
			Cell cell = row.getCell(reference.getCol());
			System.out.println(cell.getStringCellValue());

		}

	}

	public void retvoid() {
		System.out.println("retvoid");
	}

	public String join(String[] args) {
		return StringUtils.join(args, ",");
	}

}

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?