-
Notifications
You must be signed in to change notification settings - Fork 6k
feat(pdf-EPUB): add PDF to EPUB/AZW3 conversion functionality via Calibre #4947
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 7 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
265cfc0
feat(pdf-conversion): add PDF to EPUB conversion functionality
balazs-szucs ff0ddb3
fix(controller): handle blank file extension check in PDF to EPUB con…
balazs-szucs ebd193b
remove artefacts from switching from V1-V2 back and forth
balazs-szucs 42761ed
Merge branch 'main' into pdf-to-epub
balazs-szucs 0dcb55f
refactor: simplify HTML input elements in ebook-to-pdf form
balazs-szucs 4762572
Merge remote-tracking branch 'origin/pdf-to-epub' into pdf-to-epub
balazs-szucs 2bb0f93
feat: add support for AZW3 output format in PDF to EPUB conversion
balazs-szucs 0b2f3df
Merge branch 'main' into pdf-to-epub
balazs-szucs File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
204 changes: 204 additions & 0 deletions
204
...ain/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToEpubController.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| package stirling.software.SPDF.controller.api.converters; | ||
|
|
||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.nio.file.StandardCopyOption; | ||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
|
|
||
| import org.apache.commons.io.FilenameUtils; | ||
| import org.springframework.http.MediaType; | ||
| import org.springframework.http.ResponseEntity; | ||
| import org.springframework.web.bind.annotation.ModelAttribute; | ||
| import org.springframework.web.bind.annotation.PostMapping; | ||
| import org.springframework.web.bind.annotation.RequestMapping; | ||
| import org.springframework.web.bind.annotation.RestController; | ||
| import org.springframework.web.multipart.MultipartFile; | ||
|
|
||
| import io.github.pixee.security.Filenames; | ||
| import io.swagger.v3.oas.annotations.Operation; | ||
| import io.swagger.v3.oas.annotations.tags.Tag; | ||
|
|
||
| import lombok.RequiredArgsConstructor; | ||
| import lombok.extern.slf4j.Slf4j; | ||
|
|
||
| import stirling.software.SPDF.config.EndpointConfiguration; | ||
| import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest; | ||
| import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.OutputFormat; | ||
| import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.TargetDevice; | ||
| import stirling.software.common.util.GeneralUtils; | ||
| import stirling.software.common.util.ProcessExecutor; | ||
| import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult; | ||
| import stirling.software.common.util.TempFileManager; | ||
| import stirling.software.common.util.WebResponseUtils; | ||
|
|
||
| @RestController | ||
| @RequestMapping("/api/v1/convert") | ||
| @Tag(name = "Convert", description = "Convert APIs") | ||
| @RequiredArgsConstructor | ||
| @Slf4j | ||
| public class ConvertPDFToEpubController { | ||
|
|
||
| private static final String CALIBRE_GROUP = "Calibre"; | ||
| private static final String DEFAULT_EXTENSION = "pdf"; | ||
| private static final String FILTERED_CSS = | ||
| "font-family,color,background-color,margin-left,margin-right"; | ||
| private static final String SMART_CHAPTER_EXPRESSION = | ||
| "//h:*[re:test(., '\\s*Chapter\\s+', 'i')]"; | ||
|
|
||
| private final TempFileManager tempFileManager; | ||
| private final EndpointConfiguration endpointConfiguration; | ||
|
|
||
| private static List<String> buildCalibreCommand( | ||
| Path inputPath, Path outputPath, boolean detectChapters, TargetDevice targetDevice) { | ||
| List<String> command = new ArrayList<>(); | ||
| command.add("ebook-convert"); | ||
| command.add(inputPath.toString()); | ||
| command.add(outputPath.toString()); | ||
|
|
||
| // Golden defaults | ||
| command.add("--enable-heuristics"); | ||
| command.add("--insert-blank-line"); | ||
| command.add("--filter-css"); | ||
| command.add(FILTERED_CSS); | ||
|
|
||
| if (detectChapters) { | ||
| command.add("--chapter"); | ||
| command.add(SMART_CHAPTER_EXPRESSION); | ||
| } | ||
|
|
||
| if (targetDevice != null) { | ||
| command.add("--output-profile"); | ||
| command.add(targetDevice.getCalibreProfile()); | ||
| } | ||
|
|
||
| return command; | ||
| } | ||
|
|
||
| @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/epub") | ||
| @Operation( | ||
| summary = "Convert PDF to EPUB/AZW3", | ||
| description = | ||
| "Convert a PDF file to a high-quality EPUB or AZW3 ebook using Calibre. Input:PDF" | ||
| + " Output:EPUB/AZW3 Type:SISO") | ||
| public ResponseEntity<byte[]> convertPdfToEpub(@ModelAttribute ConvertPdfToEpubRequest request) | ||
| throws Exception { | ||
|
|
||
| if (!endpointConfiguration.isGroupEnabled(CALIBRE_GROUP)) { | ||
| throw new IllegalStateException( | ||
| "Calibre support is disabled. Enable the Calibre group or install Calibre to use" | ||
| + " this feature."); | ||
| } | ||
|
|
||
| MultipartFile inputFile = request.getFileInput(); | ||
| if (inputFile == null || inputFile.isEmpty()) { | ||
| throw new IllegalArgumentException("No input file provided"); | ||
| } | ||
|
|
||
| boolean detectChapters = !Boolean.FALSE.equals(request.getDetectChapters()); | ||
| TargetDevice targetDevice = | ||
| request.getTargetDevice() == null | ||
| ? TargetDevice.TABLET_PHONE_IMAGES | ||
| : request.getTargetDevice(); | ||
| OutputFormat outputFormat = | ||
| request.getOutputFormat() == null ? OutputFormat.EPUB : request.getOutputFormat(); | ||
|
|
||
| String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); | ||
| if (originalFilename == null || originalFilename.isBlank()) { | ||
| originalFilename = "document." + DEFAULT_EXTENSION; | ||
| } | ||
|
|
||
| String extension = FilenameUtils.getExtension(originalFilename); | ||
| if (extension.isBlank()) { | ||
| throw new IllegalArgumentException("Unable to determine file type"); | ||
| } | ||
|
|
||
| if (!DEFAULT_EXTENSION.equalsIgnoreCase(extension)) { | ||
| throw new IllegalArgumentException("Input file must be a PDF"); | ||
| } | ||
|
|
||
| String baseName = FilenameUtils.getBaseName(originalFilename); | ||
| if (baseName == null || baseName.isBlank()) { | ||
| baseName = "document"; | ||
| } | ||
|
|
||
| Path workingDirectory = null; | ||
| Path inputPath = null; | ||
| Path outputPath = null; | ||
|
|
||
| try { | ||
| workingDirectory = tempFileManager.createTempDirectory(); | ||
| inputPath = workingDirectory.resolve(baseName + "." + DEFAULT_EXTENSION); | ||
| outputPath = workingDirectory.resolve(baseName + "." + outputFormat.getExtension()); | ||
|
|
||
| try (InputStream inputStream = inputFile.getInputStream()) { | ||
| Files.copy(inputStream, inputPath, StandardCopyOption.REPLACE_EXISTING); | ||
| } | ||
|
|
||
| List<String> command = | ||
| buildCalibreCommand(inputPath, outputPath, detectChapters, targetDevice); | ||
| ProcessExecutorResult result = | ||
| ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE) | ||
| .runCommandWithOutputHandling(command, workingDirectory.toFile()); | ||
|
|
||
| if (result == null) { | ||
| throw new IllegalStateException("Calibre conversion returned no result"); | ||
| } | ||
|
|
||
| if (result.getRc() != 0) { | ||
| String errorMessage = result.getMessages(); | ||
| if (errorMessage == null || errorMessage.isBlank()) { | ||
| errorMessage = "Calibre conversion failed"; | ||
| } | ||
| throw new IllegalStateException(errorMessage); | ||
| } | ||
|
|
||
| if (!Files.exists(outputPath) || Files.size(outputPath) == 0L) { | ||
| throw new IllegalStateException( | ||
| "Calibre did not produce a " + outputFormat.name() + " output"); | ||
| } | ||
|
|
||
| String outputFilename = | ||
| GeneralUtils.generateFilename( | ||
| originalFilename, | ||
| "_convertedTo" | ||
| + outputFormat.name() | ||
| + "." | ||
| + outputFormat.getExtension()); | ||
|
|
||
| byte[] outputBytes = Files.readAllBytes(outputPath); | ||
| MediaType mediaType = MediaType.valueOf(outputFormat.getMediaType()); | ||
| return WebResponseUtils.bytesToWebResponse(outputBytes, outputFilename, mediaType); | ||
| } finally { | ||
| cleanupTempFiles(workingDirectory, inputPath, outputPath); | ||
| } | ||
| } | ||
|
|
||
| private void cleanupTempFiles(Path workingDirectory, Path inputPath, Path outputPath) { | ||
| if (workingDirectory == null) { | ||
| return; | ||
| } | ||
| List<Path> pathsToDelete = new ArrayList<>(); | ||
| if (inputPath != null) { | ||
| pathsToDelete.add(inputPath); | ||
| } | ||
| if (outputPath != null) { | ||
| pathsToDelete.add(outputPath); | ||
| } | ||
| for (Path path : pathsToDelete) { | ||
| try { | ||
| Files.deleteIfExists(path); | ||
| } catch (IOException e) { | ||
| log.warn("Failed to delete temporary file: {}", path, e); | ||
| } | ||
| } | ||
|
|
||
| try { | ||
| tempFileManager.deleteTempDirectory(workingDirectory); | ||
| } catch (Exception e) { | ||
| log.warn("Failed to delete temporary directory: {}", workingDirectory, e); | ||
| } | ||
| } | ||
balazs-szucs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
...re/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPdfToEpubRequest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| package stirling.software.SPDF.model.api.converters; | ||
|
|
||
| import io.swagger.v3.oas.annotations.media.Schema; | ||
|
|
||
| import lombok.Data; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
|
|
||
| import stirling.software.common.model.api.PDFFile; | ||
|
|
||
| @Data | ||
| @EqualsAndHashCode(callSuper = true) | ||
balazs-szucs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| public class ConvertPdfToEpubRequest extends PDFFile { | ||
|
|
||
| @Schema( | ||
| description = "Detect headings that look like chapters and insert EPUB page breaks.", | ||
| allowableValues = {"true", "false"}, | ||
| defaultValue = "true") | ||
| private Boolean detectChapters = Boolean.TRUE; | ||
|
|
||
| @Schema( | ||
| description = "Choose an output profile optimized for the reader device.", | ||
| allowableValues = {"TABLET_PHONE_IMAGES", "KINDLE_EINK_TEXT"}, | ||
| defaultValue = "TABLET_PHONE_IMAGES") | ||
| private TargetDevice targetDevice = TargetDevice.TABLET_PHONE_IMAGES; | ||
|
|
||
| @Schema( | ||
| description = "Choose the output format for the ebook.", | ||
| allowableValues = {"EPUB", "AZW3"}, | ||
| defaultValue = "EPUB") | ||
| private OutputFormat outputFormat = OutputFormat.EPUB; | ||
|
|
||
| @Getter | ||
| public enum TargetDevice { | ||
| TABLET_PHONE_IMAGES("tablet"), | ||
| KINDLE_EINK_TEXT("kindle"); | ||
|
|
||
| private final String calibreProfile; | ||
|
|
||
| TargetDevice(String calibreProfile) { | ||
| this.calibreProfile = calibreProfile; | ||
| } | ||
| } | ||
|
|
||
| @Getter | ||
| public enum OutputFormat { | ||
| EPUB("epub", "application/epub+zip"), | ||
| AZW3("azw3", "application/vnd.amazon.ebook"); | ||
|
|
||
| private final String extension; | ||
| private final String mediaType; | ||
|
|
||
| OutputFormat(String extension, String mediaType) { | ||
| this.extension = extension; | ||
| this.mediaType = mediaType; | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.