Skip to content

Commit

Permalink
!296 新增PDF线程管理,超时管理,内存缓存管理,更新PDF解析组件版本
Browse files Browse the repository at this point in the history
Merge pull request !296 from 高雄/pdfddd
  • Loading branch information
gitchenjh authored and gitee-org committed May 27, 2024
2 parents 7bf07cb + bb0139b commit 0a4ae41
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 58 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<antlr.version>2.7.7</antlr.version>
<concurrentlinkedhashmap.version>1.4.2</concurrentlinkedhashmap.version>
<rocksdb.version>5.17.2</rocksdb.version>
<pdfbox.version>2.0.29</pdfbox.version>
<pdfbox.version>3.0.2</pdfbox.version>
<jai-imageio.version>1.4.0</jai-imageio.version>
<jbig2-imageio.version>3.0.4</jbig2-imageio.version>
<galimatias.version>0.2.1</galimatias.version>
Expand Down
8 changes: 8 additions & 0 deletions server/src/main/config/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ convertMedias = ${KK_CONVERTMEDIAS:avi,mov,wmv,mkv,3gp,rm}
#PDF预览模块设置
#配置PDF文件生成图片的像素大小,dpi 越高,图片质量越清晰,同时也会消耗更多的计算资源。
pdf2jpg.dpi = ${KK_PDF2JPG_DPI:144}
#PDF转换超时设置 (低于50页) 温馨提示这里数字仅供参考
pdf.timeout =${KK_pdf_TIMEOUT:90}
#PDF转换超时设置 (高于50小于200页)
pdf.timeout80 =${KK_PDF_TIMEOUT80:180}
#PDF转换超时设置 (大于200页)
pdf.timeout200 =${KK_PDF_TIMEOUT200:300}
#PDF转换线程设置
pdf.thread =${KK_PDF_THREAD:5}
#是否禁止演示模式
pdf.presentationMode.disable = ${KK_PDF_PRESENTATION_MODE_DISABLE:true}
#是否禁止打开文件
Expand Down
67 changes: 67 additions & 0 deletions server/src/main/java/cn/keking/config/ConfigConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ public class ConfigConstants {
private static String homePagination;
private static String homePageSize;
private static String homeSearch;
private static int pdfTimeout;
private static int pdfTimeout80;
private static int pdfTimeout200;
private static int pdfThread;

public static final String DEFAULT_CACHE_ENABLED = "true";
public static final String DEFAULT_TXT_TYPE = "txt,html,htm,asp,jsp,xml,json,properties,md,gitignore,log,java,py,c,cpp,sql,sh,bat,m,bas,prg,cmd,xbrl";
Expand Down Expand Up @@ -107,6 +111,10 @@ public class ConfigConstants {
public static final String DEFAULT_HOME_PAGINATION = "true";
public static final String DEFAULT_HOME_PAGSIZE = "15";
public static final String DEFAULT_HOME_SEARCH = "true";
public static final String DEFAULT_PDF_TIMEOUT = "90";
public static final String DEFAULT_PDF_TIMEOUT80 = "180";
public static final String DEFAULT_PDF_TIMEOUT200 = "300";
public static final String DEFAULT_PDF_THREAD = "5";

public static Boolean isCacheEnabled() {
return cacheEnabled;
Expand Down Expand Up @@ -580,6 +588,65 @@ public static void setCadThreadValue(int cadThread) {
ConfigConstants.cadThread = cadThread;
}

/**
* 以下为pdf转换模块设置
*/
public static int getPdfTimeout() {
return pdfTimeout;
}

@Value("${pdf.timeout:90}")
public void setPdfTimeout(int pdfTimeout) {
setPdfTimeoutValue(pdfTimeout);
}

public static void setPdfTimeoutValue(int pdfTimeout) {
ConfigConstants.pdfTimeout = pdfTimeout;
}


public static int getPdfTimeout80() {
return pdfTimeout80;
}

@Value("${pdf.timeout80:180}")
public void setPdfTimeout80(int pdfTimeout80) {
setPdfTimeout80Value(pdfTimeout80);
}

public static void setPdfTimeout80Value(int pdfTimeout80) {
ConfigConstants.pdfTimeout80 = pdfTimeout80;
}



public static int getPdfTimeout200() {
return pdfTimeout200;
}

@Value("${pdf.timeout200:300}")
public void setPdfTimeout200(int pdfTimeout200) {
setPdfTimeout200Value(pdfTimeout200);
}

public static void setPdfTimeout200Value(int pdfTimeout200) {
ConfigConstants.pdfTimeout200 = pdfTimeout200;
}


public static int getPdfThread() {
return pdfThread;
}

@Value("${pdf.thread:5}")
public void setPdfThread(int pdfThread) {
setPdfThreadValue(pdfThread);
}

public static void setPdfThreadValue(int pdfThread) {
ConfigConstants.pdfThread = pdfThread;
}

/**
* 以下为OFFICE转换模块设置
*/
Expand Down
12 changes: 12 additions & 0 deletions server/src/main/java/cn/keking/config/ConfigRefreshComponent.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public void run() {
String homePagination;
String homePageSize;
String homeSearch;
int pdfTimeout;
int pdfTimeout80;
int pdfTimeout200;
int pdfThread;
while (true) {
FileReader fileReader = new FileReader(configFilePath);
BufferedReader bufferedReader = new BufferedReader(fileReader);
Expand Down Expand Up @@ -126,6 +130,10 @@ public void run() {
homePageSize = properties.getProperty("home.pagesize", ConfigConstants.DEFAULT_HOME_PAGSIZE);
homeSearch = properties.getProperty("home.search", ConfigConstants.DEFAULT_HOME_SEARCH);
cadThread = Integer.parseInt(properties.getProperty("cad.thread", ConfigConstants.DEFAULT_CAD_THREAD));
pdfTimeout = Integer.parseInt(properties.getProperty("pdf.timeout", ConfigConstants.DEFAULT_PDF_TIMEOUT));
pdfTimeout80 = Integer.parseInt(properties.getProperty("pdf.timeout80", ConfigConstants.DEFAULT_PDF_TIMEOUT80));
pdfTimeout200 = Integer.parseInt(properties.getProperty("pdf.timeout200", ConfigConstants.DEFAULT_PDF_TIMEOUT200));
pdfThread = Integer.parseInt(properties.getProperty("pdf.thread", ConfigConstants.DEFAULT_PDF_THREAD));
prohibitArray = prohibit.split(",");

ConfigConstants.setCacheEnabledValueValue(cacheEnabled);
Expand Down Expand Up @@ -169,6 +177,10 @@ public void run() {
ConfigConstants.setHomePaginationValue(homePagination);
ConfigConstants.setHomePageSizeValue(homePageSize);
ConfigConstants.setHomeSearchValue(homeSearch);
ConfigConstants.setPdfTimeoutValue(pdfTimeout);
ConfigConstants.setPdfTimeout80Value(pdfTimeout80);
ConfigConstants.setPdfTimeout200Value(pdfTimeout200);
ConfigConstants.setPdfThreadValue(pdfThread);
setWatermarkConfig(properties);
bufferedReader.close();
fileReader.close();
Expand Down
122 changes: 69 additions & 53 deletions server/src/main/java/cn/keking/service/FileHandlerService.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import com.aspose.cad.fileformats.cad.CadDrawTypeMode;
import com.aspose.cad.fileformats.tiff.enums.TiffExpectedFormat;
import com.aspose.cad.imageoptions.*;
import com.itextpdf.text.pdf.PdfReader;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
Expand All @@ -37,7 +37,10 @@
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.*;
import java.util.stream.IntStream;

Expand Down Expand Up @@ -236,74 +239,87 @@ public List<String> pdf2jpg(String fileNameFilePath, String pdfFilePath, String
boolean forceUpdatedCache = fileAttribute.forceUpdatedCache();
boolean usePasswordCache = fileAttribute.getUsePasswordCache();
String filePassword = fileAttribute.getFilePassword();
String pdfPassword = null;
PDDocument doc = null;
PdfReader pdfReader = null;
PDDocument doc;
final String[] pdfPassword = {null};
final int[] pageCount = new int[1];
if (!forceUpdatedCache) {
List<String> cacheResult = this.loadPdf2jpgCache(pdfFilePath);
if (!CollectionUtils.isEmpty(cacheResult)) {
return cacheResult;
}
}
List<String> imageUrls = new ArrayList<>();
File pdfFile = new File(fileNameFilePath);
if (!pdfFile.exists()) {
return null;
}
int index = pdfFilePath.lastIndexOf(".");
String folder = pdfFilePath.substring(0, index);
File path = new File(folder);
if (!path.exists() && !path.mkdirs()) {
logger.error("创建转换文件【{}】目录失败,请检查目录权限!", folder);
}
try {
File pdfFile = new File(fileNameFilePath);
if (!pdfFile.exists()) {
return null;
}
doc = PDDocument.load(pdfFile, filePassword);
doc = Loader.loadPDF(pdfFile, filePassword);
doc.setResourceCache(new NotResourceCache());
int pageCount = doc.getNumberOfPages();
PDFRenderer pdfRenderer = new PDFRenderer(doc);
int index = pdfFilePath.lastIndexOf(".");
String folder = pdfFilePath.substring(0, index);
File path = new File(folder);
if (!path.exists() && !path.mkdirs()) {
logger.error("创建转换文件【{}】目录失败,请检查目录权限!", folder);
pageCount[0] = doc.getNumberOfPages();
} catch (IOException e) {
Throwable[] throwableArray = ExceptionUtils.getThrowables(e);
for (Throwable throwable : throwableArray) {
if (throwable instanceof IOException || throwable instanceof EncryptedDocumentException) {
if (e.getMessage().toLowerCase().contains(PDF_PASSWORD_MSG)) {
pdfPassword[0] = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
}
}
}
String imageFilePath;
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
if (!PDF_PASSWORD_MSG.equals(pdfPassword[0])) { //该文件异常 错误原因非密码原因输出错误
logger.error("Convert pdf exception, pdfFilePath:{}", pdfFilePath, e);
}
throw new Exception(e);
}
Callable <List<String>> call = () -> {
try {
if (!ObjectUtils.isEmpty(filePassword)) { //获取到密码 判断是否是加密文件
pdfReader = new PdfReader(fileNameFilePath); //读取PDF文件 通过异常获取该文件是否有密码字符
String imageFilePath;
BufferedImage image = null;
PDFRenderer pdfRenderer = new PDFRenderer(doc);
pdfRenderer.setSubsamplingAllowed(true);
for (int pageIndex = 0; pageIndex < pageCount[0]; pageIndex++) {
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
}
} catch (Exception e) { //获取异常方法 判断是否有加密字符串
Throwable[] throwableArray = ExceptionUtils.getThrowables(e);
for (Throwable throwable : throwableArray) {
if (throwable instanceof IOException || throwable instanceof EncryptedDocumentException) {
if (e.getMessage().toLowerCase().contains(PDF_PASSWORD_MSG)) {
pdfPassword = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
}
}
}
if (!PDF_PASSWORD_MSG.equals(pdfPassword)) { //该文件异常 错误原因非密码原因输出错误
logger.error("Convert pdf exception, pdfFilePath:{}", pdfFilePath, e);
}

image.flush();
} catch (IOException e) {
throw new Exception(e);
} finally {
if (pdfReader != null) { //关闭
pdfReader.close();
}
}

if (usePasswordCache || !PDF_PASSWORD_MSG.equals(pdfPassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount);
}
} catch (IOException e) {
if (!e.getMessage().contains(PDF_PASSWORD_MSG)) {
logger.error("Convert pdf to jpg exception, pdfFilePath:{}", pdfFilePath, e);
doc.close();
}
return imageUrls;
};
Future<List<String>> result = pool.submit(call);
int pdftimeout;
if(pageCount[0] <=50){
pdftimeout = ConfigConstants.getPdfTimeout();
}else if(pageCount[0] <=200){
pdftimeout = ConfigConstants.getPdfTimeout80();
}else {
pdftimeout = ConfigConstants.getPdfTimeout200();
}
try {
result.get(pdftimeout, TimeUnit.SECONDS);
// 如果在超时时间内,没有数据返回:则抛出TimeoutException异常
} catch (InterruptedException | ExecutionException e) {
throw new Exception(e);
} catch (TimeoutException e) {
throw new Exception("overtime");
} finally {
if (doc != null) { //关闭
doc.close();
}
//关闭
doc.close();
}
if (usePasswordCache || ObjectUtils.isEmpty(filePassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount[0]);
}
return imageUrls;
}
Expand Down
25 changes: 21 additions & 4 deletions server/src/main/java/cn/keking/service/cache/NotResourceCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.DefaultResourceCache;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;

import java.io.IOException;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;

/**
* @author: Sawyer.Yong
Expand All @@ -14,7 +17,21 @@
public class NotResourceCache extends DefaultResourceCache {

@Override
public void put(COSObject indirect, PDXObject xobject) throws IOException {
// do nothing
public void put(COSObject indirect, PDColorSpace colorSpace) {
}
@Override
public void put(COSObject indirect, PDExtendedGraphicsState extGState) {
}
@Override
public void put(COSObject indirect, PDShading shading) {
}
@Override
public void put(COSObject indirect, PDAbstractPattern pattern) {
}
@Override
public void put(COSObject indirect, PDPropertyList propertyList) {
}
@Override
public void put(COSObject indirect, PDXObject xobject) {
}
}

0 comments on commit 0a4ae41

Please sign in to comment.