利用openoffice+jodconverter

网友投稿 594 2023-07-30

利用openoffice+jodconverter

利用openoffice+jodconverter

本文实例为大家分享了openoffice+jodconverter-code-3.0-bate4实现ppt转图片的具体代码,供大家参考,具体内容如下

安装openoffice4  (用于把文档(ppt)转成pdf)根据系统的位数安装

使用jodconverter-core3.0-beta-4(要上传maven本地仓库)

安装ImageMagick:yum install ImageMagick(用于pdf转图片)

安装pdftotext  用于提取文字大纲  yum install poppler-utils

perl脚本(用于提取pdf文档的文字大纲)

使用jodconverter调用OpenOffice 将office文档转换为PDF时。如果转换程序异常中止而OpenOffice并没有停止运行的话。

openoffice

1、启动tomcat时,启动openoffice服务(个人感觉有风险问题)

2、手工用命令,启动openoffice服务,在使用链接服务(推荐)

package com.document.servers.impl;

import java.io.File;

import java-.ConnectException;

import javax.annotation.PostConstruct;

import javax.annotation.PreDestroy;

import org.artofsolving.jodconverter.OfficeDocumentConverter;

import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;

import org.artofsolving.jodconverter.office.ExternalOfficeManagerConfiguration;

import org.artofsolving.jodconverter.office.OfficeConnectionProtocol;

import org.artofsolving.jodconverter.office.OfficeManager;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Service;

//import com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;

//import com.artofsolving.jodconverter.DocumentConverter;

//import com.artofsolving.jodconverter.DocumentFamily;

//import com.artofsolving.jodconverter.DocumentFormat;

//import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;

//import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;

//import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;

import com.document.servers.OfficeService;

/**

* linux下:

* cd /opt/openoffice4/program

* ./soffice "-accept=socket,host=localhost,port=8100;urp;StarOffice.ServiceManager" -nologo -headless -nofirststartwizard &

*/

@Service("officeService")

public class OfficeServiceImpl http://implements OfficeService {

private static final Logger logger = LoggerFactory.getLogger(OfficeServiceImpl.class);

private OfficeManager officeManager;

private OfficeDocumentConverter documentConverter;

// @PostConstruct

// public void init() throws Exception {

// // TODO Auto-generated method stub

// officeManager = new DefaultOfficeManagerConfiguration().setOfficeHome("/opt/openoffice4").buildOfficeManager();

//

// documentConverter = new OfficeDocumentConverter(officeManager);

// // officeManager.stop();

//

// logger.warn("openoffice starting....");

// try {

// officeManager.start();

// logger.warn("openoffice started");

// } catch (Excepthttp://ion e) {

// logger.error("office start failed:{}", e);

// }

// }

//

// @PreDestroy

// public void destroy() throws Exception {

// // TODO Auto-generated method stub

// logger.info("shutdown office service....");

// if (officeManager != null) {

// try {

//

// officeManager.stop();

// logger.info("office closed");

// } catch (Exception e) {

// logger.error("office close failed:{}", e);

// }

// }

// }

// public void convert(String inputfilename, String outputfilename) {

// logger.info("convert...." + inputfilename + " to " + outputfilename);

// documentConverter.convert(new File(inputfilename), new File(outputfilename));

// }

public void manualConvert(String inputfilename, String outputfilename) {

logger.info("convert...." + inputfilename + " to " + outputfilename);

// connect to an OpenOffice.org instance running on port 8100

ExternalOfficeManagerConfiguration externalProcessOfficeManager = new

ExternalOfficeManagerConfiguration();

externalProcessOfficeManager.setConnectOnStart(true);

externalProcessOfficeManager.setPortNumber(8100);

officeManager = externalProcessOfficeManager.buildOfficeManager();

officeManager.start();

logger.info("openoffice服务已链接");

documentConverter = new OfficeDocumentConverter(officeManager);

documentConverter.convert(new File(inputfilename), new File(outputfilename));

}

}

转换处理方法

package com.document.servers.impl;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.StringWriter;

import java.io.UnsupportedEncodingException;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.stereotype.Service;

import com.document.defined.model.ImagePPT;

import com.document.servers.OfficeService;

import com.document.servers.PPTConvertServers;

import com.document.tool.ImageMagickUtils;

import com.document.tool.SystemConfig;

import com.fasterxml.jackson.databind.ObjectMapper;

import com.ppt.util.Command;

@Service("pPTConvertServers")

public class PPTConvertServersImpl implements PPTConvertServers {

private static final Logger logger = LoggerFactory.getLogger(PPTConvertServersImpl.class);

@Autowired

private OfficeService officeService;

/**

* (non-Javadoc)

* 利用openoffice把ppt转图片

*/

public Map deal_ppt(HttpServletRequest request, String filePath, String extension, String title, String filename)

throws Exception {

logger.info("ppt转pdf,{}");

// ppt文件地址

String ppt_target_file = filePath;

// pdf文件地址

String path = filePath.substring(0, filePath.lastIndexOf("."));

String pdf_target_file = path + ".pdf";

// 输出jpg文件地址

String images_target_file = path + "/jpg" + "-%d.jpg";

// if(exists(pdf_target_file)){

// unlink(pdf_target_file);//删除

// }

// copy(new File(ppt_target_file), ppt_target_file, true);

if (!extension.equals(".pdf")) {

officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件

}

StringWriter writer = new StringWriter();

// 提取文字大纲

String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);

File filepath = new File(images_target_file);

File parentFile = filepath.getParentFile();

if (!parentFile.exists()) {

logger.info("创建图片目录");

parentFile.mkdirs();

}

Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片

String basePath = request.getScheme() + "://" + request.getServerName() + "/";

PDDocument document = PDDocument.load(new File(pdf_target_file));

int pageCount = document.getNumberOfPages();

document.close();

List list = new ArrayList();

String pathUrl = filename.substring(0, filename.lastIndexOf("."));

if (pageCount > 0) {

for (int i = 0; i < pageCount; i++) {

ImagePPT imagePPT = new ImagePPT();

imagePPT.setId(i + 1);

if (pdf_lines.length > 0) {

try {

imagePPT.setTitle(pdf_lines[i]);

} catch (Exception e) {

// TODO Auto-generated catch block

imagePPT.setTitle(title);

logger.info("title,数组越界");

//e.printStackTrace();

}

} else {

imagePPT.setTitle(title);

}

imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");

imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");

// String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";

// String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";

// File pfilepath = new File(pimgDir);

// File pf = pfilepath.getParentFile();

// if (!pf.exists()) {

// pf.mkdirs();

// }

//ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);//预览图

list.add(imagePPT);

}

}

// 拼接json字符

ObjectMapper objectMapper = new ObjectMapper();

String jsonlist = objectMapper.writeValueAsString(list);

// logger.info(jsonlist);

Map map=new HashMap();

map.put("json", jsonlist.toString());

map.put("totalPage", pageCount);

return map;

}

/**

* 文件已经上传过 ,进行替换性转换

*/

public Map replace_ppt(HttpServletRequest request, String filePath, String extension, String title,

String filename) throws Exception {

logger.info("替换,ppt转pdf,{}");

// ppt文件地址

String ppt_target_file = filePath;

// pdf文件地址

String path = filePath.substring(0, filePath.lastIndexOf("."));

String pdf_target_file = path + ".pdf";

// 输出jpg文件地址

String images_target_file = path + "/jpg" + "-%d.jpg";

if (!extension.equals(".pdf")) {

officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件

}

StringWriter writer = new StringWriter();

// 提取文字大纲

String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);

File filepath = new File(images_target_file);

File parentFile = filepath.getParentFile();

if (!parentFile.exists()) {

logger.info("替换创建图片目录");

parentFile.mkdirs();

}

Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片

String basePath = request.getScheme() + "://" + request.getServerName() + "/";

PDDocument document = PDDocument.load(new File(pdf_target_file));

int pageCount = document.getNumberOfPages();

document.close();

List list = new ArrayList();

String pathUrl = filename.substring(0, filename.lastIndexOf("."));

if (pageCount > 0) {

for (int i = 0; i < pageCount; i++) {

ImagePPT imagePPT = new ImagePPT();

imagePPT.setId(i + 1);

if (pdf_lines.length > 0) {

try {

imagePPT.setTitle(pdf_lines[i]);

} catch (Exception e) {

// TODO Auto-generated catch block

imagePPT.setTitle(title);

logger.info("title,数组越界");

// e.printStackTrace();

}

} else {

imagePPT.setTitle(title);

}

imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");

imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");

// String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";

// String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";

// File pfilepath = new File(pimgDir);

// File pf = pfilepath.getParentFile();

// if (!pf.exists()) {

// pf.mkdirs();

// }

// ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);

list.add(imagePPT);

}

}

// 拼接json字符串

ObjectMapper objectMapper = new ObjectMapper();

String jsonlist = objectMapper.writeValueAsString(list);

// logger.info(jsonlist);

Map map=new HashMap();

map.put("json", jsonlist.toString());

map.put("totalPage", pageCount);

return map;

}

/**

* 提取pdf文字大纲

* @param pdf_file

* @return

* @throws UnsupportedEncodingException

* @throws Exception

*/

public static String[] extractOutLineFromPDF(String pdf_file) throws UnsupportedEncodingException {

String svndir = PPTConvertServersImpl.class.getResource("").getPath();

svndir = svndir.split("WEB-INF")[0];

svndir = svndir.replaceFirst("file:", "");

logger.info(svndir);

String command = "/usr/bin/perl " + svndir + "WEB-INF/sh/pdf_outline.pl " + pdf_file;

logger.info(command);

ByteArrayOutputStream writer = new ByteArrayOutputStream();

Command.run2(command, writer);

String outline = writer.toString("utf-8");

logger.info("title pdf,{}", outline);

String[] items = outline.split("///");

return items;

}

/**

* 文件是否存在

*

* @param filename

* @return @throws IOException

*/

public static boolean exists(String filename) {

try {

File file = new File(filename);

return file.exists();

} catch (Exception e) {

return false;

}

}

/**

* 删除文件

*

* @param filename

* @return

*/

public static boolean unlink(String filename) {

try {

File file = new File(filename);

if (file.isFile()) {

file.delete();

return true;

}

http://return false;

} catch (Exception e) {

return false;

}

}

/**

* 拷贝文件

*

* @param file

* @param newname

* @param overwrite

* @return

*/

public static boolean copy(File file, String newname, boolean overwrite) {

try {

if (!overwrite && new File(newname).exists()) {

return false;

}

FileInputStream input = new FileInputStream(file);

File dest = new File(newname);

if (!mkdir(dest.getParent())) {

return false;

}

FileOutputStream output = new FileOutputStream(newname);

byte[] b = new byte[1024 * 5];

int len;

while ((len = input.read(b)) != -1) {

output.write(b, 0, len);

}

output.flush();

output.close();

input.close();

return true;

} catch (Exception e) {

e.printStackTrace();

return false;

}

}

/**

* 创建目录

*

* @param dirname

* @return

*/

public static boolean mkdir(String dir) {

try {

File file = new File(dir);

if (!file.exists()) {

file.mkdirs();

}

return true;

} catch (Exception e) {

e.printStackTrace();

return false;

}

}

}

上传ppt文件处理类:

package com.document.handle.controller;

import java.io.BufferedOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.PrintWriter;

import java.io.UnsupportedEncodingException;

import java.util.Date;

import java.util.Enumeration;

import java.util.HashMap;

import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import org.apache.commons.codec.digest.DigestUtils;

import org.apache.commons.lang3.StringUtils;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RequestMethod;

import org.springframework.web.bind.annotation.RequestParam;

import org.springframework.web.bind.annotation.ResponseBody;

import org.springframework.web.multipart.MultipartFile;

import org.springframework.web.servlet.ModelAndView;

import com.document.servers.PPTConvertServers;

import com.document.tool.FilenameUtils;

import com.document.tool.SystemConfig;

import com.fasterxml.jackson.annotation.PropertyAccessor;

import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;

import com.fasterxml.jackson.databind.ObjectMapper;

import com.fasterxml.jackson.databind.ObjectWriter;

@Controller

public class PptToImageController {

private static final Logger logger = LoggerFactory.getLogger(PptToImageController.class);

private static final String TYPE_BLOB = "BLOB";

private static final String CALLBACK = "callback"; // 回调函数的参数名

@Autowired

private PPTConvertServers pPTConvertServers;

@RequestMapping(value = "/convert/upload")

public ModelAndView updateFile(HttpServletRequest request, HttpServletResponse response) {

ModelAndView mav = new ModelAndView();

mav.http://addObject("name", "Hello Word");

mav.setViewName("/ppt/uploadFile");

logger.info("/convert/upload");

return mav;

}

/**

* 显示上传文件的页面表单。

*/

@SuppressWarnings("unchecked")

private ModelAndView showUploadForm(HttpServletRequest request, String type) {

// 所有请求参数

Map params = new HashMap();

Enumeration paramNames = request.getParameterNames();

while (paramNames.hasMoreElements()) {

String name = paramNames.nextElement();

String value = request.getParameter(name);

if (null != value) {

params.put(name, value);

}

}

ModelAndView mav = new ModelAndView();

mav.setViewName("/upload/" + type.toLowerCase());

mav.addObject("parameters", params);

return mav;

}

/**

* 保存用户上传的文件。

* @throws UnsupportedEncodingException

*/

private Map saveUploadedFile(HttpServletRequest request, MultipartFile file, String type) {

// 文件内容MD5串,避免文件重复上传

String md5 = null;

try {

md5 = DigestUtils.md5Hex(file.getBytes());

logger.info("文件内容MD5串,{}", md5);

} catch (IOException e1) {

// TODO Auto-generated catch block

e1.printStackTrace();

}

String originalFilename = file.getOriginalFilename();

String extension = FilenameUtils.getExtension(originalFilename); // 文件扩展名

String filename = null;

if (md5 != null) {

filename = FilenameUtils.generateFileNameMd5(extension, md5);

} else {

filename = FilenameUtils.generateFileName(extension);

}

String filenameUrl = null; // 文件访问的URL

String absoluteFilename = null; // 文件存储的绝对路径

filenameUrl = SystemConfig.getBlobUrl() + filename;

absoluteFilename = SystemConfig.getBlobDirectory() + filename;

// 检查是否需要创建目录

File filepath = new File(absoluteFilename);

File parentFile = filepath.getParentFile();

if (!parentFile.exists()) {

parentFile.mkdirs();

}

Map params = new HashMap();

// 所有请求参数

Enumeration paramNames = request.getParameterNames();

while (paramNames.hasMoreElements()) {

String name = paramNames.nextElement();

String value = request.getParameter(name);

if (null != value) {

params.put(name, value);

}

}

String pdftitle = originalFilename.substring(0, originalFilename.lastIndexOf("."));

params.put("title", pdftitle);

Map officeMap=new HashMap();

if (filepath.exists()) {

// 文件已上传过,文件进行替换

try {

officeMap = pPTConvertServers.replace_ppt(request, absoluteFilename, extension, pdftitle, filename);

params.put("totalPage", officeMap.get("totalPage"));

params.put("data", officeMap.get("json"));

params.put("status", "success");

} catch (Exception e) {

// TODO Auto-generated catch block

logger.info("把ppt文件转pdf失败,{}", e);

params.put("status", "fail");

params.put("data", "把ppt文件转pdf失败");

params.put("totalPage", 0);

e.printStackTrace();

}

return params;

}

// 保存文件

BufferedOutputStream bos = null;

try {

byte[] fileBytes = file.getBytes();

bos = new BufferedOutputStream(new FileOutputStream(filepath));

bos.write(fileBytes);

} catch (IOException e) {

logger.error("保存'" + originalFilename + "'时发生异常,Cause: ", e);

} finally {

if (null != bos) {

try {

bos.close();

} catch (IOException e) {

}

}

}

// params.put("url", filenameUrl);

// params.put("originalFilename", originalFilename);

// params.put("filesize", file.getSize());

// 把ppt文件转pdf,pdf转图片

try {

officeMap = pPTConvertServers.deal_ppt(request, absoluteFilename, extension, pdftitle, filename);

params.put("totalPage", officeMap.get("totalPage"));

params.put("data", officeMap.get("json"));

params.put("status", "success");

} catch (Exception e) {

// TODO Auto-generated catch block

logger.info("把ppt文件转pdf失败,{}", e);

params.put("status", "fail");

params.put("data", "把ppt文件转pdf失败");

params.put("totalPage", 0);

e.printStackTrace();

}

return params;

}

/**

* 处理文件上传。

* @throws IOException

*

*/

@RequestMapping(value = "/convert/upload", method = RequestMethod.POST,produces = "text/html;charset=UTF-8")

public @ResponseBody String uploadFilePost(HttpServletRequest request,

@RequestParam("file") MultipartFile file) throws IOException {

String callback = request.getParameter(CALLBACK); // 回调函数的函数名

String json = "请上传文件";

Map params = new HashMap();

ObjectMapper mapper = new ObjectMapper();

mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);

ObjectWriter writer = mapper.writerWithType(Map.class);

if (!file.isEmpty()) {

params = saveUploadedFile(request, file, TYPE_BLOB);

if (params == null) {

params = new HashMap();

json = "文件已上传过";

params.put("status", "fail");

params.put("data", json);

json = writer.writeValueAsString(params);

return json.toString();

}

try {

json = writer.writeValueAsString(params);

// json = (String) params.get("data");

} catch (Exception e) {

logger.error("转换Blob上传参数为JSON时发生异常,Cause: ", e);

}

if (StringUtils.isBlank(callback)) {

return json.toString();

} else {

return callback + "(" + json.toString() + ");";

}

}

// 还没上传文件的

params.put("status", "fail");

params.put("data", json);

json = writer.writeValueAsString(params);

return json.toString();

}

}

预览图代理输出-----处理类:

package com.document.handle.controller;

import java.io.File;

import java.io.IOException;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.PathVariable;

import org.springframework.web.bind.annotation.RequestMapping;

import com.document.tool.ImageMagickUtils;

import com.document.tool.SystemConfig;

@Controller

public class ImageAgentController {

private static final Logger LOG = LoggerFactory.getLogger(ImageAgentController.class);

/**

* ppt预览图片代理输出

* @throws IOException

*/

@RequestMapping("/preview/images/{year}/{month}/{md5id}/{preview}/{filename}.{ext}")

public void cropImage(@PathVariable String year, @PathVariable String month, @PathVariable String md5id,@PathVariable String preview, @PathVariable String filename, @PathVariable String ext, HttpServletRequest request, HttpServletResponse response) throws IOException {

//String rootDir = "/data05/ovp/images/";

String rootDir = SystemConfig.getBlobDirectory();

String oname = filename.substring(1,filename.length());//原图文件名

String dirString = rootDir + year+"/" +month + "/" + md5id + "/"+oname+"." + ext;

String targetFileString = rootDir + year+"/" +month + "/" + md5id + "/preview/" + filename + "." + ext;

LOG.info("corpImage..." + dirString + " -> " +targetFileString );

File newfile = new File(targetFileString);

String pathString = newfile.getParent();

LOG.info("pathString...{} {}" , pathString);

File pathFile = new File(pathString);

if(!pathFile.exists()){

LOG.info("---create file---");

pathFile.mkdirs();

}

boolean status = ImageMagickUtils.scale(dirString, targetFileString, 240, 180);

if(status){

response.reset();

response.setContentType("image/" + ext);

java.io.InputStream in = new java.io.FileInputStream(targetFileString);

//FilenameUrlUtils.getImageFilename(targetFileString);

if ( in != null )

{

byte[] b = new byte[1024];

int len;

while( (len = in.read(b)) != -1 )

{

response.getOutputStream().write(b);

}

in.close();

}

}

}

}

提取文字大纲的perl脚本:

use strict;

use warnings;

use utf8;

use open ':encoding(utf8)';

binmode(STDOUT, ":utf8");

sub trim($)

{

my $string = shift;

$string =~ s/^\s+//;

$string =~ s/\s+$//;

return $string;

}

if(!$ARGV[0]){

die;

}

open my $fh, "pdftotext -layout -enc UTF-8 $ARGV[0] - |" or die $!;

my $firstline=<$fh>;

print trim($firstline);

my $pageNum = 1;

while ( my $line = <$fh> ) {

if ( $line =~ /\xC/ ) {

my $count = ($line =~ tr/\xC//);

for(my $i=0;$i<$count-1;$i++){

print "///".$pageNum;

$pageNum++;

}

if(trim($line)){

print "///".trim($line);

}

$pageNum++;

}

}

close $fh;

可能遇到的问题:

1、ppt转pdf时,遇到启动失败(不清楚是不是再次启动引起的)

2、转换后的pdf 表格里的中文会出现乱码

3、有时会出现关闭服务器的所用服务(尚不清楚什么原因引起的)

4、处理请求时,经常出现超时504

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:Intellij Idea部署OpenCV 4.0.0环境
下一篇:Elasticsearch 基础介绍及索引原理分析
相关文章

 发表评论

暂时没有评论,来抢沙发吧~