基于Apach POI 实现Word 转换HTML 文件

不点 阅读:789 2021-03-31 20:58:44 评论:0

Apache POI 简介:

Apache POI 简介是用Java编写的免费开源的跨平台的 Java API,Apache POI提供API给Java程式对Microsoft Office(Excel、WORD、PowerPoint、Visio等)格式档案读和写的功能。POI为“Poor Obfuscation Implementation”的首字母缩写,意为“可怜的模糊实现”。

官方主页: http://poi.apache.org/index.html 
API文档: http://poi.apache.org/apidocs/index.html

编写项目

1、项目依赖

2、核心代码
 

项目pom.xml 文件

<!--集成apache poi word 转html --> 
		<dependency> 
			<groupId>org.apache.poi</groupId> 
			<artifactId>poi</artifactId> 
			<version>3.10.1</version> 
		</dependency> 
		<dependency> 
			<groupId>org.apache.poi</groupId> 
			<artifactId>poi-scratchpad</artifactId> 
			<version>3.9</version> 
		</dependency> 
		<dependency> 
			<groupId>org.apache.poi</groupId> 
			<artifactId>poi-ooxml</artifactId> 
			<version>3.10.1</version> 
		</dependency> 
		<dependency> 
			<groupId>org.apache.poi</groupId> 
			<artifactId>poi-ooxml-schemas</artifactId> 
			<version>3.10.1</version> 
		</dependency> 
		<dependency> 
			<groupId>org.apache.poi</groupId> 
			<artifactId>ooxml-schemas</artifactId> 
			<version>1.1</version> 
		</dependency> 
 
		<!-- 集成 --> 
		<dependency> 
			<groupId>fr.opensagres.xdocreport</groupId> 
			<artifactId>fr.opensagres.xdocreport.document</artifactId> 
			<version>2.0.1</version> 
		</dependency> 
		<dependency> 
			<groupId>fr.opensagres.xdocreport</groupId> 
			<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> 
			<version>1.0.6</version> 
		</dependency> 
		<dependency> 
			<groupId>fr.opensagres.xdocreport</groupId> 
			<artifactId>org.apache.poi.xwpf.converter.core</artifactId> 
			<version>1.0.6</version> 
		</dependency>

2、核心代码:

package com.zzg.word.trans.html; 
 
import java.io.File; 
import java.io.InputStream; 
import java.io.OutputStream; 
 
import org.apache.poi.xwpf.converter.core.BasicURIResolver; 
import org.apache.poi.xwpf.converter.core.FileImageExtractor; 
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; 
import org.apache.poi.xwpf.usermodel.XWPFDocument; 
 
public class WordTransHtml { 
	 
	/** 
	 *  
	 * @Title: docxTransHtml    
	 * @Description: docx 转换 HTML   
	 * @param: @param inputStream: docx 文件输入流 
	 * @param: @param outputStream: html 文件输出流 
	 * @param: @param imageSaveDir:图片路径  
	 * @return: void       
	 * @throws 
	 */ 
	public static void docxTransHtml(InputStream inputStream, OutputStream outputStream, String imageSaveDir){ 
		try{ 
			XWPFDocument document = new XWPFDocument(inputStream); 
			 
			XHTMLOptions options = XHTMLOptions.create(); 
			options.setExtractor(new FileImageExtractor(new File(imageSaveDir))); 
			options.URIResolver(new BasicURIResolver(imageSaveDir)); 
			 
			org.apache.poi.xwpf.converter.xhtml.XHTMLConverter.getInstance().convert(document, outputStream, options); 
		}catch(Exception e){ 
			e.printStackTrace(); 
		} 
	} 
 
} 

测试代码:

package com.zzg.word.trans.html.test; 
 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.OutputStream; 
 
import com.zzg.word.trans.html.WordTransHtml; 
 
public class WordTransHtmlTest { 
 
	public static void main(String[] args) { 
		// TODO Auto-generated method stub 
		String imagePath = "C:\\image\\iamges"; 
		String docPath ="C:\\image\\1.docx"; 
		String htmlPath = "c:\\image\\1.html"; 
		File docFile = new File(docPath); 
		File htmlFile = new File(htmlPath); 
		File imageDir = new File(imagePath); 
		// 判断文件夹是否存在,不存在则创建文件夹 
		if(!imageDir.exists()){ 
			imageDir.mkdirs(); 
		} 
		// 判断html 文件是否存在,不存在则创建 
		if(!htmlFile.exists()){ 
			try { 
				htmlFile.createNewFile(); 
			} catch (IOException e) { 
				// TODO Auto-generated catch block 
				e.printStackTrace(); 
			} 
		} 
		if(docFile.exists() && htmlFile.exists()){ 
			try { 
				InputStream inputStream = new FileInputStream(docFile); 
				OutputStream outputStream = new FileOutputStream(htmlFile); 
				WordTransHtml.docxTransHtml(inputStream, outputStream, imagePath); 
			} catch (FileNotFoundException e) { 
				// TODO Auto-generated catch block 
				e.printStackTrace(); 
			} 
			 
		} 
		 
	} 
 
} 

效果展示:

声明

1.本站遵循行业规范,任何转载的稿件都会明确标注作者和来源;2.本站的原创文章,请转载时务必注明文章作者和来源,不尊重原创的行为我们将追究责任;3.作者投稿可能会经我们编辑修改或补充。

关注我们

一个IT知识分享的公众号