Commit 2eb8f581 authored by alex yao's avatar alex yao

fix: 富文本转Word

parent 333130e4
...@@ -12,12 +12,16 @@ import com.itextpdf.text.pdf.PdfWriter; ...@@ -12,12 +12,16 @@ import com.itextpdf.text.pdf.PdfWriter;
import com.vladsch.flexmark.html.HtmlRenderer; import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser; import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document; import com.vladsch.flexmark.util.ast.Document;
import com.vladsch.flexmark.util.ast.Node;
import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*; import org.apache.poi.xwpf.usermodel.*;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
...@@ -25,6 +29,7 @@ import javax.annotation.Resource; ...@@ -25,6 +29,7 @@ import javax.annotation.Resource;
import java.awt.*; import java.awt.*;
import java.io.*; import java.io.*;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
...@@ -50,7 +55,7 @@ public class ContentReportRestImpl implements ContentReportRest { ...@@ -50,7 +55,7 @@ public class ContentReportRestImpl implements ContentReportRest {
String result = ""; String result = "";
if ("docx".equals(reportType) || "doc".equals(reportType)) { if ("docx".equals(reportType) || "doc".equals(reportType)) {
String htmlContent = convertMarkdownToHtml(markdown); String htmlContent = convertMarkdownToHtml(markdown);
result = convertHtmlToWord(htmlContent); result = exportWord(htmlContent);
} else if ("html".equals(reportType)) { } else if ("html".equals(reportType)) {
String htmlContent = convertMarkdownToHtml(markdown); String htmlContent = convertMarkdownToHtml(markdown);
result = bosConfigService.uploadFileByByteArray2Oss(htmlContent.getBytes(), UUIDTool.getUUID(), reportType); result = bosConfigService.uploadFileByByteArray2Oss(htmlContent.getBytes(), UUIDTool.getUUID(), reportType);
...@@ -74,29 +79,45 @@ public class ContentReportRestImpl implements ContentReportRest { ...@@ -74,29 +79,45 @@ public class ContentReportRestImpl implements ContentReportRest {
return renderer.render(document); return renderer.render(document);
} }
// private String convertHtmlToWord(String html) throws IOException { // private String convertHtmlToWord(String html) throws IOException {
// File file = File.createTempFile(UUIDTool.getUUID(), ".doc"); // File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
// FileOutputStream outputStream = new FileOutputStream(file); // Word07Writer writer = new Word07Writer();
// ByteArrayInputStream bais = new ByteArrayInputStream(html.getBytes(StandardCharsets.UTF_8));//将字节数组包装到流中 // writer.addText(new Font("宋体", Font.PLAIN, 10), html);
// POIFSFileSystem poifs = new POIFSFileSystem(); // writer.flush(file);
// DirectoryEntry directory = poifs.getRoot();
// directory.createDocument("WordDocument", bais);
// poifs.writeFilesystem(outputStream);
// FileInputStream fileInputStream = new FileInputStream(file); // FileInputStream fileInputStream = new FileInputStream(file);
// bais.close(); // String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
// poifs.close();
// file.deleteOnExit(); // file.deleteOnExit();
// return bosConfigService.upload(fileInputStream, "doc", "application/msword"); // fileInputStream.close();
// return upload;
// } // }
private String convertHtmlToWord(String html) throws IOException { /**
* @param content 富文本内容转word
* @throws Exception
*/
public String exportWord(String content) throws IOException {
if (!content.startsWith("<html>")) {
content = "<html>" + content;
}
if (!content.contains("<body>")) {
content = content.replaceFirst("<html>", "<html><body>") + "</body></html>";
}
byte b[] = content.getBytes("GBK"); //这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream bais = new ByteArrayInputStream(b);//将字节数组包装到流中
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); //该步骤不可省略,否则会出现乱码。
//输出文件
File file = File.createTempFile(UUIDTool.getUUID(), ".docx"); File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
Word07Writer writer = new Word07Writer(); FileOutputStream ostream = new FileOutputStream(file);
writer.addText(new Font("宋体", Font.PLAIN, 10), html); poifs.writeFilesystem(ostream);
writer.flush(file);
FileInputStream fileInputStream = new FileInputStream(file); FileInputStream fileInputStream = new FileInputStream(file);
String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword"); String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
file.deleteOnExit(); bais.close();
ostream.close();
poifs.close();
fileInputStream.close(); fileInputStream.close();
return upload; return upload;
} }
......
package cn.com.poc.expose; package cn.com.poc.expose;
import cn.com.poc.common.service.BosConfigService;
import cn.com.poc.common.utils.UUIDTool;
import cn.com.poc.expose.dto.ContentReportDto; import cn.com.poc.expose.dto.ContentReportDto;
import cn.com.poc.expose.rest.ContentReportRest; import cn.com.poc.expose.rest.ContentReportRest;
import cn.com.yict.framemax.core.spring.SingleContextInitializer; import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.Test; import org.junit.Test;
import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.ContextConfiguration;
...@@ -10,7 +15,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; ...@@ -10,7 +15,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration; import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.io.IOException; import java.io.*;
/** /**
* @author alex.yao * @author alex.yao
...@@ -26,27 +31,53 @@ public class ContentReportTest { ...@@ -26,27 +31,53 @@ public class ContentReportTest {
@Test @Test
public void test_report() throws IOException { public void test_report() throws IOException {
String content = "<pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code><span class=\"hljs-section\"># 图片内容总结</span>\n" + String content = "<html><body><p>在Markdown中,你可以使用LaTeX语法来输出数学公式,包括三角函数公式。要在Markdown中插入LaTeX公式,你需要使用<code>$</code>符号将公式包围起来。对于行内公式,使用单个<code>$</code>符号,而对于独立的公式块,使用两个<code>$$</code>符号。</p><p>下面是一些三角函数公式的例子:</p><h3>行内公式</h3><ul><li>正弦函数:<code>$\\sin(x)$</code></li><li>余弦函数:<code>$\\cos(x)$</code></li><li>正切函数:<code>$\\tan(x)$</code></li></ul><p>将以上代码插入Markdown文档中,将会得到相应的行内公式。</p><h3>公式块</h3><p>如果你想让公式独占一行并居中显示,可以使用两个<code>$$</code>符号来创建一个公式块。</p><p>例如:</p><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n" +
"\n" + "\\sin^2(x) + \\cos^2(x) = 1\n" +
"这是一幅书法作品,内容如下:\n" + "$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"\n" + "</code></pre><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**正文**</span>:工资那么低,工作上出差错是应该的,毕竟便宜没好货!\n" + "\\tan(x) = \\frac{\\sin(x)}{\\cos(x)}\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**落款**</span>:一石\n" + "$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"\n" + "</code></pre><p>在渲染后的Markdown文档中,这些代码将会生成独立的公式块,其中包含相应的三角函数公式。</p><p>请注意,为了正确渲染LaTeX公式,你使用的Markdown编辑器或查看器需要支持LaTeX渲染。许多流行的Markdown编辑器,如Typora、VS Code(配合扩展),以及在线Markdown编辑器如StackEdit,都支持LaTeX公式的渲染。如果你使用的是不支持LaTeX的编辑器或查看器,你可能需要寻找其他解决方案或转换工具来查看渲染后的公式。</p></body></html>";
"<span class=\"hljs-section\">## 作品主题</span>\n" +
"作品表达了对工资低和工作质量之间关系的一种调侃和无奈的态度。作者认为因为工资低,所以工作出差错是可以接受的,并用“便宜没好货”这一俗语来类比,暗示低工资可能导致低质量的工作表现。\n" +
"\n" +
"<span class=\"hljs-section\">## 艺术风格</span>\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**书法风格**</span>:作品采用传统的书法形式,字体流畅,具有一定的艺术美感。\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**情感表达**</span>:通过幽默和讽刺的方式,传达出对现实工作环境的不满和对生活无奈的感慨。\n" +
"</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"</code></pre>";
String reportType = "doc"; String reportType = "doc";
ContentReportDto dto = new ContentReportDto(); ContentReportDto dto = new ContentReportDto();
dto.setContent(content); dto.setContent(content);
dto.setReportType(reportType); dto.setReportType(reportType);
contentReportRest.report(dto); System.out.println(contentReportRest.report(dto));
} }
@Resource
private BosConfigService bosConfigService;
@Test
public void test_report2() throws IOException {
String content = "<h1>标题头</h1><h2>第二个标题</h2><a href=\"www.baidu.com\">百度搜索</a>";
StringBuffer sbf = new StringBuffer();
sbf.append("<html><body>");
sbf.append(content);
sbf.append("</body></html");
System.out.println(exportWord(sbf.toString()));
}
/**
* @param content 富文本内容
* @throws Exception
*/
public String exportWord(String content) throws IOException {
byte b[] = content.getBytes("GBK"); //这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream bais = new ByteArrayInputStream(b);//将字节数组包装到流中
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); //该步骤不可省略,否则会出现乱码。
//输出文件
File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
FileOutputStream ostream = new FileOutputStream(file);
poifs.writeFilesystem(ostream);
FileInputStream fileInputStream = new FileInputStream(file);
String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
bais.close();
ostream.close();
poifs.close();
fileInputStream.close();
return upload;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment