Commit 2eb8f581 authored by alex yao's avatar alex yao

fix: 富文本转Word

parent 333130e4
......@@ -12,12 +12,16 @@ import com.itextpdf.text.pdf.PdfWriter;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document;
import com.vladsch.flexmark.util.ast.Node;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.springframework.stereotype.Component;
......@@ -25,6 +29,7 @@ import javax.annotation.Resource;
import java.awt.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
......@@ -50,7 +55,7 @@ public class ContentReportRestImpl implements ContentReportRest {
String result = "";
if ("docx".equals(reportType) || "doc".equals(reportType)) {
String htmlContent = convertMarkdownToHtml(markdown);
result = convertHtmlToWord(htmlContent);
result = exportWord(htmlContent);
} else if ("html".equals(reportType)) {
String htmlContent = convertMarkdownToHtml(markdown);
result = bosConfigService.uploadFileByByteArray2Oss(htmlContent.getBytes(), UUIDTool.getUUID(), reportType);
......@@ -74,29 +79,45 @@ public class ContentReportRestImpl implements ContentReportRest {
return renderer.render(document);
}
// private String convertHtmlToWord(String html) throws IOException {
// File file = File.createTempFile(UUIDTool.getUUID(), ".doc");
// FileOutputStream outputStream = new FileOutputStream(file);
// ByteArrayInputStream bais = new ByteArrayInputStream(html.getBytes(StandardCharsets.UTF_8));//将字节数组包装到流中
// POIFSFileSystem poifs = new POIFSFileSystem();
// DirectoryEntry directory = poifs.getRoot();
// directory.createDocument("WordDocument", bais);
// poifs.writeFilesystem(outputStream);
// File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
// Word07Writer writer = new Word07Writer();
// writer.addText(new Font("宋体", Font.PLAIN, 10), html);
// writer.flush(file);
// FileInputStream fileInputStream = new FileInputStream(file);
// bais.close();
// poifs.close();
// String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
// file.deleteOnExit();
// return bosConfigService.upload(fileInputStream, "doc", "application/msword");
// fileInputStream.close();
// return upload;
// }
private String convertHtmlToWord(String html) throws IOException {
/**
* @param content 富文本内容转word
* @throws Exception
*/
public String exportWord(String content) throws IOException {
if (!content.startsWith("<html>")) {
content = "<html>" + content;
}
if (!content.contains("<body>")) {
content = content.replaceFirst("<html>", "<html><body>") + "</body></html>";
}
byte b[] = content.getBytes("GBK"); //这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream bais = new ByteArrayInputStream(b);//将字节数组包装到流中
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); //该步骤不可省略,否则会出现乱码。
//输出文件
File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
Word07Writer writer = new Word07Writer();
writer.addText(new Font("宋体", Font.PLAIN, 10), html);
writer.flush(file);
FileOutputStream ostream = new FileOutputStream(file);
poifs.writeFilesystem(ostream);
FileInputStream fileInputStream = new FileInputStream(file);
String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
file.deleteOnExit();
bais.close();
ostream.close();
poifs.close();
fileInputStream.close();
return upload;
}
......
package cn.com.poc.expose;
import cn.com.poc.common.service.BosConfigService;
import cn.com.poc.common.utils.UUIDTool;
import cn.com.poc.expose.dto.ContentReportDto;
import cn.com.poc.expose.rest.ContentReportRest;
import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.runner.RunWith;
import org.junit.Test;
import org.springframework.test.context.ContextConfiguration;
......@@ -10,7 +15,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource;
import java.io.IOException;
import java.io.*;
/**
* @author alex.yao
......@@ -26,27 +31,53 @@ public class ContentReportTest {
@Test
public void test_report() throws IOException {
String content = "<pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code><span class=\"hljs-section\"># 图片内容总结</span>\n" +
"\n" +
"这是一幅书法作品,内容如下:\n" +
"\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**正文**</span>:工资那么低,工作上出差错是应该的,毕竟便宜没好货!\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**落款**</span>:一石\n" +
"\n" +
"<span class=\"hljs-section\">## 作品主题</span>\n" +
"作品表达了对工资低和工作质量之间关系的一种调侃和无奈的态度。作者认为因为工资低,所以工作出差错是可以接受的,并用“便宜没好货”这一俗语来类比,暗示低工资可能导致低质量的工作表现。\n" +
"\n" +
"<span class=\"hljs-section\">## 艺术风格</span>\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**书法风格**</span>:作品采用传统的书法形式,字体流畅,具有一定的艺术美感。\n" +
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**情感表达**</span>:通过幽默和讽刺的方式,传达出对现实工作环境的不满和对生活无奈的感慨。\n" +
"</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"</code></pre>";
String content = "<html><body><p>在Markdown中,你可以使用LaTeX语法来输出数学公式,包括三角函数公式。要在Markdown中插入LaTeX公式,你需要使用<code>$</code>符号将公式包围起来。对于行内公式,使用单个<code>$</code>符号,而对于独立的公式块,使用两个<code>$$</code>符号。</p><p>下面是一些三角函数公式的例子:</p><h3>行内公式</h3><ul><li>正弦函数:<code>$\\sin(x)$</code></li><li>余弦函数:<code>$\\cos(x)$</code></li><li>正切函数:<code>$\\tan(x)$</code></li></ul><p>将以上代码插入Markdown文档中,将会得到相应的行内公式。</p><h3>公式块</h3><p>如果你想让公式独占一行并居中显示,可以使用两个<code>$$</code>符号来创建一个公式块。</p><p>例如:</p><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n" +
"\\sin^2(x) + \\cos^2(x) = 1\n" +
"$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"</code></pre><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n" +
"\\tan(x) = \\frac{\\sin(x)}{\\cos(x)}\n" +
"$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n" +
"</code></pre><p>在渲染后的Markdown文档中,这些代码将会生成独立的公式块,其中包含相应的三角函数公式。</p><p>请注意,为了正确渲染LaTeX公式,你使用的Markdown编辑器或查看器需要支持LaTeX渲染。许多流行的Markdown编辑器,如Typora、VS Code(配合扩展),以及在线Markdown编辑器如StackEdit,都支持LaTeX公式的渲染。如果你使用的是不支持LaTeX的编辑器或查看器,你可能需要寻找其他解决方案或转换工具来查看渲染后的公式。</p></body></html>";
String reportType = "doc";
ContentReportDto dto = new ContentReportDto();
dto.setContent(content);
dto.setReportType(reportType);
contentReportRest.report(dto);
System.out.println(contentReportRest.report(dto));
}
@Resource
private BosConfigService bosConfigService;
@Test
public void test_report2() throws IOException {
String content = "<h1>标题头</h1><h2>第二个标题</h2><a href=\"www.baidu.com\">百度搜索</a>";
StringBuffer sbf = new StringBuffer();
sbf.append("<html><body>");
sbf.append(content);
sbf.append("</body></html");
System.out.println(exportWord(sbf.toString()));
}
/**
* @param content 富文本内容
* @throws Exception
*/
public String exportWord(String content) throws IOException {
byte b[] = content.getBytes("GBK"); //这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream bais = new ByteArrayInputStream(b);//将字节数组包装到流中
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); //该步骤不可省略,否则会出现乱码。
//输出文件
File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
FileOutputStream ostream = new FileOutputStream(file);
poifs.writeFilesystem(ostream);
FileInputStream fileInputStream = new FileInputStream(file);
String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
bais.close();
ostream.close();
poifs.close();
fileInputStream.close();
return upload;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment