Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
P
poc-api
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
poc
poc-api
Commits
2eb8f581
Commit
2eb8f581
authored
May 20, 2025
by
alex yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: 富文本转Word
parent
333130e4
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
85 additions
and
33 deletions
+85
-33
ContentReportRestImpl.java
...va/cn/com/poc/expose/rest/impl/ContentReportRestImpl.java
+37
-16
ContentReportTest.java
src/test/java/cn/com/poc/expose/ContentReportTest.java
+48
-17
No files found.
src/main/java/cn/com/poc/expose/rest/impl/ContentReportRestImpl.java
View file @
2eb8f581
...
@@ -12,12 +12,16 @@ import com.itextpdf.text.pdf.PdfWriter;
...
@@ -12,12 +12,16 @@ import com.itextpdf.text.pdf.PdfWriter;
import
com.vladsch.flexmark.html.HtmlRenderer
;
import
com.vladsch.flexmark.html.HtmlRenderer
;
import
com.vladsch.flexmark.parser.Parser
;
import
com.vladsch.flexmark.parser.Parser
;
import
com.vladsch.flexmark.util.ast.Document
;
import
com.vladsch.flexmark.util.ast.Document
;
import
com.vladsch.flexmark.util.ast.Node
;
import
org.apache.pdfbox.io.RandomAccessBuffer
;
import
org.apache.pdfbox.io.RandomAccessBuffer
;
import
org.apache.pdfbox.io.RandomAccessBufferedFileInputStream
;
import
org.apache.pdfbox.io.RandomAccessBufferedFileInputStream
;
import
org.apache.pdfbox.pdfparser.PDFParser
;
import
org.apache.pdfbox.pdfparser.PDFParser
;
import
org.apache.pdfbox.pdmodel.PDDocument
;
import
org.apache.pdfbox.pdmodel.PDDocument
;
import
org.apache.poi.openxml4j.exceptions.InvalidFormatException
;
import
org.apache.poi.poifs.filesystem.DirectoryEntry
;
import
org.apache.poi.poifs.filesystem.DirectoryEntry
;
import
org.apache.poi.poifs.filesystem.DocumentEntry
;
import
org.apache.poi.poifs.filesystem.POIFSFileSystem
;
import
org.apache.poi.poifs.filesystem.POIFSFileSystem
;
import
org.apache.poi.util.Units
;
import
org.apache.poi.xwpf.usermodel.*
;
import
org.apache.poi.xwpf.usermodel.*
;
import
org.springframework.stereotype.Component
;
import
org.springframework.stereotype.Component
;
...
@@ -25,6 +29,7 @@ import javax.annotation.Resource;
...
@@ -25,6 +29,7 @@ import javax.annotation.Resource;
import
java.awt.*
;
import
java.awt.*
;
import
java.io.*
;
import
java.io.*
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.charset.StandardCharsets
;
import
java.util.Base64
;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
...
@@ -50,7 +55,7 @@ public class ContentReportRestImpl implements ContentReportRest {
...
@@ -50,7 +55,7 @@ public class ContentReportRestImpl implements ContentReportRest {
String
result
=
""
;
String
result
=
""
;
if
(
"docx"
.
equals
(
reportType
)
||
"doc"
.
equals
(
reportType
))
{
if
(
"docx"
.
equals
(
reportType
)
||
"doc"
.
equals
(
reportType
))
{
String
htmlContent
=
convertMarkdownToHtml
(
markdown
);
String
htmlContent
=
convertMarkdownToHtml
(
markdown
);
result
=
convertHtmlTo
Word
(
htmlContent
);
result
=
export
Word
(
htmlContent
);
}
else
if
(
"html"
.
equals
(
reportType
))
{
}
else
if
(
"html"
.
equals
(
reportType
))
{
String
htmlContent
=
convertMarkdownToHtml
(
markdown
);
String
htmlContent
=
convertMarkdownToHtml
(
markdown
);
result
=
bosConfigService
.
uploadFileByByteArray2Oss
(
htmlContent
.
getBytes
(),
UUIDTool
.
getUUID
(),
reportType
);
result
=
bosConfigService
.
uploadFileByByteArray2Oss
(
htmlContent
.
getBytes
(),
UUIDTool
.
getUUID
(),
reportType
);
...
@@ -74,29 +79,45 @@ public class ContentReportRestImpl implements ContentReportRest {
...
@@ -74,29 +79,45 @@ public class ContentReportRestImpl implements ContentReportRest {
return
renderer
.
render
(
document
);
return
renderer
.
render
(
document
);
}
}
// private String convertHtmlToWord(String html) throws IOException {
// private String convertHtmlToWord(String html) throws IOException {
// File file = File.createTempFile(UUIDTool.getUUID(), ".doc");
// File file = File.createTempFile(UUIDTool.getUUID(), ".docx");
// FileOutputStream outputStream = new FileOutputStream(file);
// Word07Writer writer = new Word07Writer();
// ByteArrayInputStream bais = new ByteArrayInputStream(html.getBytes(StandardCharsets.UTF_8));//将字节数组包装到流中
// writer.addText(new Font("宋体", Font.PLAIN, 10), html);
// POIFSFileSystem poifs = new POIFSFileSystem();
// writer.flush(file);
// DirectoryEntry directory = poifs.getRoot();
// directory.createDocument("WordDocument", bais);
// poifs.writeFilesystem(outputStream);
// FileInputStream fileInputStream = new FileInputStream(file);
// FileInputStream fileInputStream = new FileInputStream(file);
// bais.close();
// String upload = bosConfigService.upload(fileInputStream, "docx", "application/msword");
// poifs.close();
// file.deleteOnExit();
// file.deleteOnExit();
// return bosConfigService.upload(fileInputStream, "doc", "application/msword");
// fileInputStream.close();
// return upload;
// }
// }
private
String
convertHtmlToWord
(
String
html
)
throws
IOException
{
/**
* @param content 富文本内容转word
* @throws Exception
*/
public
String
exportWord
(
String
content
)
throws
IOException
{
if
(!
content
.
startsWith
(
"<html>"
))
{
content
=
"<html>"
+
content
;
}
if
(!
content
.
contains
(
"<body>"
))
{
content
=
content
.
replaceFirst
(
"<html>"
,
"<html><body>"
)
+
"</body></html>"
;
}
byte
b
[]
=
content
.
getBytes
(
"GBK"
);
//这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream
bais
=
new
ByteArrayInputStream
(
b
);
//将字节数组包装到流中
POIFSFileSystem
poifs
=
new
POIFSFileSystem
();
DirectoryEntry
directory
=
poifs
.
getRoot
();
DocumentEntry
documentEntry
=
directory
.
createDocument
(
"WordDocument"
,
bais
);
//该步骤不可省略,否则会出现乱码。
//输出文件
File
file
=
File
.
createTempFile
(
UUIDTool
.
getUUID
(),
".docx"
);
File
file
=
File
.
createTempFile
(
UUIDTool
.
getUUID
(),
".docx"
);
Word07Writer
writer
=
new
Word07Writer
();
FileOutputStream
ostream
=
new
FileOutputStream
(
file
);
writer
.
addText
(
new
Font
(
"宋体"
,
Font
.
PLAIN
,
10
),
html
);
poifs
.
writeFilesystem
(
ostream
);
writer
.
flush
(
file
);
FileInputStream
fileInputStream
=
new
FileInputStream
(
file
);
FileInputStream
fileInputStream
=
new
FileInputStream
(
file
);
String
upload
=
bosConfigService
.
upload
(
fileInputStream
,
"docx"
,
"application/msword"
);
String
upload
=
bosConfigService
.
upload
(
fileInputStream
,
"docx"
,
"application/msword"
);
file
.
deleteOnExit
();
bais
.
close
();
ostream
.
close
();
poifs
.
close
();
fileInputStream
.
close
();
fileInputStream
.
close
();
return
upload
;
return
upload
;
}
}
...
...
src/test/java/cn/com/poc/expose/ContentReportTest.java
View file @
2eb8f581
package
cn
.
com
.
poc
.
expose
;
package
cn
.
com
.
poc
.
expose
;
import
cn.com.poc.common.service.BosConfigService
;
import
cn.com.poc.common.utils.UUIDTool
;
import
cn.com.poc.expose.dto.ContentReportDto
;
import
cn.com.poc.expose.dto.ContentReportDto
;
import
cn.com.poc.expose.rest.ContentReportRest
;
import
cn.com.poc.expose.rest.ContentReportRest
;
import
cn.com.yict.framemax.core.spring.SingleContextInitializer
;
import
cn.com.yict.framemax.core.spring.SingleContextInitializer
;
import
org.apache.poi.poifs.filesystem.DirectoryEntry
;
import
org.apache.poi.poifs.filesystem.DocumentEntry
;
import
org.apache.poi.poifs.filesystem.POIFSFileSystem
;
import
org.junit.runner.RunWith
;
import
org.junit.runner.RunWith
;
import
org.junit.Test
;
import
org.junit.Test
;
import
org.springframework.test.context.ContextConfiguration
;
import
org.springframework.test.context.ContextConfiguration
;
...
@@ -10,7 +15,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
...
@@ -10,7 +15,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import
org.springframework.test.context.web.WebAppConfiguration
;
import
org.springframework.test.context.web.WebAppConfiguration
;
import
javax.annotation.Resource
;
import
javax.annotation.Resource
;
import
java.io.
IOException
;
import
java.io.
*
;
/**
/**
* @author alex.yao
* @author alex.yao
...
@@ -26,27 +31,53 @@ public class ContentReportTest {
...
@@ -26,27 +31,53 @@ public class ContentReportTest {
@Test
@Test
public
void
test_report
()
throws
IOException
{
public
void
test_report
()
throws
IOException
{
String
content
=
"<pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code><span class=\"hljs-section\"># 图片内容总结</span>\n"
+
String
content
=
"<html><body><p>在Markdown中,你可以使用LaTeX语法来输出数学公式,包括三角函数公式。要在Markdown中插入LaTeX公式,你需要使用<code>$</code>符号将公式包围起来。对于行内公式,使用单个<code>$</code>符号,而对于独立的公式块,使用两个<code>$$</code>符号。</p><p>下面是一些三角函数公式的例子:</p><h3>行内公式</h3><ul><li>正弦函数:<code>$\\sin(x)$</code></li><li>余弦函数:<code>$\\cos(x)$</code></li><li>正切函数:<code>$\\tan(x)$</code></li></ul><p>将以上代码插入Markdown文档中,将会得到相应的行内公式。</p><h3>公式块</h3><p>如果你想让公式独占一行并居中显示,可以使用两个<code>$$</code>符号来创建一个公式块。</p><p>例如:</p><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n"
+
"\n"
+
"\\sin^2(x) + \\cos^2(x) = 1\n"
+
"这是一幅书法作品,内容如下:\n"
+
"$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n"
+
"\n"
+
"</code></pre><pre> </pre><div class=\"code-render-container\"><div class=\"code-operation-bar-container\"><span class=\"language\">markdown</span></div><div class=\"code-render-wrapper\"><pre class=\"code-render-inner\"><code>$$\n"
+
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**正文**</span>:工资那么低,工作上出差错是应该的,毕竟便宜没好货!\n"
+
"\\tan(x) = \\frac{\\sin(x)}{\\cos(x)}\n"
+
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**落款**</span>:一石\n"
+
"$$</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n"
+
"\n"
+
"</code></pre><p>在渲染后的Markdown文档中,这些代码将会生成独立的公式块,其中包含相应的三角函数公式。</p><p>请注意,为了正确渲染LaTeX公式,你使用的Markdown编辑器或查看器需要支持LaTeX渲染。许多流行的Markdown编辑器,如Typora、VS Code(配合扩展),以及在线Markdown编辑器如StackEdit,都支持LaTeX公式的渲染。如果你使用的是不支持LaTeX的编辑器或查看器,你可能需要寻找其他解决方案或转换工具来查看渲染后的公式。</p></body></html>"
;
"<span class=\"hljs-section\">## 作品主题</span>\n"
+
"作品表达了对工资低和工作质量之间关系的一种调侃和无奈的态度。作者认为因为工资低,所以工作出差错是可以接受的,并用“便宜没好货”这一俗语来类比,暗示低工资可能导致低质量的工作表现。\n"
+
"\n"
+
"<span class=\"hljs-section\">## 艺术风格</span>\n"
+
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**书法风格**</span>:作品采用传统的书法形式,字体流畅,具有一定的艺术美感。\n"
+
"<span class=\"hljs-bullet\">-</span> <span class=\"hljs-strong\">**情感表达**</span>:通过幽默和讽刺的方式,传达出对现实工作环境的不满和对生活无奈的感慨。\n"
+
"</code></pre></div></div><pre><code class=\"hljs code-container-wrapper language-markdown\">\n"
+
"</code></pre>"
;
String
reportType
=
"doc"
;
String
reportType
=
"doc"
;
ContentReportDto
dto
=
new
ContentReportDto
();
ContentReportDto
dto
=
new
ContentReportDto
();
dto
.
setContent
(
content
);
dto
.
setContent
(
content
);
dto
.
setReportType
(
reportType
);
dto
.
setReportType
(
reportType
);
contentReportRest
.
report
(
dto
);
System
.
out
.
println
(
contentReportRest
.
report
(
dto
)
);
}
}
@Resource
private
BosConfigService
bosConfigService
;
@Test
public
void
test_report2
()
throws
IOException
{
String
content
=
"<h1>标题头</h1><h2>第二个标题</h2><a href=\"www.baidu.com\">百度搜索</a>"
;
StringBuffer
sbf
=
new
StringBuffer
();
sbf
.
append
(
"<html><body>"
);
sbf
.
append
(
content
);
sbf
.
append
(
"</body></html"
);
System
.
out
.
println
(
exportWord
(
sbf
.
toString
()));
}
/**
* @param content 富文本内容
* @throws Exception
*/
public
String
exportWord
(
String
content
)
throws
IOException
{
byte
b
[]
=
content
.
getBytes
(
"GBK"
);
//这里是必须要设置编码的,不然导出中文就会乱码。
ByteArrayInputStream
bais
=
new
ByteArrayInputStream
(
b
);
//将字节数组包装到流中
POIFSFileSystem
poifs
=
new
POIFSFileSystem
();
DirectoryEntry
directory
=
poifs
.
getRoot
();
DocumentEntry
documentEntry
=
directory
.
createDocument
(
"WordDocument"
,
bais
);
//该步骤不可省略,否则会出现乱码。
//输出文件
File
file
=
File
.
createTempFile
(
UUIDTool
.
getUUID
(),
".docx"
);
FileOutputStream
ostream
=
new
FileOutputStream
(
file
);
poifs
.
writeFilesystem
(
ostream
);
FileInputStream
fileInputStream
=
new
FileInputStream
(
file
);
String
upload
=
bosConfigService
.
upload
(
fileInputStream
,
"docx"
,
"application/msword"
);
bais
.
close
();
ostream
.
close
();
poifs
.
close
();
fileInputStream
.
close
();
return
upload
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment