Commit 638e3dd7 authored by alex yao's avatar alex yao

feat: Html转Markdown

parent e91da2d1
...@@ -375,6 +375,13 @@ ...@@ -375,6 +375,13 @@
<version>0.10.328</version> <version>0.10.328</version>
</dependency> </dependency>
<dependency>
<groupId>io.github.furstenheim</groupId>
<artifactId>copy_down</artifactId>
<version>1.0</version>
</dependency>
</dependencies> </dependencies>
......
...@@ -2,6 +2,7 @@ package cn.com.poc.common.utils; ...@@ -2,6 +2,7 @@ package cn.com.poc.common.utils;
import cn.com.yict.framemax.core.i18n.I18nMessageException; import cn.com.yict.framemax.core.i18n.I18nMessageException;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import io.github.furstenheim.*;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
...@@ -12,10 +13,48 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument; ...@@ -12,10 +13,48 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.util.Assert; import org.springframework.util.Assert;
import java.io.*; import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files; import java.nio.file.Files;
public class DocumentLoad { public class DocumentLoad {
final static OptionsBuilder optionsBuilder = OptionsBuilder.anOptions();
final static Options options = optionsBuilder.withBr("-")
.withLinkStyle(LinkStyle.REFERENCED)
.withLinkReferenceStyle(LinkReferenceStyle.SHORTCUT)
.build();
final static CopyDown converter = new CopyDown(options);
/**
* Html To Markdown
*/
public static String htmlToMarkdown(String url) {
try {
// 创建 资源符对象 连接
URLConnection conn = new URL(url).openConnection();
// 获取输入流
InputStream inputStream = conn.getInputStream();
// 缓冲区,读取输入流内容,64KB
char[] buffer = new char[1024 * 64];
int len;
StringBuilder sb = new StringBuilder();
// 转换为字符流
InputStreamReader isr = new InputStreamReader(inputStream);
// 循环读取
while ((len = isr.read(buffer)) != -1) {
sb.append(buffer, 0, len);
}
// 关闭资源
inputStream.close();
isr.close();
String htmlStr = sb.toString();
return converter.convert(htmlStr);
} catch (IOException e) {
throw new I18nMessageException(e.getMessage());
}
}
/** /**
* 读取文档 * 读取文档
* *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment