Commit b934edab authored by alex yao's avatar alex yao

fix: doc 文件加载

parent 102dceb9
......@@ -107,12 +107,6 @@
<version>8.0.28</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- ES -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
......@@ -245,11 +239,14 @@
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
......@@ -260,6 +257,13 @@
<artifactId>ooxml-schemas</artifactId>
<version>1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
......@@ -277,11 +281,6 @@
<version>3.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/cn.hutool/hutool-http -->
<dependency>
......
......@@ -6,7 +6,7 @@ import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
//import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.util.Assert;
......@@ -31,7 +31,7 @@ public class DocumentLoad {
case "docx":
return loadWordDocx(file);
case "doc":
return loadWordDocx(file);
return loadWordDoc(file);
case "md":
return loadMarkDown(file);
case "pdf":
......@@ -62,12 +62,12 @@ public class DocumentLoad {
return xwpfWordExtractor.getText();
}
// public static String loadWordDoc(File file) throws IOException {
// FileInputStream fis = new FileInputStream(file);
// HWPFDocument doc = new HWPFDocument(fis);
// fis.close();
// return doc.getText().toString();
// }
public static String loadWordDoc(File file) throws IOException {
FileInputStream fis = new FileInputStream(file);
WordExtractor wordExtractor = new WordExtractor(fis);
fis.close();
return wordExtractor.getText().toString();
}
public static String loadTxt(File file) throws IOException {
BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment