Commit 7674903c authored by alex yao's avatar alex yao

fix: PDF乱码替换

parent 768f92ec
......@@ -244,7 +244,7 @@ public class DocumentLoad {
textStripper.setSortByPosition(true);
stringBuilder.append(textStripper.getText(doc));
doc.close();
return stringBuilder.toString();
return stringBuilder.toString().replaceAll("�", StringUtils.EMPTY);
}
......
......@@ -61,4 +61,11 @@ public class FileUtilsTest {
}
}
@Test
public void test_pdf() {
File file = new File("C:\\Users\\52747\\Documents\\dataset\\迎向AI新纪元:2025企业转型的关键时刻-从2024产业案例看今年生成式AI.pdf");
String pdfResult = DocumentLoad.documentToText(file);
System.out.println(pdfResult);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment