Commit e308ea99 authored by alex yao's avatar alex yao

fix: web search 插件读取pdf文件

parent f99cad8c
...@@ -28,6 +28,8 @@ import org.slf4j.Logger; ...@@ -28,6 +28,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.io.File;
import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.concurrent.*; import java.util.concurrent.*;
...@@ -115,7 +117,17 @@ public class WebSearchFunction extends AbstractLargeModelFunction { ...@@ -115,7 +117,17 @@ public class WebSearchFunction extends AbstractLargeModelFunction {
for (Result item : items) { for (Result item : items) {
threadPoolExecutor.submit(() -> { threadPoolExecutor.submit(() -> {
String link = item.getLink(); String link = item.getLink();
String htmlContent = DocumentLoad.htmlToMarkdown(link); String htmlContent = StringUtils.EMPTY;
if (link.endsWith("pdf")) {
try {
File file = DocumentLoad.downloadURLDocument(link);
htmlContent = DocumentLoad.loadPDF(file);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
htmlContent = DocumentLoad.htmlToMarkdown(link);
}
if (StringUtils.isNotBlank(htmlContent)) { if (StringUtils.isNotBlank(htmlContent)) {
htmlContent = htmlContent.replaceAll(StringUtils.SPACE, StringUtils.EMPTY); htmlContent = htmlContent.replaceAll(StringUtils.SPACE, StringUtils.EMPTY);
if (htmlContent.length() > 1500) { if (htmlContent.length() > 1500) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment