feat：Agent插件文档读取支持excel文件

ca6cdc13 · alex yao · ea801c10 · ca6cdc13 · ca6cdc13 · ca6cdc13
Commit ca6cdc13 authored Jan 17, 2025 by alex yao
4 changed files
--- a/src/main/java/cn/com/poc/common/utils/DocumentLoad.java
+++ b/src/main/java/cn/com/poc/common/utils/DocumentLoad.java
@@ -8,6 +8,8 @@ import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 import org.springframework.util.Assert;
@@ -16,6 +18,7 @@ import java.io.*;
 import java.net.URL;
 import java.net.URLConnection;
 import java.nio.file.Files;
+import java.util.Iterator;
 public class DocumentLoad {
@@ -26,6 +29,43 @@ public class DocumentLoad {
            .build();
    final static CopyDown converter = new CopyDown(options);
+    public static String excelToMarkdown(File file) {
+        try {
+            FileInputStream inputStream = new FileInputStream(file);
+            Workbook workbook = new XSSFWorkbook(inputStream);
+            Sheet sheet = workbook.getSheetAt(0);
+            Iterator<Row> rowIterator = sheet.iterator();
+            StringBuilder markdown = new StringBuilder();
+            while (rowIterator.hasNext()) {
+                Row row = rowIterator.next();
+                Iterator<Cell> cellIterator = row.iterator();
+                while (cellIterator.hasNext()) {
+                    Cell cell = cellIterator.next();
+                    if (cell.getCellType() == CellType.STRING) {
+                        markdown.append("| ").append(cell.getStringCellValue());
+                    } else if (cell.getCellType() == CellType.NUMERIC) {
+                        markdown.append("| ").append(cell.getNumericCellValue());
+                    } else {
+                        markdown.append("| ").append("");
+                    }
+                }
+                markdown.append("|\n");
+            }
+            // 添加Markdown表格分隔线
+            markdown.insert(0, "|\n|--|--|\n");
+            workbook.close();
+            inputStream.close();
+            return markdown.toString();
+        } catch (IOException e) {
+            throw new I18nMessageException("exception/file.load.error");
+        }
+    }
    /**
     * Html To Markdown
     */
@@ -81,6 +121,10 @@ public class DocumentLoad {
                    return loadPDF(file);
                case "txt":
                    return loadTxt(file);
+                case "xlsx":
+                case "xls":
+                case "csv":
+                    return excelToMarkdown(file);
                default:
                    throw new I18nMessageException(type + " format is not yet supported");
            }

--- a/src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/document_understanding/DocumentUnderstandIngFunction.java
+++ b/src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/document_understanding/DocumentUnderstandIngFunction.java
@@ -47,7 +47,7 @@ public class DocumentUnderstandIngFunction extends AbstractLargeModelFunction {
            .description(DESC)
            .parameters(new Parameters("object")
                    .addProperties("question", new Properties("string", "提炼用户的问题"))
-                    .addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md文件地址"))
+                    .addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md、xlsx、csv、xls文件地址"))
            ).build();

--- a/src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/image_ocr/ImageOCRFunction.java
+++ b/src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/image_ocr/ImageOCRFunction.java
@@ -76,6 +76,7 @@ public class ImageOCRFunction extends AbstractLargeModelFunction {
        response.setModel(model);
        response.setMessages(messages);
        response.setStream(false);
+        response.setUser("Image_OCR");
        LargeModelDemandResult largeModelDemandResult = llmService.chat(response);
        return largeModelDemandResult.getMessage();
    }

--- a/src/test/java/cn/com/poc/thirdparty/resource/demand/ai/function/ImageOCRFunctionTest.java
+++ b/src/test/java/cn/com/poc/thirdparty/resource/demand/ai/function/ImageOCRFunctionTest.java
 package cn.com.poc.thirdparty.resource.demand.ai.function;
+import cn.com.poc.common.utils.DocumentLoad;
 import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction;
 import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction;
 import cn.com.poc.thirdparty.resource.demand.ai.function.top_search.WeiboTopSearchFunction;
@@ -11,6 +12,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 import org.springframework.test.context.web.WebAppConfiguration;
 import javax.annotation.Resource;
+import java.io.File;
 /**
 * @author alex.yao
@@ -45,4 +47,10 @@ public class ImageOCRFunctionTest {
    public void weibo() {
        System.out.println(weiboTopSearchFunction.getLLMConfig());
    }
+    @Test
+    public void excelToMarkdown() {
+        File file = new File("C:\\Users\\52747\\Desktop\\List of Question Intents and Standard Answers (IDP&DL) (Dec2024).xlsx");
+        System.out.println(DocumentLoad.excelToMarkdown(file));
+    }
 }