Commit ca6cdc13 authored by alex yao's avatar alex yao

feat:Agent插件文档读取支持excel文件

parent ea801c10
......@@ -8,6 +8,8 @@ import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.util.Assert;
......@@ -16,6 +18,7 @@ import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.util.Iterator;
public class DocumentLoad {
......@@ -26,6 +29,43 @@ public class DocumentLoad {
.build();
final static CopyDown converter = new CopyDown(options);
public static String excelToMarkdown(File file) {
try {
FileInputStream inputStream = new FileInputStream(file);
Workbook workbook = new XSSFWorkbook(inputStream);
Sheet sheet = workbook.getSheetAt(0);
Iterator<Row> rowIterator = sheet.iterator();
StringBuilder markdown = new StringBuilder();
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
Iterator<Cell> cellIterator = row.iterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
if (cell.getCellType() == CellType.STRING) {
markdown.append("| ").append(cell.getStringCellValue());
} else if (cell.getCellType() == CellType.NUMERIC) {
markdown.append("| ").append(cell.getNumericCellValue());
} else {
markdown.append("| ").append("");
}
}
markdown.append("|\n");
}
// 添加Markdown表格分隔线
markdown.insert(0, "|\n|--|--|\n");
workbook.close();
inputStream.close();
return markdown.toString();
} catch (IOException e) {
throw new I18nMessageException("exception/file.load.error");
}
}
/**
* Html To Markdown
*/
......@@ -81,6 +121,10 @@ public class DocumentLoad {
return loadPDF(file);
case "txt":
return loadTxt(file);
case "xlsx":
case "xls":
case "csv":
return excelToMarkdown(file);
default:
throw new I18nMessageException(type + " format is not yet supported");
}
......
......@@ -47,7 +47,7 @@ public class DocumentUnderstandIngFunction extends AbstractLargeModelFunction {
.description(DESC)
.parameters(new Parameters("object")
.addProperties("question", new Properties("string", "提炼用户的问题"))
.addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md文件地址"))
.addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md、xlsx、csv、xls文件地址"))
).build();
......
......@@ -76,6 +76,7 @@ public class ImageOCRFunction extends AbstractLargeModelFunction {
response.setModel(model);
response.setMessages(messages);
response.setStream(false);
response.setUser("Image_OCR");
LargeModelDemandResult largeModelDemandResult = llmService.chat(response);
return largeModelDemandResult.getMessage();
}
......
package cn.com.poc.thirdparty.resource.demand.ai.function;
import cn.com.poc.common.utils.DocumentLoad;
import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.top_search.WeiboTopSearchFunction;
......@@ -11,6 +12,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource;
import java.io.File;
/**
* @author alex.yao
......@@ -45,4 +47,10 @@ public class ImageOCRFunctionTest {
public void weibo() {
System.out.println(weiboTopSearchFunction.getLLMConfig());
}
@Test
public void excelToMarkdown() {
File file = new File("C:\\Users\\52747\\Desktop\\List of Question Intents and Standard Answers (IDP&DL) (Dec2024).xlsx");
System.out.println(DocumentLoad.excelToMarkdown(file));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment