Commit ca6cdc13 authored by alex yao's avatar alex yao

feat:Agent插件文档读取支持excel文件

parent ea801c10
...@@ -8,6 +8,8 @@ import org.apache.pdfbox.pdfparser.PDFParser; ...@@ -8,6 +8,8 @@ import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.util.Assert; import org.springframework.util.Assert;
...@@ -16,6 +18,7 @@ import java.io.*; ...@@ -16,6 +18,7 @@ import java.io.*;
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.Iterator;
public class DocumentLoad { public class DocumentLoad {
...@@ -26,6 +29,43 @@ public class DocumentLoad { ...@@ -26,6 +29,43 @@ public class DocumentLoad {
.build(); .build();
final static CopyDown converter = new CopyDown(options); final static CopyDown converter = new CopyDown(options);
public static String excelToMarkdown(File file) {
try {
FileInputStream inputStream = new FileInputStream(file);
Workbook workbook = new XSSFWorkbook(inputStream);
Sheet sheet = workbook.getSheetAt(0);
Iterator<Row> rowIterator = sheet.iterator();
StringBuilder markdown = new StringBuilder();
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
Iterator<Cell> cellIterator = row.iterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
if (cell.getCellType() == CellType.STRING) {
markdown.append("| ").append(cell.getStringCellValue());
} else if (cell.getCellType() == CellType.NUMERIC) {
markdown.append("| ").append(cell.getNumericCellValue());
} else {
markdown.append("| ").append("");
}
}
markdown.append("|\n");
}
// 添加Markdown表格分隔线
markdown.insert(0, "|\n|--|--|\n");
workbook.close();
inputStream.close();
return markdown.toString();
} catch (IOException e) {
throw new I18nMessageException("exception/file.load.error");
}
}
/** /**
* Html To Markdown * Html To Markdown
*/ */
...@@ -81,6 +121,10 @@ public class DocumentLoad { ...@@ -81,6 +121,10 @@ public class DocumentLoad {
return loadPDF(file); return loadPDF(file);
case "txt": case "txt":
return loadTxt(file); return loadTxt(file);
case "xlsx":
case "xls":
case "csv":
return excelToMarkdown(file);
default: default:
throw new I18nMessageException(type + " format is not yet supported"); throw new I18nMessageException(type + " format is not yet supported");
} }
......
...@@ -47,7 +47,7 @@ public class DocumentUnderstandIngFunction extends AbstractLargeModelFunction { ...@@ -47,7 +47,7 @@ public class DocumentUnderstandIngFunction extends AbstractLargeModelFunction {
.description(DESC) .description(DESC)
.parameters(new Parameters("object") .parameters(new Parameters("object")
.addProperties("question", new Properties("string", "提炼用户的问题")) .addProperties("question", new Properties("string", "提炼用户的问题"))
.addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md文件地址")) .addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md、xlsx、csv、xls文件地址"))
).build(); ).build();
......
...@@ -76,6 +76,7 @@ public class ImageOCRFunction extends AbstractLargeModelFunction { ...@@ -76,6 +76,7 @@ public class ImageOCRFunction extends AbstractLargeModelFunction {
response.setModel(model); response.setModel(model);
response.setMessages(messages); response.setMessages(messages);
response.setStream(false); response.setStream(false);
response.setUser("Image_OCR");
LargeModelDemandResult largeModelDemandResult = llmService.chat(response); LargeModelDemandResult largeModelDemandResult = llmService.chat(response);
return largeModelDemandResult.getMessage(); return largeModelDemandResult.getMessage();
} }
......
package cn.com.poc.thirdparty.resource.demand.ai.function; package cn.com.poc.thirdparty.resource.demand.ai.function;
import cn.com.poc.common.utils.DocumentLoad;
import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.top_search.WeiboTopSearchFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.top_search.WeiboTopSearchFunction;
...@@ -11,6 +12,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; ...@@ -11,6 +12,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration; import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.io.File;
/** /**
* @author alex.yao * @author alex.yao
...@@ -45,4 +47,10 @@ public class ImageOCRFunctionTest { ...@@ -45,4 +47,10 @@ public class ImageOCRFunctionTest {
public void weibo() { public void weibo() {
System.out.println(weiboTopSearchFunction.getLLMConfig()); System.out.println(weiboTopSearchFunction.getLLMConfig());
} }
@Test
public void excelToMarkdown() {
File file = new File("C:\\Users\\52747\\Desktop\\List of Question Intents and Standard Answers (IDP&DL) (Dec2024).xlsx");
System.out.println(DocumentLoad.excelToMarkdown(file));
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment