Commit d1d8257c authored by alex yao's avatar alex yao

feat:长文档理解插件

parent 2c98d9d0
...@@ -6,6 +6,7 @@ import cn.com.poc.thirdparty.resource.demand.ai.function.document_reader.Documen ...@@ -6,6 +6,7 @@ import cn.com.poc.thirdparty.resource.demand.ai.function.document_reader.Documen
import cn.com.poc.thirdparty.resource.demand.ai.function.document_understanding.DocumentUnderstandIngFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.document_understanding.DocumentUnderstandIngFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.long_document_reader.LongDocumentReaderFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.long_memory.SetLongMemoryFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.long_memory.SetLongMemoryFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.memory_variable_writer.MemoryVariableWriterFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.memory_variable_writer.MemoryVariableWriterFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.notification_reminder.NotificationReminderFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.notification_reminder.NotificationReminderFunction;
...@@ -43,6 +44,8 @@ public enum LargeModelFunctionEnum { ...@@ -43,6 +44,8 @@ public enum LargeModelFunctionEnum {
pdf_to_md(PdfToMDFunction.class), pdf_to_md(PdfToMDFunction.class),
long_document_reader(LongDocumentReaderFunction.class),
; ;
private Class<? extends AbstractLargeModelFunction> function; private Class<? extends AbstractLargeModelFunction> function;
......
package cn.com.poc.thirdparty.resource.demand.ai.function.long_document_reader;
import cn.com.poc.agent_application.entity.Variable;
import cn.com.poc.common.utils.DocumentLoad;
import cn.com.poc.common.utils.JsonUtils;
import cn.com.poc.common.utils.StringUtils;
import cn.com.poc.thirdparty.resource.demand.ai.entity.dialogue.Message;
import cn.com.poc.thirdparty.resource.demand.ai.entity.largemodel.LargeModelDemandResult;
import cn.com.poc.thirdparty.resource.demand.ai.entity.largemodel.LargeModelResponse;
import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractFunctionResult;
import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractLargeModelFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfig;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties;
import cn.com.poc.thirdparty.service.LLMService;
import cn.hutool.core.collection.ListUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.File;
import java.util.List;
/**
* @author alex.yao
* @date 2025/5/12
*/
@Component
public class LongDocumentReaderFunction extends AbstractLargeModelFunction {
@Resource
private LLMService llmService;
private final String MODEL = "qwen-long";
private final String DESC = "长文档理解,适合用于文档内容长、文件大的文档,结合用户问题与文档理解的插件,仅支持文档doc、docx、pdf、txt、md、xlsx、csv、xls";
private final FunctionLLMConfig functionLLMConfig = new FunctionLLMConfig
.FunctionLLMConfigBuilder()
.name("long_document_reader")
.description(DESC)
.parameters(new Parameters("object")
.addProperties("question", new Properties("string", "用户的问题"))
.addProperties("file_url", new Properties("string", "doc、docx、pdf、txt、md、xlsx、csv、xls文件地址"))
).build();
@Override
public AbstractFunctionResult<String> doFunction(String content, String identifier) {
AbstractFunctionResult<String> result = new AbstractFunctionResult<>();
if (StringUtils.isBlank(content)) {
result.setFunctionResult(StringUtils.EMPTY);
result.setPromptContent(StringUtils.EMPTY);
return result;
}
JSONObject jsonObject = JSON.parseObject(content);
if (!jsonObject.containsKey("question") || !jsonObject.containsKey("file_url")) {
result.setFunctionResult(StringUtils.EMPTY);
result.setPromptContent(StringUtils.EMPTY);
return result;
}
String question = jsonObject.getString("question");
String fileUrl = jsonObject.getString("file_url");
File file = DocumentLoad.downloadURLDocument(fileUrl);
String documentContent;
try {
documentContent = DocumentLoad.documentToText(file);
} catch (Exception e) {
documentContent = StringUtils.EMPTY;
}
Message systemMessage = new Message();
systemMessage.setRole("system");
systemMessage.setContent("任务是对文档和用户问题进行分析");
Message documentMessage = new Message();
documentMessage.setRole("system");
documentMessage.setContent(documentContent);
Message userMessage = new Message();
userMessage.setContent(question);
userMessage.setRole("user");
Message[] messages = new Message[]{systemMessage, documentMessage, userMessage};
LargeModelResponse largeModelResponse = new LargeModelResponse();
largeModelResponse.setModel(MODEL);
largeModelResponse.setMessages(messages);
largeModelResponse.setStream(false);
largeModelResponse.setUser("Document_Understanding");
LargeModelDemandResult largeModelDemandResult = llmService.chat(largeModelResponse);
if (largeModelDemandResult == null) {
result.setFunctionResult(StringUtils.EMPTY);
result.setPromptContent(StringUtils.EMPTY);
return result;
}
result.setFunctionResult(largeModelDemandResult.getMessage());
result.setPromptContent(largeModelDemandResult.getMessage());
return result;
}
@Override
public String getDesc() {
return DESC;
}
@Override
public List<String> getLLMConfig() {
return ListUtil.toList(JsonUtils.serialize(this.functionLLMConfig));
}
@Override
public List<String> getLLMConfig(List<Variable> variableStructure) {
return this.getLLMConfig();
}
}
package cn.com.poc.thirdparty.resource.demand.ai.function;
import cn.com.poc.thirdparty.resource.demand.ai.function.long_document_reader.LongDocumentReaderFunction;
import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import org.junit.runner.RunWith;
import org.junit.Test;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource;
import java.util.UUID;
/**
* @author alex.yao
* @date 2025/5/12
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(initializers = SingleContextInitializer.class)
@WebAppConfiguration
public class LongDocumentReaderFunctionTest {
@Resource
LongDocumentReaderFunction longDocumentReaderFunction;
@Test
public void test_function(){
String content = "{\"file_url\": \"https://gsst-poe-sit.gz.bcebos.com/data/20250410/1744277235901.pdf\",\"question\":\"Can a registered Grade C electrical worker work on the electrical work of a Grade A electrical worker?\"}";
String identifier = UUID.randomUUID().toString();
AbstractFunctionResult<String> result = longDocumentReaderFunction.doFunction(content, identifier);
System.out.println(result.getFunctionResult());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment