Commit 7600e3ae authored by alex yao's avatar alex yao

feat: OCR插件

parent c611a9fa
...@@ -2,13 +2,24 @@ package cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr; ...@@ -2,13 +2,24 @@ package cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr;
import cn.com.poc.agent_application.entity.Variable; import cn.com.poc.agent_application.entity.Variable;
import cn.com.poc.common.utils.JsonUtils; import cn.com.poc.common.utils.JsonUtils;
import cn.com.poc.thirdparty.common.constant.MessageRoleConstant;
import cn.com.poc.thirdparty.resource.demand.ai.entity.dialogue.ImageUrl;
import cn.com.poc.thirdparty.resource.demand.ai.entity.dialogue.Message;
import cn.com.poc.thirdparty.resource.demand.ai.entity.dialogue.MultiContent;
import cn.com.poc.thirdparty.resource.demand.ai.entity.largemodel.LargeModelDemandResult;
import cn.com.poc.thirdparty.resource.demand.ai.entity.largemodel.LargeModelResponse;
import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractLargeModelFunction; import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractLargeModelFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfig; import cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfig;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters; import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties; import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties;
import cn.com.poc.thirdparty.service.LLMService;
import cn.hutool.core.collection.ListUtil; import cn.hutool.core.collection.ListUtil;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.record.DVALRecord;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
...@@ -28,11 +39,45 @@ public class ImageOCRFunction extends AbstractLargeModelFunction { ...@@ -28,11 +39,45 @@ public class ImageOCRFunction extends AbstractLargeModelFunction {
.addProperties("image_url", new Properties("string", "图片地址"))) .addProperties("image_url", new Properties("string", "图片地址")))
.build(); .build();
private String prompt = "帮我根据问题和图片给出回复";
private String model = "Doubao-vision-lite-32k";
@Resource
private LLMService llmService;
@Override @Override
public String doFunction(String content, String identifier) { public String doFunction(String content, String identifier) {
JSONObject jsonObject = JSONObject.parseObject(content);
Message systemMessage = new Message();
systemMessage.setRole(MessageRoleConstant.system);
systemMessage.setContent(prompt);
Message userMessage = new Message();
List<MultiContent> multiContents = new ArrayList<>();
MultiContent textmultiContent = new MultiContent();
textmultiContent.setType("text");
textmultiContent.setText(jsonObject.getString("query"));
multiContents.add(textmultiContent);
MultiContent imageMultiContent = new MultiContent();
imageMultiContent.setType("image_url");
ImageUrl imageUrl = new ImageUrl();
imageUrl.setUrl(jsonObject.getString("image_url"));
imageMultiContent.setImageUrl(imageUrl);
multiContents.add(imageMultiContent);
userMessage.setRole(MessageRoleConstant.user);
userMessage.setContent(multiContents);
Message[] messages = new Message[]{systemMessage, userMessage};
//todo 对接豆包多模态大模型 LargeModelResponse response = new LargeModelResponse();
return null; response.setModel(model);
response.setMessages(messages);
response.setStream(false);
LargeModelDemandResult largeModelDemandResult = llmService.chat(response);
return largeModelDemandResult.getMessage();
} }
@Override @Override
......
package cn.com.poc.thirdparty.resource.demand.ai.function;
import cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction;
import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import org.junit.runner.RunWith;
import org.junit.Test;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource;
/**
* @author alex.yao
* @date 2025/1/15
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(initializers = SingleContextInitializer.class)
@WebAppConfiguration
public class ImageOCRFunctionTest {
@Resource
ImageOCRFunction imageOCRFunction;
@Test
public void testOCR() {
String content = "{\"query\":\"图片有什么?\",\"image_url\":\"https://ark-project.tos-cn-beijing.volces.com/images/view.jpeg\"}";
System.out.println(imageOCRFunction.doFunction(content, "test"));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment