package cn.com.poc.thirdparty.resource.demand.ai.function.text_in_pdf2md;

import cn.com.poc.agent_application.entity.KnowledgeContentResult;
import cn.com.poc.agent_application.entity.Variable;
import cn.com.poc.common.utils.JsonUtils;
import cn.com.poc.thirdparty.resource.demand.ai.entity.dbchain.DBChainResult;
import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractFunctionResult;
import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractLargeModelFunction;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfig;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters;
import cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties;
import cn.com.poc.thirdparty.resource.text_in.api.TextInClient;
import cn.hutool.core.collection.ListUtil;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;

/**
 * @author alex.yao
 * @date 2025/5/7
 */
@Component
public class PdfToMDFunction extends AbstractLargeModelFunction {

    private Logger logger = LoggerFactory.getLogger(PdfToMDFunction.class);

    private final String DESC = "该方法是通过OCR获取PDF或者图片的表格内容提取并转为Markdown格式。";

    private final FunctionLLMConfig functionLLMConfig = new FunctionLLMConfig.FunctionLLMConfigBuilder()
            .name("pdf_to_md")
            .description(DESC)
            .parameters(new Parameters("object")
                    .addProperties("file_url", new Properties("string", "文件地址")))
            .build();


    @Override
    public AbstractFunctionResult<String> doFunction(String content, String identifier, List<DBChainResult> dbChainResults, List<KnowledgeContentResult> knowledgeContentResults) {
        AbstractFunctionResult<String> result = new AbstractFunctionResult<String>();
        JSONObject jsonObject = JSONObject.parseObject(content);
        if (!jsonObject.containsKey("file_url")) {
            result.setPromptContent(content);
            result.setFunctionResult(content);
            return result;
        }
        String url = jsonObject.getString("file_url");
        byte[] fileContent = url.getBytes(StandardCharsets.UTF_8);
        HashMap<String, Object> options = new HashMap<>();
        options.put("apply_document_tree", 1);
        options.put("catalog_details", 1);
        options.put("dpi", 144);
        options.put("get_excel", 1);
        options.put("get_image", "objects");
        options.put("markdown_details", 1);
        options.put("page_start", 1);
        options.put("page_count", 1);
        options.put("page_details", 1);
        options.put("paratext_mode", "annotation");
        options.put("parse_mode", "auto");
        options.put("table_flavor", "md");
        try {
            TextInClient textInClient = new TextInClient();
            String response = textInClient.OCR(fileContent, options);
            ObjectMapper mapper = new ObjectMapper();
            JsonNode jsonNode = mapper.readTree(response);
            if (jsonNode.has("result") && jsonNode.get("result").has("markdown")) {
                String markdown = jsonNode.get("result").get("markdown").asText();
                result.setPromptContent(markdown);
                result.setFunctionResult(markdown);
            } else {
                logger.warn("text in 文档信息提取异常:{}", response);
                result.setFunctionResult(response);
                result.setPromptContent("FAIL");
            }
            return result;
        } catch (Exception e) {
            logger.error("Error occurred during PDF to MD conversion", e);
            result.setPromptContent("FAIL");
            result.setFunctionResult(e.getMessage());
            return result;
        }
    }

    @Override
    public String getDesc() {
        return DESC;
    }

    @Override
    public List<String> getLLMConfig() {
        return ListUtil.toList(JsonUtils.serialize(functionLLMConfig));
    }

    @Override
    public List<String> getLLMConfig(List<Variable> variableStructure) {
        return this.getLLMConfig();
    }
}
