Commit c1348e99 authored by alex yao's avatar alex yao

feat:MinerU Api

parent 0513442d
package cn.com.poc.thirdparty.resource.minerU;
import java.io.File;
/**
* @author alex.yao
* @date 2025/10/30
*/
public class MinerUResponse {
private File files;
private String output_dir = "./output";
private String lang_list = "ch";
private String backend = "vlm-vllm-async-engine";
private String parse_method = "auto";
private boolean formula_enable = true;
private boolean table_enable = true;
private String server_url = "";
private boolean return_md = true;
private boolean return_middle_json = false;
private boolean return_model_output = false;
private boolean return_content_list = false;
private boolean return_images = false;
private boolean response_format_zip = false;
private Integer start_page_id = 0;
private Integer end_page_id = 200;
public File getFiles() {
return files;
}
public void setFiles(File files) {
this.files = files;
}
public String getOutput_dir() {
return output_dir;
}
public void setOutput_dir(String output_dir) {
this.output_dir = output_dir;
}
public String getLang_list() {
return lang_list;
}
public void setLang_list(String lang_list) {
this.lang_list = lang_list;
}
public String getBackend() {
return backend;
}
public void setBackend(String backend) {
this.backend = backend;
}
public String getParse_method() {
return parse_method;
}
public void setParse_method(String parse_method) {
this.parse_method = parse_method;
}
public boolean getFormula_enable() {
return formula_enable;
}
public void setFormula_enable(boolean formula_enable) {
this.formula_enable = formula_enable;
}
public boolean getTable_enable() {
return table_enable;
}
public void setTable_enable(boolean table_enable) {
this.table_enable = table_enable;
}
public String getServer_url() {
return server_url;
}
public void setServer_url(String server_url) {
this.server_url = server_url;
}
public boolean getReturn_md() {
return return_md;
}
public void setReturn_md(boolean return_md) {
this.return_md = return_md;
}
public boolean getReturn_middle_json() {
return return_middle_json;
}
public void setReturn_middle_json(boolean return_middle_json) {
this.return_middle_json = return_middle_json;
}
public boolean getReturn_model_output() {
return return_model_output;
}
public void setReturn_model_output(boolean return_model_output) {
this.return_model_output = return_model_output;
}
public boolean getReturn_content_list() {
return return_content_list;
}
public void setReturn_content_list(boolean return_content_list) {
this.return_content_list = return_content_list;
}
public boolean getReturn_images() {
return return_images;
}
public void setReturn_images(boolean return_images) {
this.return_images = return_images;
}
public boolean getResponse_format_zip() {
return response_format_zip;
}
public void setResponse_format_zip(boolean response_format_zip) {
this.response_format_zip = response_format_zip;
}
public Integer getStart_page_id() {
return start_page_id;
}
public void setStart_page_id(Integer start_page_id) {
this.start_page_id = start_page_id;
}
public Integer getEnd_page_id() {
return end_page_id;
}
public void setEnd_page_id(Integer end_page_id) {
this.end_page_id = end_page_id;
}
}
package cn.com.poc.thirdparty.resource.minerU.api;
import cn.com.poc.common.utils.StringUtils;
import cn.com.poc.thirdparty.resource.baidu.api.BaiduTextDiffApi;
import cn.com.poc.thirdparty.resource.baidu.entity.result.BaiduTextDiffResult;
import cn.com.poc.thirdparty.resource.minerU.MinerUResponse;
import cn.com.yict.framemax.core.exception.BusinessException;
import com.google.gson.Gson;
import okhttp3.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
/**
* @author alex.yao
* @date 2025/10/29
*/
@Component
public class MinerUAPI {
private final Logger logger = LoggerFactory.getLogger(BaiduTextDiffApi.class);
public final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().readTimeout(300, TimeUnit.SECONDS).build();
private String MinerU_URL = "http://i-1.gpushare.com:53412/file_parse";
public Object parsePDF(MinerUResponse minerUResponse) {
MultipartBody.Builder builder = new MultipartBody.Builder().setType(MultipartBody.FORM);
File file = minerUResponse.getFiles();
try {
RequestBody files = RequestBody.create(MediaType.parse(Objects.requireNonNull(Files.probeContentType(file.toPath()))), file);
builder = builder.addFormDataPart("files", file.getName(), files);
builder.addFormDataPart("output_dir", minerUResponse.getOutput_dir());
builder.addFormDataPart("lang_list", minerUResponse.getLang_list());
builder.addFormDataPart("backend", minerUResponse.getBackend());
builder.addFormDataPart("parse_method", minerUResponse.getParse_method());
builder.addFormDataPart("formula_enable", String.valueOf(minerUResponse.getFormula_enable()));
builder.addFormDataPart("table_enable", String.valueOf(minerUResponse.getTable_enable()));
builder.addFormDataPart("server_url", "");
builder.addFormDataPart("return_md", String.valueOf(minerUResponse.getReturn_md()));
builder.addFormDataPart("return_middle_json", String.valueOf(minerUResponse.getReturn_middle_json()));
builder.addFormDataPart("return_model_output", String.valueOf(minerUResponse.getReturn_model_output()));
builder.addFormDataPart("return_content_list", String.valueOf(minerUResponse.getReturn_content_list()));
builder.addFormDataPart("return_images", String.valueOf(minerUResponse.getReturn_images()));
builder.addFormDataPart("response_format_zip", String.valueOf(minerUResponse.getResponse_format_zip()));
builder.addFormDataPart("start_page_id", minerUResponse.getStart_page_id().toString());
builder.addFormDataPart("end_page_id", minerUResponse.getEnd_page_id().toString());
MultipartBody body = builder.build();
Request request = new Request.Builder()
.url(MinerU_URL)
.method("POST", body)
.addHeader("Content-Type", "multipart/form-data")
.build();
return Objects.requireNonNull(HTTP_CLIENT.newCall(request).execute().body()).string();
} catch (IOException e) {
logger.error("MinerU 调用失败", e);
throw new BusinessException("MinerU 调用失败");
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment