Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
P
poc-api
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
poc
poc-api
Commits
333130e4
Commit
333130e4
authored
May 19, 2025
by
alex yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
style: Text In Client
parent
c0df8f2d
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
37 additions
and
19 deletions
+37
-19
ContractExtractionFunction.java
...nd/ai/function/extraction/ContractExtractionFunction.java
+6
-4
PdfToMDFunction.java
...ce/demand/ai/function/text_in_pdf2md/PdfToMDFunction.java
+3
-2
TextInClient.java
.../com/poc/thirdparty/resource/textin/api/TextInClient.java
+16
-9
PdfToMDResponse.java
...oc/thirdparty/resource/textin/entity/PdfToMDResponse.java
+1
-1
PdfToMDResult.java
.../poc/thirdparty/resource/textin/entity/PdfToMDResult.java
+8
-0
PdfToMdFunctionTest.java
...arty/resource/demand/ai/function/PdfToMdFunctionTest.java
+3
-3
No files found.
src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/extraction/ContractExtractionFunction.java
View file @
333130e4
...
...
@@ -8,7 +8,7 @@ import cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfi
import
cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.extraction.entity.KeyInfo
;
import
cn.com.poc.thirdparty.resource.
demand.ai.function.text_in_pdf2md
.api.TextInClient
;
import
cn.com.poc.thirdparty.resource.
textin
.api.TextInClient
;
import
cn.hutool.core.collection.ListUtil
;
import
cn.hutool.json.JSONException
;
import
com.alibaba.fastjson.JSONArray
;
...
...
@@ -19,13 +19,17 @@ import java.util.ArrayList;
import
java.util.List
;
/**
* 合同关键信息抽取-要素提取
*
* @author alex.yao
* @date 2025/5/12
*/
@Component
public
class
ContractExtractionFunction
extends
AbstractLargeModelFunction
{
private
String
DESC
=
"合同关键信息抽取"
;
private
final
String
DESC
=
"合同关键信息抽取"
;
private
final
TextInClient
textInClient
=
new
TextInClient
();
private
final
FunctionLLMConfig
functionLLMConfig
=
new
FunctionLLMConfig
.
FunctionLLMConfigBuilder
()
.
name
(
"contract_extraction"
)
...
...
@@ -85,8 +89,6 @@ public class ContractExtractionFunction extends AbstractLargeModelFunction {
}
keyInfos
.
add
(
keyInfo
);
}
TextInClient
textInClient
=
new
TextInClient
();
String
extraction
=
textInClient
.
extraction
(
fileUrl
,
keyInfos
);
result
.
setFunctionResult
(
extraction
);
result
.
setPromptContent
(
extraction
);
...
...
src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/text_in_pdf2md/PdfToMDFunction.java
View file @
333130e4
...
...
@@ -7,7 +7,7 @@ import cn.com.poc.thirdparty.resource.demand.ai.function.AbstractLargeModelFunct
import
cn.com.poc.thirdparty.resource.demand.ai.function.entity.FunctionLLMConfig
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.entity.Parameters
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.entity.Properties
;
import
cn.com.poc.thirdparty.resource.
demand.ai.function.text_in_pdf2md
.api.TextInClient
;
import
cn.com.poc.thirdparty.resource.
textin
.api.TextInClient
;
import
cn.hutool.core.collection.ListUtil
;
import
com.alibaba.fastjson.JSONObject
;
import
com.fasterxml.jackson.databind.JsonNode
;
...
...
@@ -64,7 +64,8 @@ public class PdfToMDFunction extends AbstractLargeModelFunction {
options
.
put
(
"parse_mode"
,
"auto"
);
options
.
put
(
"table_flavor"
,
"md"
);
try
{
String
response
=
TextInClient
.
recognize
(
fileContent
,
options
);
TextInClient
textInClient
=
new
TextInClient
();
String
response
=
textInClient
.
OCR
(
fileContent
,
options
);
ObjectMapper
mapper
=
new
ObjectMapper
();
JsonNode
jsonNode
=
mapper
.
readTree
(
response
);
if
(
jsonNode
.
has
(
"result"
)
&&
jsonNode
.
get
(
"result"
).
has
(
"markdown"
))
{
...
...
src/main/java/cn/com/poc/thirdparty/resource/
demand/ai/function/text_in_pdf2md
/api/TextInClient.java
→
src/main/java/cn/com/poc/thirdparty/resource/
textin
/api/TextInClient.java
View file @
333130e4
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
demand
.
ai
.
function
.
text_in_pdf2md
.
api
;
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
textin
.
api
;
/**
* @author alex.yao
...
...
@@ -32,20 +32,27 @@ import java.util.Map;
public
class
TextInClient
{
final
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
TextInClient
.
class
);
private
static
final
String
appId
=
"dafd04a574230c00ccba61132160de0c"
;
private
static
final
String
secretCode
=
"3bc03c7e6f9402963e6e71d16d786a9c"
;
final
private
Logger
logger
=
LoggerFactory
.
getLogger
(
TextInClient
.
class
);
private
String
appId
=
"dafd04a574230c00ccba61132160de0c"
;
private
String
secretCode
=
"3bc03c7e6f9402963e6e71d16d786a9c"
;
public
TextInClient
()
{
}
public
TextInClient
(
String
appId
,
String
secretCode
)
{
this
.
appId
=
appId
;
this
.
secretCode
=
secretCode
;
}
/**
* ocr
*
* @param fileContent
* @param options
* @return
* @throws IOException
*/
public
static
String
recognize
(
byte
[]
fileContent
,
HashMap
<
String
,
Object
>
options
)
throws
IOException
{
public
String
OCR
(
byte
[]
fileContent
,
HashMap
<
String
,
Object
>
options
)
throws
IOException
{
StringBuilder
queryParams
=
new
StringBuilder
();
for
(
Map
.
Entry
<
String
,
Object
>
entry
:
options
.
entrySet
())
{
if
(
queryParams
.
length
()
>
0
)
{
...
...
@@ -55,7 +62,7 @@ public class TextInClient {
.
append
(
"="
)
.
append
(
URLEncoder
.
encode
(
entry
.
getValue
().
toString
(),
"UTF-8"
));
}
HttpURLConnection
connection
=
get
RecoGinze
HttpURLConnection
(
queryParams
);
HttpURLConnection
connection
=
get
OCR
HttpURLConnection
(
queryParams
);
try
(
OutputStream
os
=
connection
.
getOutputStream
())
{
os
.
write
(
fileContent
);
os
.
flush
();
...
...
@@ -79,7 +86,7 @@ public class TextInClient {
}
private
static
HttpURLConnection
getRecoGinze
HttpURLConnection
(
StringBuilder
queryParams
)
throws
IOException
{
private
HttpURLConnection
getOCR
HttpURLConnection
(
StringBuilder
queryParams
)
throws
IOException
{
String
baseUrl
=
"https://api.textin.com/ai/service/v1/pdf_to_markdown"
;
String
fullUrl
=
baseUrl
+
(
queryParams
.
length
()
>
0
?
"?"
+
queryParams
:
""
);
URL
url
=
new
URL
(
fullUrl
);
...
...
@@ -102,7 +109,7 @@ public class TextInClient {
* @return
*/
public
static
String
extraction
(
String
fileUrl
,
List
<
KeyInfo
>
keyInfoList
)
{
public
String
extraction
(
String
fileUrl
,
List
<
KeyInfo
>
keyInfoList
)
{
try
{
// 读取文件并将其转换为Base64编码
File
file
=
DocumentLoad
.
downloadURLDocument
(
fileUrl
);
...
...
@@ -166,7 +173,7 @@ public class TextInClient {
* @param taskId
* @return
*/
private
static
String
extractedResults
(
String
taskId
)
{
private
String
extractedResults
(
String
taskId
)
{
String
baseUrl
=
"https://doc-compare.intsig.com/doc_extraction/keyinfo/extracted_results?format=json&task_id="
+
taskId
;
HttpUriRequest
httpUriRequest
=
RequestBuilder
.
post
()
.
setUri
(
baseUrl
)
...
...
src/main/java/cn/com/poc/thirdparty/resource/
demand/ai/function/text_in_pdf2md
/entity/PdfToMDResponse.java
→
src/main/java/cn/com/poc/thirdparty/resource/
textin
/entity/PdfToMDResponse.java
View file @
333130e4
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
demand
.
ai
.
function
.
text_in_pdf2md
.
entity
;
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
textin
.
entity
;
/**
* @author alex.yao
...
...
src/main/java/cn/com/poc/thirdparty/resource/
demand/ai/function/text_in_pdf2md
/entity/PdfToMDResult.java
→
src/main/java/cn/com/poc/thirdparty/resource/
textin
/entity/PdfToMDResult.java
View file @
333130e4
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
demand
.
ai
.
function
.
text_in_pdf2md
.
entity
;
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
textin
.
entity
;
/**
* @author alex.yao
...
...
src/test/java/cn/com/poc/thirdparty/resource/demand/ai/function/PdfToMdFunctionTest.java
View file @
333130e4
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
demand
.
ai
.
function
;
import
cn.com.poc.common.utils.JsonUtils
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.extraction.ContractExtractionFunction
;
import
cn.com.poc.thirdparty.resource.
demand.ai.function.text_in_pdf2md
.api.TextInClient
;
import
cn.com.poc.thirdparty.resource.
textin
.api.TextInClient
;
import
cn.com.yict.framemax.core.spring.SingleContextInitializer
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
...
...
@@ -44,7 +43,8 @@ public class PdfToMdFunctionTest {
options
.
put
(
"parse_mode"
,
"auto"
);
options
.
put
(
"table_flavor"
,
"md"
);
try
{
String
response
=
TextInClient
.
recognize
(
fileContent
,
options
);
TextInClient
textInClient
=
new
TextInClient
();
String
response
=
textInClient
.
OCR
(
fileContent
,
options
);
ObjectMapper
mapper
=
new
ObjectMapper
();
JsonNode
jsonNode
=
mapper
.
readTree
(
response
);
if
(
jsonNode
.
has
(
"result"
)
&&
jsonNode
.
get
(
"result"
).
has
(
"markdown"
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment