Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
P
poc-api
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
poc
poc-api
Commits
ca6cdc13
Commit
ca6cdc13
authored
Jan 17, 2025
by
alex yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat:Agent插件文档读取支持excel文件
parent
ea801c10
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
54 additions
and
1 deletion
+54
-1
DocumentLoad.java
src/main/java/cn/com/poc/common/utils/DocumentLoad.java
+44
-0
DocumentUnderstandIngFunction.java
...document_understanding/DocumentUnderstandIngFunction.java
+1
-1
ImageOCRFunction.java
...source/demand/ai/function/image_ocr/ImageOCRFunction.java
+1
-0
ImageOCRFunctionTest.java
...rty/resource/demand/ai/function/ImageOCRFunctionTest.java
+8
-0
No files found.
src/main/java/cn/com/poc/common/utils/DocumentLoad.java
View file @
ca6cdc13
...
...
@@ -8,6 +8,8 @@ import org.apache.pdfbox.pdfparser.PDFParser;
import
org.apache.pdfbox.pdmodel.PDDocument
;
import
org.apache.pdfbox.text.PDFTextStripper
;
import
org.apache.poi.hwpf.extractor.WordExtractor
;
import
org.apache.poi.ss.usermodel.*
;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook
;
import
org.apache.poi.xwpf.extractor.XWPFWordExtractor
;
import
org.apache.poi.xwpf.usermodel.XWPFDocument
;
import
org.springframework.util.Assert
;
...
...
@@ -16,6 +18,7 @@ import java.io.*;
import
java.net.URL
;
import
java.net.URLConnection
;
import
java.nio.file.Files
;
import
java.util.Iterator
;
public
class
DocumentLoad
{
...
...
@@ -26,6 +29,43 @@ public class DocumentLoad {
.
build
();
final
static
CopyDown
converter
=
new
CopyDown
(
options
);
public
static
String
excelToMarkdown
(
File
file
)
{
try
{
FileInputStream
inputStream
=
new
FileInputStream
(
file
);
Workbook
workbook
=
new
XSSFWorkbook
(
inputStream
);
Sheet
sheet
=
workbook
.
getSheetAt
(
0
);
Iterator
<
Row
>
rowIterator
=
sheet
.
iterator
();
StringBuilder
markdown
=
new
StringBuilder
();
while
(
rowIterator
.
hasNext
())
{
Row
row
=
rowIterator
.
next
();
Iterator
<
Cell
>
cellIterator
=
row
.
iterator
();
while
(
cellIterator
.
hasNext
())
{
Cell
cell
=
cellIterator
.
next
();
if
(
cell
.
getCellType
()
==
CellType
.
STRING
)
{
markdown
.
append
(
"| "
).
append
(
cell
.
getStringCellValue
());
}
else
if
(
cell
.
getCellType
()
==
CellType
.
NUMERIC
)
{
markdown
.
append
(
"| "
).
append
(
cell
.
getNumericCellValue
());
}
else
{
markdown
.
append
(
"| "
).
append
(
""
);
}
}
markdown
.
append
(
"|\n"
);
}
// 添加Markdown表格分隔线
markdown
.
insert
(
0
,
"|\n|--|--|\n"
);
workbook
.
close
();
inputStream
.
close
();
return
markdown
.
toString
();
}
catch
(
IOException
e
)
{
throw
new
I18nMessageException
(
"exception/file.load.error"
);
}
}
/**
* Html To Markdown
*/
...
...
@@ -81,6 +121,10 @@ public class DocumentLoad {
return
loadPDF
(
file
);
case
"txt"
:
return
loadTxt
(
file
);
case
"xlsx"
:
case
"xls"
:
case
"csv"
:
return
excelToMarkdown
(
file
);
default
:
throw
new
I18nMessageException
(
type
+
" format is not yet supported"
);
}
...
...
src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/document_understanding/DocumentUnderstandIngFunction.java
View file @
ca6cdc13
...
...
@@ -47,7 +47,7 @@ public class DocumentUnderstandIngFunction extends AbstractLargeModelFunction {
.
description
(
DESC
)
.
parameters
(
new
Parameters
(
"object"
)
.
addProperties
(
"question"
,
new
Properties
(
"string"
,
"提炼用户的问题"
))
.
addProperties
(
"file_url"
,
new
Properties
(
"string"
,
"doc、docx、pdf、txt、md文件地址"
))
.
addProperties
(
"file_url"
,
new
Properties
(
"string"
,
"doc、docx、pdf、txt、md
、xlsx、csv、xls
文件地址"
))
).
build
();
...
...
src/main/java/cn/com/poc/thirdparty/resource/demand/ai/function/image_ocr/ImageOCRFunction.java
View file @
ca6cdc13
...
...
@@ -76,6 +76,7 @@ public class ImageOCRFunction extends AbstractLargeModelFunction {
response
.
setModel
(
model
);
response
.
setMessages
(
messages
);
response
.
setStream
(
false
);
response
.
setUser
(
"Image_OCR"
);
LargeModelDemandResult
largeModelDemandResult
=
llmService
.
chat
(
response
);
return
largeModelDemandResult
.
getMessage
();
}
...
...
src/test/java/cn/com/poc/thirdparty/resource/demand/ai/function/ImageOCRFunctionTest.java
View file @
ca6cdc13
package
cn
.
com
.
poc
.
thirdparty
.
resource
.
demand
.
ai
.
function
;
import
cn.com.poc.common.utils.DocumentLoad
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.html_reader.HtmlReaderFunction
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.image_ocr.ImageOCRFunction
;
import
cn.com.poc.thirdparty.resource.demand.ai.function.top_search.WeiboTopSearchFunction
;
...
...
@@ -11,6 +12,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import
org.springframework.test.context.web.WebAppConfiguration
;
import
javax.annotation.Resource
;
import
java.io.File
;
/**
* @author alex.yao
...
...
@@ -45,4 +47,10 @@ public class ImageOCRFunctionTest {
public
void
weibo
()
{
System
.
out
.
println
(
weiboTopSearchFunction
.
getLLMConfig
());
}
@Test
public
void
excelToMarkdown
()
{
File
file
=
new
File
(
"C:\\Users\\52747\\Desktop\\List of Question Intents and Standard Answers (IDP&DL) (Dec2024).xlsx"
);
System
.
out
.
println
(
DocumentLoad
.
excelToMarkdown
(
file
));
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment