Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
P
poc-api
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
poc
poc-api
Commits
9be410a5
Commit
9be410a5
authored
Sep 04, 2025
by
alex yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: 关键词提取
parent
72890a4b
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
280 additions
and
20 deletions
+280
-20
LongTextDialoguesServiceImpl.java
...document/aggregate/impl/LongTextDialoguesServiceImpl.java
+121
-17
Concepts.java
...ava/cn/com/poc/long_document/domain/keyword/Concepts.java
+8
-0
KeyWord.java
...java/cn/com/poc/long_document/domain/keyword/KeyWord.java
+49
-0
MentionFrequency.java
...om/poc/long_document/domain/keyword/MentionFrequency.java
+49
-0
LongTextSummaryDto.java
...java/cn/com/poc/long_document/dto/LongTextSummaryDto.java
+4
-3
KeyWordCounter.java
src/main/java/cn/com/poc/meeting/cache/KeyWordCounter.java
+23
-0
MeetingTest.java
src/test/java/cn/com/poc/meeting/MeetingTest.java
+26
-0
No files found.
src/main/java/cn/com/poc/long_document/aggregate/impl/LongTextDialoguesServiceImpl.java
View file @
9be410a5
...
...
@@ -14,6 +14,7 @@ import cn.com.poc.common.model.BizFileUploadRecordModel;
import
cn.com.poc.common.service.BizFileUploadRecordService
;
import
cn.com.poc.common.utils.DocumentLoad
;
import
cn.com.poc.common.utils.JsonUtils
;
import
cn.com.poc.common.utils.ListUtils
;
import
cn.com.poc.common.utils.SSEUtil
;
import
cn.com.poc.knowledge.aggregate.KnowledgeService
;
import
cn.com.poc.knowledge.constant.KnowledgeConstant
;
...
...
@@ -23,12 +24,15 @@ import cn.com.poc.knowledge.service.BizKnowledgeDocumentService;
import
cn.com.poc.long_document.aggregate.LongTextDialoguesService
;
import
cn.com.poc.long_document.domain.LongtextDialoguesResult
;
import
cn.com.poc.long_document.domain.MindMap
;
import
cn.com.poc.long_document.domain.keyword.KeyWord
;
import
cn.com.poc.long_document.domain.keyword.MentionFrequency
;
import
cn.com.poc.long_document.dto.LongTextExampleDto
;
import
cn.com.poc.long_document.dto.LongTextSummaryDto
;
import
cn.com.poc.long_document.entity.BizLongTextSummaryCacheEntity
;
import
cn.com.poc.long_document.model.BizLongTextExampleModel
;
import
cn.com.poc.long_document.service.BizLongTextExampleService
;
import
cn.com.poc.long_document.service.BizLongTextSummaryCacheService
;
import
cn.com.poc.meeting.cache.KeyWordCounter
;
import
cn.com.poc.thirdparty.resource.demand.ai.aggregate.DemandKnowledgeService
;
import
cn.com.poc.thirdparty.resource.demand.ai.constants.KnowledgeSearchTypeEnum
;
import
cn.com.poc.thirdparty.resource.demand.ai.constants.LLMRoleEnum
;
...
...
@@ -62,6 +66,7 @@ import java.io.IOException;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.CompletableFuture
;
import
java.util.stream.Collectors
;
...
...
@@ -240,7 +245,7 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
String
summary
=
StringUtils
.
EMPTY
;
String
corePoint
=
StringUtils
.
EMPTY
;
String
mindMap
=
StringUtils
.
EMPTY
;
String
keyWord
=
StringUtils
.
EMPTY
;
KeyWord
keyWord
=
null
;
boolean
summaryNeedGenerate
=
false
;
boolean
corePointNeedGenerate
=
false
;
...
...
@@ -259,7 +264,8 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
BizLongTextSummaryCacheEntity
longTextSummaryCacheEntity
=
bizLongTextSummaryCacheEntities
.
get
(
0
);
summary
=
longTextSummaryCacheEntity
.
getSummary
();
corePoint
=
longTextSummaryCacheEntity
.
getCorePoint
();
keyWord
=
longTextSummaryCacheEntity
.
getKeyword
();
keyWord
=
StringUtils
.
isNotBlank
(
longTextSummaryCacheEntity
.
getKeyword
())
?
JsonUtils
.
deSerialize
(
longTextSummaryCacheEntity
.
getKeyword
(),
KeyWord
.
class
)
:
null
;
mindMap
=
longTextSummaryCacheEntity
.
getMindmap
();
}
...
...
@@ -267,7 +273,7 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
summaryNeedGenerate
=
StringUtils
.
isBlank
(
summary
);
corePointNeedGenerate
=
StringUtils
.
isBlank
(
corePoint
);
mindMapNeedGenerate
=
StringUtils
.
isBlank
(
mindMap
);
keyWordNeedGenerate
=
StringUtils
.
isBlank
(
keyWord
);
keyWordNeedGenerate
=
ObjectUtil
.
isEmpty
(
keyWord
);
String
document
=
DocumentLoad
.
documentToText
(
file
);
...
...
@@ -306,8 +312,8 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
});
boolean
finalKeyWordNeedGenerate
=
keyWordNeedGenerate
;
String
finalKeyWord
=
keyWord
;
CompletableFuture
<
String
>
keyWordFuture
=
CompletableFuture
.
supplyAsync
(()
->
KeyWord
finalKeyWord
=
keyWord
;
CompletableFuture
<
KeyWord
>
keyWordFuture
=
CompletableFuture
.
supplyAsync
(()
->
finalKeyWordNeedGenerate
?
createKeyWord
(
document
)
:
finalKeyWord
).
exceptionally
(
throwable
->
{
logger
.
error
(
"生成关键词失败"
,
throwable
);
throw
new
BusinessException
(
throwable
);
...
...
@@ -329,7 +335,8 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
updateCacheEntity
.
setCorePoint
(
corePoint
);
updateCacheEntity
.
setSummary
(
summary
);
updateCacheEntity
.
setMindmap
(
mindMap
);
updateCacheEntity
.
setKeyword
(
keyWord
);
updateCacheEntity
.
setKeyword
(
ObjectUtil
.
isNotEmpty
(
keyWord
)
?
JsonUtils
.
serialize
(
keyWord
)
:
StringUtils
.
EMPTY
);
bizLongTextSummaryCacheService
.
update
(
updateCacheEntity
);
}
else
{
BizLongTextSummaryCacheEntity
saveCache
=
new
BizLongTextSummaryCacheEntity
();
...
...
@@ -337,29 +344,28 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
saveCache
.
setSummary
(
summary
);
saveCache
.
setFileUrl
(
fileUrl
);
saveCache
.
setMindmap
(
mindMap
);
saveCache
.
setKeyword
(
keyWord
);
saveCache
.
setKeyword
(
ObjectUtil
.
isNotEmpty
(
keyWord
)
?
JsonUtils
.
serialize
(
keyWord
)
:
StringUtils
.
EMPTY
);
saveCache
.
setFileMd5
(
fileMD5
);
saveCache
.
setIsDeleted
(
CommonConstant
.
IsDeleted
.
N
);
bizLongTextSummaryCacheService
.
save
(
saveCache
);
}
}
if
(
StringUtils
.
isBlank
(
summary
)){
if
(
StringUtils
.
isBlank
(
summary
))
{
throw
new
BusinessException
(
"获取/生成摘要失败"
);
}
if
(
StringUtils
.
isBlank
(
keyWord
))
{
if
(
ObjectUtil
.
isEmpty
(
keyWord
))
{
throw
new
BusinessException
(
"获取/生成关键词失败"
);
}
if
(
StringUtils
.
isBlank
(
corePoint
)){
if
(
StringUtils
.
isBlank
(
corePoint
))
{
throw
new
BusinessException
(
"获取/生成核心观点失败"
);
}
if
(
StringUtils
.
isBlank
(
mindMap
)){
if
(
StringUtils
.
isBlank
(
mindMap
))
{
throw
new
BusinessException
(
"获取/生成思维导图失败"
);
}
// 返回摘要和核心观点
// 转换格式 json to list
int
startKeyWord
=
keyWord
.
lastIndexOf
(
"["
);
int
endKeyWord
=
keyWord
.
lastIndexOf
(
"]"
);
int
startMindMap
=
mindMap
.
indexOf
(
"{"
);
int
endMindMap
=
mindMap
.
lastIndexOf
(
"}"
);
...
...
@@ -367,8 +373,7 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
LongTextSummaryDto
longTextSummaryDto
=
new
LongTextSummaryDto
();
longTextSummaryDto
.
setSummary
(
summary
);
longTextSummaryDto
.
setCorePoint
(
corePoint
);
longTextSummaryDto
.
setKeyword
(
JsonUtils
.
deSerialize
(
keyWord
.
substring
(
startKeyWord
,
endKeyWord
+
1
),
new
TypeReference
<
List
<
String
>>()
{
}.
getType
()));
longTextSummaryDto
.
setKeyword
(
keyWord
);
longTextSummaryDto
.
setMindMap
(
JsonUtils
.
deSerialize
(
mindMap
.
substring
(
startMindMap
,
endMindMap
+
1
),
MindMap
.
class
));
return
longTextSummaryDto
;
}
...
...
@@ -699,7 +704,7 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
}
}
private
String
createKeyWord
(
String
document
)
{
private
KeyWord
createKeyWord
(
String
document
)
{
// 获取对话提示词
String
promptCode
=
"CreateKeyWordPrompt"
;
BizAgentApplicationGcConfigEntity
documentDialoguePrompt
=
bizAgentApplicationGcConfigService
.
getByConfigCode
(
promptCode
);
...
...
@@ -750,13 +755,112 @@ public class LongTextDialoguesServiceImpl implements LongTextDialoguesService {
}
}
bufferedReader
.
close
();
return
summary
.
toString
();
if
(
StringUtils
.
isBlank
(
summary
.
toString
()))
{
logger
.
error
(
"------------ summary is blank , check llm config -------------"
);
throw
new
BusinessException
(
"关键词提取失败"
);
}
String
keywordStr
=
summary
.
toString
();
int
start
=
keywordStr
.
lastIndexOf
(
"["
);
int
end
=
keywordStr
.
lastIndexOf
(
"]"
);
List
<
String
>
keywords
=
JsonUtils
.
deSerialize
(
keywordStr
.
substring
(
start
,
end
+
1
),
new
TypeReference
<
List
<
String
>>()
{
}.
getType
());
if
(
CollectionUtils
.
isEmpty
(
keywords
))
{
logger
.
error
(
"------------ keywords is blank , check llm config -------------"
);
throw
new
BusinessException
(
"关键词提取失败"
);
}
//匹配关键词在文章中句子[按标点符号切分]/计算关键词频率
Map
<
String
,
Integer
>
keywordCountMap
=
KeyWordCounter
.
keywordCount
(
document
,
keywords
);
Map
<
String
,
List
<
String
>>
matchSentenceMap
=
KeyWordCounter
.
keywordMatchSentence
(
document
,
keywords
);
List
<
MentionFrequency
>
mentionFrequencies
=
new
ArrayList
<>();
for
(
String
keyword
:
keywordCountMap
.
keySet
())
{
MentionFrequency
mentionFrequency
=
new
MentionFrequency
();
mentionFrequency
.
setKeyword
(
keyword
);
mentionFrequency
.
setSentences
(
matchSentenceMap
.
get
(
keyword
).
stream
().
distinct
().
collect
(
Collectors
.
toList
()));
mentionFrequency
.
setFrequency
(
keywordCountMap
.
get
(
keyword
));
mentionFrequencies
.
add
(
mentionFrequency
);
}
// 相关概念
List
<
String
>
concepts
=
createConcepts
(
document
);
KeyWord
keyWord
=
new
KeyWord
();
keyWord
.
setKeyWord
(
new
ArrayList
<>(
keywordCountMap
.
keySet
()));
keyWord
.
setMentionFrequency
(
mentionFrequencies
);
keyWord
.
setConcepts
(
concepts
);
return
keyWord
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取关键词失败"
,
e
);
throw
new
BusinessException
(
"获取关键词失败"
);
}
}
private
List
<
String
>
createConcepts
(
String
document
)
{
// 获取对话提示词
String
promptCode
=
"CreateConceptsPrompt"
;
BizAgentApplicationGcConfigEntity
documentDialoguePrompt
=
bizAgentApplicationGcConfigService
.
getByConfigCode
(
promptCode
);
if
(
documentDialoguePrompt
==
null
||
StringUtils
.
isBlank
(
documentDialoguePrompt
.
getConfigSystem
()))
{
logger
.
error
(
"获取对话提示词失败 , configCode:{}"
,
promptCode
);
throw
new
BusinessException
(
"获取对话提示词失败"
);
}
String
prompt
=
documentDialoguePrompt
.
getConfigSystem
();
String
largeModel
=
documentDialoguePrompt
.
getLargeModel
();
prompt
=
prompt
.
replace
(
"{document}"
,
document
);
// 配置message
List
<
Message
>
messages
=
new
ArrayList
<>();
Message
systemMessage
=
new
Message
();
systemMessage
.
setContent
(
prompt
);
systemMessage
.
setRole
(
LLMRoleEnum
.
SYSTEM
.
getRole
());
messages
.
add
(
systemMessage
);
Message
questionMessage
=
new
Message
();
questionMessage
.
setContent
(
"生成相关概念"
);
questionMessage
.
setRole
(
LLMRoleEnum
.
USER
.
getRole
());
messages
.
add
(
questionMessage
);
// 调用LLM
LargeModelResponse
response
=
new
LargeModelResponse
();
response
.
setModel
(
largeModel
);
response
.
setMessages
(
messages
.
toArray
(
new
Message
[
0
]));
response
.
setStream
(
true
);
response
.
setUser
(
"CreateMindMap"
);
try
{
StringBuilder
summary
=
new
StringBuilder
();
BufferedReader
bufferedReader
=
llmService
.
chatChunk
(
response
);
String
res
;
while
((
res
=
bufferedReader
.
readLine
())
!=
null
)
{
if
(
StringUtils
.
isEmpty
(
res
))
{
continue
;
}
res
=
res
.
replace
(
"data: "
,
StringUtils
.
EMPTY
);
LargeModelDemandResult
result
=
JsonUtils
.
deSerialize
(
res
,
LargeModelDemandResult
.
class
);
if
(
ObjectUtil
.
isEmpty
(
result
)
||
!
result
.
getCode
().
equals
(
"0"
))
{
logger
.
error
(
"LLM Error,code:{}"
,
result
.
getCode
());
throw
new
BusinessException
(
"生成相关概念失败"
);
}
if
(
StringUtils
.
isNotBlank
(
result
.
getMessage
()))
{
String
message
=
result
.
getMessage
();
summary
.
append
(
message
);
}
}
bufferedReader
.
close
();
String
result
=
summary
.
toString
();
if
(
StringUtils
.
isBlank
(
result
))
{
return
ListUtils
.
EMPTY_LIST
;
}
int
startMindMap
=
result
.
indexOf
(
"["
);
int
endMindMap
=
result
.
lastIndexOf
(
"]"
);
return
JsonUtils
.
deSerialize
(
result
.
substring
(
startMindMap
,
endMindMap
+
1
),
new
TypeReference
<
List
<
String
>>()
{
}.
getType
());
}
catch
(
Exception
e
)
{
logger
.
error
(
"-----------生成相关概念失败------------"
,
e
);
return
ListUtils
.
EMPTY_LIST
;
}
}
private
String
createMindMap
(
String
document
)
{
// 获取对话提示词
...
...
src/main/java/cn/com/poc/long_document/domain/keyword/Concepts.java
0 → 100644
View file @
9be410a5
package
cn
.
com
.
poc
.
long_document
.
domain
.
keyword
;
/**
* @author alex.yao
* @date 2025/9/4
*/
public
class
Concepts
{
}
src/main/java/cn/com/poc/long_document/domain/keyword/KeyWord.java
0 → 100644
View file @
9be410a5
package
cn
.
com
.
poc
.
long_document
.
domain
.
keyword
;
import
java.util.List
;
/**
* @author alex.yao
* @date 2025/9/4
*/
public
class
KeyWord
{
/**
* 关键词云图 [10个以内]
*/
private
List
<
String
>
keyWord
;
/**
* 关键词提及频率 [10个以内]
*/
private
List
<
MentionFrequency
>
mentionFrequency
;
/**
* 相关概念 [10个以内]
*/
List
<
String
>
concepts
;
public
List
<
String
>
getKeyWord
()
{
return
keyWord
;
}
public
void
setKeyWord
(
List
<
String
>
keyWord
)
{
this
.
keyWord
=
keyWord
;
}
public
List
<
MentionFrequency
>
getMentionFrequency
()
{
return
mentionFrequency
;
}
public
void
setMentionFrequency
(
List
<
MentionFrequency
>
mentionFrequency
)
{
this
.
mentionFrequency
=
mentionFrequency
;
}
public
List
<
String
>
getConcepts
()
{
return
concepts
;
}
public
void
setConcepts
(
List
<
String
>
concepts
)
{
this
.
concepts
=
concepts
;
}
}
src/main/java/cn/com/poc/long_document/domain/keyword/MentionFrequency.java
0 → 100644
View file @
9be410a5
package
cn
.
com
.
poc
.
long_document
.
domain
.
keyword
;
import
java.util.List
;
/**
* @author alex.yao
* @date 2025/9/4
*/
public
class
MentionFrequency
{
/**
* 关键词
*/
private
String
keyword
;
/**
* 关联句子
*/
private
List
<
String
>
sentences
;
/**
* 出现频率
*/
private
Integer
frequency
;
public
String
getKeyword
()
{
return
keyword
;
}
public
void
setKeyword
(
String
keyword
)
{
this
.
keyword
=
keyword
;
}
public
List
<
String
>
getSentences
()
{
return
sentences
;
}
public
void
setSentences
(
List
<
String
>
sentences
)
{
this
.
sentences
=
sentences
;
}
public
Integer
getFrequency
()
{
return
frequency
;
}
public
void
setFrequency
(
Integer
frequency
)
{
this
.
frequency
=
frequency
;
}
}
src/main/java/cn/com/poc/long_document/dto/LongTextSummaryDto.java
View file @
9be410a5
package
cn
.
com
.
poc
.
long_document
.
dto
;
import
cn.com.poc.long_document.domain.MindMap
;
import
cn.com.poc.long_document.domain.keyword.KeyWord
;
import
com.fasterxml.jackson.annotation.JsonIgnoreProperties
;
import
com.fasterxml.jackson.annotation.JsonInclude
;
...
...
@@ -18,15 +19,15 @@ public class LongTextSummaryDto {
private
String
corePoint
;
private
List
<
String
>
keyword
;
private
KeyWord
keyword
;
private
MindMap
mindMap
;
public
List
<
String
>
getKeyword
()
{
public
KeyWord
getKeyword
()
{
return
keyword
;
}
public
void
setKeyword
(
List
<
String
>
keyword
)
{
public
void
setKeyword
(
KeyWord
keyword
)
{
this
.
keyword
=
keyword
;
}
...
...
src/main/java/cn/com/poc/meeting/cache/KeyWordCounter.java
View file @
9be410a5
...
...
@@ -38,6 +38,29 @@ public class KeyWordCounter {
return
sortedMap
;
}
/**
* 提取关键词与文本中匹配的句子
*
* @param content 文本
* @param keywords 关键词
*/
public
static
Map
<
String
,
List
<
String
>>
keywordMatchSentence
(
String
content
,
List
<
String
>
keywords
)
{
Map
<
String
,
List
<
String
>>
result
=
new
LinkedHashMap
<>();
// 将关键词转换为正则表达式模式(注意转义特殊字符)
for
(
String
keyword
:
keywords
)
{
String
regexPattern
=
Pattern
.
quote
(
keyword
);
String
sentenceRegex
=
"[^,,::;;.。!?!?\\n]*"
+
regexPattern
+
"[^,,::;;.。!?!?\\n]*[,,::;;.。!?!?\\n]"
;
Pattern
pattern
=
Pattern
.
compile
(
sentenceRegex
,
Pattern
.
CASE_INSENSITIVE
);
Matcher
matcher
=
pattern
.
matcher
(
content
);
// 查找并输出匹配的句子
List
<
String
>
sentences
=
new
ArrayList
<>();
while
(
matcher
.
find
())
{
sentences
.
add
(
matcher
.
group
().
trim
());
}
result
.
put
(
keyword
,
sentences
);
}
return
result
;
}
/**
* 缓存关键词计数
...
...
src/test/java/cn/com/poc/meeting/MeetingTest.java
View file @
9be410a5
package
cn
.
com
.
poc
.
meeting
;
import
cn.com.yict.framemax.core.spring.SingleContextInitializer
;
import
org.junit.Test
;
import
org.junit.runner.RunWith
;
import
org.springframework.test.context.ContextConfiguration
;
import
org.springframework.test.context.junit4.SpringJUnit4ClassRunner
;
import
org.springframework.test.context.web.WebAppConfiguration
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
@RunWith
(
SpringJUnit4ClassRunner
.
class
)
@ContextConfiguration
(
initializers
=
SingleContextInitializer
.
class
)
@WebAppConfiguration
public
class
MeetingTest
{
@Test
public
void
test
()
{
String
keyword
=
"an example"
;
String
text
=
"This is an example sentence. Another example? Yes, this is an example."
;
// 将关键词转换为正则表达式模式(注意转义特殊字符)
String
regexPattern
=
Pattern
.
quote
(
keyword
);
// 构建匹配句子的正则表达式
// 匹配以任意标点符号或换行结尾的句子,包含关键词
// 支持中英文混合关键词匹配
String
sentenceRegex
=
"[^,,::;;.。!?!?\\n]*"
+
regexPattern
+
"[^,,::;;.。!?!?\\n]*[,,::;;.。!?!?\\n]"
;
Pattern
pattern
=
Pattern
.
compile
(
sentenceRegex
,
Pattern
.
CASE_INSENSITIVE
);
Matcher
matcher
=
pattern
.
matcher
(
text
);
// 查找并输出匹配的句子
while
(
matcher
.
find
())
{
System
.
out
.
println
(
"\""
+
matcher
.
group
().
trim
()
+
"\""
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment