Commit b6c56139 authored by alex yao's avatar alex yao

feat: QA知識庫文件校驗

parent 0b33fb3c
...@@ -31,6 +31,8 @@ import org.apache.poi.ss.usermodel.Cell; ...@@ -31,6 +31,8 @@ import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
...@@ -47,6 +49,7 @@ import java.util.stream.Collectors; ...@@ -47,6 +49,7 @@ import java.util.stream.Collectors;
@Service @Service
public class KnowledgeServiceImpl implements KnowledgeService { public class KnowledgeServiceImpl implements KnowledgeService {
private Logger logger = LoggerFactory.getLogger(KnowledgeService.class);
@Resource @Resource
private BizKnowledgeDocumentService bizKnowledgeDocumentService; private BizKnowledgeDocumentService bizKnowledgeDocumentService;
...@@ -78,31 +81,22 @@ public class KnowledgeServiceImpl implements KnowledgeService { ...@@ -78,31 +81,22 @@ public class KnowledgeServiceImpl implements KnowledgeService {
//记录成功 //记录成功
List<BizKnowledgeDocumentEntity> result = new ArrayList<>(); List<BizKnowledgeDocumentEntity> result = new ArrayList<>();
for (MultipartFile documentFile : documentFiles) { for (MultipartFile documentFile : documentFiles) {
//todo 根据知识库类型,对文件内容进行校验
//获取文件名 //获取文件名
String documentName = documentFile.getOriginalFilename(); String documentName = documentFile.getOriginalFilename();
// 统计文件字符数
String type = documentFile.getOriginalFilename().substring(documentFile.getOriginalFilename().lastIndexOf(".") + 1, documentFile.getOriginalFilename().length()); String type = documentFile.getOriginalFilename().substring(documentFile.getOriginalFilename().lastIndexOf(".") + 1, documentFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type); File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
documentFile.transferTo(file); // 文件大小
// 文件大小不能超过10M
long fileSizeInBytes = file.length(); long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024); documentFile.transferTo(file);
if (fileSizeInMB > 10) { // 统计文件字符数
throw new I18nMessageException("exception/upload.more.than.10m");
}
String fileContent = DocumentLoad.documentToText(file).replaceAll(StringUtils.LF, StringUtils.EMPTY).replaceAll(StringUtils.CR, StringUtils.EMPTY);//文件内容 String fileContent = DocumentLoad.documentToText(file).replaceAll(StringUtils.LF, StringUtils.EMPTY).replaceAll(StringUtils.CR, StringUtils.EMPTY);//文件内容
if (StringUtils.isBlank(fileContent)) {
throw new I18nMessageException("exception/error.file.content.is.null");
}
//获取文件字符数
long charCount = fileContent.length(); long charCount = fileContent.length();
//文件字符数不能超过100w boolean check = knowledgeType.equals(KnowledgeConstant.KnowledgeType.BASE) ? checkBaseKnowledgeDocument(documentFile) : checkQAKnowledgeDocument(documentFile);
if (charCount > 100 * 10000) { if (!check) {
throw new I18nMessageException("exception/file.content.more.than.100w"); throw new I18nMessageException("knowledge.document.check.fail");
} }
//文件上传 //文件上传
String documentUrl = bosConfigService.upload(Files.newInputStream(file.toPath()), type, documentFile.getContentType()); String documentUrl = bosConfigService.upload(Files.newInputStream(file.toPath()), type, documentFile.getContentType());
...@@ -390,4 +384,92 @@ public class KnowledgeServiceImpl implements KnowledgeService { ...@@ -390,4 +384,92 @@ public class KnowledgeServiceImpl implements KnowledgeService {
} }
return kdIdList; return kdIdList;
} }
private boolean checkBaseKnowledgeDocument(MultipartFile multipartFile) {
try {
String type = multipartFile.getOriginalFilename().substring(multipartFile.getOriginalFilename().lastIndexOf(".") + 1, multipartFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
multipartFile.transferTo(file);
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
//文件内容不能为空
String fileContent = DocumentLoad.documentToText(file).replaceAll(StringUtils.LF, StringUtils.EMPTY).replaceAll(StringUtils.CR, StringUtils.EMPTY);//文件内容
if (StringUtils.isBlank(fileContent)) {
throw new I18nMessageException("exception/error.file.content.is.null");
}
//获取文件字符数
long charCount = fileContent.length();
//文件字符数不能超过100w
if (charCount > 100 * 10000) {
throw new I18nMessageException("exception/file.content.more.than.100w");
}
} catch (IOException e) {
logger.error("checkBaseKnowledgeDocument error :{}", e);
return false;
}
return true;
}
private boolean checkQAKnowledgeDocument(MultipartFile multipartFile) {
//1、上传文件列数超过10列,提示:上传文件内容不能超过10列
//2、上传文件行数超过1500行,提示:上传文件内容不能超过1500行
//3、上传文件单一行字数超过3000字,提示:上传文件内容单一行字数不能超过3000字
//4、单格不可超1000字
//5、文件大小不能超过10M
try {
String type = multipartFile.getOriginalFilename().substring(multipartFile.getOriginalFilename().lastIndexOf(".") + 1, multipartFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
multipartFile.transferTo(file);
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
ExcelReader excelReader = ExcelUtil.getReader(file);
Workbook workbook = excelReader.getWorkbook();
Sheet sheetAt = workbook.getSheetAt(0);
int rowNum = sheetAt.getLastRowNum() - 2;//行数
if (rowNum > 1500) {
throw new I18nMessageException("exception/file.rows.more.than.1500");
}
Row row = sheetAt.getRow(2);
short lastCellNum = row.getLastCellNum();//列数
if (lastCellNum > 10) {
throw new I18nMessageException("exception/file.columns.more.than.10");
}
int lastRowNum = sheetAt.getLastRowNum();//最后一行索引
for (int i = 2; i <= lastRowNum; i++) {
row = sheetAt.getRow(i);
//获取单元格内容
Iterator<Cell> cellIterator = row.cellIterator();
//每行字数不能超过3000字
int totalWordCount = 0;
while (cellIterator.hasNext()) {
Cell next = cellIterator.next();
if (StringUtils.isNotBlank(next.toString())) {
if (next.toString().length() > 1000) { // 每格字数不可超1000字
throw new I18nMessageException("exception/file.content.more.than.1000");
}
totalWordCount += next.toString().length();
}
}
if (totalWordCount > 3000) { //每行字数不可超3000字
throw new I18nMessageException("exception/file.content.more.than.3000");
}
}
} catch (IOException e) {
logger.error("checkQAKnowledgeDocument error :{}", e);
return false;
}
return true;
}
} }
...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=The equity packet configuration does not ...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=The equity packet configuration does not
no.permission=No permission no.permission=No permission
create.num.limit=Creating has reached its maximum limit create.num.limit=Creating has reached its maximum limit
qa.knowledge.document.struct.not.consistent=Inconsistent question and answer structure qa.knowledge.document.struct.not.consistent=Inconsistent question and answer structure
file.rows.more.than.1500=The document exceeds 1500 lines
file.columns.more.than.10=The document exceeds 10 columns
file.cell.content.more.than.1000=Cell exceeds 1000 characters
file.rows.content.more.than.3000=The document line exceeds 3000 characters
\ No newline at end of file
...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=\u6743\u76CA\u5305\u914D\u7F6E\u4E0D\u5B ...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=\u6743\u76CA\u5305\u914D\u7F6E\u4E0D\u5B
no.permission=\u6682\u65E0\u6743\u9650 no.permission=\u6682\u65E0\u6743\u9650
create.num.limit=\u521B\u5EFA\u5DF2\u8FBE\u6700\u5927\u4E0A\u9650 create.num.limit=\u521B\u5EFA\u5DF2\u8FBE\u6700\u5927\u4E0A\u9650
qa.knowledge.document.struct.not.consistent=文档结构不一致 qa.knowledge.document.struct.not.consistent=文档结构不一致
file.rows.more.than.1500=文档超出1500行
file.columns.more.than.10=文档超出10列
file.cell.content.more.than.1000=单元格超出1000字符数
file.rows.content.more.than.3000=文档行超出3000字符数
\ No newline at end of file
...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=\u6B0A\u76CA\u5305\u914D\u7F6E\u4E0D\u5B ...@@ -82,3 +82,7 @@ payment.package.configuration.not.exist=\u6B0A\u76CA\u5305\u914D\u7F6E\u4E0D\u5B
no.permission=\u66AB\u7121\u6B0A\u9650 no.permission=\u66AB\u7121\u6B0A\u9650
create.num.limit=\u5275\u5EFA\u5DF2\u9054\u6700\u5927\u4E0A\u9650 create.num.limit=\u5275\u5EFA\u5DF2\u9054\u6700\u5927\u4E0A\u9650
qa.knowledge.document.struct.not.consistent=文檔結構不一致 qa.knowledge.document.struct.not.consistent=文檔結構不一致
file.rows.more.than.1500=文檔超出1500行
file.columns.more.than.10=文檔超出10列
file.cell.content.more.than.1000=單元格超出1000字符數
file.rows.content.more.than.3000=文檔超出3000字符數
\ No newline at end of file
package cn.com.poc.knowledge; package cn.com.poc.knowledge;
import cn.com.poc.common.utils.StringUtils;
import cn.com.poc.common.utils.UUIDTool;
import cn.com.poc.knowledge.aggregate.KnowledgeService; import cn.com.poc.knowledge.aggregate.KnowledgeService;
import cn.com.yict.framemax.core.i18n.I18nMessageException;
import cn.com.yict.framemax.core.spring.SingleContextInitializer; import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import cn.hutool.poi.excel.ExcelReader;
import cn.hutool.poi.excel.ExcelUtil;
import com.vladsch.flexmark.html.HtmlRenderer; import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser; import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document; import com.vladsch.flexmark.util.ast.Document;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
...@@ -18,10 +27,12 @@ import org.springframework.test.context.web.WebAppConfiguration; ...@@ -18,10 +27,12 @@ import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.Iterator;
/** /**
* @author alex.yao * @author alex.yao
...@@ -46,6 +57,60 @@ public class KnowledgeServiceTest { ...@@ -46,6 +57,60 @@ public class KnowledgeServiceTest {
knowledgeService.delDocument(138, 1071); knowledgeService.delDocument(138, 1071);
} }
@Test
public void test_checkQAKnowledgeDocument() {
//1、上传文件列数超过10列,提示:上传文件内容不能超过10列
//2、上传文件行数超过1500行,提示:上传文件内容不能超过1500行
//3、上传文件单一行字数超过3000字,提示:上传文件内容单一行字数不能超过3000字
//4、单格不可超1000字
//5、文件大小不能超过10M
try {
File file = new File("C:\\Users\\52747\\Desktop\\1823模板.xlsx");
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
ExcelReader excelReader = ExcelUtil.getReader(file);
Workbook workbook = excelReader.getWorkbook();
Sheet sheetAt = workbook.getSheetAt(0);
int rowNum = sheetAt.getLastRowNum() - 2;//行数
if (rowNum > 1500) {
throw new I18nMessageException("exception/file.rows.more.than.1500");
}
Row row = sheetAt.getRow(2);
short lastCellNum = row.getLastCellNum();//列数
if (lastCellNum > 10) {
throw new I18nMessageException("exception/file.columns.more.than.10");
}
int lastRowNum = sheetAt.getLastRowNum();//最后一行索引
for (int i = 2; i <= lastRowNum; i++) {
row = sheetAt.getRow(i);
//获取单元格内容
Iterator<Cell> cellIterator = row.cellIterator();
//每行字数不能超过3000字
int totalWordCount = 0;
while (cellIterator.hasNext()) {
Cell next = cellIterator.next();
if (StringUtils.isNotBlank(next.toString())) {
if (next.toString().length() > 1000) { // 每格字数不可超1000字
throw new I18nMessageException("exception/file.content.more.than.1000");
}
totalWordCount += next.toString().length();
}
}
if (totalWordCount > 3000) { //每行字数不可超3000字
throw new I18nMessageException("exception/file.content.more.than.3000");
}
}
} catch (Exception e) {
System.out.println("error" + e.getMessage());
}
System.out.println(true);
}
// @Test // @Test
public static void main(String[] args) throws IOException, InvalidFormatException { public static void main(String[] args) throws IOException, InvalidFormatException {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment