Commit b6c56139 authored by alex yao's avatar alex yao

feat: QA知識庫文件校驗

parent 0b33fb3c
......@@ -31,6 +31,8 @@ import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
......@@ -47,6 +49,7 @@ import java.util.stream.Collectors;
@Service
public class KnowledgeServiceImpl implements KnowledgeService {
private Logger logger = LoggerFactory.getLogger(KnowledgeService.class);
@Resource
private BizKnowledgeDocumentService bizKnowledgeDocumentService;
......@@ -78,31 +81,22 @@ public class KnowledgeServiceImpl implements KnowledgeService {
//记录成功
List<BizKnowledgeDocumentEntity> result = new ArrayList<>();
for (MultipartFile documentFile : documentFiles) {
//todo 根据知识库类型,对文件内容进行校验
//获取文件名
String documentName = documentFile.getOriginalFilename();
// 统计文件字符数
String type = documentFile.getOriginalFilename().substring(documentFile.getOriginalFilename().lastIndexOf(".") + 1, documentFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
documentFile.transferTo(file);
// 文件大小不能超过10M
// 文件大小
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
documentFile.transferTo(file);
// 统计文件字符数
String fileContent = DocumentLoad.documentToText(file).replaceAll(StringUtils.LF, StringUtils.EMPTY).replaceAll(StringUtils.CR, StringUtils.EMPTY);//文件内容
if (StringUtils.isBlank(fileContent)) {
throw new I18nMessageException("exception/error.file.content.is.null");
}
//获取文件字符数
long charCount = fileContent.length();
//文件字符数不能超过100w
if (charCount > 100 * 10000) {
throw new I18nMessageException("exception/file.content.more.than.100w");
boolean check = knowledgeType.equals(KnowledgeConstant.KnowledgeType.BASE) ? checkBaseKnowledgeDocument(documentFile) : checkQAKnowledgeDocument(documentFile);
if (!check) {
throw new I18nMessageException("knowledge.document.check.fail");
}
//文件上传
String documentUrl = bosConfigService.upload(Files.newInputStream(file.toPath()), type, documentFile.getContentType());
......@@ -390,4 +384,92 @@ public class KnowledgeServiceImpl implements KnowledgeService {
}
return kdIdList;
}
private boolean checkBaseKnowledgeDocument(MultipartFile multipartFile) {
try {
String type = multipartFile.getOriginalFilename().substring(multipartFile.getOriginalFilename().lastIndexOf(".") + 1, multipartFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
multipartFile.transferTo(file);
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
//文件内容不能为空
String fileContent = DocumentLoad.documentToText(file).replaceAll(StringUtils.LF, StringUtils.EMPTY).replaceAll(StringUtils.CR, StringUtils.EMPTY);//文件内容
if (StringUtils.isBlank(fileContent)) {
throw new I18nMessageException("exception/error.file.content.is.null");
}
//获取文件字符数
long charCount = fileContent.length();
//文件字符数不能超过100w
if (charCount > 100 * 10000) {
throw new I18nMessageException("exception/file.content.more.than.100w");
}
} catch (IOException e) {
logger.error("checkBaseKnowledgeDocument error :{}", e);
return false;
}
return true;
}
private boolean checkQAKnowledgeDocument(MultipartFile multipartFile) {
//1、上传文件列数超过10列,提示:上传文件内容不能超过10列
//2、上传文件行数超过1500行,提示:上传文件内容不能超过1500行
//3、上传文件单一行字数超过3000字,提示:上传文件内容单一行字数不能超过3000字
//4、单格不可超1000字
//5、文件大小不能超过10M
try {
String type = multipartFile.getOriginalFilename().substring(multipartFile.getOriginalFilename().lastIndexOf(".") + 1, multipartFile.getOriginalFilename().length());
File file = File.createTempFile(UUIDTool.getUUID(), "." + type);
multipartFile.transferTo(file);
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
ExcelReader excelReader = ExcelUtil.getReader(file);
Workbook workbook = excelReader.getWorkbook();
Sheet sheetAt = workbook.getSheetAt(0);
int rowNum = sheetAt.getLastRowNum() - 2;//行数
if (rowNum > 1500) {
throw new I18nMessageException("exception/file.rows.more.than.1500");
}
Row row = sheetAt.getRow(2);
short lastCellNum = row.getLastCellNum();//列数
if (lastCellNum > 10) {
throw new I18nMessageException("exception/file.columns.more.than.10");
}
int lastRowNum = sheetAt.getLastRowNum();//最后一行索引
for (int i = 2; i <= lastRowNum; i++) {
row = sheetAt.getRow(i);
//获取单元格内容
Iterator<Cell> cellIterator = row.cellIterator();
//每行字数不能超过3000字
int totalWordCount = 0;
while (cellIterator.hasNext()) {
Cell next = cellIterator.next();
if (StringUtils.isNotBlank(next.toString())) {
if (next.toString().length() > 1000) { // 每格字数不可超1000字
throw new I18nMessageException("exception/file.content.more.than.1000");
}
totalWordCount += next.toString().length();
}
}
if (totalWordCount > 3000) { //每行字数不可超3000字
throw new I18nMessageException("exception/file.content.more.than.3000");
}
}
} catch (IOException e) {
logger.error("checkQAKnowledgeDocument error :{}", e);
return false;
}
return true;
}
}
......@@ -81,4 +81,8 @@ file.content.empty=The file content cannot be empty
payment.package.configuration.not.exist=The equity packet configuration does not exist
no.permission=No permission
create.num.limit=Creating has reached its maximum limit
qa.knowledge.document.struct.not.consistent=Inconsistent question and answer structure
\ No newline at end of file
qa.knowledge.document.struct.not.consistent=Inconsistent question and answer structure
file.rows.more.than.1500=The document exceeds 1500 lines
file.columns.more.than.10=The document exceeds 10 columns
file.cell.content.more.than.1000=Cell exceeds 1000 characters
file.rows.content.more.than.3000=The document line exceeds 3000 characters
\ No newline at end of file
......@@ -81,4 +81,8 @@ file.content.empty=\u6587\u6863\u5185\u5BB9\u4E0D\u80FD\u4E3A\u7A7A
payment.package.configuration.not.exist=\u6743\u76CA\u5305\u914D\u7F6E\u4E0D\u5B58\u5728
no.permission=\u6682\u65E0\u6743\u9650
create.num.limit=\u521B\u5EFA\u5DF2\u8FBE\u6700\u5927\u4E0A\u9650
qa.knowledge.document.struct.not.consistent=文档结构不一致
\ No newline at end of file
qa.knowledge.document.struct.not.consistent=文档结构不一致
file.rows.more.than.1500=文档超出1500行
file.columns.more.than.10=文档超出10列
file.cell.content.more.than.1000=单元格超出1000字符数
file.rows.content.more.than.3000=文档行超出3000字符数
\ No newline at end of file
......@@ -81,4 +81,8 @@ file.content.empty=\u6587\u4EF6\u5185\u5BB9\u4E0D\u80FD\u7232\u7A7A
payment.package.configuration.not.exist=\u6B0A\u76CA\u5305\u914D\u7F6E\u4E0D\u5B58\u5728
no.permission=\u66AB\u7121\u6B0A\u9650
create.num.limit=\u5275\u5EFA\u5DF2\u9054\u6700\u5927\u4E0A\u9650
qa.knowledge.document.struct.not.consistent=文檔結構不一致
\ No newline at end of file
qa.knowledge.document.struct.not.consistent=文檔結構不一致
file.rows.more.than.1500=文檔超出1500行
file.columns.more.than.10=文檔超出10列
file.cell.content.more.than.1000=單元格超出1000字符數
file.rows.content.more.than.3000=文檔超出3000字符數
\ No newline at end of file
package cn.com.poc.knowledge;
import cn.com.poc.common.utils.StringUtils;
import cn.com.poc.common.utils.UUIDTool;
import cn.com.poc.knowledge.aggregate.KnowledgeService;
import cn.com.yict.framemax.core.i18n.I18nMessageException;
import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import cn.hutool.poi.excel.ExcelReader;
import cn.hutool.poi.excel.ExcelUtil;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
......@@ -18,10 +27,12 @@ import org.springframework.test.context.web.WebAppConfiguration;
import javax.annotation.Resource;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Iterator;
/**
* @author alex.yao
......@@ -46,6 +57,60 @@ public class KnowledgeServiceTest {
knowledgeService.delDocument(138, 1071);
}
@Test
public void test_checkQAKnowledgeDocument() {
//1、上传文件列数超过10列,提示:上传文件内容不能超过10列
//2、上传文件行数超过1500行,提示:上传文件内容不能超过1500行
//3、上传文件单一行字数超过3000字,提示:上传文件内容单一行字数不能超过3000字
//4、单格不可超1000字
//5、文件大小不能超过10M
try {
File file = new File("C:\\Users\\52747\\Desktop\\1823模板.xlsx");
// 文件大小不能超过10M
long fileSizeInBytes = file.length();
double fileSizeInMB = (double) fileSizeInBytes / (1024 * 1024);
if (fileSizeInMB > 10) {
throw new I18nMessageException("exception/upload.more.than.10m");
}
ExcelReader excelReader = ExcelUtil.getReader(file);
Workbook workbook = excelReader.getWorkbook();
Sheet sheetAt = workbook.getSheetAt(0);
int rowNum = sheetAt.getLastRowNum() - 2;//行数
if (rowNum > 1500) {
throw new I18nMessageException("exception/file.rows.more.than.1500");
}
Row row = sheetAt.getRow(2);
short lastCellNum = row.getLastCellNum();//列数
if (lastCellNum > 10) {
throw new I18nMessageException("exception/file.columns.more.than.10");
}
int lastRowNum = sheetAt.getLastRowNum();//最后一行索引
for (int i = 2; i <= lastRowNum; i++) {
row = sheetAt.getRow(i);
//获取单元格内容
Iterator<Cell> cellIterator = row.cellIterator();
//每行字数不能超过3000字
int totalWordCount = 0;
while (cellIterator.hasNext()) {
Cell next = cellIterator.next();
if (StringUtils.isNotBlank(next.toString())) {
if (next.toString().length() > 1000) { // 每格字数不可超1000字
throw new I18nMessageException("exception/file.content.more.than.1000");
}
totalWordCount += next.toString().length();
}
}
if (totalWordCount > 3000) { //每行字数不可超3000字
throw new I18nMessageException("exception/file.content.more.than.3000");
}
}
} catch (Exception e) {
System.out.println("error" + e.getMessage());
}
System.out.println(true);
}
// @Test
public static void main(String[] args) throws IOException, InvalidFormatException {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment