package cn.com.poc.common.utils;

import cn.com.yict.framemax.core.i18n.I18nMessageException;
import cn.hutool.core.io.FileUtil;
import io.github.furstenheim.*;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hslf.usermodel.HSLFSlide;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.Slide;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
import org.springframework.util.Assert;

import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.List;

public class DocumentLoad {

    final static OptionsBuilder optionsBuilder = OptionsBuilder.anOptions();
    final static Options options = optionsBuilder.withBr("-")
            .withLinkStyle(LinkStyle.REFERENCED)
            .withLinkReferenceStyle(LinkReferenceStyle.SHORTCUT)
            .build();
    final static CopyDown converter = new CopyDown(options);

    public static String excelToMarkdown(File file) {
        try {
            FileInputStream inputStream = new FileInputStream(file);
            Workbook workbook = new XSSFWorkbook(inputStream);
            Sheet sheet = workbook.getSheetAt(0);

            Iterator<Row> rowIterator = sheet.iterator();

            StringBuilder markdown = new StringBuilder();

            while (rowIterator.hasNext()) {
                Row row = rowIterator.next();
                Iterator<Cell> cellIterator = row.iterator();

                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();
                    if (cell.getCellType() == CellType.STRING) {
                        markdown.append("| ").append(cell.getStringCellValue());
                    } else if (cell.getCellType() == CellType.NUMERIC) {
                        markdown.append("| ").append(cell.getNumericCellValue());
                    } else {
                        markdown.append("| ").append("");
                    }
                }
                markdown.append("|\n");
            }

            // 添加Markdown表格分隔线
            markdown.insert(0, "|\n|--|--|\n");
            workbook.close();
            inputStream.close();
            return markdown.toString();
        } catch (IOException e) {
            throw new I18nMessageException("exception/file.load.error");
        }
    }

    public static String loadPPT(File file) {
        StringBuilder sb = new StringBuilder();
        try {
            InputStream is = FileUtil.getInputStream(file);
            HSLFSlideShow hslfSlideShow = new HSLFSlideShow(is);
            List<HSLFSlide> slides = hslfSlideShow.getSlides();
            SlideShowExtractor slideShowExtractor = new SlideShowExtractor(hslfSlideShow);
            for (HSLFSlide slide : slides) {
                sb.append("Page:").append(slide.getSlideNumber()).append(StringUtils.LF).append(slideShowExtractor.getText(slide)).append(StringUtils.LF);
            }
            slideShowExtractor.close();
            is.close();
        } catch (IOException e) {
            throw new I18nMessageException("exception/file.load.error");
        } catch (OfficeXmlFileException e) {
            try {
                InputStream is = FileUtil.getInputStream(file);
                XMLSlideShow xmlSlideShow = new XMLSlideShow(is);
                List<XSLFSlide> slides = xmlSlideShow.getSlides();
                for (XSLFSlide slide : slides) {
                    CTSlide rawSlide = slide.getXmlObject();
                    CTGroupShape spTree = rawSlide.getCSld().getSpTree();
                    List<CTShape> spList = spTree.getSpList();
                    for (CTShape shape : spList) {
                        CTTextBody txBody = shape.getTxBody();
                        if (null == txBody) {
                            continue;
                        }
                        List<CTTextParagraph> pList = txBody.getPList();
                        for (CTTextParagraph textParagraph : pList) {
                            List<CTRegularTextRun> textRuns = textParagraph.getRList();
                            for (CTRegularTextRun textRun : textRuns) {
                                sb.append("Page:").append(slide.getSlideNumber()).append(StringUtils.LF).append(textRun.getT()).append(StringUtils.LF);
                            }
                        }
                    }
                }
                xmlSlideShow.close();
                is.close();
            } catch (IOException e1) {
                throw new I18nMessageException("exception/file.load.error");
            }
        }
        return sb.toString();
    }

    /**
     * Html To Markdown
     */
    public static String htmlToMarkdown(String url) {
        try {
            // 创建 资源符对象 连接
            URLConnection conn = new URL(url).openConnection();
            // 设置连接超时时间，单位毫秒
            conn.setConnectTimeout(5000);
            // 设置读取超时时间，单位毫秒
            conn.setReadTimeout(15000);
            // 获取输入流
            InputStream inputStream = conn.getInputStream();
            // 缓冲区，读取输入流内容，64KB
            char[] buffer = new char[1024 * 64];
            int len;
            StringBuilder sb = new StringBuilder();
            // 转换为字符流
            InputStreamReader isr = new InputStreamReader(inputStream);
            // 循环读取
            while ((len = isr.read(buffer)) != -1) {
                sb.append(buffer, 0, len);
            }
            // 关闭资源
            inputStream.close();
            isr.close();
            String htmlStr = sb.toString().replaceAll("<head>.*?</head>", "");
            return converter.convert(htmlStr);
        } catch (IOException e) {
            return "";
        }
    }

    /**
     * 读取文档
     *
     * @param file
     * @return
     */
    public static String documentToText(File file) {
        Assert.notNull(file);
        String fileName = FileUtil.getName(file);
        String type = fileName.substring(fileName.lastIndexOf(".") + 1, fileName.length());
        try {
            switch (type) {
                case "docx":
                    return loadWordDocx(file);
                case "doc":
                    return loadWordDoc(file);
                case "md":
                    return loadMarkDown(file);
                case "pdf":
                    return loadPDF(file);
                case "txt":
                    return loadTxt(file);
                case "ppt":
                case "pptx":
                    return loadPPT(file);
                case "xlsx":
                case "xls":
                case "csv":
                    return excelToMarkdown(file);
                default:
                    throw new I18nMessageException(type + " format is not yet supported");
            }
        } catch (IOException e) {
            throw new I18nMessageException(e.getMessage());
        }
    }

    public static String loadMarkDown(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        StringBuilder stringBuilder = new StringBuilder();
        String line;
        while ((line = bufferedReader.readLine()) != null) {
            stringBuilder.append(line);
        }
        return stringBuilder.toString();
    }

    public static String loadWordDocx(File file) throws IOException {
        XWPFDocument xwpfDocument = new XWPFDocument(Files.newInputStream(file.toPath()));
        XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(xwpfDocument);
        return xwpfWordExtractor.getText();
    }

    public static String loadWordDoc(File file) throws IOException {
        FileInputStream fis = new FileInputStream(file);
        WordExtractor wordExtractor = new WordExtractor(fis);
        fis.close();
        return wordExtractor.getText().toString();
    }

    public static String loadTxt(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        StringBuilder stringBuilder = new StringBuilder();
        String line;
        while ((line = bufferedReader.readLine()) != null) {
            stringBuilder.append(line);
        }
        return stringBuilder.toString();
    }

    public static String loadPDF(File file) throws IOException {
        PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
        parser.parse();
        PDDocument doc = parser.getPDDocument();
        PDFTextStripper textStripper = new PDFTextStripper();
        StringBuilder stringBuilder = new StringBuilder();
        textStripper.setStartPage(1);
        textStripper.setEndPage(doc.getNumberOfPages());
        textStripper.setSortByPosition(true);
        stringBuilder.append(textStripper.getText(doc));
        doc.close();
        return stringBuilder.toString();
    }


    public static File downloadURLDocument(String path) {
        // 下载网络文件
        int bytesum = 0;
        int byteread = 0;
        try {
            URL url = new URL(path);

            URLConnection conn = url.openConnection();
            String[] split = url.getFile().split("\\.");
            String suffix = split[split.length - 1];
            File tempFile = File.createTempFile(UUIDTool.getUUID(), "." + suffix);
            FileOutputStream fs = new FileOutputStream(tempFile);
            InputStream inStream = conn.getInputStream();

            byte[] buffer = new byte[1024];
            while ((byteread = inStream.read(buffer)) != -1) {
                bytesum += byteread;
                fs.write(buffer, 0, byteread);
            }
            fs.close();
            return tempFile;
        } catch (IOException e) {
            throw new I18nMessageException("exception/file.load.error");
        }
    }
}
