package cn.com.poc.thirdparty.resource.demand.ai.function;

import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import io.github.furstenheim.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.runner.RunWith;
import org.junit.Test;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;

/**
 * @author alex.yao
 * @date 2025/3/5
 */
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(initializers = SingleContextInitializer.class)
@WebAppConfiguration
public class HtmlReaderFunctionTest {

    @Test
    public void test_jsoup() throws IOException {
        Element body = Jsoup.connect("https://juejin.cn/post/7115639885457063966")
                .get().body();
        System.out.println(body.text());
    }
    final static OptionsBuilder optionsBuilder = OptionsBuilder.anOptions();
    final static Options options = optionsBuilder.withBr("-")
            .withLinkStyle(LinkStyle.REFERENCED)
            .withLinkReferenceStyle(LinkReferenceStyle.SHORTCUT)
            .build();
    final static CopyDown converter = new CopyDown(options);
    @Test
    public void test_html2md() throws IOException {
        // 创建 资源符对象 连接
        URLConnection conn = new URL("https://juejin.cn/post/7115639885457063966").openConnection();
        // 设置连接超时时间，单位毫秒
        conn.setConnectTimeout(5000);
        // 设置读取超时时间，单位毫秒
        conn.setReadTimeout(15000);
        // 获取输入流
        InputStream inputStream = conn.getInputStream();
        // 缓冲区，读取输入流内容，64KB
        char[] buffer = new char[1024 * 64];
        int len;
        StringBuilder sb = new StringBuilder();
        // 转换为字符流
        InputStreamReader isr = new InputStreamReader(inputStream);
        // 循环读取
        while ((len = isr.read(buffer)) != -1) {
            sb.append(buffer, 0, len);
        }
        // 关闭资源
        inputStream.close();
        isr.close();
        String htmlStr = sb.toString().replaceAll("<head>.*?</head>", "");
        System.out.println( converter.convert(htmlStr));
    }

}
