package cn.com.poc.common;

import cn.com.poc.common.utils.MD2Json;
import cn.com.yict.framemax.core.spring.SingleContextInitializer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;

/**
 * @author alex.yao
 * @date 2025/10/30
 */

@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(initializers = SingleContextInitializer.class)
@WebAppConfiguration
public class UtilTest {

    @Test
    public void test_md2jsonToPaperIsFalse() {
        String md = "OCR技术的演进，正在进入一场“终局之战”：一边是DeepSeek以大模型为武器的激进突围，另一边是百度以生态与数据壁垒构筑的稳固防线。这不仅是技术路线的分歧，更是AI时代对“认知边界”的重新定义。本文将从技术架构、产品策略与行业格局三方面，解析这场OCR领域的关键博弈。\n" +
                "\n" +
                "\n" +
                "在AI技术上，OCR（光学字符识别）一度是个传统甚至有点无聊的赛道。它就像一个勤勤恳恳的“数字档案员”，埋头从图片里提取文本。但现在，这个赛道突然变得无比“性感”。\n" +
                "\n" +
                "因为它的核心任务已经变了。\n" +
                "\n" +
                "一场关于文档智能的“范式革命”已经打响。近期，DeepSeek和百度相继亮出的“王牌”，不仅仅是技术迭代，更是两种截然不同的AI发展哲学的正面碰撞。\n" +
                "\n" +
                "作为产品经理，我们必须看懂：这场对决的表象之下，隐藏着文档AI市场未来的两条“生死线”。这关乎我们未来是选择一个“革命性”的标准，还是一个“极致好用”的工具。\n" +
                "\n" +
                "1. DeepSeek的豪赌：一场“AI为AI”的标准之战\n" +
                "DeepSeek-OCR瞄准的痛点，不是你的业务流程，而是AI自己。\n" +
                "\n" +
                "大型语言模型（LLM，Large Language Model）非常强大，但它们有两个“致命”缺陷：\n" +
                "\n" +
                "成本高昂：处理海量文档（即“长上下文”）时，Token（代币）消耗是天文数字。\n" +
                "理解肤浅：传统的OCR“喂”给LLM的是一长串纯文本。这等于把一本图文并茂、布局精良的杂志撕碎，只保留文字，完全“丢失”了版式、表格、图表等关键的「二维空间信息」。\n" +
                "DeepSeek的解决方案叫“光学压缩”（Contexts Optical Compression），这是一个极具“革命性”甚至“攻击性”的思路。\n" +
                "\n" +
                "它不提取文本，而是将整个文档页面——包含所有布局、表格、公式和图片——直接“压缩”成一种高密度的“视觉Token”（视觉代币）。\n" +
                "\n" +
                "打个比方，它不是在“复述”这本书的内容，而是给LLM提供了一个「高保真的缩略图」。一个原本需要数千个文本Token才能表示的页面，现在可能只需要一百到几百个视觉Token。\n" +
                "\n" +
                "这带来的好处是双重的：\n" +
                "\n" +
                "成本骤降：Token数量级减少，处理效率（吞吐量）暴增。\n" +
                "理解保真：AI第一次能“看”到原始的版式，它知道这是一个表格，那是一个分栏。\n" +
                "DeepSeek的战略意图是「AI for AI」。它真正的用户是“AI模型本身”。它在赌自己的这套“视觉压缩标准”能成为下一代AI处理海量知识的基石，成为AI世界的“PDF”。\n" +
                "\n" +
                "但这正是一场豪赌。作为PM，我们能立刻看穿它的GTM（Go-to-Market，推向市场）难题：它在试图建立一个“专有标准”。它如何说服全世界的AI开发者（比如OpenAI、Anthropic，乃至百度自己）放弃他们苦心经营的视觉编码器，转而“适配”它的压缩标准？\n" +
                "\n" +
                "这就像一场操作系统之战。DeepSeek的成功，取决于它能否快速建立一个“开发者生态”，让它的“视觉Token”成为AI-Native（AI原生）知识库的“新事实标准”。如果赌赢了，它将掌握下一代AI的“数据入口”；如果赌输了，它就只是一个“屠龙之技”。\n" +
                "\n" +
                "2. 百度的堡垒：一座“AI为商业”的工程巅峰\n" +
                "与DeepSeek的“未来主义”豪赌不同，百度的哲学完全相反，它立足于“当下”。\n" +
                "\n" +
                "百度PaddleOCR-VL的目标用户不是未来的AI，而是「今天」的企业。它的用户是正在灯下审核那张「手写金额模糊不清」的发票的财务、是正在处理「跨页合并单元格」的金融分析师、是正在录入「印刷体与手写体混合」保单的运营。\n" +
                "\n" +
                "这些用户不需要“光学压缩”这么性感的概念，他们需要的是“稳定”、“精准”，以及“立刻能用”。\n" +
                "\n" +
                "因此，百度走的是一条“工程即SOTA（State-of-the-Art，顶尖水平）”的务实路线。它推出的VLM（多模态视觉语言模型），就是为了攻克企业文档里那些最硬的骨头。\n" +
                "\n" +
                "百度的战略是构建「AI for Business」的坚固堡垒。\n" +
                "\n" +
                "它通过在权威基准测试上“屠榜”，向市场证明：在解决企业真实、复杂、棘手的文档解析问题上，我是最可靠的。这份“可靠性”就是它最深的护城河。\n" +
                "\n" +
                "同时，百度强大的生态（飞桨PaddlePaddle深度学习平台）和成熟的商业化部署方案（公有云、私有化部署、离线SDK），使其能快速将这种SOTA能力转化为“企业级解决方案”。它不是在“卖模型”，它是在“卖集成”，卖一个可以直接「嵌入」到你现有ERP（Enterprise Resource Planning，企业资源计划）、财务软件和业务流中的“即战力”。\n" +
                "\n" +
                "但百度同样面临一个“SOTA陷阱”。这个陷阱源于经典的“创新者窘境”。百度今天引以为傲的“极限精度”（比如比通用模型高5%的准确率），是它投入巨大工程资源“卷”出来的。\n" +
                "\n" +
                "但当3-5年后，通用的GPT-6或文心N代模型「开箱即用」就能解决95%的文档任务时，企业是否还愿意为百度那“额外5%”的极限精度，支付高昂的定制和私有化部署费用？\n" +
                "\n" +
                "当“足够好”的通用AI变得触手可及时，专业工具的生存空间就会被严重挤压。百度的“堡垒”要想不被攻破，就必须在通用AI彻底成熟前，完成从“卖工具”到“锁定工作流”的转型。\n" +
                "\n" +
                "3. PM的战场：从“技术边界”到“用户习惯”\n" +
                "作为产品经理，在技术选型之外，我们还有两个更重要的战场：\n" +
                "\n" +
                "战场一：厘清“技术边界”\n" +
                "我们必须停止将所有“看图”的任务都称为OCR。这是最容易犯的认知错误。\n" +
                "\n" +
                "皮肤诊断、衣料识别：这是典型的CV（ComputerVision，计算机视觉）任务。它的核心是“模式识别”和“图像分类”，与读取字符无关。\n" +
                "辅助视障人士：这是一个“混合应用”的绝佳范例。当用户需要“阅读”菜单时，调用OCR；当用户需要“避开”障碍物时，调用CV的“物体检测”。\n" +
                "PM的职责，就是精准定义“用户要解决的问题”，然后匹配正确的AI能力。把一个CV问题交给OCR团队，无异于缘木求鱼。\n" +
                "\n" +
                "战场二：攻克“用户习惯”的最后一公里\n" +
                "这可能是比技术本身更难的挑战。VLM的终局是“自然语言交互”，但我们当下的用户，是被“字段框”驯化了二十年的财务和法务。\n" +
                "\n" +
                "我们如何将一个习惯了“在‘金额’字段框里审核数字”的财务人员，平稳过渡到“在一个聊天框里向AI提问”的全新交互模式？\n" +
                "\n" +
                "「用户的操作习惯」是比技术更难攻克的堡垒。\n" +
                "\n" +
                "我们的解决方案不该是“革命”式的替换，而应是“渐进式”的引导。\n" +
                "\n" +
                "“可信UI”（TrustableUI）：这是转型的第一步。当VLM分析完一份合同，它不应只给出一个“答案”。它必须在原始文档上「高亮」它引用的关键证据，并给出“置信度”打分。这就像一个“Diff视图”（差异对比），它帮助用户建立信任，将角色从“数据录入员”平滑过渡到「AI审核员」。\n" +
                "“混合式交互”（HybridInteraction）：不要强迫用户二选一。保留用户熟悉的“字段框”，但同时在旁边提供一个“AI助手”聊天框。让用户可以继续点击字段，也可以随时输入：“帮我找出这家供应商过去6个月的所有发票”。让新旧两种交互模式并存，用“体验优势”自然引导用户迁移。\n" +
                "聚焦“JTBD”（Jobs-to-be-Done）：我们的终极目标不是“识别”。用户的“待办任务”不是“OCR一张发票”，而是“在月底前合规地关闭账目”。这意味着我们的产品必须超越“提取”，深入到“校验”、“审批”、“归档”乃至“支付”的整个「工作流」。\n" +
                "4. 未来的“三步曲”：谁将定义OCR的终局？\n" +
                "这场对决的真正走向，将关乎我们未来如何与信息交互。\n" +
                "\n" +
                "第一阶段：「混合并行期」 (2025-2026)市场将明显分化。百度的“工程派”VLM将继续主导对“可靠性”要求极高的企业级结构化工作流（如财税、保单、物流单据）。 与此同时，DeepSeek的“革命派”方案将在非结构化领域（如科研、法律电子取证、R&D）爆发。在这些场景下，「理解500页的PDF技术文档」远比“提取3个字段”更重要。\n" +
                "\n" +
                "第二阶段：「通用模型侵蚀期」 (2027-2028)通用VLM（如GPT-6或同等模型）将变得极其强大，它们的“泛用性”将严重“挤压”纯粹靠模型精度获利的服务商。 百度的生存空间，取决于它是否已成功转型为“深度嵌入业务的工作流SaaS”。它的护城河将不再是“模型精度”，而是「流程锁定」和“数据合规”。\n" +
                "\n" +
                "第三阶段：「感知即智能期」 (2029年以后)“OCR”这个词汇将逐渐消失。就像我们今天不再谈论“上网”一样，AI处理文档将成为一种“本能”，一种像“水电煤”一样的基础感知能力。\n" +
                "\n" +
                "这背后是一个更宏大的叙事：这一切都是在为“AI Agents”制造“眼睛”。\n" +
                "\n" +
                "一个“全自动财务审计Agent”必须具备“阅读”财报和发票的能力。我们今天所分析的，正是这些未来智能体的“感知引擎”。\n" +
                "\n" +
                "对于产品经理而言，这场对决的启示是：我们必须停止只关注“提取”的准确率。我们真正要设计的，是一个全新的、基于“智能感知”的工作流。\n" +
                "\n" +
                "在这场关乎“AI之眼”的终局之战中，DeepSeek和百度，刚刚分别从“未来”和“现在”两个方向，同时吹响了号角。\n" +
                "\n" +
                "本文由 @靠谱瓦叔 原创发布于人人都是产品经理。未经作者许可，禁止转载\n" +
                "\n" +
                "题图来自Unsplash，基于CC0协议";
        System.out.println(MD2Json.md2json(md));
    }

    @Test
    public void test_md2json() {
        String md = "# Work Permit Extension Details\n" +
                "\n" +
                "REQ250219010\n" +
                "\n" +
                "New\n" +
                "\n" +
                "WPO Validation\n" +
                "\n" +
                "Pending Approval\n" +
                "\n" +
                "Approved/Closed\n" +
                "\n" +
                "Application Details\n" +
                "\n" +
                "Review & Comment\n" +
                "\n" +
                "WPO Notes\n" +
                "\n" +
                "Extension History\n" +
                "\n" +
                "# Parent Work Permit\n" +
                "\n" +
                "PARENT WORK PERMIT NO.\n" +
                "\n" +
                "WP056183\n" +
                "\n" +
                "# Contract Information\n" +
                "\n" +
                "CONTRACT TYPE*\n" +
                "\n" +
                "Airport Authority Hong Kong\n" +
                "\n" +
                "CONTRACT NO.\n" +
                "\n" +
                "CR13/68/1\n" +
                "\n" +
                "CONTRACT TITLE\n" +
                "\n" +
                "Design, Supply and Installation of Smart Passenger Security Screening System at HKIA\n" +
                "\n" +
                "# Sponsoring Organization\n" +
                "\n" +
                "ORGANIZATION*\n" +
                "\n" +
                "Airport Authority Hong Kong\n" +
                "\n" +
                "REPRESENTATIVE 1*\n" +
                "\n" +
                "Tony S Y Chan\n" +
                "\n" +
                "MOBILE NO.\n" +
                "\n" +
                "OFFICE NO.\n" +
                "\n" +
                "63431672\n" +
                "\n" +
                "21836157\n" +
                "\n" +
                "EMAIL ADDRESS\n" +
                "\n" +
                "tony.sy.chan@hkairport.com\n" +
                "\n" +
                "More Sponsor\n" +
                "\n" +
                "# Contractor Information\n" +
                "\n" +
                "ORGANIZATION*\n" +
                "\n" +
                "Nuctech Hong Kong Company Limited\n" +
                "\n" +
                "REPRESENTATIVE\n" +
                "\n" +
                "Liu Chao\n" +
                "\n" +
                "OFFICE NO.\n" +
                "\n" +
                "31534542\n" +
                "\n" +
                "MOBILE NO.\n" +
                "\n" +
                "68815681\n" +
                "\n" +
                "EMAIL ADDRESS\n" +
                "\n" +
                "liuchao1@nuctech.com\n" +
                "\n" +
                "# Safety Representative Information\n" +
                "\n" +
                "REPRESENTATIVE\n" +
                "\n" +
                "Cheung Ka Yuen\n" +
                "\n" +
                "OFFICE NO. \\*\n" +
                "\n" +
                "2877 1933\n" +
                "\n" +
                "MOBILE NO. *\n" +
                "\n" +
                "62850087\n" +
                "\n" +
                "EMAIL ADDRESS\n" +
                "\n" +
                "cheungkayuen@nuctech.com\n" +
                "\n" +
                "# Work Details\n" +
                "\n" +
                "DESCRIPTION*\n" +
                "\n" +
                "Approve\n" +
                "\n" +
                "Reject\n" +
                "\n" +
                "Cancel Application\n" +
                "\n" +
                "Endorsers\n" +
                "\n" +
                "+\n" +
                "\n" +
                "Claire W N Chiu\n" +
                "\n" +
                "CHIUCWN@hkairport.com\n" +
                "\n" +
                "<\n" +
                "\n" +
                "Cher S Y Lau\n" +
                "\n" +
                "cher.lau@hkairport.com\n" +
                "\n" +
                "<\n" +
                "\n" +
                "Tommy C F Lee\n" +
                "\n" +
                "tommy.lee@hkairport.com\n" +
                "\n" +
                "<\n" +
                "\n" +
                "Approval Status\n" +
                "\n" +
                "No approvals requested yet.\n" +
                "\n" +
                "Sponsor Confirmation Status\n" +
                "\n" +
                "Tony S Y Chan\n" +
                "\n" +
                "Approved at 2025-02-19\n" +
                "\n" +
                "14:49:15\n" +
                "\n" +
                "Attachments\n" +
                "\n" +
                "Download All\n" +
                "\n" +
                "1. Work Programme - Work Progra\n" +
                "\n" +
                "mme for L7DIH Phase 2b.pdf\n" +
                "\n" +
                "Dismantle the existing X-ray Machines and Archway Metal Detectors in South Departure Immigration Hall and North Departure Immigration Hall. Delivery, installation, T&C for the new CT X-ray Machines, Archway Metal Detectors, Full Body Scanners, Explosives Trace Detectors, and Bottle Liquid Scanners.\n" +
                "\n" +
                "NO. OF WORKERS\n" +
                "\n" +
                "20\n" +
                "\n" +
                "# Proposed Working Date\n" +
                "\n" +
                "START DATE\n" +
                "\n" +
                "23-Feb-2025\n" +
                "\n" +
                "END DATE\n" +
                "\n" +
                "X\n" +
                "\n" +
                "30-Jun-2025\n" +
                "\n" +
                "X\n" +
                "\n" +
                "# Proposed Working Hours\n" +
                "\n" +
                "START TIME*\n" +
                "\n" +
                "00:00\n" +
                "\n" +
                "X\n" +
                "\n" +
                "23:59\n" +
                "\n" +
                "X\n" +
                "\n" +
                "# Work Method Statement\n" +
                "\n" +
                "TITLE\n" +
                "\n" +
                "Method Statement For L7DIH Phase 2\n" +
                "\n" +
                "DOCUMENT NO.\n" +
                "\n" +
                "REVISION (IF ANY)\n" +
                "\n" +
                "DATE\n" +
                "\n" +
                "22-Jul-2024\n" +
                "\n" +
                "![](https://cdn-mineru.openxlab.org.cn/result/2025-10-29/749571bc-546a-49c9-a42b-deb9e5c924ce/666dca006937e9b45b4498102698cf75834a712a2ceb200b05bef151bd446959.jpg)\n" +
                "\n" +
                "# Safety Plan / Safety Method Statement\n" +
                "\n" +
                "TITLE\n" +
                "\n" +
                "Safety Method Statement for DIH PHASE II\n" +
                "\n" +
                "DOCUMENT NO.\n" +
                "\n" +
                "REVISION (IF ANY)\n" +
                "\n" +
                "DATE\n" +
                "\n" +
                "6-Aug-2024\n" +
                "\n" +
                "×\n" +
                "\n" +
                "# Risk Assessment\n" +
                "\n" +
                "TITLE*\n" +
                "\n" +
                "Risk Assessment Report for DIH PHASE II\n" +
                "\n" +
                "REVISION (IF ANY)\n" +
                "\n" +
                "APPROVAL DATE\n" +
                "\n" +
                "6-Aug-2024\n" +
                "\n" +
                "X\n" +
                "\n" +
                "# Work Permit Processing Fee\n" +
                "\n" +
                "CONTRACT SUM*\n" +
                "\n" +
                "$ 808714200\n" +
                "\n" +
                "WORK PERMIT PROCESSING FEE\n" +
                "\n" +
                "$ 0\n" +
                "\n" +
                "# Insurance\n" +
                "\n" +
                "OCWIP / SELF-ARRANGED *\n" +
                "\n" +
                "OCWIP\n" +
                "\n" +
                "Self-arranged\n" +
                "\n" +
                "DECLARATION NO.\n" +
                "\n" +
                "# FRTMO Equipment\n" +
                "\n" +
                "REQUEST TO BORROW FRTMO EQUIPMENT*\n" +
                "\n" +
                "Yes  \n" +
                "No\n" +
                "\n" +
                "# Proposed Work Locations\n" +
                "\n" +
                "MAP OF PROPOSED WORK LOCATIONS *\n" +
                "\n" +
                "![](https://cdn-mineru.openxlab.org.cn/result/2025-10-29/749571bc-546a-49c9-a42b-deb9e5c924ce/77c73c104a853bbb2a62ac64dece4506ee48c85ed4c4d67d64aa5f5aad26bb66.jpg)\n" +
                "\n" +
                "Select Work Areas\n" +
                "\n" +
                "INDOOR ZONES\n" +
                "\n" +
                "Terminal 1/7/06,10\n" +
                "\n" +
                "LOCATION DESCRIPTION *\n" +
                "\n" +
                "South Departure Immigration Hall Zone10\n" +
                "\n" +
                "North Departure Immigration Hall Zone 06\n" +
                "\n" +
                "AREAS*\n" +
                "\n" +
                "AIRFIELD AREA  \n" +
                "TERMINAL (NON-RESTRICTED AREA)  \n" +
                "CARRIAGEWAY\n" +
                "\n" +
                "LANDSIDE AREA  \n" +
                "LANDSCAPE  \n" +
                "BAGGAGE HALL\n" +
                "\n" +
                "TERMINAL (RESTRICTED AREA)  \n" +
                "FOOTPATH  \n" +
                "APM AREA\n" +
                "\n" +
                "# High Risk Activities\n" +
                "\n" +
                "INVOLVED HIGH RISK ACTIVITIES\n" +
                "\n" +
                "Yes\n" +
                "\n" +
                "No\n" +
                "\n" +
                "ELECTRICAL HAZARD WORK  \n" +
                "HANDLING OF HAZARDOUS SUBSTANCES  \n" +
                "WORK IN CONFINED SPACE\n" +
                "\n" +
                "EXCAVATION WORK  \n" +
                "HOT WORK  \n" +
                "WORKING AT HEIGHT\n" +
                "\n" +
                "FIRE ISOLATION  \n" +
                "ROAD WORK AT 80KM/H SPEED LIMIT  \n" +
                "OTHERS\n" +
                "\n" +
                "PLEASE SPECIFY *\n" +
                "\n" +
                "Exposure to X-ray\n" +
                "\n" +
                "# Fire Warden\n" +
                "\n" +
                "NAME OF FIRE WARDEN\n" +
                "\n" +
                "MOBILE NO.\n" +
                "\n" +
                "# System Change Request\n" +
                "\n" +
                "INVOLVED SYSTEM CHANGE\n" +
                "\n" +
                "Yes  \n" +
                "No\n" +
                "\n" +
                "# Document Submission\n" +
                "\n" +
                "1. WORK PROGRAMME *\n" +
                "\n" +
                "1. Work Programme - Work Programme for L7DIH Phase 2b.pdf\n" +
                "\n" +
                "Delete\n" +
                "\n" +
                "2. SAFETY ORGANISATION CHART\n" +
                "\n" +
                "Choose File No file chosen\n" +
                "\n" +
                "3. RISK ASSESSMENT\n" +
                "\n" +
                "Choose File No file chosen\n" +
                "\n" +
                "4. TEMPORARY TRAFFIC MANAGEMENT PLAN\n" +
                "\n" +
                "Choose File No file chosen\n" +
                "\n" +
                "5. OTHER RELATED DOCUMENTS\n" +
                "\n" +
                "Choose File No file chosen";
        System.out.println(MD2Json.md2json(md));
    }
}
