data/iot/dongli_SocketServer.git

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
将Markdown文档转换为Word格式
"""
 
import os
from pathlib import Path
 
try:
    from docx import Document
    from docx.shared import Pt, RGBColor, Inches
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.oxml.ns import qn
    from docx.oxml import OxmlElement
except ImportError:
    print("正在安装必要的库...")
    os.system("pip install python-docx -q")
    from docx import Document
    from docx.shared import Pt, RGBColor, Inches
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.oxml.ns import qn
    from docx.oxml import OxmlElement
 
import re
from datetime import datetime
 
class MarkdownToWord:
    def __init__(self):
        self.doc = Document()
        self.setup_styles()
    
    def setup_styles(self):
        """设置文档样式"""
        style = self.doc.styles['Normal']
        style.font.name = '宋体'
        style.font.size = Pt(11)
    
    def add_heading(self, text, level=1):
        """添加标题"""
        self.doc.add_heading(text, level=level)
    
    def add_paragraph(self, text=''):
        """添加段落"""
        if text:
            p = self.doc.add_paragraph(text)
            p_format = p.paragraph_format
            p_format.space_before = Pt(6)
            p_format.space_after = Pt(6)
            p_format.line_spacing = 1.5
    
    def add_table_from_markdown(self, lines, index):
        """从Markdown表格创建Word表格"""
        table_lines = []
        i = index
        
        # 收集表格行
        while i < len(lines):
            line = lines[i].strip()
            if not line.startswith('|'):
                break
            table_lines.append(line)
            i += 1
        
        if len(table_lines) < 3:
            return index
        
        # 解析表头
        header = table_lines[0].split('|')[1:-1]
        header = [h.strip() for h in header]
        
        # 创建表格
        table = self.doc.add_table(rows=1, cols=len(header))
        table.style = 'Light Grid Accent 1'
        
        # 添加表头
        hdr_cells = table.rows[0].cells
        for i, cell_text in enumerate(header):
            hdr_cells[i].text = cell_text
            # 设置表头样式
            for paragraph in hdr_cells[i].paragraphs:
                for run in paragraph.runs:
                    run.font.bold = True
                    run.font.name = '宋体'
        
        # 添加数据行（跳过分隔符行）
        for line in table_lines[2:]:
            cells = line.split('|')[1:-1]
            cells = [c.strip() for c in cells]
            row_cells = table.add_row().cells
            for i, cell_text in enumerate(cells):
                row_cells[i].text = cell_text
                for paragraph in row_cells[i].paragraphs:
                    for run in paragraph.runs:
                        run.font.name = '宋体'
        
        return i
    
    def add_code_block(self, language, code):
        """添加代码块"""
        p = self.doc.add_paragraph()
        p.paragraph_format.left_indent = Inches(0.25)
        p.paragraph_format.space_before = Pt(6)
        p.paragraph_format.space_after = Pt(6)
        
        # 代码块背景
        shading_elm = OxmlElement('w:shd')
        shading_elm.set(qn('w:fill'), 'E8E8E8')
        p._element.get_or_add_pPr().append(shading_elm)
        
        # 添加语言标记
        if language:
            run = p.add_run(f'# {language}\n')
            run.font.italic = True
            run.font.size = Pt(9)
        
        # 添加代码
        run = p.add_run(code)
        run.font.name = 'Courier New'
        run.font.size = Pt(9)
    
    def convert_markdown_file(self, input_path, output_path):
        """转换Markdown文件为Word"""
        print(f"正在读取: {input_path}")
        
        with open(input_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        print(f"文件包含 {len(lines)} 行")
        
        i = 0
        code_block = False
        code_language = ''
        code_lines = []
        
        while i < len(lines):
            line = lines[i].rstrip()
            
            # 处理代码块
            if line.startswith('```'):
                if code_block:
                    # 结束代码块
                    self.add_code_block(code_language, '\n'.join(code_lines))
                    code_block = False
                    code_language = ''
                    code_lines = []
                else:
                    # 开始代码块
                    code_block = True
                    code_language = line[3:].strip()
                i += 1
                continue
            
            if code_block:
                code_lines.append(line)
                i += 1
                continue
            
            # 处理表格
            if line.strip().startswith('|'):
                self.add_table_from_markdown(lines, i)
                # 跳过表格行
                while i < len(lines) and lines[i].strip().startswith('|'):
                    i += 1
                continue
            
            # 处理标题
            if line.startswith('###### '):
                self.add_heading(line[7:], 6)
            elif line.startswith('##### '):
                self.add_heading(line[6:], 5)
            elif line.startswith('#### '):
                self.add_heading(line[5:], 4)
            elif line.startswith('### '):
                self.add_heading(line[4:], 3)
            elif line.startswith('## '):
                self.add_heading(line[3:], 2)
            elif line.startswith('# '):
                self.add_heading(line[2:], 1)
            
            # 跳过分隔符
            elif line.strip() in ['---', '***', '___']:
                self.add_paragraph()  # 空行
            
            # 处理普通段落
            elif line.strip():
                self.add_paragraph(line)
            else:
                self.add_paragraph()  # 空行
            
            i += 1
        
        # 保存Word文档
        print(f"正在保存到: {output_path}")
        self.doc.save(output_path)
        print(f"✓ 转换完成！")
        
        # 返回文件大小
        if os.path.exists(output_path):
            size = os.path.getsize(output_path) / 1024  # KB
            print(f"✓ 文件大小: {size:.1f} KB")
            return output_path
        
        return None
 
def main():
    workspace = r"d:\gitData\dongli_SocketServer"
    
    # 转换HTTP集成指南
    converter = MarkdownToWord()
    input_file = os.path.join(workspace, "HTTP_API_INTEGRATION_GUIDE.md")
    output_file = os.path.join(workspace, "HTTP_API_INTEGRATION_GUIDE.docx")
    
    if os.path.exists(input_file):
        converter.convert_markdown_file(input_file, output_file)
    else:
        print(f"错误: 未找到文件 {input_file}")
 
if __name__ == '__main__':
    main()