#!/usr/bin/env python3
|
# -*- coding: utf-8 -*-
|
"""
|
将Markdown文档转换为Word格式
|
"""
|
|
import os
|
from pathlib import Path
|
|
try:
|
from docx import Document
|
from docx.shared import Pt, RGBColor, Inches
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
from docx.oxml.ns import qn
|
from docx.oxml import OxmlElement
|
except ImportError:
|
print("正在安装必要的库...")
|
os.system("pip install python-docx -q")
|
from docx import Document
|
from docx.shared import Pt, RGBColor, Inches
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
from docx.oxml.ns import qn
|
from docx.oxml import OxmlElement
|
|
import re
|
from datetime import datetime
|
|
class MarkdownToWord:
|
def __init__(self):
|
self.doc = Document()
|
self.setup_styles()
|
|
def setup_styles(self):
|
"""设置文档样式"""
|
style = self.doc.styles['Normal']
|
style.font.name = '宋体'
|
style.font.size = Pt(11)
|
|
def add_heading(self, text, level=1):
|
"""添加标题"""
|
self.doc.add_heading(text, level=level)
|
|
def add_paragraph(self, text=''):
|
"""添加段落"""
|
if text:
|
p = self.doc.add_paragraph(text)
|
p_format = p.paragraph_format
|
p_format.space_before = Pt(6)
|
p_format.space_after = Pt(6)
|
p_format.line_spacing = 1.5
|
|
def add_table_from_markdown(self, lines, index):
|
"""从Markdown表格创建Word表格"""
|
table_lines = []
|
i = index
|
|
# 收集表格行
|
while i < len(lines):
|
line = lines[i].strip()
|
if not line.startswith('|'):
|
break
|
table_lines.append(line)
|
i += 1
|
|
if len(table_lines) < 3:
|
return index
|
|
# 解析表头
|
header = table_lines[0].split('|')[1:-1]
|
header = [h.strip() for h in header]
|
|
# 创建表格
|
table = self.doc.add_table(rows=1, cols=len(header))
|
table.style = 'Light Grid Accent 1'
|
|
# 添加表头
|
hdr_cells = table.rows[0].cells
|
for i, cell_text in enumerate(header):
|
hdr_cells[i].text = cell_text
|
# 设置表头样式
|
for paragraph in hdr_cells[i].paragraphs:
|
for run in paragraph.runs:
|
run.font.bold = True
|
run.font.name = '宋体'
|
|
# 添加数据行(跳过分隔符行)
|
for line in table_lines[2:]:
|
cells = line.split('|')[1:-1]
|
cells = [c.strip() for c in cells]
|
row_cells = table.add_row().cells
|
for i, cell_text in enumerate(cells):
|
row_cells[i].text = cell_text
|
for paragraph in row_cells[i].paragraphs:
|
for run in paragraph.runs:
|
run.font.name = '宋体'
|
|
return i
|
|
def add_code_block(self, language, code):
|
"""添加代码块"""
|
p = self.doc.add_paragraph()
|
p.paragraph_format.left_indent = Inches(0.25)
|
p.paragraph_format.space_before = Pt(6)
|
p.paragraph_format.space_after = Pt(6)
|
|
# 代码块背景
|
shading_elm = OxmlElement('w:shd')
|
shading_elm.set(qn('w:fill'), 'E8E8E8')
|
p._element.get_or_add_pPr().append(shading_elm)
|
|
# 添加语言标记
|
if language:
|
run = p.add_run(f'# {language}\n')
|
run.font.italic = True
|
run.font.size = Pt(9)
|
|
# 添加代码
|
run = p.add_run(code)
|
run.font.name = 'Courier New'
|
run.font.size = Pt(9)
|
|
def convert_markdown_file(self, input_path, output_path):
|
"""转换Markdown文件为Word"""
|
print(f"正在读取: {input_path}")
|
|
with open(input_path, 'r', encoding='utf-8') as f:
|
lines = f.readlines()
|
|
print(f"文件包含 {len(lines)} 行")
|
|
i = 0
|
code_block = False
|
code_language = ''
|
code_lines = []
|
|
while i < len(lines):
|
line = lines[i].rstrip()
|
|
# 处理代码块
|
if line.startswith('```'):
|
if code_block:
|
# 结束代码块
|
self.add_code_block(code_language, '\n'.join(code_lines))
|
code_block = False
|
code_language = ''
|
code_lines = []
|
else:
|
# 开始代码块
|
code_block = True
|
code_language = line[3:].strip()
|
i += 1
|
continue
|
|
if code_block:
|
code_lines.append(line)
|
i += 1
|
continue
|
|
# 处理表格
|
if line.strip().startswith('|'):
|
self.add_table_from_markdown(lines, i)
|
# 跳过表格行
|
while i < len(lines) and lines[i].strip().startswith('|'):
|
i += 1
|
continue
|
|
# 处理标题
|
if line.startswith('###### '):
|
self.add_heading(line[7:], 6)
|
elif line.startswith('##### '):
|
self.add_heading(line[6:], 5)
|
elif line.startswith('#### '):
|
self.add_heading(line[5:], 4)
|
elif line.startswith('### '):
|
self.add_heading(line[4:], 3)
|
elif line.startswith('## '):
|
self.add_heading(line[3:], 2)
|
elif line.startswith('# '):
|
self.add_heading(line[2:], 1)
|
|
# 跳过分隔符
|
elif line.strip() in ['---', '***', '___']:
|
self.add_paragraph() # 空行
|
|
# 处理普通段落
|
elif line.strip():
|
self.add_paragraph(line)
|
else:
|
self.add_paragraph() # 空行
|
|
i += 1
|
|
# 保存Word文档
|
print(f"正在保存到: {output_path}")
|
self.doc.save(output_path)
|
print(f"✓ 转换完成!")
|
|
# 返回文件大小
|
if os.path.exists(output_path):
|
size = os.path.getsize(output_path) / 1024 # KB
|
print(f"✓ 文件大小: {size:.1f} KB")
|
return output_path
|
|
return None
|
|
def main():
|
workspace = r"d:\gitData\dongli_SocketServer"
|
|
# 转换HTTP集成指南
|
converter = MarkdownToWord()
|
input_file = os.path.join(workspace, "HTTP_API_INTEGRATION_GUIDE.md")
|
output_file = os.path.join(workspace, "HTTP_API_INTEGRATION_GUIDE.docx")
|
|
if os.path.exists(input_file):
|
converter.convert_markdown_file(input_file, output_file)
|
else:
|
print(f"错误: 未找到文件 {input_file}")
|
|
if __name__ == '__main__':
|
main()
|