添加excel转word表格

xml转word
9 months ago · 471e2701af
2 changed files with 117 additions and 0 deletions
--- a/excel2word_table.py
+++ b/excel2word_table.py
@ -0,0 +1,50 @@
 from docx import Document
 import pandas as pd
 doc = Document('temp.docx')
 xls = pd.ExcelFile("数据湖和大数据相关数据库信息.xlsx")
 head_lines = ["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
 # 逐sheet处理
 for sheet_name in xls.sheet_names:
    df = xls.parse(sheet_name)
    last_basename = ""
    last_tablename = ""
    doc_table = None
    for index, row in df.iterrows():
        if index == 0:
            continue
        if row[0] != last_basename:
            doc.add_heading(row[0], 2)
            last_basename = row[0]
        if row[1] != last_tablename:
            if pd.isnull(row[2]):
                doc.add_paragraph("表：" + str(row[1]) + "列清单", style='tablename')
            else:
                doc.add_paragraph("表：" + str(row[2]) + "(" + str(row[1]) + ")列清单", style='tablename')
            last_tablename = row[1]
            doc_table = doc.add_table(rows=1, cols=7, style="table1")
            i = 0
            for head_line in head_lines:
                doc_table.rows[0].cells[i].text = head_line
                i += 1
        # 插数据["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
        doc_row = doc_table.add_row().cells
        doc_row[0].text = row[4]
        if not pd.isnull(row[5]):
            doc_row[1].text = str(row[5])
        if pd.isnull(row[7]):
            doc_row[2].text = row[6]
        else:
            doc_row[2].text = str(row[6]) + "(" + f"{row[7]:.0f}" + ")"
        doc_row[3].text = row[9]
        doc_row[4].text = row[8]
        doc_row[5].text = str(row[3])
 style = doc.styles['table_raw']
 for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            for paragraph in cell.paragraphs:
                paragraph.style = style
 doc.save("out2.docx")
--- a/xml2word.py
+++ b/xml2word.py
@ -0,0 +1,67 @@
 import xml.etree.ElementTree as ET
 import glob
 from docx import Document
 def unify_response(input_str):
    input_str = input_str.lower()
    if input_str in ['yes', 'true']:
        return 'YES'
    else:
        return "NO"
 # 解析XML文件
 xml_files = glob.glob('.\9-datastructure\*.xml')
 #xml_files = [".\9-datastructure\dsp_service.xml"]
 doc = Document('temp.docx')
 head_lines = ["字段", "名称", "数据类型", "主键", "非空", "外键", "备注"]
 for xml_file in xml_files:
    print("处理文件：" + xml_file)
    tree = ET.parse(xml_file)
    root = tree.getroot()
    database = root.find('database')
    doc.add_heading(database.get('name'), 2)
    doc.add_paragraph("表：" + database.get('name'), style='tablename')
    tables = root.findall(".//table")
    doc_table = doc.add_table(rows=1, cols=2, style="table1")
    row = doc_table.rows[0].cells
    row[0].text = "id"
    row[1].text = "name"
    for table in tables:
        row = doc_table.add_row().cells
        row[0].text = table.get("id")
        row[1].text = table.get("name")
    for table in tables:
        doc.add_paragraph("表：" + table.get("name") + "(" + table.get("id") + ")列清单", style='tablename')
        # 初始化表头
        doc_table = doc.add_table(rows=1, cols=7, style="table1")
        i = 0
        for head_line in head_lines:
            doc_table.rows[0].cells[i].text = head_line
            i += 1
        # 插数据
        for column in table:
            row = doc_table.add_row().cells
            row[0].text = column.attrib['id']
            if "name" in column.attrib:
                row[1].text = column.attrib['name']
            if "size" in column.attrib:
                row[2].text = column.attrib['type'] + "(" + column.attrib['size'] + ")"
            else:
                row[2].text = column.attrib['type']
            if "name" in column.attrib and "主键" in column.attrib['name']:
                row[3].text = "PRI"
            row[4].text = unify_response(column.attrib['required'])
            row[5].text = "NO"
 style = doc.styles['table_raw']
 for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            for paragraph in cell.paragraphs:
                paragraph.style = style
 doc.save("out.docx")