From 471e2701af0289ace210cf06b8229c13528eb16d Mon Sep 17 00:00:00 2001 From: liuqingwen Date: Thu, 25 Jul 2024 15:15:19 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0excel=E8=BD=ACword=E8=A1=A8?= =?UTF-8?q?=E6=A0=BC=20xml=E8=BD=ACword?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- excel2word_table.py | 50 +++++++++++++++++++++++++++++++++ xml2word.py | 67 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 excel2word_table.py create mode 100644 xml2word.py diff --git a/excel2word_table.py b/excel2word_table.py new file mode 100644 index 0000000..6e0a005 --- /dev/null +++ b/excel2word_table.py @@ -0,0 +1,50 @@ +from docx import Document +import pandas as pd + +doc = Document('temp.docx') +xls = pd.ExcelFile("数据湖和大数据相关数据库信息.xlsx") +head_lines = ["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"] +# 逐sheet处理 +for sheet_name in xls.sheet_names: + df = xls.parse(sheet_name) + last_basename = "" + last_tablename = "" + doc_table = None + for index, row in df.iterrows(): + if index == 0: + continue + if row[0] != last_basename: + doc.add_heading(row[0], 2) + last_basename = row[0] + if row[1] != last_tablename: + if pd.isnull(row[2]): + doc.add_paragraph("表:" + str(row[1]) + "列清单", style='tablename') + else: + doc.add_paragraph("表:" + str(row[2]) + "(" + str(row[1]) + ")列清单", style='tablename') + last_tablename = row[1] + doc_table = doc.add_table(rows=1, cols=7, style="table1") + i = 0 + for head_line in head_lines: + doc_table.rows[0].cells[i].text = head_line + i += 1 + + # 插数据["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"] + doc_row = doc_table.add_row().cells + doc_row[0].text = row[4] + if not pd.isnull(row[5]): + doc_row[1].text = str(row[5]) + if pd.isnull(row[7]): + doc_row[2].text = row[6] + else: + doc_row[2].text = str(row[6]) + "(" + f"{row[7]:.0f}" + ")" + doc_row[3].text = row[9] + doc_row[4].text = row[8] + doc_row[5].text = str(row[3]) + +style = doc.styles['table_raw'] +for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + paragraph.style = style +doc.save("out2.docx") diff --git a/xml2word.py b/xml2word.py new file mode 100644 index 0000000..b7632ce --- /dev/null +++ b/xml2word.py @@ -0,0 +1,67 @@ +import xml.etree.ElementTree as ET +import glob +from docx import Document + + +def unify_response(input_str): + input_str = input_str.lower() + if input_str in ['yes', 'true']: + return 'YES' + else: + return "NO" + + +# 解析XML文件 +xml_files = glob.glob('.\9-datastructure\*.xml') +#xml_files = [".\9-datastructure\dsp_service.xml"] +doc = Document('temp.docx') +head_lines = ["字段", "名称", "数据类型", "主键", "非空", "外键", "备注"] +for xml_file in xml_files: + print("处理文件:" + xml_file) + tree = ET.parse(xml_file) + root = tree.getroot() + + database = root.find('database') + doc.add_heading(database.get('name'), 2) + + doc.add_paragraph("表:" + database.get('name'), style='tablename') + tables = root.findall(".//table") + doc_table = doc.add_table(rows=1, cols=2, style="table1") + row = doc_table.rows[0].cells + row[0].text = "id" + row[1].text = "name" + for table in tables: + row = doc_table.add_row().cells + row[0].text = table.get("id") + row[1].text = table.get("name") + + for table in tables: + doc.add_paragraph("表:" + table.get("name") + "(" + table.get("id") + ")列清单", style='tablename') + # 初始化表头 + doc_table = doc.add_table(rows=1, cols=7, style="table1") + i = 0 + for head_line in head_lines: + doc_table.rows[0].cells[i].text = head_line + i += 1 + # 插数据 + for column in table: + row = doc_table.add_row().cells + row[0].text = column.attrib['id'] + if "name" in column.attrib: + row[1].text = column.attrib['name'] + if "size" in column.attrib: + row[2].text = column.attrib['type'] + "(" + column.attrib['size'] + ")" + else: + row[2].text = column.attrib['type'] + if "name" in column.attrib and "主键" in column.attrib['name']: + row[3].text = "PRI" + row[4].text = unify_response(column.attrib['required']) + row[5].text = "NO" + +style = doc.styles['table_raw'] +for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + paragraph.style = style +doc.save("out.docx")