From 471e2701af0289ace210cf06b8229c13528eb16d Mon Sep 17 00:00:00 2001
From: liuqingwen <liuqingwen@inspur.com>
Date: Thu, 25 Jul 2024 15:15:19 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0excel=E8=BD=ACword=E8=A1=A8?=
 =?UTF-8?q?=E6=A0=BC=20xml=E8=BD=ACword?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 excel2word_table.py | 50 +++++++++++++++++++++++++++++++++
 xml2word.py         | 67 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 excel2word_table.py
 create mode 100644 xml2word.py

diff --git a/excel2word_table.py b/excel2word_table.py
new file mode 100644
index 0000000..6e0a005
--- /dev/null
+++ b/excel2word_table.py
@@ -0,0 +1,50 @@
+from docx import Document
+import pandas as pd
+
+doc = Document('temp.docx')
+xls = pd.ExcelFile("数据湖和大数据相关数据库信息.xlsx")
+head_lines = ["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
+# 逐sheet处理
+for sheet_name in xls.sheet_names:
+    df = xls.parse(sheet_name)
+    last_basename = ""
+    last_tablename = ""
+    doc_table = None
+    for index, row in df.iterrows():
+        if index == 0:
+            continue
+        if row[0] != last_basename:
+            doc.add_heading(row[0], 2)
+            last_basename = row[0]
+        if row[1] != last_tablename:
+            if pd.isnull(row[2]):
+                doc.add_paragraph("表：" + str(row[1]) + "列清单", style='tablename')
+            else:
+                doc.add_paragraph("表：" + str(row[2]) + "(" + str(row[1]) + ")列清单", style='tablename')
+            last_tablename = row[1]
+            doc_table = doc.add_table(rows=1, cols=7, style="table1")
+            i = 0
+            for head_line in head_lines:
+                doc_table.rows[0].cells[i].text = head_line
+                i += 1
+
+        # 插数据["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
+        doc_row = doc_table.add_row().cells
+        doc_row[0].text = row[4]
+        if not pd.isnull(row[5]):
+            doc_row[1].text = str(row[5])
+        if pd.isnull(row[7]):
+            doc_row[2].text = row[6]
+        else:
+            doc_row[2].text = str(row[6]) + "(" + f"{row[7]:.0f}" + ")"
+        doc_row[3].text = row[9]
+        doc_row[4].text = row[8]
+        doc_row[5].text = str(row[3])
+
+style = doc.styles['table_raw']
+for table in doc.tables:
+    for row in table.rows:
+        for cell in row.cells:
+            for paragraph in cell.paragraphs:
+                paragraph.style = style
+doc.save("out2.docx")
diff --git a/xml2word.py b/xml2word.py
new file mode 100644
index 0000000..b7632ce
--- /dev/null
+++ b/xml2word.py
@@ -0,0 +1,67 @@
+import xml.etree.ElementTree as ET
+import glob
+from docx import Document
+
+
+def unify_response(input_str):
+    input_str = input_str.lower()
+    if input_str in ['yes', 'true']:
+        return 'YES'
+    else:
+        return "NO"
+
+
+# 解析XML文件
+xml_files = glob.glob('.\9-datastructure\*.xml')
+#xml_files = [".\9-datastructure\dsp_service.xml"]
+doc = Document('temp.docx')
+head_lines = ["字段", "名称", "数据类型", "主键", "非空", "外键", "备注"]
+for xml_file in xml_files:
+    print("处理文件：" + xml_file)
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+
+    database = root.find('database')
+    doc.add_heading(database.get('name'), 2)
+
+    doc.add_paragraph("表：" + database.get('name'), style='tablename')
+    tables = root.findall(".//table")
+    doc_table = doc.add_table(rows=1, cols=2, style="table1")
+    row = doc_table.rows[0].cells
+    row[0].text = "id"
+    row[1].text = "name"
+    for table in tables:
+        row = doc_table.add_row().cells
+        row[0].text = table.get("id")
+        row[1].text = table.get("name")
+
+    for table in tables:
+        doc.add_paragraph("表：" + table.get("name") + "(" + table.get("id") + ")列清单", style='tablename')
+        # 初始化表头
+        doc_table = doc.add_table(rows=1, cols=7, style="table1")
+        i = 0
+        for head_line in head_lines:
+            doc_table.rows[0].cells[i].text = head_line
+            i += 1
+        # 插数据
+        for column in table:
+            row = doc_table.add_row().cells
+            row[0].text = column.attrib['id']
+            if "name" in column.attrib:
+                row[1].text = column.attrib['name']
+            if "size" in column.attrib:
+                row[2].text = column.attrib['type'] + "(" + column.attrib['size'] + ")"
+            else:
+                row[2].text = column.attrib['type']
+            if "name" in column.attrib and "主键" in column.attrib['name']:
+                row[3].text = "PRI"
+            row[4].text = unify_response(column.attrib['required'])
+            row[5].text = "NO"
+
+style = doc.styles['table_raw']
+for table in doc.tables:
+    for row in table.rows:
+        for cell in row.cells:
+            for paragraph in cell.paragraphs:
+                paragraph.style = style
+doc.save("out.docx")