from docx import Document
import pandas as pd

doc = Document('temp.docx')
xls = pd.ExcelFile("数据湖和大数据相关数据库信息.xlsx")
head_lines = ["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
# 逐sheet处理
for sheet_name in xls.sheet_names:
    df = xls.parse(sheet_name)
    last_basename = ""
    last_tablename = ""
    doc_table = None
    for index, row in df.iterrows():
        if index == 0:
            continue
        if row[0] != last_basename:
            doc.add_heading(row[0], 2)
            last_basename = row[0]
        if row[1] != last_tablename:
            if pd.isnull(row[2]):
                doc.add_paragraph("表：" + str(row[1]) + "列清单", style='tablename')
            else:
                doc.add_paragraph("表：" + str(row[2]) + "(" + str(row[1]) + ")列清单", style='tablename')
            last_tablename = row[1]
            doc_table = doc.add_table(rows=1, cols=7, style="table1")
            i = 0
            for head_line in head_lines:
                doc_table.rows[0].cells[i].text = head_line
                i += 1

        # 插数据["字段", "名称", "数据类型", "主键", "非空", "数据条数", "备注"]
        doc_row = doc_table.add_row().cells
        doc_row[0].text = row[4]
        if not pd.isnull(row[5]):
            doc_row[1].text = str(row[5])
        if pd.isnull(row[7]):
            doc_row[2].text = row[6]
        else:
            doc_row[2].text = str(row[6]) + "(" + f"{row[7]:.0f}" + ")"
        doc_row[3].text = row[9]
        doc_row[4].text = row[8]
        doc_row[5].text = str(row[3])

style = doc.styles['table_raw']
for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            for paragraph in cell.paragraphs:
                paragraph.style = style
doc.save("out2.docx")