From 7ea977268a8f09d3af048f7d9de9fdeea9278aac Mon Sep 17 00:00:00 2001 From: liuqingwen Date: Thu, 25 Jul 2024 15:09:42 +0800 Subject: [PATCH] init --- excel2word.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ excel2word2.py | 80 +++++++++++++++++++++++++++++++++++++++++++ execl.py | 53 +++++++++++++++++++++++++++++ temp.docx | Bin 0 -> 18724 bytes wordformat.py | 31 +++++++++++++++++ 5 files changed, 254 insertions(+) create mode 100644 excel2word.py create mode 100644 excel2word2.py create mode 100644 execl.py create mode 100644 temp.docx create mode 100644 wordformat.py diff --git a/excel2word.py b/excel2word.py new file mode 100644 index 0000000..d8e46f0 --- /dev/null +++ b/excel2word.py @@ -0,0 +1,90 @@ +import glob +import pandas as pd +from docx import Document + +# 目录级别后移,最小为0 +level_offset = 1 +name = False +header = False +headline = [] +# sheet页黑名单,当前所有表格中均不含退役军人项,该项为excel模版中带有的sheet页,如有例外需特殊处理。 +sheet_name_black_list = ['填表说明', '数据资源目录结构', '新增需求填报', '退役军人'] +column_black_list = ['期望更新周期'] + +# 获取全部excel表格 +# xlsx_files = glob.glob('./附件2*.xlsx') + +# sheet_name_black_list = ['填表说明', '数据资源目录结构', '新增需求填报'] +xlsx_files = ['附件2:自治区文化旅游主题共享资源库需求对接表_3932232206984868.xlsx'] + +for file in xlsx_files: + # 打开模板word文件 + doc = Document('temp.docx') + print(f"正在处理文件: {file}") + + # 读取Excel文件 + xls = pd.ExcelFile(file) + # 逐sheet处理 + for sheet_name in xls.sheet_names: + level_offset = 1 + name = False + header = False + if sheet_name in sheet_name_black_list: + continue + print(f"正在处理工作表: {sheet_name}") + # 读取当前工作表到DataFrame + df = xls.parse(sheet_name) + doc.add_heading(sheet_name, 1 + level_offset) + initialized_header = False + + third_index = 1 + items_index = 1 + for index, row in df.iterrows(): + # 检查当前行的E列是否为空,为空则说明为新表,重置表名及表头标记 + if pd.isnull(row[4]): + level_offset = 1 + name = False + header = False + # 检查当前行的B列是否为空,为空说明整行为空要跳过 + if pd.isnull(row[1]): + continue + # 只要符合模式,新表第一行B列一定为表名 + if not name: + # 去除表名开头的“中文数字、” + table_name = str.split(row[1], '、', 1)[1] + name = True + if table_name == sheet_name: + level_offset -= 1 + continue + else: + doc.add_heading(table_name, 2 + level_offset) + continue + # 只要符合模式,新表第二行一定为表头 + if not header: + if not initialized_header: + initialized_header = True + headline.clear() + new_row = row[4:] + for cell in new_row: + headline.append(cell) + header = True + continue + # 只要符合模式,新表从第三行开始,非空的D列一定为子表名 + if not pd.isnull(row[3]): + if sheet_name == str(row[3]).replace(' ', '') or table_name == str(row[3]).replace(' ', ''): + level_offset -= 1 + else: + doc.add_heading(str(row[3]).replace(' ', ''), 3 + level_offset) + items_index = 1 + # 默认正常的E列(非表中间间隔)不为空,为空会出错 + doc.add_heading(row[4], 4 + level_offset) + for head, cell in zip(headline, row[4:]): + if head in column_black_list: + continue + doc.add_paragraph(head + ":" + str(cell).replace('\n', '').replace('nan', '无')) + items_index += 1 + # 文件另存为 + out_file_name = file.split(':')[1].split('_')[0] + doc.save(out_file_name + ".docx") + xls.close() +print("处理完成") diff --git a/excel2word2.py b/excel2word2.py new file mode 100644 index 0000000..237de15 --- /dev/null +++ b/excel2word2.py @@ -0,0 +1,80 @@ +import glob +import pandas as pd +from docx import Document + +# 目录级别后移,最小为0 +level_offset = 1 +name = False +header = False +headline = [] +# sheet页黑名单,当前所有表格中均不含退役军人项,该项为excel模版中带有的sheet页,如有例外需特殊处理。 +sheet_name_black_list = ['指标体系', '资源目录'] +column_black_list = ['期望更新周期'] + +# 获取全部excel表格 +xlsx_files = glob.glob('./附件2*.xlsx') + +# sheet_name_black_list = ['填表说明', '数据资源目录结构', '新增需求填报'] +# xlsx_files = ['附件2:自治区文化旅游主题共享资源库需求对接表_3932232206984868.xlsx'] + +for file in xlsx_files: + # 打开模板word文件 + doc = Document('temp.docx') + print(f"正在处理文件: {file}") + + # 读取Excel文件 + xls = pd.ExcelFile(file) + # 逐sheet处理 + for sheet_name in xls.sheet_names: + name = False + header = False + if sheet_name in sheet_name_black_list: + continue + print(f"正在处理工作表: {sheet_name}") + # 读取当前工作表到DataFrame + df = xls.parse(sheet_name) + doc.add_heading(sheet_name, 1 + level_offset) + initialized_header = False + + third_index = 1 + items_index = 1 + for index, row in df.iterrows(): + # 检查当前行的E列是否为空,为空则说明为新表,重置表名及表头标记 + if pd.isnull(row[4]): + name = False + header = False + # 检查当前行的B列是否为空,为空说明整行为空要跳过 + if pd.isnull(row[1]): + continue + # 只要符合模式,新表第一行B列一定为表名 + if not name: + # 去除表名开头的“中文数字、” + doc.add_heading(str.split(row[1], '、', 1)[1], 2 + level_offset) + name = True + continue + # 只要符合模式,新表第二行一定为表头 + if not header: + if not initialized_header: + initialized_header = True + headline.clear() + new_row = row[4:] + for cell in new_row: + headline.append(cell) + header = True + continue + # 只要符合模式,新表从第三行开始,非空的D列一定为子表名 + if not pd.isnull(row[3]): + doc.add_heading(str(row[3]).replace(' ', ''), 3 + level_offset) + items_index = 1 + # 默认正常的E列(非表中间间隔)不为空,为空会出错 + doc.add_heading(row[4], 4 + level_offset) + for head, cell in zip(headline, row[4:]): + if head in column_black_list: + continue + doc.add_paragraph(head + ":" + str(cell).replace('\n', '').replace('nan', '无')) + items_index += 1 + # 文件另存为 + out_file_name = file.split(':')[1].split('_')[0] + doc.save(out_file_name + ".docx") + xls.close() +print("处理完成") diff --git a/execl.py b/execl.py new file mode 100644 index 0000000..c4b8b86 --- /dev/null +++ b/execl.py @@ -0,0 +1,53 @@ +import glob +import pandas as pd + +name = False +header = False +headline = [] +sheet_name_black_list = ['填表说明', '数据资源目录结构', '新增需求填报'] + +xlsx_files = glob.glob('./附件2*.xlsx') +for file in xlsx_files: + print(f"正在处理文件: {file}") + out_file_name = file.split(':')[1].split('_')[0] + # 读取Excel文件 + xls = pd.ExcelFile(file) + # 打开一个文件用于写入 + with open(out_file_name + '.txt', 'w', encoding='utf-8') as file: + for sheet_name in xls.sheet_names: + name = False + header = False + if sheet_name in sheet_name_black_list: + continue + print(f"正在处理工作表: {sheet_name}") + # 读取当前工作表到DataFrame + df = xls.parse(sheet_name) + file.write(sheet_name + "\n") + initialized_header = False + for index, row in df.iterrows(): + # 检查当前行的B列是否为空 + if pd.isnull(row[1]): + file.write("\n\n") + name = False + header = False + continue + if not name: + file.write(row[1] + "\n") + name = True + continue + if not header: + if not initialized_header: + initialized_header = True + new_row = [row[1]] + new_row.extend(row[4:]) + for cell in new_row: + headline.append(cell) + header = True + continue + new_row = [row[1]] + new_row.extend(row[4:]) + for head, cell in zip(headline, new_row): + file.write(head + ":" + str(cell).replace('\n', '') + "\n") + file.write("\n") + xls.close() +print("处理完成") diff --git a/temp.docx b/temp.docx new file mode 100644 index 0000000000000000000000000000000000000000..f89bdb8181daabf197cc091bcaf94345c0f8a06e GIT binary patch literal 18724 zcmeHvWmq1|vhIhwy99!}LvRc3?(XjH?(P!Y3GNQT-6gn7f;+)4S$plXvUAowSAL$e z=Xqv6n(4Q?XS%wox~pnrB|tz?0pI{g002M$2od*Y$^!xbDxd%W6aXZ!hJcN=qmi|v zj-s2bk%JbktCi*ZJWycr902fJ{eK_-gWo`3@|t-7A7aoMj2FbxO5UdeS@E!8As^Zm zzFCOH<7m0hZKJ2VXD6Hkg5qL%%yFxbtqv3S#;9pqu3Ed@$QhH7(Q3ShHGq0%%Gs+_ zFR6_l5p-8n^{`db&?Rs#oEU4H;MpiJ{8;tJ)WXo)Fb6sX^1zr#Fe$PyyCA++CkN}G zCv=C+0PQnBiF_pHU{&|(>2omTnjr>Pt=XOMKK@g+b4|2lH3pQ0{WMh31JBa=>_poX z#5jA6o!PMmPDfRJs_e3iau-QA`L-&`I<_2~&u_P3e^5UDG=lqlB7unAO=8OKl z$Jg0K(!@t~j{){9ayjsZS0}YB4{sCr^%Vpl`!7=)FAlru?2TWg-)1h%+tk*vH?nk~ zqy2gP_muu0Jdb~Q^z!&_^Im#bzO#Vmfawmow%f3u_HT`! zM~K8IMgk(#(84FN!Q*~}rjCC>*`pM^-7AJMBczy|Ga#HMS)^9Z5!sVSwIr;9Kqg*++fSxO5;rYS0~NC&+hg=*VbZ ztkfumw2EyI)aEBaQ!x*by%VccSjR+W+L24V8HcVcjpenMHj56K*w08@q`CNr2(Gl@ z!72=HGCJ0Hp~bNm{tf;DlHjt2*_t{h+POw07^SV>lZv&P!^NT&dF4J|VsjLEPJ?{a z5hvgbZcRixQG4m8%#lO1EM?{vwcq;wEvGp!qhhLro~-?76SO&XPWj6TB7p%1dRM=rQzUxLGPxbB@NTq>RIJzgh1>54Vz$?xiUVg&)A38G6Xy8Sph z*d>GlF{mN8B?24oWntNRUi(VdR8}5q*j-u;^geJ{Ieb^os$v9@v7iX|lf*=5vT*Wk zylAwc8so=l@k3y^JT`L^afy#nKpruw)HP6?xNf}B%BJnWMi97!@&;DlACr-?VX<;i zNnQhfc$>=00fiLK;cBQJLfe%ez&HVE2GM+~30`7(pMQi$bY+_>v_L@np^`0WtZfFa zBbaBPQU%Lp&ybh#wXf*I2NJsHH%X71ik7W-qIQ_Q7cHaB!Z4!NFtS@Kn4_{tV=BN{ z=B;(}0Dkdi23<@kd3wk}Lz$*@Ib`yxAdD<1zJ-g6Ho6AKp<2$^ER2QJw}oS1|;o(?wbR;)liaOkpWI{u?Tq(%=|$Vx~F^Fy-&@ zJxa3`P|xzKu2=YC7(9NQ&|xa%h4mtvv%NEqKblAHMBp15mXcx+G!_%^UEP#Ao`DEl z>$cgPbO`97u8wv;!^kS}R!cM|o!1Za>S?fzf5slcBa>^(5VIiJz+4)!k~+bvY^VRx3gl!ytn!h9nfT2X3QF~8|po?2vb1v1GP;cWiU^asqD=W@wr+@Iee;3K!St+;6CC84WUIS~DCg3u9RkuP)>{APiRjPmgM-4G9&t;b6K5u1XU`+w8*G>ONn)f?GG1FL|%f&hS z3~fk6XEKwlzo9uxu4Wn*nvtYkS_xXF0T9$%;y-yw@9CITtKP;wG)c|LU%#@Dws4*B zl{+h8`5GL%6ej8)Jh|v2$CQZj9itggdqypB#s!uUoM+2B#I728<8E$4=x#0Ag}Q}& zv8D^bPhpPvAxvTTeblWC}csxB4i5e zrB-2=L|f%}jR_GPL$_1y&ZLbzLBalU$@{kh&udXLIsE&;8{6188rd`cW8k%90_ahL z&!nCZ3hp%c*Axk_dV~zNgD!R;=H!J9x@OQ9Upk6m2j=?dT?BIXye3v0mK_}0k_kVj zIcb)`lFVWXNy=q9G>l9=t~~ET3r(OTq2!{CP6e|&wy|-yn9cO8K+cU6M&)8rcCbEiiydi8e&~qoA%+BT1{r_LQ#@0sXZhb3Rsi$$76;48#3) z5c(|@BLG~3E~-LQjRPN)1Xn>JVEtlDgDsD-^)O;hIKS7V^p0K9x?X@u7rig+RU|GVLb^xFfRWI z5xLHs@_XneP@0^$^b)aLpaVLr@SV{sxA@h3Z}VYjd8n?Y8Okgx&i02WfUG)7908*M z+n9Y)uVQz_l?)g0wkKW)+Uob7MV?$gy3YUCVQ2XFu>VgS_CLk^zi0b@!`c2{X!k#b z{J*vPe@pE?*Ef>oi<8)5cRTK}!qg5aXhQ0ayuWkb^&P zC8puW6XffsgF(wE;G>ceQ>bvVZ$mV#E8fqwr3RIZV9_gIh!UeL#*r1-iVY%ZsG19$ zX~CFu30xe&kp;3~2JGNSdts2{oeSXTtt(<&WahKf=XMtLuxXg;Zztev!_@erc0%Hi zwBf?eUD~}7;txvw_D5{?b%~w4&7D={uhhS%9he{XMUV$^x)ZBq(#PhElr)Qsfm1|D ztcrbaO9)9rZgLRwQ;MhhD%=;Gq76c>0m+?F`b0f$gHAT|B9H8$mUV3Yj@G6P(M%-? ziA~5`M_kQ$gbqc3F_#cd|2&m&Ya^A6Bp#Xt{AF&soqd(Q6~3Acq@2179_a?sGREE$ zyJ3Azc`qBBl6$7lsvhEoqdNWd5L($sV|`uM<5Oy}zxit-Nd&$#G^`gz2cgd6mlQfv zDn#ZgP^-ZW<`M&!x%Dx7DL5zHSRG_Fe`7t1;^(>7g{3o4r1(k|Vv;DKw#3T}pS*Rqp08Pvz@38&)WDEau=KzR^MyvuyjUpz~73~`lD0*4xlF!dU zS?gp1FDkV7pIc+UvNf*!{TkrG5-|9aS(x6K1^J(AfS;z|FWfS2z4IP%@Qmt&Q*{-^ z7S*IqIRnqU;d6b6d7Bp?+%D1_6O>JQ-TT6%Rtk+|T#s(x%b3jz6G-`~b1iJ+swxmA z|5jp-FBN#2>?hEU7q?VuIo)?kSTy_>peHRtE=Ok@%1#*|Y(Ke$={Ihn{DWH*{^AxU zy$h{B8Ub<*d+JQ8JxVzOp)qSv%NI7eAn=KT1*#SEd+ z11oZG6<)PXx=;i~TQy@7nbA+=M;z3XP8|$Nu04to5*>FujQ5U^Rxe`bRuwl$?)#7tM*=TzGW<$)cG0X|8d3mrv>?M zo$}vOr@UmG88g0Z6C`2-0BHXd6ON`vRz`GxT>lvTV>PJ=99GOuj0gPS7EZhsrWe(1 zxZ6eP<2qtsWP)a^~u3 z2KcdoCFK_zO=6c)oOxfq5iV{qNam0nD>Q;EJCEHP7Hb;&nSvh!r+rbpj)If|U?^cH z2|z~&ZGfhfMJv#e0SX(F`~{mkc(8K;>OLJmo^S4CUOdl(e$=^KF+8X~Nw6ot0Tn^m zgO*@XTC!CRA$&04DOy1KVx<9#5x*ng+G7O+G9VA>1+3c{eZ=~3!gDOc6$WzCSDv@&%C3uh1? zv@jhk`}*YB2s0GANhRz$>HraSh}$Wc1XD&;Cv#uaknGzQ2oK6ff?=De?)kxw2oZ9% zEStyYW(9u%FI>Ool=MA`WIczH$>v<2&;8NocxcE)zZ~5R7BI^Sv6&iGzUjcG6PX~D zHh|mxcK`R@gJJ|9Zm;Wa*IScUHD6CEeVHkjJMOp2e6o0|UtadwSYx_A?5H7CNsU7u zU{}~=b-2GA?e^;SK=MXgJLJ_@E1- zfYuIbf;&HeU}D;8l?+~dKr`0^*3Md=hs5u59Ek&RWszpyea19rw$Y8VN9;zmm$;2%2n4U=gfz2T69FQLjBIv- zP}A~_Bw!a5uwtQ!s>~7GH8Lh7zpUster=p5S={Y}h;b}f!~uNL{8%1z!I)|vi-{Eg ziV^4GJ4hN2!GvuJWTI)v;h(DdjL2wQ+ahg(y;jCIl+1AQ=$iaZbYyxla1|sFHIAF{ z)*x0gGD8j6=X7h?_GH0Co`pP)-7eH6*6YQ zvo6cKFU%c4{n8A4yd}2~^Di4^w?)tGUKQFSb*%#jDNcK&ORf(11d#|oV0*cX0yD$D z2Z2nNP>z^h)_rgIA>UKnC0SOaccLj%H zsjH|q#ZqN1e2k)=8|0$We^ul2Zrf_XBC1&j1m}(^7-yTf9Hp(JN_s6T^3|J+qQLAx%9&>?uLRr z&B1d-#sfJiR|CdHFjQ0)>3=%Xvoi7Veu9&Y#QE!;Y*}ZKYmmjTik?3gM z_y*_R`PCBcbY&q9xGgjNY~gWSzGM55r0G=jVO+U%OS$Y_)3n*`+QhS4dH>keBv*U* z*VDhZMT0qbrS-wy6b!;I3l|3?M@KVjlRp+N^(tyMD{P3~I=Zg`xd%?=vlxe{m${@u zGw5=ykCGDg<8VUI#72W^$37bb6a(yYgeQ42Mf;y3dbb8I+V1YqPEKlK1MAW-GVZkz z>ikmztxVRxu9WlMy2}$N6OLeIxrtDB+#Phi-k#DYsZz)$aNBn8)>9gZuKdDvK7MGnKODmjsl$#b{$xNK zztGQzN9B2jEB25aulg;#91OW}YuR0~|Jr`Ena=|HJD-+&bwk$h1ZN1X&u07KyCN2J zFkBW*q>VdpY*hhTTbtx36csljP#aY@Kct~fD3gpb^x&PXPnrZl-7Vz|h5G|&s6?WN z^lQ+H-CB0Y1`6A7<|VB{N17MdiMNoqcp+9F3;7^yFZp9HWvEdp5XUVPeTPcHhpUxH zYKy+Pc7wTXN)ohkEmiu$MNjm!G(Jq6G}Q}~X&uM%fgRVwb@?LtD-;)=b^0zm%~XG6 z*#}o|)1tV1_xWPxR3WQtN1+t#WWdOVxBt!-yqOrUh!p3)JX`l^NV4dLdvo72GEhnwxv^0bi3TGpwvhUiRU7Y09qr&z3BGW_P$m- z9R79hvIhQ@G^-;=@a-wXDT_to)H0%=JcgtMh>Y4xqGNwwkrh$i?Cv|{FOniJu=VyH z<#xeHib!}pIw)fqYQkb(QobMhNT~c=z18Szmvn)EAZ4dnG_eSObk8l>FTF-_x-Os5 z8zt_GANIp(R*0lu42s2a7|(x$y2#%mx4t_E7W22`IQ!`DiaI$fO&lTID};eeoA>_s z8l6OrP1A`W3_-TIi5N~<&17Mi5GqgllN@H@=mc9nV!8UMx1V6|`yq}vV%UJ>xNM@0 z${RK)rY@!?F=Vnev`on-r3EV{gakQ{nrxy{{DI^;9fAlptbVN_uBhaEJB(UPgcR?& zAnHK@pQV=BJ;t&9HBofotr8fu`+T-zuAq6wRL~+9@E9#^sT;u(ij7KUqZ>+#8oqgT z#JRQwrX4x3u8Ac|Pkgm;5fNQXXkO}jx zIfHMWr)lu)rPzbe)Xq3pA=Ch6!hJrjpEFmu8sVLtPgSnW_8^b@7(Jhj^{LV)E+R`A z2KZe%e%X!LEoWVr0^t_PXFW+pO}1l%05Aqvntl>ZTGb^U91}GTbu~&DWF~FOREKa* zwmY2mG@`IkbcdoamS>ZAYBDqA!DquHl4n;&na3+rq#KO1Gn4ajL+|wGm356fpWCOu z_Y);JFNk5jZ4U6gdB{=!Y4y7p=_~vqlM|=A#X%Sl!8hABIRjT$EF)-Y9W@Y3U6J^` ze3>(?0@5Uh3apQ2IV+Wr=uT)740-lVj3c z@IwKBpEdr7GjMQpv;4=7+-l~e!;UE0;F;nDZ;7XWujTflv~kgb1W8t)7(>FiG~;m# zO@o0Ial>@Xu;HYl9I6PsEGoq27pP{t-7Y<~+pZj+Ve5lZqYh;o=j2gI>hE52hi4vD zkKYdldEK%}yE)S_7)FZ%wnQESe2obrwvUZDTs^#QcfY?U7mpVr*2Bt#KQ&|cF-*q$ z)II1j8F9&zEA$0O?Sc!gIKW7)GVAOFZ7aU0;74eM>3|BYYF2#w>zK{0eEf!T85x0u zNRr1B>5;GSqQy1zOWp)@d zx$^*Nlzs`Zj0%Qc20)=*YMnpLNX(D!Z|GvEtoWy+T{DItsha=@fUT-5u6`bpN57Z& z27*7cr?+=PO~x3C^mYj9xiT0s*N?z5teSH$tyoG5mx-!St$@^G3&(XxQ z-G1tdo-Nw+7nFSio)_e!k%k9;C z=#!74e57Mu2Eje>9OJQ*1P$GpOsidUo-C|FqQmg+I6~_p$Yzr6Ds^x5qBy{0^16pA zeSmI0lGgB2|mSq#D$FLVOusfzS&&dQ!aTIg=ZnMGq^mx zTjT8bLzR=mz5E@wSNS`!v8I-Mwuo;YXrZ>OzO4}kVZzLV$d;zjB*{~_5+yn8$)bD? z)X~xVs!y2XHO&jksB@HEl6_h@fciffI+bO%l-;8zpJD_w^S$wC1pn ze>|M6dFD@G%jZ`vbU^5AA!J=wWscx-(UH22M_quI`fN|(Pkg&MM*-!q&}A0sI*L%W zo#&$iC2A{A0(H~ZdcLC{Ki~BsHua^@ukX4Fmbl1APuFz6EP{DlLB=e-7=D&n=DFx; zR;BB$;QdFz?D_G8gBB%idt7a>`mv==t*6iIWc{Q$+=j7-4DcL+<2AwDOw8UgJZz2? zS~oJTdX-x9$gVf-WqV>EsgdA0epDDS_Csz^ZN5vO$+XkoV*CnK+ajwNpmFsOvS`60 zg^p66;(eG_X0H7Kyn@&XdYLL-(fs2I@3QF9M(+n8Tl9mVCVJBW_w!8{hDvlWtcHGD^mQLQnq*b=R=9P>H2GWB3zv1h7nLVAK|f*YzAANn{OzBy(*H9!(n& zPxex<4?mjWiLCd<((8zerqW>%kEMVk?<@JfxO~H`erfvtcDXwwPc5eK1-jZZ(GKyE zQE`J!rQ=PY*YPIR>wJ>z@w{L9Cd7vU!vl58+J`Bs!(;OP8ID;^Y!+=)%=-+=Eqw}S zdkeHQPixA*~WQ{-S<6f090&8V1&MDt|tTJk5g z%j7+|pM~OgG)5WLYK{H9F!387IC|thy_Q>WMO?jWaGPFkwrjGAu5Yxo@6P`~w+x-M z$p+g$4_qSMlv=am0)yAaO&WfTD#^A)k-0MxZ3?}Ex_^7eL4RqO=A?Pxa zk*y?|*|gERIrp4f>A+(*0*%dVFT6&lX=Q}v!Rsu|>MVHBZXn_)&F3swENRFhQb^2M z=wMw-#6d>E37lE68R|Q#JcXB1k<%g3=#QFWId6cjE8>_1zlijWF9^~x=QfpM5vha^ zY+u?Bn&Kb-`UuJ)y9El1Q8I=1cCM4i67;{FVDCy0{ooZt`5-6-b3o7tr~D4tnwru% z!6*yT=Ldaa_jYOlqseQ&ol|~4Z~ScPCtU*IpJbpFbx%=CV*USvoD#SB zH@yE-d(eN=&%OSh|BnXi?~6MH&DzGjLwS$PIt;oV3#ryhSoiZ+NzLcpDK(8u9KKNZ zKp8ag?*xPpso6LN^j#4fU!peb)wT{NH3{a>dU|tM`1A}G+;ICry^l1 zZM3?7Swup9IbfA#t?sXO&`k4~o~(u;bh$&glb-n(1&$66p&$16psilCkra3zALb8ViIeEK_$#^@0hk`98qf4KL9UY%mNrCUaDpl>SDUpXoa@8{Q3LA(D&Mn7>*upd=wB-v;qH^Q~DG8nwc= za)tcP=R)ODyZLx6bIpc1VwOsE^Y~?M_7y1Uf8LQVn`<^7rv2m28;nIPsw_FZ;Lj)e z8}vWxl>gTR-Wvbe?jO`kmCPj?>Rp;{fJnQOH^_EyQtqfI*SoE_=ut{tVh+8!`n|{XoE!yS=j=b@^ z=G_~i9TfFi8<&!rQ=c0+s=8O!?5udsk-j#l8_GOx@(JrBA%Qg*oV${*-vHGTKaQ7P zzAsry^k|5yV0ebur~M#!Xmx$RrP)3hfj=eNQ4&Kfo?hj1GM7@yO3~robV3x9yQKNd zsQ_Wa9L(lc^OlFdBln1Jj8oR`8xg0mW}I((nq<_{1e*}xr&EXQdU*>rR*&% zh)#6Aa8Mwv_%j->0rAv35U;X@qd1g^Euow9gYk#~Wx5$G%Mgrtm%<+xc{@w4_607- z--H^jK4~i!FCp|sm}kFyVVv{JBDC=WSEO~gsR)~Iy7rRg^eoGwdc3HZ#ppHZP0;RC z3JLtKaVgsgx8WbH(01XH>^(O&H!k#%=ZrMh``zaX=S2uSgZml3e!6Q^7Y8zfx=)K5 zYT@Cf6+yP{9XC8mpSsLL*5>KgBi-^0_k1!&kL`;UBFuHKwIQH&c=N?B(@QFrMmkG} zWXw-&bvh3ZyB}KIE$ZfZQuaE~s+-m@Z|c^t=3Fway^0$vgRfQ|=Bv3qus#inUMWVj z)gJQn)MQr{zrck>erv8_ZRtkA8>@mRtqY?iaD1wgCcJ-D=V>YZAs^;lbCxpe=YD`a zDM|=8EXSV#y;y)1C&e7x)=7PT|H>kfK`jf^rr&Oihd}1;$kG@$QM11L-Bjm;Je%J> z8kU+CliN)r4>s7++>$$QE4RjyRf1QHyba@)UICQgNm9iiCT7T?b%yr2+VmiAv> zUoH_$A?6gy&sKrj@a_?#pI`51t2*yT&jJW;7jLM)a9Gu=RBpT~2x+xFepz0FSUg>D z@)$h*AkqFBlE9?J>1lX!j?~zDVlW!Az2QBf>UgqT(_k^USlJ*m*Gbp8S{!+{yoF*B zB2)d&8owx=W3_yX_xQ5urR>M9&)+UNeYRsc=Rg2}Mao}1`_@iY`bPF|`>8+UK#tW{ zthebAy>!f8frLg3KC$hFrz`WND5;dqi=TM=LV@QENbd%&Y*c09NJ$U`S0hmIDwYm1 z4Vnzx;(qkFtL~Sxi893==u`PpQF)CBwXG(K-g$XoWWPh%!5m-+o}c<&XDL_Ft@-s~ z&zk8y6_-#V`zJ$ycj&;Z4oEk!N77?lAc<`)0Vty$}L)-3?=Z}dl^gbvr^0c; zzWh#;pH)*yAhptn6Ry?MGrn>fORIryY;n2++<=E?xOs!bU>=q9L_LQ;Jw_&;_EgiQ zKuIFC=>Q9E{jGM>Jh#m}dJ#O|hNbN!9NF-S3PBae_7mKo_2ofu!c(Tjg^Jb%s^sPe zh0Wjuie?o_{+qtK>T=`3VK|7|DeU+mz34-dGsqtI;oTW#;Z4hq>&Rh5&#M6^q{Gh= z9hL$rhlO(zr77}sh1%X({YlsH8`O3k%-jgRa-A>i?q(bVuAt@eHXF)XhekN9@^r6E z9gjN-2cP z!KYm))5b1m_`G|0W}-$(HhC59*{IXf_Yz1as)G05QPJf@@DJf_#gVvnK54kHIWNg* z1-2f_3$9GP)wMG6c{K|8*@nDeVQ3ab9x;ZD{BujfCv0)`imDXbT|U)mKZIZf$#JsXznK(8OH#a^mTZty{9DlfXK^YoY1+iITb&95+jx9wzbBrY2Nwt)xwCI^t- z60Y7dTV(BRY#rzfZ0wDGih#GY#D8a5y@}7jcq!>#dc>eR(N%)WXrV)58ZaR1paZvC{qFwQziIkXF`g>%nG)pj8G%CQ_)Ju90j!@MFT^K z22#bb$ZNHM)%|uw^7@vaNmY?Kh)}-x9XQ!R2h;0A(t>KKQROud=Qk8&log=rq6z?0 zK<3wMTwWxz4_H$ zDgB^3x-z^CCF@Du)ij{r55rOOYzi(~SeuhN*+oTi`P&9`(Gk>((s8S|$u3x~&z#~a zq@pp)tJM?S-!U@2>Nq*l+b*>0TMWuCVQSWnihBB%e{k|@!nFCy&zuDa4~X2yf!fRh z?GlCM`rz(w{mVVpXhMzs)Wf3UGh983dszlvzTJzXme(xq)GH^ZjLEA}OPHgtah>^B zkKcB$rIk(lqtMg~*>DjfoAuoZ6yvs6J{^;H4{>}(ub&AocI~C%$?WuYs?FL}XOGbB zs6`o;*W^%<@&8gAFJTVRx)1H&@*$q%2eZ)o();G}nABtzCw*GVtq=TyLzpg@bqGQS zExJ!1>k4;A?r;3I&=4DH3rnnIG|mJxEb2<4j91l5)RbvtfiLhP8`F-^PY zl?0ymew9Jk@OI5qK80q6tT0X5hN%ok@TbIO?oOw)a$AHDQ+P~8MDA^CTL~*K7`P<$ z%TA5dU@w+e{G?qdQOsFsTYkKU#*(+>gj1X6`(rb3> z=PH$udLxc`^!IB#RF&A*USZtr4JY7*AZyqNetemeN-wo91^ZAf+e$21s*@T-Y4*#2i0_BR zTrTiT>C8FIo`6Qz&K$ZSB}6C0PWA*MP)-WiN;4O2C={ecRv?S7?s{U;aA@B<1~7D5 zr7(IAhGgvPxA}{W4}y+6o`!p)RTX!=>FE(|WqgQIr8AAKq7IM7&V(^GI(?EoKV+j{ zXI?#Qn;va-od4w{0s^LabFKaTw?Y0nCjb2Xn=ggPO8hH4gSKds3U%p51 zJMdpqvwsCXzKPubDpC7)O24Pn{zWV5?TaV>DZ%!4`0r^8f5F3H|AhaNqWC-d_sq7x z&;f7Ww?7*HCC}~u9_{{?ANdRVk8sETd$iq~%K8QUN7l{n=>Pq5o4&bT|3Lrax&KJs z_#ORw%>Q4{t%v?6`qvDA-{HSUf&T@M#{8x0{}B`ZJN|#)`S!Pf(O)|Mw=eGZ;L^WZ zZ@~Q%{d=hC?;ZF(bm%V<*hGJl_%)E|clhs)?!Vw=5 zaQFTW{(Y(b7ub~TPw>Ai+<(XatC{~78UV250s#J#(f>RAcjM|W0xWNF0e>>?FQ(S- n_YXZ~NB|5MY-N`Spdl%D|wsDNuw003L?XX$?d+dL0* literal 0 HcmV?d00001 diff --git a/wordformat.py b/wordformat.py new file mode 100644 index 0000000..41446e2 --- /dev/null +++ b/wordformat.py @@ -0,0 +1,31 @@ +from docx import Document + +# 打开目标Word文档 +document = Document('数据库概要设计说明书.docx') +# 打开样板文件 +doc = Document('temp.docx') +database_index = 0 +table_index = 1 +empty_table = False + +# 遍历文档中的所有段落 +for paragraph in document.paragraphs: + if paragraph.text.startswith('数据库:'): + database_index += 1 + doc.add_heading(str(database_index) + "." + paragraph.text, 1) + table_index = 1 + elif paragraph.text.startswith('表名:'): + if len(paragraph.text) == 3: + empty_table = True + continue + else: + empty_table = False + doc.add_heading(str(database_index) + "." + str(table_index) + "." + paragraph.text, 2) + table_index += 1 + elif empty_table: + continue + else: + doc.add_paragraph(paragraph.text) + +# 保存修改后的文档 +doc.save('modified_document.docx')