You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.9 KiB
54 lines
1.9 KiB
9 months ago
|
import glob
|
||
|
import pandas as pd
|
||
|
|
||
|
name = False
|
||
|
header = False
|
||
|
headline = []
|
||
|
sheet_name_black_list = ['填表说明', '数据资源目录结构', '新增需求填报']
|
||
|
|
||
|
xlsx_files = glob.glob('./附件2*.xlsx')
|
||
|
for file in xlsx_files:
|
||
|
print(f"正在处理文件: {file}")
|
||
|
out_file_name = file.split(':')[1].split('_')[0]
|
||
|
# 读取Excel文件
|
||
|
xls = pd.ExcelFile(file)
|
||
|
# 打开一个文件用于写入
|
||
|
with open(out_file_name + '.txt', 'w', encoding='utf-8') as file:
|
||
|
for sheet_name in xls.sheet_names:
|
||
|
name = False
|
||
|
header = False
|
||
|
if sheet_name in sheet_name_black_list:
|
||
|
continue
|
||
|
print(f"正在处理工作表: {sheet_name}")
|
||
|
# 读取当前工作表到DataFrame
|
||
|
df = xls.parse(sheet_name)
|
||
|
file.write(sheet_name + "\n")
|
||
|
initialized_header = False
|
||
|
for index, row in df.iterrows():
|
||
|
# 检查当前行的B列是否为空
|
||
|
if pd.isnull(row[1]):
|
||
|
file.write("\n\n")
|
||
|
name = False
|
||
|
header = False
|
||
|
continue
|
||
|
if not name:
|
||
|
file.write(row[1] + "\n")
|
||
|
name = True
|
||
|
continue
|
||
|
if not header:
|
||
|
if not initialized_header:
|
||
|
initialized_header = True
|
||
|
new_row = [row[1]]
|
||
|
new_row.extend(row[4:])
|
||
|
for cell in new_row:
|
||
|
headline.append(cell)
|
||
|
header = True
|
||
|
continue
|
||
|
new_row = [row[1]]
|
||
|
new_row.extend(row[4:])
|
||
|
for head, cell in zip(headline, new_row):
|
||
|
file.write(head + ":" + str(cell).replace('\n', '') + "\n")
|
||
|
file.write("\n")
|
||
|
xls.close()
|
||
|
print("处理完成")
|