Python实现对比两个Excel表某个范围的内容并提取出差异

# pip install openpyxl
from openpyxl import load_workbook, Workbook
from openpyxl.utils.cell import coordinate_from_string, column_index_from_string, get_column_letter
from openpyxl.styles import Font, PatternFill
def _normalize_range(start_cell: str, end_cell: str) -> str:
 """把两个单元格规范化成合法的范围字符串,例如 'A2', 'A10' -> 'A2:A10'"""
 col1, row1 = coordinate_from_string(start_cell)
 col2, row2 = coordinate_from_string(end_cell)
 c1 = column_index_from_string(col1)
 c2 = column_index_from_string(col2)
 min_col = min(c1, c2)
 max_col = max(c1, c2)
 min_row = min(row1, row2)
 max_row = max(row1, row2)
 return f"{get_column_letter(min_col)}{min_row}:{get_column_letter(max_col)}{max_row}"
def _read_range_values(ws, ranges):
 """
 从 worksheet 中的多个范围读取值。
 返回 (原始值, 规范化值) 的列表。
 - 规范化值:去掉前后空格,字符串转小写。
 """
 values = []
 for start_cell, end_cell in ranges:
 range_str = _normalize_range(start_cell, end_cell)
 cells = ws[range_str]
 for row in cells:
 for cell in row:
 if cell.value is not None:
 raw = cell.value
 if isinstance(raw, str):
 norm = raw.strip().lower() # 忽略大小写 + 去掉前后空格
 else:
 norm = raw
 values.append((raw, norm))
 return values
def _ordered_set_difference(list_a, list_b):
 """
 返回 list_a 独有的元素(基于规范化值比较)。
 保持顺序 + 去重,输出原始值。
 """
 set_b_norm = {norm for _, norm in list_b}
 seen_norm = set()
 result = []
 for raw, norm in list_a:
 if norm in seen_norm:
 continue
 if norm not in set_b_norm:
 result.append(raw)
 seen_norm.add(norm)
 return result
def compare_excel_columns(
 file1, ranges1, file2, ranges2,
 output_file="result.xlsx",
 sheet_name1=None, sheet_name2=None
):
 """
 比较两个 Excel 文件的指定范围,输出差异。
 参数:
 - file1, file2: 文件路径
 - ranges1, ranges2: 列表,每个元素是 (start_cell, end_cell)
 - sheet_name1, sheet_name2: 工作表名称(不传则用 active)
 - output_file: 输出文件名
 """
 # 读取文件1
 wb1 = load_workbook(file1, data_only=True)
 ws1 = wb1[sheet_name1] if sheet_name1 else wb1.active
 vals1 = _read_range_values(ws1, ranges1)
 # 读取文件2
 wb2 = load_workbook(file2, data_only=True)
 ws2 = wb2[sheet_name2] if sheet_name2 else wb2.active
 vals2 = _read_range_values(ws2, ranges2)
 # 求差异
 only_in_file1 = _ordered_set_difference(vals1, vals2)
 only_in_file2 = _ordered_set_difference(vals2, vals1)
 # 输出结果
 wb_out = Workbook()
 ws_out = wb_out.active
 ws_out.title = "Comparison Result"
 # 表头
 ws_out["A1"] = "Only in file1"
 ws_out["B1"] = "Only in file2"
 ws_out["A1"].font = ws_out["B1"].font = Font(bold=True, color="FFFFFF")
 ws_out["A1"].fill = ws_out["B1"].fill = PatternFill("solid", fgColor="4F81BD")
 max_rows = max(len(only_in_file1), len(only_in_file2))
 for i in range(max_rows):
 if i < len(only_in_file1):
 ws_out.cell(row=i+2, column=1, value=only_in_file1[i])
 if i < len(only_in_file2):
 ws_out.cell(row=i+2, column=2, value=only_in_file2[i])
 # 在最后一行之后添加数量统计
 stats_row = max_rows + 3
 ws_out[f"A{stats_row}"] = f"Count: {len(only_in_file1)}"
 ws_out[f"B{stats_row}"] = f"Count: {len(only_in_file2)}"
 ws_out[f"A{stats_row}"].font = ws_out[f"B{stats_row}"].font = Font(bold=True, color="000000")
 wb_out.save(output_file)
 print(f"对比完成,结果已保存到: {output_file}")
 return output_file
# 示例调用
if __name__ == "__main__":
 compare_excel_columns(
 "D:\code\python\pythonProject1\新建 XLSX 工作表.xlsx", [("A1", "A7")], # 支持多个范围,比如 [("A1","A6"), ("B1","B3")]
 "D:\code\python\pythonProject1\新建 XLSX 工作表 (2).xlsx", [("A1", "A4")],
 output_file="result.xlsx",
 sheet_name1="Sheet1",
 sheet_name2="Sheet1"
 )
作者:faucon原文地址:https://www.cnblogs.com/faucon/p/19095980

%s 个评论

要回复文章请先登录注册