import json from collections import defaultdict from config import OUTPUT_DIR filepath = OUTPUT_DIR / "issue_receipt_details_full.json" with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) # 用来记录每个组合出现的次数和对应的列表索引 seen = defaultdict(list) null_keys = 0 for idx, item in enumerate(data): wo = item.get("发料单号") line = item.get("行号") mat = item.get("物料代码") if not wo or not line or not mat: null_keys += 1 continue key = f"{wo}_{line}_{mat}" seen[key].append(idx) duplicates = {k: v for k, v in seen.items() if len(v) > 1} print(f"总数据条数: {len(data)}") print(f"缺失关键字段的数据条数: {null_keys}") print(f"发现重复的组合数: {len(duplicates)}") redundant_count = sum(len(v)-1 for v in duplicates.values()) print(f"因重复而多出的冗余条数: {redundant_count}") # 打印前 5 个重复的例子 count = 0 for k, indices in duplicates.items(): if count >= 5: break print(f"\n重复键 (发料单号_行号_物料代码): {k}") print(f" 第一次出现在第 {indices[0] + 1} 条,最新状态: {data[indices[0]].get('状态')}") print(f" 第二次出现在第 {indices[1] + 1} 条,最新状态: {data[indices[1]].get('状态')}") count += 1