From e2dc9c8a8d55eaaa27ce91b92018cd4529586a92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Kucharczyk?= Date: Fri, 29 Dec 2023 11:31:27 +0100 Subject: [PATCH] merge-csv-lists: refactor again --- merge-csv-lists.py | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/merge-csv-lists.py b/merge-csv-lists.py index 71473ae..8dc5b56 100644 --- a/merge-csv-lists.py +++ b/merge-csv-lists.py @@ -2,37 +2,31 @@ import argparse -def split_and_flat(input_string): - if "\n" in input_string: - _without_newlines = input_string.split("\n") - _without_empty = filter(lambda x: x != "", _without_newlines) - input_string = ",".join(_without_empty) - comma_split = input_string.split(",") - flat_list = [] - for item in comma_split: - dash_split = item.split("-") - flat_list.extend([value.strip().replace("\n", "") for value in dash_split]) +def flatten_and_split(input_string): + elements = input_string.replace("\n", "").split(",") + flat_list = [item.strip() for element in elements for item in element.split("-")] return ",".join(flat_list) -def unique_combined_list(*inputs): +def combine_and_uniquify(*inputs): combined_list = [ item.strip().title() for input_list in inputs for item in input_list.split(",") ] - final_list = [] - for name in combined_list: + unique_names = set(combined_list) + + final_set = set() + for name in unique_names: parts = name.split() if len(parts) == 2: first, last = parts reversed_name = f"{last} {first}" - if name not in final_list and reversed_name not in final_list: - final_list.append(name) + # Add the name if its reversed variant is not already in the final set + if reversed_name not in final_set: + final_set.add(name) else: - if name not in final_list: - final_list.append(name) - sorted_list = sorted(final_list) - output = ",".join(sorted_list) - return output + final_set.add(name) + + return ",".join(sorted(final_set)) def main(): @@ -41,13 +35,9 @@ def main(): ) parser.add_argument("lists", nargs="+", type=str, help="Comma-separated lists.") args = parser.parse_args() - if len(args.lists) == 1: - args.lists.append(args.lists[0]) - # Process each list with split_and_flat if it contains a dash - processed_lists = [split_and_flat(lst) if "-" in lst else lst for lst in args.lists] - - result = unique_combined_list(*processed_lists) + processed_lists = [flatten_and_split(lst) for lst in args.lists] + result = combine_and_uniquify(*processed_lists) print(result)