merge-csv-lists: refactor again

This commit is contained in:
Lukáš Kucharczyk 2023-12-29 11:31:27 +01:00
parent a7c1936acf
commit e2dc9c8a8d
Signed by: lukas
SSH Key Fingerprint: SHA256:vMuSwvwAvcT6htVAioMP7rzzwMQNi3roESyhv+nAxeg
1 changed files with 16 additions and 26 deletions

View File

@ -2,37 +2,31 @@
import argparse import argparse
def split_and_flat(input_string): def flatten_and_split(input_string):
if "\n" in input_string: elements = input_string.replace("\n", "").split(",")
_without_newlines = input_string.split("\n") flat_list = [item.strip() for element in elements for item in element.split("-")]
_without_empty = filter(lambda x: x != "", _without_newlines)
input_string = ",".join(_without_empty)
comma_split = input_string.split(",")
flat_list = []
for item in comma_split:
dash_split = item.split("-")
flat_list.extend([value.strip().replace("\n", "") for value in dash_split])
return ",".join(flat_list) return ",".join(flat_list)
def unique_combined_list(*inputs): def combine_and_uniquify(*inputs):
combined_list = [ combined_list = [
item.strip().title() for input_list in inputs for item in input_list.split(",") item.strip().title() for input_list in inputs for item in input_list.split(",")
] ]
final_list = [] unique_names = set(combined_list)
for name in combined_list:
final_set = set()
for name in unique_names:
parts = name.split() parts = name.split()
if len(parts) == 2: if len(parts) == 2:
first, last = parts first, last = parts
reversed_name = f"{last} {first}" reversed_name = f"{last} {first}"
if name not in final_list and reversed_name not in final_list: # Add the name if its reversed variant is not already in the final set
final_list.append(name) if reversed_name not in final_set:
final_set.add(name)
else: else:
if name not in final_list: final_set.add(name)
final_list.append(name)
sorted_list = sorted(final_list) return ",".join(sorted(final_set))
output = ",".join(sorted_list)
return output
def main(): def main():
@ -41,13 +35,9 @@ def main():
) )
parser.add_argument("lists", nargs="+", type=str, help="Comma-separated lists.") parser.add_argument("lists", nargs="+", type=str, help="Comma-separated lists.")
args = parser.parse_args() args = parser.parse_args()
if len(args.lists) == 1:
args.lists.append(args.lists[0])
# Process each list with split_and_flat if it contains a dash processed_lists = [flatten_and_split(lst) for lst in args.lists]
processed_lists = [split_and_flat(lst) if "-" in lst else lst for lst in args.lists] result = combine_and_uniquify(*processed_lists)
result = unique_combined_list(*processed_lists)
print(result) print(result)