Compare commits

...

4 Commits

Author SHA1 Message Date
Lukáš Kucharczyk 68a60ea873 Improve name handling
Considered reversed first and last name order for uniqueness
Sort names with same words close to each other
2023-10-22 17:36:49 +02:00
Lukáš Kucharczyk 019813cc30 bullet_list_to_unique_list: add 2023-10-22 17:35:16 +02:00
Lukáš Kucharczyk 52becb8909 split_and_flat: add 2023-10-22 17:35:07 +02:00
Lukáš Kucharczyk 7477c13c34 merge-csv-lists: add 2023-10-22 17:34:48 +02:00
3 changed files with 145 additions and 0 deletions

View File

@ -0,0 +1,39 @@
#!/usr/bin/python
import sys
import io
def extract_unique_values(input_string):
# Split the input string by newline to get the list of entries
input_list = input_string.strip().split("\n")
# Extract values from each entry in the format $VALUE1 - $VALUE2
values = [item.strip() for entry in input_list for item in entry.split("-")]
# Remove duplicates by converting to a set and back to a list
unique_values = list(set(values))
# Sort the list
sorted_values = sorted(unique_values)
# Convert the list back to a comma-separated string
output = ",".join(sorted_values)
return output
def main():
sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8")
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
# Read the input from standard input
input_string = sys.stdin.read()
# Extract unique values
result = extract_unique_values(input_string)
print(result)
if __name__ == "__main__":
main()

68
merge-csv-lists.py Normal file
View File

@ -0,0 +1,68 @@
#!/usr/bin/python
import argparse
def unique_combined_list(input1, input2):
# Split each input by comma and trim whitespace
list1 = [item.strip().title() for item in input1.split(",")]
list2 = [item.strip().title() for item in input2.split(",")]
# Combine lists
combined_list = list1 + list2
# Create an empty list to store the final unique names
final_list = []
# Check for reversed names
for name in combined_list:
parts = name.split()
# If the name has two words, check for its reversed variant
if len(parts) == 2:
first, last = parts
reversed_name = f"{last} {first}"
# If neither the name nor its reversed variant is in the final list, add the name
if name not in final_list and reversed_name not in final_list:
final_list.append(name)
# If it's a single-word name, simply add it if it's not in the final list
else:
if name not in final_list:
final_list.append(name)
# Flatten the list into individual words
flattened_words = [word for name in combined_list for word in name.split()]
# Sort the list based on the criteria discussed above
sorted_list = sorted(
final_list,
key=lambda x: (flattened_words.index(x.split()[0]), combined_list.index(x)),
)
# Convert the list back to a comma-separated string
output = ",".join(sorted_list)
return output
def main():
# Create an argument parser
parser = argparse.ArgumentParser(
description="Combine two comma-separated lists into one unique sorted list."
)
# Add arguments for the two input lists
parser.add_argument("list1", type=str, help="The first comma-separated list.")
parser.add_argument("list2", type=str, help="The second comma-separated list.")
# Parse the arguments
args = parser.parse_args()
# Get the unique combined list
result = unique_combined_list(args.list1, args.list2)
print(result)
if __name__ == "__main__":
main()

38
split_and_flat.py Normal file
View File

@ -0,0 +1,38 @@
import sys
def split_and_flat(input_string):
if "\n" in input_string:
_without_newlines = input_string.split("\n")
_without_empty = filter(lambda x: x != "", _without_newlines)
input_string = ",".join(_without_empty)
# Split the input string on commas
comma_split = input_string.split(",")
# Initialize an empty flat list
flat_list = []
# Iterate through the comma-separated values
for item in comma_split:
# Split each item on dashes
dash_split = item.split("-")
# Extend the flat list with the dash-separated values
flat_list.extend([value.strip().replace("\n", "") for value in dash_split])
map(lambda x: x.strip(), flat_list)
return ",".join(flat_list)
if __name__ == "__main__":
# Check if a single command-line argument is provided
if len(sys.argv) != 2:
print("Usage: python split_and_flat.py <input_string>")
sys.exit(1)
# Get the input string from the command-line argument
input_string = sys.argv[1]
# Call the split_and_flat function and print the result
result = split_and_flat(input_string)
print(result)