-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_unique_values.py
More file actions
54 lines (42 loc) · 1.79 KB
/
extract_unique_values.py
File metadata and controls
54 lines (42 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import argparse
def extract_unique_values(csv_file, column_name):
"""
Read a CSV file into a DataFrame and extract unique values from a specified column.
Args:
csv_file (str): Path to the CSV file
column_name (str): Name of the column to extract unique values from
Returns:
list: A sorted list of unique values from the specified column
"""
try:
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)
# Check if the column exists in the DataFrame
if column_name not in df.columns:
raise ValueError(f"Column '{column_name}' not found in the CSV file. Available columns: {', '.join(df.columns)}")
# Extract the column and convert it to a set of unique values, then sort
unique_values = sorted(set(df[column_name]))
return unique_values
except FileNotFoundError:
print(f"Error: File '{csv_file}' not found.")
return None
except Exception as e:
print(f"Error: {str(e)}")
return None
def main():
# Set up command-line argument parsing
parser = argparse.ArgumentParser(description='Extract unique values from a column in a CSV file.')
parser.add_argument('csv_file', help='Path to the CSV file')
parser.add_argument('column_name', help='Name of the column to extract unique values from')
args = parser.parse_args()
# Extract unique values
unique_values = extract_unique_values(args.csv_file, args.column_name)
# Display the results
if unique_values is not None:
print(f"\nUnique values in column '{args.column_name}':")
for value in unique_values:
print(f" - {value}")
print(f"\nTotal number of unique values: {len(unique_values)}")
if __name__ == "__main__":
main()