-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter_csv.py
More file actions
68 lines (55 loc) · 2.43 KB
/
filter_csv.py
File metadata and controls
68 lines (55 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import csv
import argparse
import sys
def filter_csv(input_file, output_file, is_correct):
"""
Filter CSV file based on isCorrect column.
Args:
input_file (str): Path to the input CSV file
output_file (str): Path to the output CSV file
is_correct (str): Filter value for isCorrect column ('0' or '1')
"""
try:
with open(input_file, 'r', newline='') as infile:
reader = csv.DictReader(infile)
# Check if 'isCorrect' column exists
if 'isCorrect' not in reader.fieldnames:
print(f"Error: 'isCorrect' column not found in {input_file}")
sys.exit(1)
# Prepare for writing
filtered_rows = []
# Filter rows based on isCorrect column
for row in reader:
if row['isCorrect'] == is_correct:
filtered_rows.append(row)
# Write filtered rows to output file
with open(output_file, 'w', newline='') as outfile:
writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
writer.writeheader()
writer.writerows(filtered_rows)
print(f"Filtered CSV created with {len(filtered_rows)} rows (plus header) where isCorrect = {is_correct}")
except FileNotFoundError:
print(f"Error: File '{input_file}' not found")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
def main():
# Set up argument parser
parser = argparse.ArgumentParser(description='Filter CSV file based on isCorrect column')
parser.add_argument('is_correct', choices=['0', '1'],
help='Filter value for isCorrect column (1 for True, 0 for False)')
parser.add_argument('--input', '-i', required=True,
help='Input CSV file path')
parser.add_argument('--output', '-o',
help='Output CSV file path (default: based on input filename and filter value)')
# Parse arguments
args = parser.parse_args()
# Set default output filename if not provided
if not args.output:
input_name = args.input.rsplit('.', 1)[0]
args.output = f"{input_name}_{args.is_correct}.csv"
# Run the filter function
filter_csv(args.input, args.output, args.is_correct)
if __name__ == "__main__":
main()