-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter.py
More file actions
51 lines (46 loc) · 1014 Bytes
/
filter.py
File metadata and controls
51 lines (46 loc) · 1014 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
Filter the data to only include properties in Sydney
"""
import polars as pl
df = pl.read_csv("large-files/nsw_property_data.csv")
sydney_councils = [
"BAYSIDE",
"BLACKTOWN",
"BLUE MOUNTAINS",
"BURWOOD",
"CAMDEN",
"CAMPBELLTOWN",
"CANADA BAY",
"CANTERBURY-BANKSTOWN",
"CITY OF PARRAMATTA",
"CITY OF SYDNEY",
"CUMBERLAND",
"FAIRFIELD",
"GEORGES RIVER",
"HAWKESBURY",
"HORNSBY",
"HUNTERS HILL",
"INNER WEST",
"KU-RING-GAI",
"LANE COVE",
"LIVERPOOL",
"MOSMAN",
"NORTH SYDNEY",
"NORTHERN BEACHES",
"PENRITH",
"RANDWICK",
"RYDE",
"STRATHFIELD",
"SUTHERLAND",
"THE HILLS SHIRE",
"UNINCORPORATED SYDNEY HARBOUR",
"WAVERLEY",
"WILLOUGHBY",
"WOLLONDILLY",
"WOOLLAHRA"
]
# suburb name is stored in column council_name
df = df.filter(pl.col("council_name").is_in(sydney_councils))
print(df.head())
# save the filtered data
df.write_csv("sydney_property_data.csv", separator="\t")