-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinal1.csv
More file actions
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 3.
230 lines (165 loc) · 6.11 KB
/
final1.csv
File metadata and controls
230 lines (165 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#capture prevents the output of the wget from showing
%%capture
!wget "http://www.csc.calpoly.edu/~dekhtyar/466-Fall2019/data/babynames/NationalNames.csv"
!wget "http://www.csc.calpoly.edu/~dekhtyar/466-Fall2019/data/babynames/StateNames.csv"
!wget "https://raw.githubusercontent.com/maciverlab/ME224_public/main/homework/data_forensics/found_on_phone_cleaned.csv"
!wget "https://raw.githubusercontent.com/Aokehua23132/Testing/main/
reading files
def read_csv(filename):
csv=[]
with open(filename, "r") as file:
lines = file.readlines()
headers = lines[0].replace("\n","").split(",")
for line in lines[1:]:
values = line.replace("\n","").split(",")
csv.append ({headers[i]: values[i] for i in range(len(values))} )
return csv
csv = read_csv('/content/NationalNames.csv')
pandas (not specific for numbers)
# We start with importing the package
import pandas as pd
# load the national baby names database
df = pd.read_csv("NationalNames.csv")
# show first 5 rows
df.head(5)
# load the state baby names database
state_df = pd.read_csv("StateNames.csv")
# Show the last 5 rows
state_df.tail(5)
# The columns are the labels across the top
print(state_df.columns[1])
print(type(state_df.columns[1]))
# The indexes run down the side (rows)
print(state_df.index)
# Show the basic info of your data: how many rows and columns, and what are they?
df.info()
#use sort_values to sort by year descending
sorted_df = df.sort_values(by="Name", ascending=True)
sorted_df.head(5)
or
#use sort_values to sort by year and count descending
two_key_sorted_df= df.sort_values(by=["Year","Count"], ascending=False)
two_key_sorted_df.head(5)
# We can access a column with the . notation shown here
df.Count (gives us the entire index with counts and length)
or
# Or multiple columns using a list to index
# note: make sure you use double square brackets [["column_1","column_2"]]
df[["Name","Count"]]
# We can slice like in a list. Remember, dataframe is like a 2D list,
df[10:11]
# so the code below will get all the columns
# Notice how the row indexes are numeric and that's what we slice on
# The operation order doesn't alter the result
# Rever the column selection and slicing operation, check the results
df[:10][["Year", "Name"]]
#filter all Male names from this century in the state of Illinois
state_df[state_df.Gender == "M"][state_df.Year >= 2000][state_df.State == "IL"]
# print loc[0]
print(sorted_df.loc[0])
# print iloc[0]
print()
print(sorted_df.iloc[0])
loc is strictly labeled based, iloc is integer based and works on indexes
df.info() gives us info on the data frames
path = df[['latitude', 'longitude']]
kms = df['kms']
vel = df['gps_currKPH'] # velocity in kph
vel_m_per_s = vel*(10/36)
import matplotlib.pyplot as plt
plt.plot(df['currBPM'],color='green')
plt.plot(df['gps_currKPH'],color='blue')
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("Raw Data.csv")
df.columns
# how do we see the difference in time between samples?
# hint: do you recall a matlab function that does this?
rawt = df['Time (s)']
rawx = df['Linear Acceleration x (m/s^2)']
rawy = df['Linear Acceleration y (m/s^2)']
rawz = df['Linear Acceleration z (m/s^2)']
plt.subplot(1,4,1)
plt.plot(rawt,rawx, 'r-')
plt.subplot(1,4,2)
plt.plot(rawt,rawy, 'g-')
plt.subplot(1,4,3)
plt.plot(rawt,rawz, 'b-')
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("Raw Data.csv")
rawt = df['Time (s)']
raw_out = df['Linear Acceleration y (m/s^2)'] # out axis is y
raw_up = df['Linear Acceleration z (m/s^2)'] #
raw_right = df['Linear Acceleration x (m/s^2)']
plt.figure(figsize=(12,5))
plt.subplot(1,4,1)
plt.plot(rawt, raw_out, 'r-')
plt.subplot(1,4,2)
plt.plot(rawt, raw_up, 'b-')
plt.subplot(1,4,3)
plt.plot(rawt, raw_right, 'g-')
--------------------
# Pandas, and install the folium package, which lets you work with
# maps, and numpy
%%capture
!pip install folium
#use sort_values to sort by year descending
sorted_df = df.sort_values(by="Name", ascending=True)
sorted_df.head(5)
import folium
token = "pk.eyJ1IjoiZ2VybWFuZXNwaW5vc2EiLCJhIjoiY2x0NHVmcG1hMDZucjJpcXE1MzQ0Ym55eCJ9.byXqX6eUYcwDbOEZ1d05-g"
tileurl = 'https://api.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}@2x.png?access_token=' + str(token)
path = df[['latitude', 'longitude']]
map = folium.Map(location=[path.latitude.mean(), path.longitude.mean()],
zoom_start = 10,
tiles=tileurl,
control_scale=True,
attr='Mapbox')
path_taken = folium.PolyLine(path).add_to(map)
folium.Circle(path[0:1], color="green").add_to(map)
folium.Circle(path[-1:], color="red").add_to(map)
map
----------
import folium
# Coordinates of New York City
latitude = 40.7128
longitude = -74.0060
# Create a map centered around New York City
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)
# Add a marker for New York City
folium.Marker([latitude, longitude], popup='New York City').add_to(map_nyc)
# Display the map
map_nyc
-------------------
import folium
# Create a map centered around the USA
map_usa = folium.Map(location=[37.0902, -95.7129], zoom_start=4)
# Cities coordinates
cities = {
"New York City": (40.7128, -74.0060),
"Los Angeles": (34.0522, -118.2437),
"Chicago": (41.8781, -87.6298)
}
# Add markers for each city
for city, coords in cities.items():
folium.Marker(location=coords,
popup=city,
icon=folium.Icon(icon='cloud')).add_to(map_usa)
# Display the map
map_usa
------------
import folium
from folium.plugins import HeatMap
# Sample data: locations in San Francisco (latitude, longitude)
locations = [
(37.7749, -122.4194), # San Francisco
(37.7786, -122.4892), # Golden Gate Park
(37.7975, -122.4143), # Fisherman's Wharf
]
# Create a map centered around San Francisco
map_sf = folium.Map(location=[37.7749, -122.4194], zoom_start=13)
# Create a HeatMap
HeatMap(locations).add_to(map_sf)
# Display the map
map_sf