-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.rb
More file actions
47 lines (32 loc) · 803 Bytes
/
parse.rb
File metadata and controls
47 lines (32 loc) · 803 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
require 'nokogiri'
require 'pry'
require 'csv'
#declares stuff
$tweets = Array.new
#gets list of html files from working directory
file_list = Array.new
Dir.glob("*.htm").each do |dir|
file_list << dir
end
p "Grabbed file list..."
#parser function to open html files and return dates
def html_parser(file)
page = Nokogiri::HTML(open(file))
tags = page.css(".started")
tags.each do |tag|
$tweets << {:timestamp => tag.text.strip, :tag => file}
end
p "Successfully parsed #{file}"
return tags
end
#calls parser for each file
file_list.each do |file|
html_parser(file)
end
#writes to csv
CSV.open("test.csv", "wb") do |csv|
$tweets.each do |tweet|
csv << [tweet[:timestamp], File.basename(tweet[:tag],File.extname(tweet[:tag]))]
end
p "Successfully output CSV."
end