-
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathjson2csv.py
More file actions
executable file
·100 lines (89 loc) · 2.88 KB
/
json2csv.py
File metadata and controls
executable file
·100 lines (89 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
import codecs
import cStringIO
import csv
import json
# Script to convert the data.json file, scraped using the Scrapy spider
# from the EU Whoiswho website, to a CSV stored in data.csv
# Write CSV with correct encoding
# Taken from: https://docs.python.org/2/library/csv.html#examples
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
# Load the data from data.json
with open('data.json') as IN:
data = json.load(IN)
# Open data.csv and write the converted data to it
with open('data.csv', 'w') as OUT:
wr = UnicodeWriter(OUT, quoting=csv.QUOTE_ALL, lineterminator='\n')
wr.writerow(
[
'hierarchy',
'title',
'name',
'email',
'telephone',
'fax',
'url',
'source'
]
)
all_csv_data = []
for item in data:
csv_data = []
# Separate items in the hierarchy list with a '|' in order to
# make a single string which we can place in 1 CSV cell
csv_data.append(' | '.join(item['hierarchy']))
if item['title']:
csv_data.append(item['title'])
else:
csv_data.append('')
if item['name']:
csv_data.append(item['name'])
else:
csv_data.append('')
if item['email']:
csv_data.append(item['email'])
else:
csv_data.append('')
# Combined multiple telephone numbers with a comma
if item['telephone']:
csv_data.append(', '.join(item['telephone']))
else:
csv_data.append('')
if item['fax']:
csv_data.append(item['fax'])
else:
csv_data.append('')
if item['url']:
csv_data.append(item['url'])
else:
csv_data.append('')
if item['source']:
csv_data.append(item['source'])
else:
csv_data.append('')
all_csv_data.append(csv_data)
wr.writerows(sorted(all_csv_data))