This repository was archived by the owner on Sep 7, 2023. It is now read-only.
forked from shaunagm/followthemoney
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv2sqlite.py
More file actions
executable file
·82 lines (71 loc) · 2.82 KB
/
csv2sqlite.py
File metadata and controls
executable file
·82 lines (71 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/python
"""Import a CSV file into a (SQLite) database.
This can be run as a command-line tool; give on the command line the name
of the CSV file, the name of the SQLite database file, and the name of
the SQLite database table.
This can also be invoked programmatically. Call csv2sqlite.do_import()
with the file-like object for the input file, the database connection
object, and the name of the database table. This should work for any
database sufficiently similar to SQLite for the Python database API to
work."""
import csv
import optparse
import sqlite3
import sys
def do_import(f, db, table, drop=False, create=True, progress=None):
"""Import content from CSV file-like object 'f' into database 'db'
using table name 'table'. By default the table will be CREATE TABLE'd,
but if the optional 'create' parameter is set to False it will not be
and data will only be added to an existing table. If the 'drop'
parameter is set to true then the table will be dropped before
recreating it. If 'progress' is given, it is a function called with
the total number of processed rows, every 10,000 rows."""
cur = db.cursor()
if drop:
create = True # this makes no sense otherwise
try:
cur.execute("DROP TABLE {0}".format(table))
except sqlite3.OperationalError, e:
pass # no such table, ignore
pass
csvr = csv.DictReader(f)
# DictReader will read the list of field names from the first line
columns = [ "{0} VARCHAR(1024)".format(c) for c in csvr.fieldnames ]
# TODO: Different database drivers use different syntax
qmarks = [ "?" for c in csvr.fieldnames ]
insert = "INSERT INTO {0} VALUES ({1})".format(table, ', '.join(qmarks))
if create:
query = "CREATE TABLE {0} ({1})".format(table, ', '.join(columns))
cur.execute(query)
pass
count = 0
for row in csvr:
count += 1
if progress is not None and (count % 10000) == 0:
progress(count)
pass
values = []
for f in csvr.fieldnames:
values.append(row[f])
pass
cur.execute(insert, values)
pass
db.commit()
if progress is not None:
progress(count)
pass
if __name__ == '__main__':
parser = optparse.OptionParser(usage='%prog file.csv file.sqlite table')
parser.add_option("-d", "--drop", action="store_true",
help="Drop the table before recreating it")
(options,args) = parser.parse_args()
if len(args) < 3:
parser.error("must provide CSV, database file, and table names")
pass
f = open(args[0], 'r')
db = sqlite3.connect(args[1])
table = args[2]
def progress(n): print "...{0}".format(n)
do_import(f, db, table, drop=options.drop, progress=progress)
f.close()
pass