Skip to content

Commit d1822fb

Browse files
author
RamanjaneyuluIdavalapati
committed
strarray
1 parent 72b3f56 commit d1822fb

File tree

6 files changed

+71
-11
lines changed

6 files changed

+71
-11
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ deploy:
99
- provider: releases
1010
api_key:
1111
secure: KuV+GjVaFNKvhpI3rgBjolmdhtRWbnJSOgy2iDT4+GQMEz+ypXF3XGR8Opx8NDDeFoBzRxcLcfqRo0Og/i06n5IZ/GcqppErJxYGb984qAJVymukm7pUO4+tls10pDrzZH0+4tTp3SNHukUlcUFjk/+bCTrD67uCZsQGCob3aflLBNx+uL+q3TinF/gbLKdf6wLQqVzkye//ZC20zjZWLRQpyQPRAH1CgGKtRETo5BgSq9w4LbGZd0pGc3S3b33wf3MVzfVlepuXHtwEpviXlXYImRX8/giw6SIx/EJN5IZFkeyGFBetdPsN6dCcOiWAlaFAlrsUSb/YtlrNWZOizkUpmzlAmPTgpl/rW1kS2UUxjLMV1w3oaBt8bhRhX97C0SI0gO2cMWO4E2NIqUFG+rz7Y9VBb/ZpWTlaT5odU+paIBYT0ii6m79YYVu53ajyB6e26zN1Mw12fmRlzBTWsZopxVa22P1+zuIEqtN9meMu5KKONuQ7FL3iNphA8RGguj9X6NKVy4PbbO/25fGScy1oTxsAVCDsiq9x3M+tFg8+9g1fJJ1Ry30wq2cqe1L9o3AaEcuoIvBhf2cIj2ZO1NQAFr9/pkr7t4w/HfJsrRGmlK4hLFkNwZUPdufIS/1s/66lHIiaXacM069xz47zpuxNftjjF3DoZX5Ge/wjKn8=
12-
name: diskarray-0.1.6
13-
tag_name: 0.1.6
12+
name: diskarray-0.1.7
13+
tag_name: 0.1.7
1414
on:
1515
repo: deep-compute/diskarray
1616
- provider: pypi

diskarray/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .command import main
22
from .diskarray import DiskArray
33
from .vararray import DiskVarArray
4+
from .strarray import DiskStringArray

diskarray/diskarray.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,22 @@
88
from .exception import AppendNotSupported
99

1010
class DiskArray(object):
11+
'''
12+
Stores binary data on disk as a memory mapped file
13+
using numpy.memmap. Allows for growing the disk data
14+
by appending and extending.
15+
16+
Links:
17+
* https://en.wikipedia.org/wiki/Memory-mapped_file
18+
19+
# FIXME:
20+
1. Explain capacity and actual shape
21+
2. Explain growby
22+
3. Explain not having to specify shape for 1d arrays
23+
4. Explain using structured arrays
24+
5. Why memory mapping? What does it provide?
25+
6. Why not use np.save and np.load?
26+
'''
1127
GROWBY = 10000
1228

1329
def __init__(self, fpath, dtype, mode='r+', shape=None,
@@ -69,10 +85,13 @@ def _shape_bytes(self, shape, dtype_bytes):
6985

7086
def _truncate_if_needed(self):
7187
fd = os.open(self._fpath, os.O_RDWR|os.O_CREAT)
72-
dtype_bytes = np.dtype(self._dtype).itemsize
73-
nbytes = self._shape_bytes(self._shape, dtype_bytes)
74-
os.ftruncate(fd, nbytes)
75-
self._capacity_shape = self._shape
88+
try:
89+
dtype_bytes = np.dtype(self._dtype).itemsize
90+
nbytes = self._shape_bytes(self._shape, dtype_bytes)
91+
os.ftruncate(fd, nbytes)
92+
self._capacity_shape = self._shape
93+
finally:
94+
os.close(fd)
7695
self._create_ndarray()
7796

7897
@property
@@ -184,6 +203,11 @@ def grow(self, n):
184203
# FIXME: code
185204
pass
186205

206+
def close(self):
207+
self.data._mmap.close()
208+
del self.data
209+
del self._fpath
210+
187211
def truncate(self, n):
188212
# FIXME: code
189213
pass

diskarray/strarray.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from logging import Logger
2+
3+
import numpy as np
4+
5+
from .vararray import DiskVarArray
6+
7+
class DiskStringArray(DiskVarArray):
8+
# Index to word
9+
def __init__(self, dpath, mode='r+',
10+
growby=DiskVarArray.GROWBY,
11+
log=Logger):
12+
super(DiskStringArray, self).__init__(dpath,
13+
dtype=np.uint8, dtype_index=np.uint64,
14+
mode=mode, growby=growby, log=log)
15+
16+
def __getitem__(self, idx):
17+
data = super(DiskStringArray, self).__getitem__(idx)
18+
return data.tostring()
19+
20+
def append(self, v):
21+
v = np.array(list(v), dtype=np.uint8)
22+
return super(DiskStringArray, self).append(v)
23+
24+
def extend(self, v):
25+
v = [np.array(list(x), dtype=np.uint8) for x in v]
26+
return super(DiskStringArray, self).extend(v)

diskarray/vararray.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ def num_lists(self):
109109
'''
110110
return len(self.index)
111111

112+
def __len__(self):
113+
return self.num_lists
114+
112115
def append(self, v):
113116
'''
114117
>>> d = DiskVarArray('/tmp/test3', dtype='uint32')
@@ -124,10 +127,12 @@ def append(self, v):
124127
self.data.extend(v)
125128

126129
def extend(self, v):
127-
# FIXME: assert v properties
128-
# FIXME: can we avoid the for loop for perf?
129-
for index in enumerate(v):
130-
self.append(v[index])
130+
lengths = np.cumsum([len(x) for x in v])
131+
self.index.append(0)
132+
self.index.extend(lengths[:-1])
133+
134+
vals = np.concatenate(v)
135+
self.data.extend(vals)
131136

132137
def destroy(self):
133138
'''
@@ -144,3 +149,7 @@ def destroy(self):
144149

145150
self.index.destroy()
146151
self.index = None
152+
153+
def close(self):
154+
self.data.close()
155+
self.index.close()

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup, find_packages
22

3-
version = '0.1.6'
3+
version = '0.1.7'
44
setup(
55
name="diskarray",
66
version=version,

0 commit comments

Comments
 (0)