Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "alignmentrs"
version = "0.10.0"
version = "0.10.3"
authors = ["Kent Kawashima <kentkawashima@gmail.com>"]
edition = "2018"

Expand Down
2 changes: 1 addition & 1 deletion alignmentrs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


__author__ = 'Kent Kawashima'
__version__ = '0.10.0'
__version__ = '0.10.5'
__all__ = [
# From dynamic library
'librs',
Expand Down
18 changes: 8 additions & 10 deletions alignmentrs/aln/alignment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
""" Alignment class. """

from collections import Counter
from copy import copy, deepcopy
import os
import inspect
# from copy import copy, deepcopy
# import os
# import inspect
import warnings

import pandas
Expand Down Expand Up @@ -183,14 +185,10 @@ def _make_row_meta(self, data=None, ids=None, descriptions=None):
# If descriptions is NOT specified but ids is specified,
# use ids as index and return an empty DataFrame.
elif (descriptions is None) and (ids is not None):
df = pandas.DataFrame([], index=ids)
df['description'] = [''] * len(ids)
return df
return pandas.DataFrame(None, index=ids)
# If both descriptions and ids are not specified,
# use default integer indexing and return an empty DataFrame.
df = pandas.DataFrame([], index=range(self.nrows))
df['description'] = [''] * self.nrows
return df
return pandas.DataFrame(None, index=range(self.nrows))

def _make_col_meta(self, data=None, ids=None, descriptions=None):
# Constructs column metadata using data, or
Expand Down Expand Up @@ -296,7 +294,7 @@ def ncols(self):
@property
def ids(self):
"""list of str: Returns the list of identifiers."""
return self.row_metadata.index.to_list()
return self.row_metadata.index.tolist()

@property
def sequences(self):
Expand Down
6 changes: 4 additions & 2 deletions alignmentrs/aln/col.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from copy import deepcopy
""" Classes for retrieving and removing data column by column. """

# from copy import deepcopy
import numbers
import inspect
# import inspect
import itertools

import pandas
Expand Down
84 changes: 75 additions & 9 deletions alignmentrs/aln/mixins/serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ class FastaSerdeMixin:
from a FASTA formatted file.
"""
@classmethod
def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description=True, column_metadata_decoders=None, column_metadata_regexp='c\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', column_index_regexp='ci\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', store_history=True, **kwargs):
def from_fasta(
cls, path, name=None,
# parse_row_metadata=True,
parse_description=True,
# column_metadata_decoders=None,
column_metadata_regexp='c\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])',
column_index_regexp='ci\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])',
store_history=True, **kwargs):
"""Create an Alignment object from a FASTA-formatted file.

Parameters
Expand Down Expand Up @@ -65,8 +72,9 @@ def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description
match = re.search(column_index_regexp, metadata['descriptions'][0])
if match:
key, value = match.groups()
# Convert text into a list using eval
try:
value = eval(value)
value = cls._parse_str_to_list(value, 'infer')
except SyntaxError:
raise ValueError('Cannot construct Alignment from the given FASTA file: column index is malformed'.format(key))
# Put key-value pair into the dictionary
Expand All @@ -78,10 +86,8 @@ def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description
match_locations.append(match.span())
key, value = match.groups()
# Convert text into a list using eval
# This is DANGEROUS and could open to exploitation.
# TODO: Add a prelimenary regex check to lessen vulnerability
try:
value = eval(value)
value = cls._parse_str_to_list(value, 'infer')
except SyntaxError:
raise ValueError('Cannot construct Alignment from the given FASTA file: column metadata {} is malformed'.format(key))
# Put key-value pair into the dictionary
Expand Down Expand Up @@ -159,6 +165,66 @@ def to_fasta(self, path=None, include_column_metadata=None, column_metadata_enco
with open(path, 'w') as writer:
print(fasta_str, file=writer)

@staticmethod
def _parse_str_to_list(string: str, item_type: type = 'infer'):
""" Returns a list by parsing a given string. The input string has to
expressed as like Python list syntax.

Parameters
----------
string: str
A string to be converted into a list. Format should be Python
syntax of list object like "[1, 2, 3]". It has to starts with "["
and ends with "]" and items have to be separated by ",".
item_type: type (default: str)
Type in which items in str-like list will be converted. For example,
"[1, 2, 3]" and int are passed to string and item_type variables
respectively, "[1, 2, 3]" will converted into [1, 2, 3] not
["1", "2", "3"].

Return
------
A list version of the input string.

"""
# Check if item_type variable is "type" type
if item_type != 'infer' and not isinstance(item_type, type):
raise TypeError('Invalid type: object constructor type should be '\
'passed to "item_type" variable.')

# Check if sring is str
if not isinstance(string, str):
raise TypeError('Invalid type: "string" variable has to be str type.')

# Check string format
if not string.startswith('['):
raise SyntaxError(f'Invalid syntax for conversion to a list. '\
'{string} does not start with "[".')
if not string.endswith(']'):
raise SyntaxError(f'Invalid syntax for conversion to a list. '\
'{string} does not end with "]".')

# Convert into a list
if item_type == 'infer':
out_l = []
for item in string.split('[')[1].split(']')[0].split(','):
try:
dat = int(item)
# e.g. int('1.1') gives "ValueError: invalid literal for int()
# with base 10: '1.1'"
except ValueError:
dat = float(item)
# e.g. float('a') gives "ValueError: could not convert string
# to float: 'a'"
except:
dat = item

out_l.append(dat)
return out_l

return [item_type(item) for item
in string.split('[')[1].split(']')[0].split(',')]

@staticmethod
def _fasta_entry_formatter(sid, desc, seq, col_meta):
# Formats the ID, description, stringed metadata, and sequence
Expand Down Expand Up @@ -237,10 +303,10 @@ def to_dict(self, row_metadata=True, column_metadata=True):
}
if row_metadata:
d['row_metadata'] = self.row_metadata.to_dict(orient='list')
d['row_metadata_index'] = self.row_metadata.index.to_list()
d['row_metadata_index'] = self.row_metadata.index.tolist()
if column_metadata:
d['column_metadata'] = self.column_metadata.to_dict(orient='list')
d['column_metadata_index'] = self.column_metadata.index.to_list()
d['column_metadata_index'] = self.column_metadata.index.tolist()
return d

class JsonSerdeMixin(DictSerdeMixin):
Expand Down Expand Up @@ -366,7 +432,7 @@ def to_pickle(self, path=None, **kwargs):
if not os.path.isdir(dirpath):
raise OSError('{} does not exist'.format(dirpath))
with open(path, 'wb') as writer:
print(pickled, file=writer)
writer.write(pickled)

def __getstate__(self):
# This method gets called when the Alignment object
Expand Down Expand Up @@ -435,7 +501,7 @@ def col_metadata_to_str(column_metadata, included_keys, encoders=None, template=
for k, v in included_values
]
str_index = [col_metadata_str_formatter(
'index', column_metadata.index.to_list(),
'index', column_metadata.index.tolist(),
encoders['index'] if 'index' in encoders.keys() else None,
index_template)
]
Expand Down
12 changes: 6 additions & 6 deletions alignmentrs/aln/mixins/tests/test_dict_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def test_to_dict_with_row_col_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
}
assert exp_dict == test_dict, \
"expected and test dictionaries are not the same: {} != {}".format(
Expand All @@ -74,7 +74,7 @@ def test_to_dict_with_row_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
}
assert exp_dict == test_dict, \
"expected and test dictionaries are not the same: {} != {}".format(
Expand All @@ -89,7 +89,7 @@ def test_to_dict_with_col_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
}
assert exp_dict == test_dict, \
"expected and test dictionaries are not the same: {} != {}".format(
Expand All @@ -102,9 +102,9 @@ def test_from_dict(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
}
test_class = MockAlignment.from_dict(test_dict)
exp_class = MockAlignment(
Expand Down
12 changes: 6 additions & 6 deletions alignmentrs/aln/mixins/tests/test_json_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ def test_to_json_with_row_col_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
})
assert exp_json == test_json, \
"expected and test json are not the same: {} != {}".format(
Expand All @@ -76,7 +76,7 @@ def test_to_dict_with_row_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
})
assert exp_json == test_json, \
"expected and test dictionaries are not the same: {} != {}".format(
Expand All @@ -91,7 +91,7 @@ def test_to_dict_with_col_meta(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
})
assert exp_json == test_json, \
"expected and test dictionaries are not the same: {} != {}".format(
Expand All @@ -104,9 +104,9 @@ def test_from_json(self):
'data': self.matrix.data,
'alignment_metadata': self.alignment_metadata,
'row_metadata': self.row_metadata.to_dict(orient='list'),
'row_metadata_index': self.row_metadata.index.to_list(),
'row_metadata_index': self.row_metadata.index.tolist(),
'column_metadata': self.column_metadata.to_dict(orient='list'),
'column_metadata_index': self.column_metadata.index.to_list(),
'column_metadata_index': self.column_metadata.index.tolist(),
})
with tempfile.TemporaryFile(mode='r+') as f:
f.write(exp_json)
Expand Down
2 changes: 2 additions & 0 deletions alignmentrs/aln/record.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
""" Class for formatting sequence and metadata. """

import pandas

class Record:
Expand Down
10 changes: 6 additions & 4 deletions alignmentrs/aln/row.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from copy import deepcopy
import numbers
import inspect
import itertools
""" Classes for retrieving and removing data row by row. """

# from copy import deepcopy
# import numbers
# import inspect
# import itertools

import pandas

Expand Down
Loading