From 9c2bef3e87c56c6f153503a5057850407252c984 Mon Sep 17 00:00:00 2001 From: kent Date: Wed, 15 May 2019 17:35:23 +0900 Subject: [PATCH 01/12] Fixes saving pickle to disk --- alignmentrs/aln/mixins/serde.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alignmentrs/aln/mixins/serde.py b/alignmentrs/aln/mixins/serde.py index f1d6f1c..bf21147 100644 --- a/alignmentrs/aln/mixins/serde.py +++ b/alignmentrs/aln/mixins/serde.py @@ -366,7 +366,7 @@ def to_pickle(self, path=None, **kwargs): if not os.path.isdir(dirpath): raise OSError('{} does not exist'.format(dirpath)) with open(path, 'wb') as writer: - print(pickled, file=writer) + writer.write(pickled) def __getstate__(self): # This method gets called when the Alignment object From f31c371f9f45b19450b2d98686135e4b16319112 Mon Sep 17 00:00:00 2001 From: kent Date: Tue, 28 May 2019 18:45:36 +0900 Subject: [PATCH 02/12] Fixes wrong method to_list to tolist --- Cargo.toml | 2 +- alignmentrs/__init__.py | 2 +- alignmentrs/aln/alignment.py | 2 +- alignmentrs/aln/mixins/serde.py | 6 +++--- alignmentrs/aln/mixins/tests/test_dict_serde.py | 12 ++++++------ alignmentrs/aln/mixins/tests/test_json_serde.py | 12 ++++++------ alignmentrs/utils.py | 1 + setup.py | 2 +- 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 14e36e4..37753c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "alignmentrs" -version = "0.10.0" +version = "0.10.2" authors = ["Kent Kawashima "] edition = "2018" diff --git a/alignmentrs/__init__.py b/alignmentrs/__init__.py index 6f091e0..d867e3e 100644 --- a/alignmentrs/__init__.py +++ b/alignmentrs/__init__.py @@ -5,7 +5,7 @@ __author__ = 'Kent Kawashima' -__version__ = '0.10.0' +__version__ = '0.10.2' __all__ = [ # From dynamic library 'librs', diff --git a/alignmentrs/aln/alignment.py b/alignmentrs/aln/alignment.py index f48477d..f8bf0e5 100644 --- a/alignmentrs/aln/alignment.py +++ b/alignmentrs/aln/alignment.py @@ -296,7 +296,7 @@ def ncols(self): @property def ids(self): """list of str: Returns the list of identifiers.""" - return self.row_metadata.index.to_list() + return self.row_metadata.index.tolist() @property def sequences(self): diff --git a/alignmentrs/aln/mixins/serde.py b/alignmentrs/aln/mixins/serde.py index bf21147..49cbef0 100644 --- a/alignmentrs/aln/mixins/serde.py +++ b/alignmentrs/aln/mixins/serde.py @@ -237,10 +237,10 @@ def to_dict(self, row_metadata=True, column_metadata=True): } if row_metadata: d['row_metadata'] = self.row_metadata.to_dict(orient='list') - d['row_metadata_index'] = self.row_metadata.index.to_list() + d['row_metadata_index'] = self.row_metadata.index.tolist() if column_metadata: d['column_metadata'] = self.column_metadata.to_dict(orient='list') - d['column_metadata_index'] = self.column_metadata.index.to_list() + d['column_metadata_index'] = self.column_metadata.index.tolist() return d class JsonSerdeMixin(DictSerdeMixin): @@ -435,7 +435,7 @@ def col_metadata_to_str(column_metadata, included_keys, encoders=None, template= for k, v in included_values ] str_index = [col_metadata_str_formatter( - 'index', column_metadata.index.to_list(), + 'index', column_metadata.index.tolist(), encoders['index'] if 'index' in encoders.keys() else None, index_template) ] diff --git a/alignmentrs/aln/mixins/tests/test_dict_serde.py b/alignmentrs/aln/mixins/tests/test_dict_serde.py index 2cd5a8a..1b7e512 100644 --- a/alignmentrs/aln/mixins/tests/test_dict_serde.py +++ b/alignmentrs/aln/mixins/tests/test_dict_serde.py @@ -57,9 +57,9 @@ def test_to_dict_with_row_col_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), } assert exp_dict == test_dict, \ "expected and test dictionaries are not the same: {} != {}".format( @@ -74,7 +74,7 @@ def test_to_dict_with_row_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), } assert exp_dict == test_dict, \ "expected and test dictionaries are not the same: {} != {}".format( @@ -89,7 +89,7 @@ def test_to_dict_with_col_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), } assert exp_dict == test_dict, \ "expected and test dictionaries are not the same: {} != {}".format( @@ -102,9 +102,9 @@ def test_from_dict(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), } test_class = MockAlignment.from_dict(test_dict) exp_class = MockAlignment( diff --git a/alignmentrs/aln/mixins/tests/test_json_serde.py b/alignmentrs/aln/mixins/tests/test_json_serde.py index eabb41a..1036a25 100644 --- a/alignmentrs/aln/mixins/tests/test_json_serde.py +++ b/alignmentrs/aln/mixins/tests/test_json_serde.py @@ -59,9 +59,9 @@ def test_to_json_with_row_col_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), }) assert exp_json == test_json, \ "expected and test json are not the same: {} != {}".format( @@ -76,7 +76,7 @@ def test_to_dict_with_row_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), }) assert exp_json == test_json, \ "expected and test dictionaries are not the same: {} != {}".format( @@ -91,7 +91,7 @@ def test_to_dict_with_col_meta(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), }) assert exp_json == test_json, \ "expected and test dictionaries are not the same: {} != {}".format( @@ -104,9 +104,9 @@ def test_from_json(self): 'data': self.matrix.data, 'alignment_metadata': self.alignment_metadata, 'row_metadata': self.row_metadata.to_dict(orient='list'), - 'row_metadata_index': self.row_metadata.index.to_list(), + 'row_metadata_index': self.row_metadata.index.tolist(), 'column_metadata': self.column_metadata.to_dict(orient='list'), - 'column_metadata_index': self.column_metadata.index.to_list(), + 'column_metadata_index': self.column_metadata.index.tolist(), }) with tempfile.TemporaryFile(mode='r+') as f: f.write(exp_json) diff --git a/alignmentrs/utils.py b/alignmentrs/utils.py index a559623..fa03765 100644 --- a/alignmentrs/utils.py +++ b/alignmentrs/utils.py @@ -11,6 +11,7 @@ def idseq_to_display(ids, chunked_sequences, template='{name} {seq}', max_length=20, id_width=15, sequence_width=55): + ids = list(map(str, ids)) if not len(ids): return '' def chunked_fn(x): diff --git a/setup.py b/setup.py index 57844a9..23693c8 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='alignmentrs', author='Kent Kawashima', - version='0.10.0', + version='0.10.2', author_email='kentkawashima@gmail.com', description='Quickly read and manipulate multiple sequence alignments in Python', long_description=long_description, From 238205f6da64fda9f7c5aa8825e5a482d8a0e76d Mon Sep 17 00:00:00 2001 From: kent Date: Thu, 18 Jul 2019 12:35:05 +0900 Subject: [PATCH 03/12] Fixes behavior of retain_rows and retain_cols when 0 indexes are passed --- Cargo.toml | 2 +- alignmentrs/__init__.py | 2 +- setup.py | 2 +- src/alignment.rs | 12 ++++++++++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 37753c4..896cf93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "alignmentrs" -version = "0.10.2" +version = "0.10.3" authors = ["Kent Kawashima "] edition = "2018" diff --git a/alignmentrs/__init__.py b/alignmentrs/__init__.py index d867e3e..a35c5b3 100644 --- a/alignmentrs/__init__.py +++ b/alignmentrs/__init__.py @@ -5,7 +5,7 @@ __author__ = 'Kent Kawashima' -__version__ = '0.10.2' +__version__ = '0.10.3' __all__ = [ # From dynamic library 'librs', diff --git a/setup.py b/setup.py index 23693c8..66b5881 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='alignmentrs', author='Kent Kawashima', - version='0.10.2', + version='0.10.3', author_email='kentkawashima@gmail.com', description='Quickly read and manipulate multiple sequence alignments in Python', long_description=long_description, diff --git a/src/alignment.rs b/src/alignment.rs index e281bc6..cdf5539 100644 --- a/src/alignment.rs +++ b/src/alignment.rs @@ -125,7 +125,11 @@ impl SeqMatrix { /// Keep rows matching the specified row indices, and removes everything else. pub fn _retain_rows(&mut self, ids: Vec) -> Result<(), String> { - self._drop_rows(ids, true) + if ids.len() == 0 { + let ids: Vec = (0..self._nrows()).map(|i| i as i32).collect(); + return self._drop_rows(ids, false) + } + return self._drop_rows(ids, true) } /// Generalized method used to remove rows from the sequence matrix. @@ -255,7 +259,11 @@ impl SeqMatrix { /// Keep columns matching the specified columns indices, and removes everything else. pub fn _retain_cols(&mut self, ids: Vec) -> Result<(), String> { - self._drop_cols(ids, true) + if ids.len() == 0 { + let ids: Vec = (0..self._ncols()).map(|i| i as i32).collect(); + return self._drop_cols(ids, false) + } + return self._drop_cols(ids, true) } /// Generalized method used to remove columns from the sequence matrix. From 958b724c45f070feac9647a0e0a7ed13d9c77415 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 17:47:21 +0900 Subject: [PATCH 04/12] Fix nosetests fails in test_none and test_with_ids_no_descriptions for _make_row_meta method in Alignment --- alignmentrs/aln/alignment.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/alignmentrs/aln/alignment.py b/alignmentrs/aln/alignment.py index f8bf0e5..2459216 100644 --- a/alignmentrs/aln/alignment.py +++ b/alignmentrs/aln/alignment.py @@ -183,14 +183,10 @@ def _make_row_meta(self, data=None, ids=None, descriptions=None): # If descriptions is NOT specified but ids is specified, # use ids as index and return an empty DataFrame. elif (descriptions is None) and (ids is not None): - df = pandas.DataFrame([], index=ids) - df['description'] = [''] * len(ids) - return df + return pandas.DataFrame(None, index=ids) # If both descriptions and ids are not specified, # use default integer indexing and return an empty DataFrame. - df = pandas.DataFrame([], index=range(self.nrows)) - df['description'] = [''] * self.nrows - return df + return pandas.DataFrame(None, index=range(self.nrows)) def _make_col_meta(self, data=None, ids=None, descriptions=None): # Constructs column metadata using data, or From 870c67a0d0dec4a9ed63f2a480c6d5fb32c6a9a3 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 17:48:36 +0900 Subject: [PATCH 05/12] Update to 0.10.4 --- alignmentrs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alignmentrs/__init__.py b/alignmentrs/__init__.py index a35c5b3..a12c931 100644 --- a/alignmentrs/__init__.py +++ b/alignmentrs/__init__.py @@ -5,7 +5,7 @@ __author__ = 'Kent Kawashima' -__version__ = '0.10.3' +__version__ = '0.10.4' __all__ = [ # From dynamic library 'librs', From f4116d9ad8e5a56a56611f579d78773a90fb826a Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 18:11:06 +0900 Subject: [PATCH 06/12] Add module docstrings and commented out unnecessary imports. --- alignmentrs/aln/alignment.py | 8 +++++--- alignmentrs/aln/col.py | 6 ++++-- alignmentrs/aln/record.py | 2 ++ alignmentrs/aln/row.py | 10 ++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/alignmentrs/aln/alignment.py b/alignmentrs/aln/alignment.py index 2459216..ddaee5e 100644 --- a/alignmentrs/aln/alignment.py +++ b/alignmentrs/aln/alignment.py @@ -1,7 +1,9 @@ +""" Alignment class. """ + from collections import Counter -from copy import copy, deepcopy -import os -import inspect +# from copy import copy, deepcopy +# import os +# import inspect import warnings import pandas diff --git a/alignmentrs/aln/col.py b/alignmentrs/aln/col.py index f1bc5e4..2e6c75f 100644 --- a/alignmentrs/aln/col.py +++ b/alignmentrs/aln/col.py @@ -1,6 +1,8 @@ -from copy import deepcopy +""" Classes for retrieving and removing data column by column. """ + +# from copy import deepcopy import numbers -import inspect +# import inspect import itertools import pandas diff --git a/alignmentrs/aln/record.py b/alignmentrs/aln/record.py index 2b2a7dd..6d8708a 100644 --- a/alignmentrs/aln/record.py +++ b/alignmentrs/aln/record.py @@ -1,3 +1,5 @@ +""" Class for formatting sequence and metadata. """ + import pandas class Record: diff --git a/alignmentrs/aln/row.py b/alignmentrs/aln/row.py index 4493958..83313eb 100644 --- a/alignmentrs/aln/row.py +++ b/alignmentrs/aln/row.py @@ -1,7 +1,9 @@ -from copy import deepcopy -import numbers -import inspect -import itertools +""" Classes for retrieving and removing data row by row. """ + +# from copy import deepcopy +# import numbers +# import inspect +# import itertools import pandas From 05e7ed10771f3c2d4bf32cd8fa2b671f559062c1 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 18:49:52 +0900 Subject: [PATCH 07/12] Initial commit --- alignmentrs/tests/test_utils.py | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 alignmentrs/tests/test_utils.py diff --git a/alignmentrs/tests/test_utils.py b/alignmentrs/tests/test_utils.py new file mode 100644 index 0000000..f05ca09 --- /dev/null +++ b/alignmentrs/tests/test_utils.py @@ -0,0 +1,52 @@ +""" Unit test for utils functions. """ + +import tempfile + +from alignmentrs.utils import fasta_file_to_lists + +class TestFastaFileReader: + """ Unit tests for fasta_file_to_lists """ + def setup(self): + self.fp = tempfile.NamedTemporaryFile(mode='w+') + lines = [ + '>seq1 here_is_description1', + 'ATGCATGCATGC', + '>seq2 here_is_description2', + 'ATGCAAN-ATGC', + '>marker1 here_is_description_for_marker1', + 'CCCCCCCCCCNN', + '>marker2 here_is_description_for_marker2', + '000011110011' + ] + self.fp.write('\n'.join(lines)) + self.fp.seek(0) + self.path = self.fp.name + + def teardown(self): + self.fp.close() + + def test_fasta_file_to_lists(self): + """ Tests if fasta_file_to_lists function returns contents of + a FASTA file as expected. """ + fasta_d = fasta_file_to_lists(self.path, marker_kw='marker') + + exp_sample = { + 'ids': ['seq1', 'seq2'], + 'descriptions': ['here_is_description1', 'here_is_description2'], + 'sequences': ['ATGCATGCATGC', 'ATGCAAN-ATGC'] + } + assert fasta_d['sample'] == exp_sample, \ + 'Sequence data read from file is not the same as expected: {}'\ + .foramt(fasta_d['sample']) + + exp_marker = { + 'ids': ['marker1', 'marker2'], + 'descriptions': [ + 'here_is_description_for_marker1', + 'here_is_description_for_marker2'], + 'sequences': ['CCCCCCCCCCNN', '000011110011'] + } + assert fasta_d['marker'] == exp_marker, \ + 'Marker data read from file is not the same as expected: {}'\ + .foramt(fasta_d['marker']) + From cb35d3672fd8559e9b61a1d00b948b12d3a55963 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 19:46:18 +0900 Subject: [PATCH 08/12] Add a specific function to read alignment file with assertion for the same sequence lengths --- alignmentrs/utils.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/alignmentrs/utils.py b/alignmentrs/utils.py index fa03765..8fe38c9 100644 --- a/alignmentrs/utils.py +++ b/alignmentrs/utils.py @@ -130,6 +130,34 @@ def fasta_file_to_lists(path, marker_kw=None): } } +def alignment_file_to_lists(path, marker_kw=None): + """ Reads a FASTA formatted text file to a list. Nucleotide or amino acid + sequences and marker sequences should be the same lengths. + + Parameters + ---------- + path : str + Location of FASTA file. + marker_kw : str + Keyword indicating the sample is a marker. + + Returns + ------- + dict + Contains list of ids, descriptions, and sequences for sample + and marker categories. + + """ + fasta = fasta_file_to_lists(path, marker_kw) + + # Check if lengths of sequences are the same + sequences = fasta['sample']['sequences'] + fasta['marker']['sequences'] + len_array = [len(seq) for seq in sequences] + assert min(len_array) == max(len_array), \ + 'Wrong sequence lengths: different lengths of sequences exist. '\ + '{} <= seq_len <= {}.'.format(min(len_array), max(len_array)) + + return fasta def parse_comment_list(comment_list: list): comments_d = dict() From 1a117fd007308152c4dc93a6b03ce5a65eb62fd1 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Wed, 11 Sep 2019 19:46:48 +0900 Subject: [PATCH 09/12] Add test for alignment_file_to_lists function --- alignmentrs/tests/test_utils.py | 68 +++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/alignmentrs/tests/test_utils.py b/alignmentrs/tests/test_utils.py index f05ca09..e11bb80 100644 --- a/alignmentrs/tests/test_utils.py +++ b/alignmentrs/tests/test_utils.py @@ -1,20 +1,21 @@ """ Unit test for utils functions. """ import tempfile +from nose.tools import * -from alignmentrs.utils import fasta_file_to_lists +from alignmentrs.utils import fasta_file_to_lists, alignment_file_to_lists class TestFastaFileReader: - """ Unit tests for fasta_file_to_lists """ + """ Unit tests for reading normal FASTA formatted text file. """ def setup(self): self.fp = tempfile.NamedTemporaryFile(mode='w+') lines = [ '>seq1 here_is_description1', 'ATGCATGCATGC', '>seq2 here_is_description2', - 'ATGCAAN-ATGC', + 'ATGCAAN-ATGCAAA', '>marker1 here_is_description_for_marker1', - 'CCCCCCCCCCNN', + 'CCCCCCCCCCNNCCC', '>marker2 here_is_description_for_marker2', '000011110011' ] @@ -30,6 +31,60 @@ def test_fasta_file_to_lists(self): a FASTA file as expected. """ fasta_d = fasta_file_to_lists(self.path, marker_kw='marker') + exp_sample = { + 'ids': ['seq1', 'seq2'], + 'descriptions': ['here_is_description1', 'here_is_description2'], + 'sequences': ['ATGCATGCATGC', 'ATGCAAN-ATGCAAA'] + } + assert fasta_d['sample'] == exp_sample, \ + 'Sequence data read from file is not the same as expected: {}'\ + .format(fasta_d['sample']) + + exp_marker = { + 'ids': ['marker1', 'marker2'], + 'descriptions': [ + 'here_is_description_for_marker1', + 'here_is_description_for_marker2'], + 'sequences': ['CCCCCCCCCCNNCCC', '000011110011'] + } + assert fasta_d['marker'] == exp_marker, \ + 'Marker data read from file is not the same as expected: {}'\ + .format(fasta_d['marker']) + + @raises(AssertionError) + def test_alignment_file_assertion(self): + """ Tests if alignment_file_to_lists function detects error in sequence + lengths. """ + fasta_d = alignment_file_to_lists(self.path, marker_kw='marker') + +class TestAlignmentFileReader: + """ Unit tests for functions for reading alignment file. """ + def setup(self): + """ Create a dummy alignment file. """ + self.fp = tempfile.NamedTemporaryFile(mode='w+') + lines = [ + '>seq1 here_is_description1', + 'ATGCATGCATGC', + '>seq2 here_is_description2', + 'ATGCAAN-ATGC', + '>marker1 here_is_description_for_marker1', + 'CCCCCCCCCCNN', + '>marker2 here_is_description_for_marker2', + '000011110011' + ] + self.fp.write('\n'.join(lines)) + self.fp.seek(0) + self.path = self.fp.name + + + def teardown(self): + self.fp.close() + + def test_alignment_file_to_lists(self): + """ Tests if alignment_file_to_lists function returns contents of + a FASTA file as expected. """ + fasta_d = alignment_file_to_lists(self.path, marker_kw='marker') + exp_sample = { 'ids': ['seq1', 'seq2'], 'descriptions': ['here_is_description1', 'here_is_description2'], @@ -37,7 +92,7 @@ def test_fasta_file_to_lists(self): } assert fasta_d['sample'] == exp_sample, \ 'Sequence data read from file is not the same as expected: {}'\ - .foramt(fasta_d['sample']) + .format(fasta_d['sample']) exp_marker = { 'ids': ['marker1', 'marker2'], @@ -48,5 +103,4 @@ def test_fasta_file_to_lists(self): } assert fasta_d['marker'] == exp_marker, \ 'Marker data read from file is not the same as expected: {}'\ - .foramt(fasta_d['marker']) - + .format(fasta_d['marker']) From bba033d2b50ddab6ca47f1465cda7d4b897eec20 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Thu, 12 Sep 2019 17:08:57 +0900 Subject: [PATCH 10/12] Update ipython version to 3.7.4 --- examples/01_Reading_alignments.ipynb | 2 +- examples/02a_Removing_rows.ipynb | 2 +- examples/02b_Removing_columns.ipynb | 2 +- examples/03a_Filtering_rows.ipynb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/01_Reading_alignments.ipynb b/examples/01_Reading_alignments.ipynb index 36d5319..154a95c 100644 --- a/examples/01_Reading_alignments.ipynb +++ b/examples/01_Reading_alignments.ipynb @@ -766,7 +766,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/examples/02a_Removing_rows.ipynb b/examples/02a_Removing_rows.ipynb index f630b84..94e0313 100644 --- a/examples/02a_Removing_rows.ipynb +++ b/examples/02a_Removing_rows.ipynb @@ -1127,7 +1127,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/examples/02b_Removing_columns.ipynb b/examples/02b_Removing_columns.ipynb index 55a9dc6..038ef2c 100644 --- a/examples/02b_Removing_columns.ipynb +++ b/examples/02b_Removing_columns.ipynb @@ -1266,7 +1266,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/examples/03a_Filtering_rows.ipynb b/examples/03a_Filtering_rows.ipynb index d777fb0..4e8ffe4 100644 --- a/examples/03a_Filtering_rows.ipynb +++ b/examples/03a_Filtering_rows.ipynb @@ -1110,7 +1110,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.4" } }, "nbformat": 4, From 116ee0941898e5f2d320104d6f11045d6f5decfe Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Fri, 13 Sep 2019 14:39:12 +0900 Subject: [PATCH 11/12] Add _parse_str_to_list staticmethod and replaced eval() with it --- alignmentrs/aln/mixins/serde.py | 76 ++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/alignmentrs/aln/mixins/serde.py b/alignmentrs/aln/mixins/serde.py index 49cbef0..f74bf87 100644 --- a/alignmentrs/aln/mixins/serde.py +++ b/alignmentrs/aln/mixins/serde.py @@ -29,7 +29,14 @@ class FastaSerdeMixin: from a FASTA formatted file. """ @classmethod - def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description=True, column_metadata_decoders=None, column_metadata_regexp='c\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', column_index_regexp='ci\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', store_history=True, **kwargs): + def from_fasta( + cls, path, name=None, + # parse_row_metadata=True, + parse_description=True, + # column_metadata_decoders=None, + column_metadata_regexp='c\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', + column_index_regexp='ci\|([A-Za-z0-9\s\.]+)=(\[[A-Za-z0-9\.\s,\"\']+\])', + store_history=True, **kwargs): """Create an Alignment object from a FASTA-formatted file. Parameters @@ -65,8 +72,9 @@ def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description match = re.search(column_index_regexp, metadata['descriptions'][0]) if match: key, value = match.groups() + # Convert text into a list using eval try: - value = eval(value) + value = cls._parse_str_to_list(value, 'infer') except SyntaxError: raise ValueError('Cannot construct Alignment from the given FASTA file: column index is malformed'.format(key)) # Put key-value pair into the dictionary @@ -78,10 +86,8 @@ def from_fasta(cls, path, name=None, parse_row_metadata=True, parse_description match_locations.append(match.span()) key, value = match.groups() # Convert text into a list using eval - # This is DANGEROUS and could open to exploitation. - # TODO: Add a prelimenary regex check to lessen vulnerability try: - value = eval(value) + value = cls._parse_str_to_list(value, 'infer') except SyntaxError: raise ValueError('Cannot construct Alignment from the given FASTA file: column metadata {} is malformed'.format(key)) # Put key-value pair into the dictionary @@ -159,6 +165,66 @@ def to_fasta(self, path=None, include_column_metadata=None, column_metadata_enco with open(path, 'w') as writer: print(fasta_str, file=writer) + @staticmethod + def _parse_str_to_list(string: str, item_type: type = 'infer'): + """ Returns a list by parsing a given string. The input string has to + expressed as like Python list syntax. + + Parameters + ---------- + string: str + A string to be converted into a list. Format should be Python + syntax of list object like "[1, 2, 3]". It has to starts with "[" + and ends with "]" and items have to be separated by ",". + item_type: type (default: str) + Type in which items in str-like list will be converted. For example, + "[1, 2, 3]" and int are passed to string and item_type variables + respectively, "[1, 2, 3]" will converted into [1, 2, 3] not + ["1", "2", "3"]. + + Return + ------ + A list version of the input string. + + """ + # Check if item_type variable is "type" type + if item_type != 'infer' and not isinstance(item_type, type): + raise TypeError('Invalid type: object constructor type should be '\ + 'passed to "item_type" variable.') + + # Check if sring is str + if not isinstance(string, str): + raise TypeError('Invalid type: "string" variable has to be str type.') + + # Check string format + if not string.startswith('['): + raise SyntaxError(f'Invalid syntax for conversion to a list. '\ + '{string} does not start with "[".') + if not string.endswith(']'): + raise SyntaxError(f'Invalid syntax for conversion to a list. '\ + '{string} does not end with "]".') + + # Convert into a list + if item_type == 'infer': + out_l = [] + for item in string.split('[')[1].split(']')[0].split(','): + try: + dat = int(item) + # e.g. int('1.1') gives "ValueError: invalid literal for int() + # with base 10: '1.1'" + except ValueError: + dat = float(item) + # e.g. float('a') gives "ValueError: could not convert string + # to float: 'a'" + except: + dat = item + + out_l.append(dat) + return out_l + + return [item_type(item) for item + in string.split('[')[1].split(']')[0].split(',')] + @staticmethod def _fasta_entry_formatter(sid, desc, seq, col_meta): # Formats the ID, description, stringed metadata, and sequence From 5f963d13ac2db72f4ef23b462de0836526f590b7 Mon Sep 17 00:00:00 2001 From: Haruka Yamashita Date: Fri, 13 Sep 2019 15:19:42 +0900 Subject: [PATCH 12/12] Update to 0.10.5 --- alignmentrs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alignmentrs/__init__.py b/alignmentrs/__init__.py index a12c931..20d2028 100644 --- a/alignmentrs/__init__.py +++ b/alignmentrs/__init__.py @@ -5,7 +5,7 @@ __author__ = 'Kent Kawashima' -__version__ = '0.10.4' +__version__ = '0.10.5' __all__ = [ # From dynamic library 'librs',