diff --git a/Bio_Files_Processor.py b/Bio_Files_Processor.py
new file mode 100644
index 0000000..3872474
--- /dev/null
+++ b/Bio_Files_Processor.py
@@ -0,0 +1,126 @@
+import os
+from typing import List
+
+def convert_multiline_fasta_to_oneline(input_fasta: str, output_fasta: str = None) -> str:
+ """
+ Function conver multiline fasta file into fasta with name line marked by '>' and the other line with sequence.
+ Results save into new folder "Converted_data". If the folder already exists, new data is written to it.
+
+ :param input_fasta: Path to fasta-file with your seqs with.
+ !! It is necessary to indicate the name along with extensions (.fasta) !!
+ :type input_fasta: str
+ :param output_fasta: Name of new fasta-file with your seqs in one line, default value = None
+ It is necessary to indicate the name along with extensions (.fasta)
+ :type output_fasta: str
+ :rtype: str
+ :return: Script completion message
+ """
+ if input_fasta.find('.fasta') == 0:
+ raise ValueError(f'Wrong file format in input!')
+ if os.path.exists(os.path.join('.', 'Converted_data')) == False:
+ os.mkdir(os.path.join('.', 'Converted_data'))
+ if output_fasta == None:
+ out_name = 'one_line_' + input_fasta.strip("/")[-1]
+ else:
+ out_name = output_fasta
+ gene = 0
+ seq = 0
+ gene_name = ''
+ prot_seq = ''
+ gene_and_seq = dict()
+ with open (input_fasta) as seq_fasta:
+ for line in seq_fasta:
+ if gene == 1:
+ if line.startswith('>') == False:
+ gene_and_seq[gene_name] += line.strip('\n')
+ else:
+ gene_name = line.strip('\n')
+ gene_and_seq[gene_name] = ''
+ if gene == 0 and line.startswith('>'):
+ gene_name = line.strip('\n')
+ gene_and_seq[gene_name] = ''
+ gene = 1
+ with open (os.path.join('.', 'Converted_data', out_name), mode = 'w') as new_seq_fasta:
+ for key, item in gene_and_seq.items():
+ new_seq_fasta.write(key + '\n')
+ new_seq_fasta.write(item + '\n')
+ return 'All sequences processed!'
+
+
+def select_genes_from_gbk_to_fasta(input_gbk: str, genes: List[str], n_before: int = 1, n_after: int = 1,
+ output_fasta: str = None) -> str:
+ '''
+ Function help to search neighbours of GOI (gene of interest). Function writes neighbours of GOI in new FASTA-file
+ as: name of gene, protein sequence. Results save into new folder "Analyzed_data". If the folder already exists,
+ new data is written to it.
+
+ :input_gbk: Path to gbk-file with your seqs with.
+ !! It is necessary to indicate the name along with extensions (.gbk) !!
+ :type input_gbk: str
+ :param genes: Gene of interest names
+ :type genes: List[str]
+ :param n_before: number of genes before GOI (>0), default value = 1
+ :type n_before: int
+ :param n_after: number of genes after GOI (>0), default value = 1
+ :type n_after: int
+ :output_fasta: Name of FASTA-file with neighbours of GOI (names and seqs), default value = None
+ :type output_fasta: str
+ :rtype: str
+ :return: Script completion message
+
+ '''
+ if input_gbk.find('.gbk') == 0:
+ raise ValueError(f'Wrong file format in input!')
+ if os.path.exists(os.path.join('.', 'Analyzed_data')) == False:
+ os.mkdir(os.path.join('.', 'Analyzed_data'))
+ genes_for_search = genes
+ genes_gbk = []
+ genes_for_search_in_gbk = []
+ neighbour_genes = dict()
+ if output_fasta == None:
+ output_fasta = 'output_for_gbk.fasta'
+ with open (input_gbk) as gbk:
+ for line in gbk:
+ if '/gene' in line:
+ genes_gbk += [line.strip().split('=')[1]]
+ for el in genes_for_search:
+ genes_for_search_in_gbk += [gn for gn in genes_gbk if el in gn]
+ for gene in genes_for_search_in_gbk:
+ gene_index = genes_gbk.index(gene)
+ if gene_index >= 0 and gene_index < (len(genes_gbk) - 1):
+ for i in range(1, n_before + 1):
+ neighbour_genes[(genes_gbk[gene_index - i])] = 0
+ for i in range(1, n_after + 1):
+ neighbour_genes[(genes_gbk[gene_index + i])] = 0
+ else:
+ for i in range(1, n_before + 1):
+ neighbour_genes[(genes_gbk[gene_index - i])] = 0
+ for i in range(1, n_after):
+ neighbour_genes[(genes_gbk[0 + i])] = 0
+ with open (input_gbk) as gbk:
+ gene_name_read = 0
+ protein_read = 0
+ gene_name = ''
+ protein = ''
+ for line in gbk:
+ if '/gene' in line:
+ gene_name = line.strip().split('=')[1]
+ if gene_name in neighbour_genes:
+ gene_name_read = 1
+ if protein_read == 1:
+ protein += line.strip('\n').strip(' ')
+ if '"' in line:
+ protein_read = 0
+ gene_name_read = 0
+ neighbour_genes[gene_name] = protein
+ protein = ''
+ if gene_name_read == 1 and '/translation' in line:
+ protein_read = 1
+ protein += line.strip().split('=')[1]
+ with open (os.path.join('.', 'Analyzed_data', output_fasta), mode = 'w') as fasta:
+ for name, seq in neighbour_genes.items():
+ name = '>'+name.replace('\"','')
+ fasta.write(name + '\n')
+ fasta.write(seq.replace('\"','') + '\n')
+ return 'All sequences processed!'
+
diff --git a/Bio_Seq_Analysis_Tool.py b/Bio_Seq_Analysis_Tool.py
new file mode 100644
index 0000000..bac8f59
--- /dev/null
+++ b/Bio_Seq_Analysis_Tool.py
@@ -0,0 +1,348 @@
+import os
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqUtils import gc_fraction
+from typing import Union, Tuple, Dict
+from abc import ABC, abstractmethod
+
+
+
+def analyse_gc(records: str, min_gc: Union[int, float], max_gc: Union[int, float]) -> str:
+ """
+ Return filtered FASTQ-sequences by GC-content
+
+ :param records: FASTQ-file
+ :type records: str
+ :param min_gc: Left boundary for GC-content filtration
+ :type min_gc: Union[int, float]
+ :tupe max_gc: Right boundary for GC-content filtration
+ :type max_gc: Union[int, float]
+ :rtype: str
+ :return: filtered FASTQ-sequences
+ """
+ return {record.id: record for record in records if (min_gc/100) <= gc_fraction(record.seq) <= (max_gc/100)}
+
+
+def filter_by_length(records: str, min_length: Union[int, float], max_length: Union[int, float]) -> str:
+ """
+ Return filtered FASTQ-sequences by GC-content
+
+ :param records: FASTQ-file
+ :type records: str
+ :param min_gc: Left boundary for GC-content filtration
+ :type min_gc: Union[int, float]
+ :tupe max_gc: Right boundary for GC-content filtration
+ :type max_gc: Union[int, float]
+ :rtype: str
+ :return: filtered FASTQ-sequences
+ """
+
+ return {record.id: record for record in records if min_length <= len(record.seq) <= max_length}
+
+
+def filter_by_quality(records: str, quality_threshold: Union[int, float]) -> str:
+ """
+ Return filtered FASTQ-sequences by quality
+ :param records: Sequnces filtered by GC-content and length
+ :type records: str
+ :param quality_threshold: boundary for quality filtration
+ :type quality_threshold: Union[int, float]
+ :rtype: str
+ :return: filtered FASTQ-sequences
+ """
+
+ return {record.id: record for record in records if min(record.letter_annotations["phred_quality"]) >= quality_threshold}
+
+
+
+def write_filtered_sequences_to_fastq(filtered_sequences: Dict[str, str], unfiltered_sequences: Dict[str, str],
+ output_file: str, folder_path: str = 'fastq_filtrator_results') -> str:
+ '''
+ The function writes filtered FASTQ reads into new file and save it into folder "fastq_filtrator_resuls"
+ :param filtered_sequences: Dict of filtered sequences by GC-content, quality and length
+ :filtered_sequences type: Dict[str]
+ :param unfiltered_sequences: Dict of unfiltered sequences by GC-content, quality and length
+ :filtered_sequences type: Dict[str]
+ :output_file: name of output file
+ :output_file type: str
+ :folder_path: name of result folder, default = 'fastq_filtrator_resuls'
+ :folder_path type: str
+ :rtype: None
+ :return: None
+ '''
+
+ if not os.path.exists(folder_path):
+ os.makedirs(folder_path)
+
+ file_path_filtered = os.path.join(folder_path, output_file)
+ file_path_not_filtered = os.path.join(folder_path, 'unfiltered_sequences.fasta')
+
+ with open(file_path_filtered, "w") as output_handle:
+ for key, value in filtered_sequences.items():
+ records_filtered = []
+ sequence = Seq(value)
+ record = SeqRecord(sequence, id=key, description="")
+ records_filtered.append(record)
+ SeqIO.write(records_filtered, output_handle, "fasta")
+
+ with open(file_path_not_filtered, "w") as output_handle:
+ for key, value in unfiltered_sequences.items():
+ records_unfiltered = []
+ sequence = Seq(value)
+ record = SeqRecord(sequence, id=key, description="")
+ records_unfiltered.append(record)
+ SeqIO.write(records_unfiltered, output_handle, "fasta")
+
+
+def filter_fastq(input_path: str,
+ gc_bounds: Union[int, float, Tuple [int], Tuple [float]] = (0, 100),
+ length_bounds: Union[int, Tuple [int]] = (0, 2**32),
+ quality_threshold: float = 0.0, filtered_file_name: Union[None, str] = None) -> Dict[str,str]:
+ """
+ This function help analyze a set of reads obtained from next-generation sequencing.
+
+ The function allow to filter the desired reads according to three parameters:
+ GC-content, length and reading quality.
+
+ :param seqs:
+ Path to the file with FASTQ-sequences in the format.
+ :type seqs: str
+
+ :param gc_bounds:
+ Boundary parameters for filtering sequences by GC-content. Save only reads with a GC-content between boundaries
+ or lower than one boundary. Lower boundary cannot be less than 0 and upper boundary cannot be greater than 100.
+ gc_bounds default value is (0,100).
+ :type param gc_bounds: Union[int, float, Tuple [int], Tuple [float]]
+
+ :param length_bounds:
+ Boundary parameters for filtering sequences by length. Works the same as gc_bounds. Lower boundary cannot be less
+ than 0 and upper boundary cannot be greater than 2^32. length_bounds default value is (0,2^32)
+ :type param length_bounds: Union[int, Tuple [int]
+
+ :param quality_threshold:
+ Threshold for quality of each nucleotide in read. Quality incodes by ASCII codes. The threshold cannot be more
+ than 40. quality_threshold default value is 0
+ :type param quality_threshold: float
+
+ :return:
+ New dictionaries with fastq sequence.The first one consisting of filtered fastq sequences and the other one with
+ sequences that did not pass filters.
+ :rtype: Dict[str]
+
+ :raises ValueError: if sequence not RNA or DNA, also if the argument values are outside the allowed ones
+ """
+
+ if type(gc_bounds) == float or type(gc_bounds) == int:
+ gc_bounds = (0, gc_bounds)
+ if gc_bounds[0] < 0 or gc_bounds[1] > 100:
+ raise ValueError(f'Wrong boundaries!')
+ min_gc, max_gc = gc_bounds
+ if type(length_bounds) == int:
+ length_bounds = (0,length_bounds)
+ if length_bounds[0] < 0 or length_bounds[1] > 2**32:
+ raise ValueError(f'Wrong boundaries!')
+ min_length, max_length = length_bounds
+ if quality_threshold > 40:
+ raise ValueError(f'Wrong quality threshold!')
+
+ records = list(SeqIO.parse(input_path, "fastq"))
+
+ filtered_by_gc = analyse_gc(records, min_gc, max_gc)
+
+ filtered_by_length = filter_by_length(filtered_by_gc.values(), min_length, max_length)
+
+ filtered_by_quality = filter_by_quality(filtered_by_length.values(), quality_threshold)
+
+ filtered_seq = {record_id: str(record.seq) for record_id, record in filtered_by_quality.items()}
+ unfiltered_seq = {record.id: str(record.seq) for record in records if record.id not in filtered_seq}
+
+ if filtered_file_name == None:
+ new_file_name = "filtered_sequences.fasta"
+ else:
+ new_file_name = filtered_file_name
+
+ write_filtered_sequences_to_fastq(filtered_seq, unfiltered_seq, new_file_name)
+
+ return ('Sequences are filtered!')
+
+
+class BiologicalSequence(ABC):
+ '''
+ Abstract class for different biological sequences
+ '''
+
+ @abstractmethod
+ def __len__(self):
+ '''
+ Method for working with the Python len function. !Needs to be overridden in child class!
+ '''
+
+ pass
+
+ @abstractmethod
+ def __getitem__(self):
+ '''
+ Method for get elements by index and slice the sequence. !Needs to be overridden in child class!
+ '''
+
+ pass
+
+ @abstractmethod
+ def __str__(self):
+ '''
+ Method for convertion sequence to a string. !Needs to be overridden in child class!
+ '''
+
+ pass
+
+ @abstractmethod
+ def is_alphabet_correct(self):
+ '''
+ Method for checking that a sequence is written correctly.
+ '''
+ pass
+
+
+class NucleicAcidSequnce(BiologicalSequence):
+ '''
+ Class for DNA or RNA molecules
+ '''
+
+ def __init__(self, seq) -> None:
+ self.seq = seq
+ self.dna_alphabet = set('AaTtGgCc')
+ self.rna_alphabet = set('AaUuGgCc')
+ self.complement_dict = {'A': 'T', 'C': 'G',
+ 'G': 'C', 'T': 'A', 'U': 'A', 'a': 't',
+ 'c': 'g', 'g': 'c', 't': 'a', 'u': 'a'}
+
+
+ def __len__(self) -> int:
+ return len(self.seq)
+
+
+ def __getitem__(self, item) -> int:
+ return self.seq[item]
+
+
+ def __str__(self) -> str:
+ return self.seq
+
+
+
+ def is_alphabet_correct(self) -> bool:
+
+ '''
+ Method for checking of standard nucleotide content in sequence
+
+ :param self: DNA or RNA sequence
+ '''
+
+ if (set(self.seq).issubset(self.dna_alphabet) and isinstance(self, DNASequence)) or (set(self.seq).issubset(self.rna_alphabet) and isinstance(self, RNASequence)):
+ return True
+ raise TypeError(f'{self.seq} is not correct nucleic acid')
+
+
+ def complement(self):
+ """
+ Function return complement sequence.
+
+ :param self: DNA or RNA sequence
+ :rtype: str
+ :return: complement sequence
+ """
+ if self.is_alphabet_correct():
+ complement_seq = str()
+ length = len(self.seq)
+ for i in range (length):
+ if self.seq[i] in self.complement_dict:
+ complement_seq += (self.complement_dict[self[i]])
+ if isinstance(self, DNASequence):
+ return DNASequence(complement_seq)
+ if isinstance(self, RNASequence):
+ return RNASequence(complement_seq)
+
+
+ def gc_calculate(self) -> float:
+ """
+ Function return sequence GC-content in percent.
+
+ :param seq: DNA or RNA sequence
+ :type seq: str
+ :rtype: float
+ :return: GC-contentn percent
+ """
+ length = len(self.seq)
+ gc_content = 0.0
+ seq_up = self.seq.upper()
+ c = seq_up.count("C")
+ g = seq_up.count("G")
+ gc_content = round(((c+g)/length*100),2)
+ return gc_content
+
+
+class DNASequence(NucleicAcidSequnce):
+ '''
+ Class for DNA sequence
+ '''
+
+ def __init__(self, seq) -> None:
+ super().__init__(seq)
+
+ def transcribe(self):
+ '''
+ Method return transcribed sequence.
+ '''
+
+ if super().is_alphabet_correct():
+ return RNASequence(self.seq.replace('T', 'U').replace('t', 'u'))
+
+
+class RNASequence(NucleicAcidSequnce):
+ '''
+ Class for RNA sequence
+ '''
+
+ def __init__(self, seq) -> None:
+ super().__init__(seq)
+
+
+class AminoAcidSequence (BiologicalSequence):
+ '''
+ Class for protein sequence
+ '''
+
+ def __init__(self, seq):
+ self.seq = seq
+ self.protein_alphabet = set('ACDEFGHIKLMNPQRSTVWY')
+
+
+ def __len__(self):
+ return len(self.seq)
+
+
+ def __getitem__(self, item):
+ return self.seq[item]
+
+
+ def __str__(self):
+ return self.seq
+
+
+ def is_alphabet_correct(self):
+ if set(self.seq).issubset(self.protein_alphabet):
+ return True
+ raise TypeError(f'{self.seq} is not a protein')
+
+ def calculate_protein_mass(self):
+ '''
+ Method return mass of residues in seq in Da.
+ '''
+
+ if self.is_alphabet_correct:
+ weights = {'A': 89.09, 'R': 174.20, 'N': 132.12, 'D': 133.10, 'C': 121.15,
+ 'E': 147.13, 'Q': 146.15, 'G': 75.07, 'H': 155.16, 'I': 131.17,
+ 'L': 131.17, 'K': 146.19, 'M': 149.21, 'F': 165.19, 'P': 115.13,
+ 'S': 105.09, 'T': 119.12, 'W': 204.23, 'Y': 181.19, 'V': 117.15}
+ return sum(weights.get(aa, 0) for aa in self.seq)
diff --git a/README.md b/README.md
index 602c1e1..f81dc14 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,564 @@
-# BSAT
-BSAT - Biological Sequences Analysis Toolbox. Repo contains tools which can help work with nucleic acid or protein sequences and with NGS-reads
+# BSAT - Biological Sequences Analysis Toolbox
+
+This repository contains tools which helps you work with nucleic acid or protein sequences and with NGS-reads. It is capable to process multiple sequences, that makes analysis faster.
+
+## Installation
+
+To use this toolbox one need to clone repository
+
+```shell
+git clone git@github.com:grishchenkoira/BSAT.git
+cd BSAT
+```
+
+### System requirements:
+
+Key packages and programs:
+- [Python](https://www.python.org/downloads/) (version >= 3.9)
+
+## Usage
+
+```python
+# import main function Bio_Seq_Analysis_Tool
+from Bio_Seq_Analysis_Tool import Bio_Seq_Analysis_Tool
+
+# import main function Bio_Files_Processor
+from Bio_Files_Processor import Bio_Files_Processor
+```
+
+## Works with main functions
+
+This section contains description of two main scripts: Bio_Files_Processor and Bio_Seq_Analysis_Tool
+
+## Bio_Files_Processor
+
+Functions from this script help you to analyze genomic data from different database saved into standard format of these storages.
+
+### convert_multiline_fasta_to_oneline(input_fasta: str, output_fasta: str = None) -> str
+
+Function conver multiline fasta file into fasta with name line marked by '>' and the other line with sequence. Results save in new file into new folder "Converted_data". If the folder already exists, new data is written to it.
+
+**Parameters:**
+
+-**input_fasta**: *str*
+
+Path to fasta-file with your seqs with. !! It is necessary to indicate the name along with extensions (.fasta) !! In function exist check for correct format.
+
+-**output_fasta**: *str*
+
+Name of new fasta-file with your seqs in one line, default value = None. It is necessary to indicate the name along with extensions (.fasta)
+
+**Returns:**
+
+Script completion message
+
+**Example**
+
+```python
+convert_multiline_fasta_to_oneline('./data_example/example_multiline_fasta.fasta') # 'All sequences processed!'
+```
+Structure of new file with FASTA-seq:
+'>_name:
+sequence'
+
+### select_genes_from_gbk_to_fasta(input_gbk: str, genes: List[str], n_before: int = 1, n_after: int = 1, output_fasta: str = None) -> str
+
+Function help to search neighbours of GOI (gene of interest). Function writes neighbours of GOI in new FASTA-file as: name of gene, protein sequence. Results save into new folder "Analyzed_data". If the folder already exists, new data is written to it.
+
+**Parameters:**
+
+-**input_gbk**: *str*
+
+Path to gbk-file with your seqs with. !! It is necessary to indicate the name along with extensions (.gbk) !! In function exist check for correct format.
+
+-**genes**: *List[str]*
+
+Gene of interest names.
+
+Example of input:
+```python
+['pndA']
+```
+-**n_before**: *int*
+
+Number of genes before GOI (>0), default value = 1
+
+-**n_after**: *int*
+
+number of genes after GOI (>0), default value = 1
+
+-**output_fasta**: *str*
+
+Name of new fasta-file with your seqs, default value = None. It is necessary to indicate the name along with extensions (.fasta)
+
+**Returns:**
+Script completion message
+
+**Example**
+```python
+select_genes_from_gbk_to_fasta('.\\example_data\\example_gbk.gbk', ['pndA'], 2, 2) # 'All sequences processed!'
+```
+Structure of new file with FASTA-seq:
+'> gene_name:
+protein_sequence'
+
+## Bio_Seq_Analysis_Tool
+
+Main functions from this script help you to analyze different types of bio sequences: DNA, RNA, NGS-reads, protein
+
+### dna_rna_analysis(*args: str, operation: str)
+
+This function performs a number of operations on DNA or RNA.
+Operations supported by this functions:
+- transcribe - return transcribed sequence
+- reverse - return reverse sequence
+- complement - return complement sequence
+- reverse_complement - return reverse complement sequence
+- gc_calculate - return sequence GC-content in percent
+
+**Parameters:**
+- **args**: *str*
+
+Nucleic acid sequence
+- **operation**: *str*
+
+Type of operation required
+
+**Returns**:
+- **analysis**: *str*
+
+Analysis of nucleic acid sequence
+
+### analyse_fastq(seqs, gc_bounds, length_bounds, quality_threshold)
+
+Apply one of the operations described below to fastq sequences.
+
+**Parameters:**
+- **seqs**: *dict*
+
+A dictionary consisting of fastq sequences. The structure is as follows: Key - string, sequence name. The value is a tuple of two strings: sequence and quality. The sequence is RNA or DNA.
+
+- **gc_bounds**: *Union[int, float, Tuple [int], Tuple [float]]*
+
+Boundary parameters for filtering sequences by GC-content. Save only reads with a GC-content between boundaries or lower than one boundary. Lower boundary cannot be less than 0 and upper boundary cannot be greater than 100. gc_bounds default value is (0,100).
+
+- **length_bounds** : *Union[int, Tuple [int]]*
+
+Boundary parameters for filtering sequences by length. Works the same as gc_bounds. Lower boundary cannot be less than 0 and upper boundary cannot be greater than 2^32. length_bounds default value is (0,2^32).
+
+- **quality_threshold** : *float*
+
+Threshold for quality of each nucleotide in read. Quality incodes by ASCII codes. The threshold cannot be more than 40. quality_threshold default value is 0
+
+**Returns**:
+- **analysed_seq**: *Dict[str]*
+
+New dictionary with fastq sequence.This one consists of filtered fastq sequences and
+
+- **analysed_seq**: *Dict[str, str]*
+
+New dictionary with fastq sequence. This one consists of sequences that did not pass filters.
+
+### run_protein_analysis(*args: str)
+
+Apply operations described below to any number of sequences with any case.
+
+**Parameters:**
+**\*args**:
+- **sequences**: *str*
+
+input coma-separated sequences in 1-letter or 3-letter code with any case (as many as you wish)
+- **add_arg**: *str*
+
+necessary parameter for certain functions (for example, specify target protein site)
+- **procedure** : *str*
+
+specify procedure you want to apply
+
+**Returns**:
+- **operation_result**: str or list
+
+result of function work in list or str format (dependent on number of input sequences)
+
+**Note!**
+- Operation name always must be the last argument
+- Additional argument must be always before operation name
+
+## Modules
+
+This section contains description of modules using by main functions you can find in our library.
+
+- DNA & RNA analysis tool(#title1)
+- FASTQ analysis tool(#title2)
+- Amino acid sequences analysis tool(#title3)
+
+### DNA & RNA analysis
+
+This module performs a number of operations on DNA or RNA.
+
+#### Operations
+
+##### transcribe(seq)
+
+Function return return transcribed sequence.
+
+**Parameters:**
+- **seq**: *str*
+
+DNA sequence
+
+**Returns:**
+- **rna_seq**: *str*
+
+**Example**
+```python
+run_dna_rna_tools('ATG', 'transcribe') # 'AUG'
+```
+
+
+##### reverse(seq)
+
+Function return return reversed sequence.
+
+**Parameters:**
+- **seq**: *str*
+
+DNA or RNA sequence
+
+**Returns:**
+- **reverse_seq**: *str*
+
+**Example**
+```python
+run_dna_rna_tools('ATG', 'reverse') # 'GTA'
+```
+
+##### complement(seq)
+
+Function return return complement sequence.
+
+**Parameters:**
+- **seq**: *str*
+
+DNA or RNA sequence
+
+**Returns:**
+- **complement_seq**: *str*
+
+**Example**
+```python
+run_dna_rna_tools('AtG', 'complement') # 'TaC'
+```
+
+##### reverse_complement(seq)
+
+Function return return reverse complement sequence.
+
+**Parameters:**
+- **seq**: *str*
+
+DNA or RNA sequence
+
+**Returns:**
+- **reverse_complement_seq**: *str*
+
+**Example**
+```python
+run_dna_rna_tools('ATg', 'reverse_complement') # 'cAT'
+```
+
+##### gc_calculate(seq)
+
+Function return sequence GC-content in percent.
+
+**Parameters:**
+- **seq**: *str*
+
+DNA or RNA sequence
+
+**Returns:**
+- **gc_content**: *str*
+
+**Example**
+```python
+run_dna_rna_tools ('GTAccca','gc_calculate') # '57.14'
+```
+
+
+### FASTQ analysis tool
+
+This module contains functions for FASTQ sequnces filtration. The function allow to filter the desired reads according to three parameters: GC-content, length and reading quality.
+
+#### Operations
+
+##### analyse_gc(seq)
+
+Return GC-content of DNA/RNA sequence.
+
+**Parameters:**
+
+- **seq**: *str*
+
+DNA/RNA sequence
+
+**Returns:**
+- **gc_content**: *float*
+
+##### analyse_length(seq)
+
+Return length of DNA/RNA sequence
+
+**Parameters:**
+
+- **seq**: *str*
+
+DNA/RNA sequence
+
+**Returns:**
+- **length**: *int*
+
+##### analyse_quality(seq)
+
+Return quality score of read, that coding by ASCII code
+
+**Parameters:**
+
+- **seq**: *str*
+
+quality symbols for each nucleotide
+
+**Returns:**
+- **q_score_sum**: *float*
+
+
+### Amino acid sequences analysis tool
+
+This module contains functions for protein sequences analysis. You can reencode peptides sequences: 1-letter to 3-letter code and vice versa, calculate physical features, find specific sites, get predicted mRNA that coding your protein.
+
+#### Operations
+
+##### change_residues_encoding(seq, query='one')
+
+Transfer amino acids from 3-letter to 1-letter code and vice versa.
+
+**Parameters:**
+
+- **seq**: *str*
+
+Input protein seq in any encoding and case. If the input is a sequence of amino acids written in a three-letter code, then the amino acids must be separated by a space. If the input is a sequence of amino acids written in a single-letter code, then the amino acids may not be separated by a space.
+
+- **encoding**: {'one', 'three'}, default: 'one'
+
+specify target encoding
+
+**Returns:**
+- **encode_seq_registered**: *str*
+
+same protein seq in another encoding
+
+**Example**
+```python
+seq = 'AAA'
+change_residues_encoding(seq, 'one', 'change_residues_encoding') # 'AAA'
+
+seq = 'ALA ALA ALA'
+change_residues_encoding(seq, 'one', 'change_residues_encoding') # 'AAA'
+
+seq = 'AAA'
+change_residues_encoding(seq, 'three', 'change_residues_encoding') # 'ALA ALA ALA'
+```
+
+##### is_protein(seq)
+
+Check if sequence is protein or not by identify invalid seq elements, which are not presented in dicts above.
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **verification_result**: *bool*
+
+if seq is correct protein seq or not
+
+**Example**
+```python
+seq = 'AAA'
+is_protein(seq) #True
+```
+
+##### get_seq_characteristic(seq)
+
+Count entry of each residue type in your sequence
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **res_count**: *dict*
+
+each residue type in seq in 3-letter code and its amount in current seq
+
+**Example**
+```python
+seq = 'AAA'
+get_seq_characteristic(seq) #{'ALA': 3}
+```
+
+##### find_res(seq, res_of_interest)
+
+Find all positions of certain residue in your seq
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+- **res_of_interest**: *str*
+
+residue of interest in 1-letter encoding and upper case
+
+**Returns:**
+- **res_positions**: *str*
+
+positions of specified residue in your seq
+
+**Example**
+```python
+seq = 'AAA'
+res = 'A'
+find_res(seq, res) # 'A positions: [1, 2, 3]'
+```
+
+##### find_site(seq, site)
+
+Find if seq contains certain site and get positions of its site
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+- **site**: *str*
+
+specify site of interest
+
+**Returns:**
+- **site_positions**: *str*
+
+the range of values for amino acid positions of specified site in your seq in which the last number is excluded
+
+**Example**
+```python
+seq = 'AAADDDF'
+site = 'AAA'
+find_site(seq, site) # "Site entry in sequence = 1. Site residues can be found at positions: ['1:4']"
+```
+
+##### calculate_protein_mass(seq)
+
+Get sum of residues masses in your seq in Da
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **total_mass**: *float*
+
+mass of all residues in seq in Da
+
+**Example**
+```python
+seq = 'AAA'
+calculate_protein_mass(seq) #267
+```
+
+##### calculate_average_hydrophobicity(seq)
+
+Get average hydrophobicity index for protein seq as sum of index for each residue in your seq divided by its length
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **average_hydrophobicity_idx**: *float*
+
+average hydrophobicity index for your seq
+
+**Example**
+```python
+seq = 'AAA'
+calculate_average_hydrophobicity(seq) #1.8
+```
+
+##### get_mrna(seq)
+
+Get encoding mRNA nucleotides for your seq
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **mrna_seq**: *str*
+
+potential encoding mRNA sequences with multiple choice for some positions
+
+**Example**
+```python
+seq = 'AAA'
+get_mrna(seq) # ['GCN', 'GCN', 'GCN']
+```
+
+##### calculate_isoelectric_point(seq)
+
+Find isoelectrinc point as sum of known pI for residues in your seq
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **pi**: *float*
+
+isoelectric point for your seq
+
+**Example**
+```python
+seq = 'AAA'
+calculate_isoelectric_point(seq) # 6.01
+```
+##### analyze_secondary_structure(seq)
+
+Calculates the percentage of amino acids found in the three main types of protein secondary structure: beta-turn, beta-sheet and alpha-helix in your seq
+
+**Parameters:**
+- **seq**: *str*
+
+input protein seq in 1-letter encoding and upper case
+
+**Returns:**
+- **result**: *list*
+
+percentage of amino acids belonging to three types of secondary structure for seq
+
+**Example**
+```python
+seq = 'AAA'
+analyze_secondary_structure(seq) # [0.0, 0.0, 100.0]
+```
+
+## Contact
+
+*This is the repo for the 5th homework of the BI Python 2023 course*
+
+Author:
+- *Grishenko Irina*
\ No newline at end of file