From 8eabf4010c5ee9816338c46be24c8ab826025718 Mon Sep 17 00:00:00 2001
From: shradhasaraf <shradha@ebi.ac.uk>
Date: Thu, 10 Jul 2025 15:46:53 +0100
Subject: [PATCH 1/3] Added script for gene merging

---
 merge_genes.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 merge_genes.py

diff --git a/merge_genes.py b/merge_genes.py
new file mode 100644
index 000000000..896436463
--- /dev/null
+++ b/merge_genes.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+# See the NOTICE file distributed with this work for additional information
+# regarding copyright ownership.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Merge Genes script.
+
+This script aims to merge the genes from Nipponbare 3 annotation sources (RAPDB, MSU and Gramene).
+
+Below are some of the criteria to consider:
+1. (plus) and (minus) strand genes are considered separately 
+2. The genes are considered as overlapping if the overlap length of the sequence is >=50% of the length of the smallest line.
+
+"""
+import argparse
+import logging
+from pathlib import Path
+
+import pandas
+
+from ensembl.utils import StrPath
+from ensembl.utils.argparse import ArgumentParser
+from ensembl.utils.logging import init_logging_with_args
+
+
+def update_gene_info(
+    update_gene: dict,
+    id_list: list,
+    merged_chromosome: str,
+    merged_gene_start: str,
+    merged_gene_end: str,
+    merged_strand: str,
+) -> dict:
+    """
+    Takes the id_list, other parameters from main, update the gene information and returns to the main
+
+    Args:
+        update_gene (dict): column names as keys and corresponding parameters are values in teh form of list
+        id_list (list): A list of ids from genes to merge
+        merged_chromosome: Chromosome number from the merged gene
+        merged_gene_start: New gene start coordinate after merging the gene
+        merged_gene_end: New gene end coordinate after merging the gene
+        merged_strand: Strand from the merged genes
+
+    Returns:
+        The updated gene.
+    """
+    update_gene["chr"].append(merged_chromosome)
+    update_gene["source"].append("panoryza")
+    update_gene["gene"].append("gene")
+    update_gene["start"].append(merged_gene_start)
+    update_gene["end"].append(merged_gene_end)
+    update_gene["score"].append(".")
+    update_gene["strand"].append(merged_strand)
+    update_gene["frame"].append(".")
+    merge_ids = ",".join(id_list)
+    update_gene["attribute"].append(merge_ids)
+    return update_gene
+
+
+def sort_genes(output_file: dict) -> pandas.DataFrame:
+    """Sort genes.
+
+    Args:
+        output_file: gene information in a dictionary
+    Returns:
+        A sorted gene matrix.
+
+    """
+    gene_matrix = pandas.DataFrame.from_dict(output_file)
+    gene_matrix["chr_no"] = gene_matrix["chr"].str.extract("(\d+)", expand=False).astype(int)
+    gene_matrix.sort_values(by=["chr_no", "start"], inplace=True)
+    gene_matrix.drop("chr_no", axis=1, inplace=True)
+    return gene_matrix
+
+
+def merge_genes(input_gff_file: StrPath) -> dict:
+    """Merge genes from the input GFF file.
+
+    Args:
+        input_gff_file: Path to the GFF file.
+
+    Returns:
+        The merged genes in a dictionary.
+    """
+
+    list_id = []
+    dict_merged_gene = {
+        "chr": [],
+        "source": [],
+        "gene": [],
+        "start": [],
+        "end": [],
+        "score": [],
+        "strand": [],
+        "frame": [],
+        "attribute": [],
+    }
+
+    logging.info("The input file is " + str(input_gff_file))
+
+    with open(input_gff_file) as gff_file:
+        lines = gff_file.readlines()
+        ## if line is starting with 'Chr' then split it and assign to different variables
+        for line in lines:
+            if line.startswith("#"):
+                continue
+            chromo, _, _, start, end, _, strand, _, description = line.split("\t")
+            gene_id = description.split(";Name=")[0].split(";biotype=")[0]
+            start = int(start)
+            end = int(end)
+
+            ### Assign the values for the first gene in the gff
+            if line == lines[0]:
+                gene_start = start
+                gene_end = end
+                current_chromosome = str(line.split("\t")[0])
+                current_strand = str(line.split("\t")[6])
+
+            ## Checking if the strand and chromosome are same between the previous and current line; otherwise print and start with the new gene
+            if strand != current_strand or chromo != current_chromosome:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                current_strand = strand
+                gene_start = start
+                gene_end = end
+                current_chromosome = chromo
+                list_id = []
+
+            ## if start of the next gene is lesser than the current gene, raise exception
+            if start < gene_start:
+                raise Exception("Error: File is not sorted properly")
+
+            ## if start coordinate of a gene is equal or greater than the previous AND if start coordinate is lesser than the gene end then start merging the genes
+            elif (start == gene_start or start > gene_start) and start < gene_end:
+                ## if end coord of current gene is lesser than the previous, then merge the gene into the previous one
+                if end <= gene_end:
+                    list_id.append(gene_id)
+
+                ## if end coord of current gene is greater than the previous, then calculate the length of both genes and check overlap criteria
+                elif end > gene_end:
+                    gene_length = gene_end - gene_start
+                    length = end - start
+                    min_length = min(length, gene_length)
+                    overlap_length = gene_end - start
+
+                    ## Checking the overlap criteria
+                    if int(overlap_length) * 2 >= int(min_length):
+                        gene_end = end
+                        list_id.append(gene_id)
+                        logging.info("overlap\n")
+                    else:
+                        dict_merged_gene = update_gene_info(
+                            dict_merged_gene,
+                            list_id,
+                            current_chromosome,
+                            gene_start,
+                            gene_end,
+                            current_strand,
+                        )
+                        list_id = []
+                        gene_start = start
+                        gene_end = end
+                        list_id.append(gene_id)
+
+            ## if start coord of a gene is greater than the end coord of the previous gene then print the previous gene (current gene will not merge into that)
+            if start >= gene_end:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                list_id = []
+                gene_start = start
+                gene_end = end
+                list_id.append(gene_id)
+
+            ## If last line in the file, print the gene(s)
+            if line == lines[-1]:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+    return dict_merged_gene
+
+
+def parse_args(arg_list: list[str] | None) -> argparse.Namespace:
+    """Parse the aruments from the command line.
+
+    Args:
+        arg_list: List of arguments to parse. If `None`, grab them from the command line.
+
+    Returns:
+        Parsed arguments.
+
+    """
+    parser = ArgumentParser(description="Merge genes script")
+    parser.add_argument_src_path(
+        "--input_gff_file",
+        default="sorted_all_line.gff",
+        help="Input file with gene info from RAPDB, MSU and Gramene",
+    )
+    parser.add_argument_dst_path(
+        "--output_dir", default=".", help="Output directory for the merged genes file"
+    )
+    parser.add_log_arguments()
+    return parser.parse_args(arg_list)
+
+
+def main(arg_list: list[str] | None = None) -> None:
+    """Main script entry point.
+
+    Args:
+        arg_list: Arguments to parse passing list to `parse_args()`.
+    """
+    args = parse_args(arg_list)
+    init_logging_with_args(args)
+
+    merge_genes_metadata = merge_genes(args.input_gff_file)
+    sorted_genes = sort_genes(merge_genes_metadata)
+    sorted_genes.to_csv(args.output_dir / "merged_genes.tsv", sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    main()

From 8cd5b5cf084087cc79998d0b3f605024e32fd168 Mon Sep 17 00:00:00 2001
From: shradhasaraf <shradha@ebi.ac.uk>
Date: Thu, 10 Jul 2025 15:53:36 +0100
Subject: [PATCH 2/3] Added script for gene merging

---
 scripts/merge_genes.py | 234 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 scripts/merge_genes.py

diff --git a/scripts/merge_genes.py b/scripts/merge_genes.py
new file mode 100644
index 000000000..896436463
--- /dev/null
+++ b/scripts/merge_genes.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+# See the NOTICE file distributed with this work for additional information
+# regarding copyright ownership.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Merge Genes script.
+
+This script aims to merge the genes from Nipponbare 3 annotation sources (RAPDB, MSU and Gramene).
+
+Below are some of the criteria to consider:
+1. (plus) and (minus) strand genes are considered separately 
+2. The genes are considered as overlapping if the overlap length of the sequence is >=50% of the length of the smallest line.
+
+"""
+import argparse
+import logging
+from pathlib import Path
+
+import pandas
+
+from ensembl.utils import StrPath
+from ensembl.utils.argparse import ArgumentParser
+from ensembl.utils.logging import init_logging_with_args
+
+
+def update_gene_info(
+    update_gene: dict,
+    id_list: list,
+    merged_chromosome: str,
+    merged_gene_start: str,
+    merged_gene_end: str,
+    merged_strand: str,
+) -> dict:
+    """
+    Takes the id_list, other parameters from main, update the gene information and returns to the main
+
+    Args:
+        update_gene (dict): column names as keys and corresponding parameters are values in teh form of list
+        id_list (list): A list of ids from genes to merge
+        merged_chromosome: Chromosome number from the merged gene
+        merged_gene_start: New gene start coordinate after merging the gene
+        merged_gene_end: New gene end coordinate after merging the gene
+        merged_strand: Strand from the merged genes
+
+    Returns:
+        The updated gene.
+    """
+    update_gene["chr"].append(merged_chromosome)
+    update_gene["source"].append("panoryza")
+    update_gene["gene"].append("gene")
+    update_gene["start"].append(merged_gene_start)
+    update_gene["end"].append(merged_gene_end)
+    update_gene["score"].append(".")
+    update_gene["strand"].append(merged_strand)
+    update_gene["frame"].append(".")
+    merge_ids = ",".join(id_list)
+    update_gene["attribute"].append(merge_ids)
+    return update_gene
+
+
+def sort_genes(output_file: dict) -> pandas.DataFrame:
+    """Sort genes.
+
+    Args:
+        output_file: gene information in a dictionary
+    Returns:
+        A sorted gene matrix.
+
+    """
+    gene_matrix = pandas.DataFrame.from_dict(output_file)
+    gene_matrix["chr_no"] = gene_matrix["chr"].str.extract("(\d+)", expand=False).astype(int)
+    gene_matrix.sort_values(by=["chr_no", "start"], inplace=True)
+    gene_matrix.drop("chr_no", axis=1, inplace=True)
+    return gene_matrix
+
+
+def merge_genes(input_gff_file: StrPath) -> dict:
+    """Merge genes from the input GFF file.
+
+    Args:
+        input_gff_file: Path to the GFF file.
+
+    Returns:
+        The merged genes in a dictionary.
+    """
+
+    list_id = []
+    dict_merged_gene = {
+        "chr": [],
+        "source": [],
+        "gene": [],
+        "start": [],
+        "end": [],
+        "score": [],
+        "strand": [],
+        "frame": [],
+        "attribute": [],
+    }
+
+    logging.info("The input file is " + str(input_gff_file))
+
+    with open(input_gff_file) as gff_file:
+        lines = gff_file.readlines()
+        ## if line is starting with 'Chr' then split it and assign to different variables
+        for line in lines:
+            if line.startswith("#"):
+                continue
+            chromo, _, _, start, end, _, strand, _, description = line.split("\t")
+            gene_id = description.split(";Name=")[0].split(";biotype=")[0]
+            start = int(start)
+            end = int(end)
+
+            ### Assign the values for the first gene in the gff
+            if line == lines[0]:
+                gene_start = start
+                gene_end = end
+                current_chromosome = str(line.split("\t")[0])
+                current_strand = str(line.split("\t")[6])
+
+            ## Checking if the strand and chromosome are same between the previous and current line; otherwise print and start with the new gene
+            if strand != current_strand or chromo != current_chromosome:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                current_strand = strand
+                gene_start = start
+                gene_end = end
+                current_chromosome = chromo
+                list_id = []
+
+            ## if start of the next gene is lesser than the current gene, raise exception
+            if start < gene_start:
+                raise Exception("Error: File is not sorted properly")
+
+            ## if start coordinate of a gene is equal or greater than the previous AND if start coordinate is lesser than the gene end then start merging the genes
+            elif (start == gene_start or start > gene_start) and start < gene_end:
+                ## if end coord of current gene is lesser than the previous, then merge the gene into the previous one
+                if end <= gene_end:
+                    list_id.append(gene_id)
+
+                ## if end coord of current gene is greater than the previous, then calculate the length of both genes and check overlap criteria
+                elif end > gene_end:
+                    gene_length = gene_end - gene_start
+                    length = end - start
+                    min_length = min(length, gene_length)
+                    overlap_length = gene_end - start
+
+                    ## Checking the overlap criteria
+                    if int(overlap_length) * 2 >= int(min_length):
+                        gene_end = end
+                        list_id.append(gene_id)
+                        logging.info("overlap\n")
+                    else:
+                        dict_merged_gene = update_gene_info(
+                            dict_merged_gene,
+                            list_id,
+                            current_chromosome,
+                            gene_start,
+                            gene_end,
+                            current_strand,
+                        )
+                        list_id = []
+                        gene_start = start
+                        gene_end = end
+                        list_id.append(gene_id)
+
+            ## if start coord of a gene is greater than the end coord of the previous gene then print the previous gene (current gene will not merge into that)
+            if start >= gene_end:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                list_id = []
+                gene_start = start
+                gene_end = end
+                list_id.append(gene_id)
+
+            ## If last line in the file, print the gene(s)
+            if line == lines[-1]:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+    return dict_merged_gene
+
+
+def parse_args(arg_list: list[str] | None) -> argparse.Namespace:
+    """Parse the aruments from the command line.
+
+    Args:
+        arg_list: List of arguments to parse. If `None`, grab them from the command line.
+
+    Returns:
+        Parsed arguments.
+
+    """
+    parser = ArgumentParser(description="Merge genes script")
+    parser.add_argument_src_path(
+        "--input_gff_file",
+        default="sorted_all_line.gff",
+        help="Input file with gene info from RAPDB, MSU and Gramene",
+    )
+    parser.add_argument_dst_path(
+        "--output_dir", default=".", help="Output directory for the merged genes file"
+    )
+    parser.add_log_arguments()
+    return parser.parse_args(arg_list)
+
+
+def main(arg_list: list[str] | None = None) -> None:
+    """Main script entry point.
+
+    Args:
+        arg_list: Arguments to parse passing list to `parse_args()`.
+    """
+    args = parse_args(arg_list)
+    init_logging_with_args(args)
+
+    merge_genes_metadata = merge_genes(args.input_gff_file)
+    sorted_genes = sort_genes(merge_genes_metadata)
+    sorted_genes.to_csv(args.output_dir / "merged_genes.tsv", sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    main()

From fc96508a114fcdb8c784c1d80d85c6927296e621 Mon Sep 17 00:00:00 2001
From: shradhasaraf <shradha@ebi.ac.uk>
Date: Fri, 11 Jul 2025 09:57:13 +0100
Subject: [PATCH 3/3] removed script from parent folder

---
 merge_genes.py | 234 -------------------------------------------------
 1 file changed, 234 deletions(-)
 delete mode 100644 merge_genes.py

diff --git a/merge_genes.py b/merge_genes.py
deleted file mode 100644
index 896436463..000000000
--- a/merge_genes.py
+++ /dev/null
@@ -1,234 +0,0 @@
-#!/usr/bin/env python
-# See the NOTICE file distributed with this work for additional information
-# regarding copyright ownership.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Merge Genes script.
-
-This script aims to merge the genes from Nipponbare 3 annotation sources (RAPDB, MSU and Gramene).
-
-Below are some of the criteria to consider:
-1. (plus) and (minus) strand genes are considered separately 
-2. The genes are considered as overlapping if the overlap length of the sequence is >=50% of the length of the smallest line.
-
-"""
-import argparse
-import logging
-from pathlib import Path
-
-import pandas
-
-from ensembl.utils import StrPath
-from ensembl.utils.argparse import ArgumentParser
-from ensembl.utils.logging import init_logging_with_args
-
-
-def update_gene_info(
-    update_gene: dict,
-    id_list: list,
-    merged_chromosome: str,
-    merged_gene_start: str,
-    merged_gene_end: str,
-    merged_strand: str,
-) -> dict:
-    """
-    Takes the id_list, other parameters from main, update the gene information and returns to the main
-
-    Args:
-        update_gene (dict): column names as keys and corresponding parameters are values in teh form of list
-        id_list (list): A list of ids from genes to merge
-        merged_chromosome: Chromosome number from the merged gene
-        merged_gene_start: New gene start coordinate after merging the gene
-        merged_gene_end: New gene end coordinate after merging the gene
-        merged_strand: Strand from the merged genes
-
-    Returns:
-        The updated gene.
-    """
-    update_gene["chr"].append(merged_chromosome)
-    update_gene["source"].append("panoryza")
-    update_gene["gene"].append("gene")
-    update_gene["start"].append(merged_gene_start)
-    update_gene["end"].append(merged_gene_end)
-    update_gene["score"].append(".")
-    update_gene["strand"].append(merged_strand)
-    update_gene["frame"].append(".")
-    merge_ids = ",".join(id_list)
-    update_gene["attribute"].append(merge_ids)
-    return update_gene
-
-
-def sort_genes(output_file: dict) -> pandas.DataFrame:
-    """Sort genes.
-
-    Args:
-        output_file: gene information in a dictionary
-    Returns:
-        A sorted gene matrix.
-
-    """
-    gene_matrix = pandas.DataFrame.from_dict(output_file)
-    gene_matrix["chr_no"] = gene_matrix["chr"].str.extract("(\d+)", expand=False).astype(int)
-    gene_matrix.sort_values(by=["chr_no", "start"], inplace=True)
-    gene_matrix.drop("chr_no", axis=1, inplace=True)
-    return gene_matrix
-
-
-def merge_genes(input_gff_file: StrPath) -> dict:
-    """Merge genes from the input GFF file.
-
-    Args:
-        input_gff_file: Path to the GFF file.
-
-    Returns:
-        The merged genes in a dictionary.
-    """
-
-    list_id = []
-    dict_merged_gene = {
-        "chr": [],
-        "source": [],
-        "gene": [],
-        "start": [],
-        "end": [],
-        "score": [],
-        "strand": [],
-        "frame": [],
-        "attribute": [],
-    }
-
-    logging.info("The input file is " + str(input_gff_file))
-
-    with open(input_gff_file) as gff_file:
-        lines = gff_file.readlines()
-        ## if line is starting with 'Chr' then split it and assign to different variables
-        for line in lines:
-            if line.startswith("#"):
-                continue
-            chromo, _, _, start, end, _, strand, _, description = line.split("\t")
-            gene_id = description.split(";Name=")[0].split(";biotype=")[0]
-            start = int(start)
-            end = int(end)
-
-            ### Assign the values for the first gene in the gff
-            if line == lines[0]:
-                gene_start = start
-                gene_end = end
-                current_chromosome = str(line.split("\t")[0])
-                current_strand = str(line.split("\t")[6])
-
-            ## Checking if the strand and chromosome are same between the previous and current line; otherwise print and start with the new gene
-            if strand != current_strand or chromo != current_chromosome:
-                dict_merged_gene = update_gene_info(
-                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
-                )
-                current_strand = strand
-                gene_start = start
-                gene_end = end
-                current_chromosome = chromo
-                list_id = []
-
-            ## if start of the next gene is lesser than the current gene, raise exception
-            if start < gene_start:
-                raise Exception("Error: File is not sorted properly")
-
-            ## if start coordinate of a gene is equal or greater than the previous AND if start coordinate is lesser than the gene end then start merging the genes
-            elif (start == gene_start or start > gene_start) and start < gene_end:
-                ## if end coord of current gene is lesser than the previous, then merge the gene into the previous one
-                if end <= gene_end:
-                    list_id.append(gene_id)
-
-                ## if end coord of current gene is greater than the previous, then calculate the length of both genes and check overlap criteria
-                elif end > gene_end:
-                    gene_length = gene_end - gene_start
-                    length = end - start
-                    min_length = min(length, gene_length)
-                    overlap_length = gene_end - start
-
-                    ## Checking the overlap criteria
-                    if int(overlap_length) * 2 >= int(min_length):
-                        gene_end = end
-                        list_id.append(gene_id)
-                        logging.info("overlap\n")
-                    else:
-                        dict_merged_gene = update_gene_info(
-                            dict_merged_gene,
-                            list_id,
-                            current_chromosome,
-                            gene_start,
-                            gene_end,
-                            current_strand,
-                        )
-                        list_id = []
-                        gene_start = start
-                        gene_end = end
-                        list_id.append(gene_id)
-
-            ## if start coord of a gene is greater than the end coord of the previous gene then print the previous gene (current gene will not merge into that)
-            if start >= gene_end:
-                dict_merged_gene = update_gene_info(
-                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
-                )
-                list_id = []
-                gene_start = start
-                gene_end = end
-                list_id.append(gene_id)
-
-            ## If last line in the file, print the gene(s)
-            if line == lines[-1]:
-                dict_merged_gene = update_gene_info(
-                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
-                )
-    return dict_merged_gene
-
-
-def parse_args(arg_list: list[str] | None) -> argparse.Namespace:
-    """Parse the aruments from the command line.
-
-    Args:
-        arg_list: List of arguments to parse. If `None`, grab them from the command line.
-
-    Returns:
-        Parsed arguments.
-
-    """
-    parser = ArgumentParser(description="Merge genes script")
-    parser.add_argument_src_path(
-        "--input_gff_file",
-        default="sorted_all_line.gff",
-        help="Input file with gene info from RAPDB, MSU and Gramene",
-    )
-    parser.add_argument_dst_path(
-        "--output_dir", default=".", help="Output directory for the merged genes file"
-    )
-    parser.add_log_arguments()
-    return parser.parse_args(arg_list)
-
-
-def main(arg_list: list[str] | None = None) -> None:
-    """Main script entry point.
-
-    Args:
-        arg_list: Arguments to parse passing list to `parse_args()`.
-    """
-    args = parse_args(arg_list)
-    init_logging_with_args(args)
-
-    merge_genes_metadata = merge_genes(args.input_gff_file)
-    sorted_genes = sort_genes(merge_genes_metadata)
-    sorted_genes.to_csv(args.output_dir / "merged_genes.tsv", sep="\t", index=False)
-
-
-if __name__ == "__main__":
-    main()