From db3cca27a0097344ba3a2c606eec9ae914d45c0c Mon Sep 17 00:00:00 2001 From: Simon <41160238+That-Thing@users.noreply.github.com> Date: Fri, 12 Sep 2025 10:41:41 -0400 Subject: [PATCH 1/4] Adds a /strains path which allows retrieving all strains present in the VCF file. --- main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/main.py b/main.py index 2dd74b7..7a618f9 100644 --- a/main.py +++ b/main.py @@ -291,3 +291,11 @@ def alignment_lengths(reference: str , url: str): except OSError as e: send_400_resp(f"Unable to open file: {e}") + +# Return all strains present in VCF file +@app.get("/strains/{url:path}") +def strains(url: str): + try: + return { "strains": list(pysam.VariantFile(check_url(url)).header.samples) } + except OSError as e: + send_400_resp(f"Unable to open file: {e}") \ No newline at end of file From f3957353044d57b5a73f398951e7b022bdaf45bb Mon Sep 17 00:00:00 2001 From: Simon <41160238+That-Thing@users.noreply.github.com> Date: Tue, 2 Dec 2025 08:46:59 -0500 Subject: [PATCH 2/4] Adds sample support for /vcf/fetch endpoint. --- main.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/main.py b/main.py index 7a618f9..1a8f036 100644 --- a/main.py +++ b/main.py @@ -5,11 +5,24 @@ import urllib import itertools import os +from starlette.middleware.cors import CORSMiddleware ALLOWED_URLS = os.environ.get("ALLOWED_URLS", "").split(",") +ALLOWED_ORIGINS_ENV = os.environ.get("ALLOWED_ORIGINS", "*") app = FastAPI() +# Configure CORS +_origins = [o.strip() for o in ALLOWED_ORIGINS_ENV.split(",") if o.strip()] +_allow_origins = ["*"] if not _origins or _origins == ["*"] else _origins +app.add_middleware( + CORSMiddleware, + allow_origins=_allow_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + def check_url(url): url = urllib.parse.unquote(url) if not any(url.startswith(allowed_url) for allowed_url in ALLOWED_URLS): @@ -149,6 +162,31 @@ def vcf_contigs(url: str): except OSError as e: send_400_resp(f"Unable to open file: {e}") +@app.get("/vcf/fetch/{seqid}:{start}-{end}/{strains}/{url:path}") +def vcf_features(url: str, seqid: str, start: int, end: int, strains: str): + try: + samples = [s.strip() for s in strains.split(",") if s.strip()] + vf = pysam.VariantFile(check_url(url)) + if samples: + vf.subset_samples(samples) + return [ {"chrom": feature.chrom, + "pos": feature.pos, + "id": feature.id, + "ref": feature.ref, + "alts": feature.alts, + "qual": feature.qual, + "filter": list(feature.filter), + "info": list(feature.info), + "format": list(feature.format), + "samples": list(feature.samples), + "alleles": feature.alleles} + for feature + in vf.fetch(seqid, start, end) ] + except OSError as e: + send_400_resp(f"Unable to open file: {e}") + except KeyError as e: + send_400_resp(f"Unable to find feature: {e}") + @app.get("/vcf/fetch/{seqid}:{start}-{end}/{url:path}") def vcf_features(url: str, seqid: str, start: int, end: int): try: From c79d29d4a42b05a29d1b3c57afbe22fe852c3990 Mon Sep 17 00:00:00 2001 From: Simon <41160238+That-Thing@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:10:58 -0500 Subject: [PATCH 3/4] Remove separate endpoint for samples filtering and just add it as a url parameter to existing endpoints. --- main.py | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/main.py b/main.py index 1a8f036..6c7341c 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ # https://pysam.readthedocs.io/en/latest/api.html#fasta-files import json import pysam -from fastapi import FastAPI, HTTPException, Request, status +from fastapi import FastAPI, HTTPException, Request, status, Query import urllib import itertools import os @@ -162,10 +162,9 @@ def vcf_contigs(url: str): except OSError as e: send_400_resp(f"Unable to open file: {e}") -@app.get("/vcf/fetch/{seqid}:{start}-{end}/{strains}/{url:path}") -def vcf_features(url: str, seqid: str, start: int, end: int, strains: str): +@app.get("/vcf/fetch/{seqid}:{start}-{end}/{url:path}") +def vcf_features(url: str, seqid: str, start: int, end: int, samples: list[str] = Query(default=[])): try: - samples = [s.strip() for s in strains.split(",") if s.strip()] vf = pysam.VariantFile(check_url(url)) if samples: vf.subset_samples(samples) @@ -186,31 +185,13 @@ def vcf_features(url: str, seqid: str, start: int, end: int, strains: str): send_400_resp(f"Unable to open file: {e}") except KeyError as e: send_400_resp(f"Unable to find feature: {e}") - -@app.get("/vcf/fetch/{seqid}:{start}-{end}/{url:path}") -def vcf_features(url: str, seqid: str, start: int, end: int): - try: - return [ {"chrom": feature.chrom, - "pos": feature.pos, - "id": feature.id, - "ref": feature.ref, - "alts": feature.alts, - "qual": feature.qual, - "filter": list(feature.filter), - "info": list(feature.info), - "format": list(feature.format), - "samples": list(feature.samples), - "alleles": feature.alleles} - for feature - in pysam.VariantFile(check_url(url)).fetch(seqid, start, end) ] - except OSError as e: - send_400_resp(f"Unable to open file: {e}") - except KeyError as e: - send_400_resp(f"Unable to find feature: {e}") @app.get("/vcf/fetch/{seqid}/{url:path}") -def vcf_features(url: str, seqid: str): +def vcf_features(url: str, seqid: str, samples: list[str] = Query(default=[])): try: + vf = pysam.VariantFile(check_url(url)) + if samples: + vf.subset_samples(samples) return [ {"chrom": feature.chrom, "pos": feature.pos, "id": feature.id, @@ -223,7 +204,7 @@ def vcf_features(url: str, seqid: str): "samples": list(feature.samples), "alleles": feature.alleles} for feature - in pysam.VariantFile(check_url(url)).fetch(seqid) ] + in vf.fetch(seqid) ] except OSError as e: send_400_resp(f"Unable to open file: {e}") except KeyError as e: From 5fc7ad659292cd8feccd07cb131ba3b2a06f801e Mon Sep 17 00:00:00 2001 From: Simon <41160238+That-Thing@users.noreply.github.com> Date: Mon, 15 Dec 2025 10:24:54 -0500 Subject: [PATCH 4/4] Rename strains endpoint for consistency. --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 6c7341c..369329d 100644 --- a/main.py +++ b/main.py @@ -312,7 +312,7 @@ def alignment_lengths(reference: str , url: str): # Return all strains present in VCF file -@app.get("/strains/{url:path}") +@app.get("/vcf/samples/{url:path}") def strains(url: str): try: return { "strains": list(pysam.VariantFile(check_url(url)).header.samples) }