From bba9fccbf32f7e5b1743fd480c80dcd712b22652 Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sun, 25 Jan 2026 18:57:28 -0500 Subject: [PATCH] Fix FASTQ barcode parsing for non-standard header formats Fixes #15 The barcode extraction from FASTQ headers was failing with non-standard formats like SRA headers (e.g., "@SRR20318439.1 ... length=111") where the extracted "barcode" contained spaces, breaking downstream shell commands. Changes: - Refactored barcode extraction to sample first 10k reads and return the most frequent valid barcode (avoids single-read sequencing errors) - Validate barcodes against pattern ^[ACGTN+-]+$ (nucleotides with optional dual-index separator) - Fall back to "unknown" for files without valid barcodes - Extracted shared function to eliminate code duplication between paired-end and single-end processes - Added test case with SRA-style headers to verify the fix Co-Authored-By: Claude Opus 4.5 --- fastq_to_ubam.nf | 37 ++++++++++++++----- tests/fastq_to_ubam.nf.test | 25 +++++++++++++ tests/fastq_to_ubam.nf.test.snap | 26 +++++++++++++ tests/fixtures/fastq_sra/sra-test.1.fastq.gz | Bin 0 -> 9503 bytes tests/fixtures/fastq_sra/sra-test.2.fastq.gz | Bin 0 -> 10968 bytes 5 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 tests/fixtures/fastq_sra/sra-test.1.fastq.gz create mode 100644 tests/fixtures/fastq_sra/sra-test.2.fastq.gz diff --git a/fastq_to_ubam.nf b/fastq_to_ubam.nf index 006d7a7..b6d8cdd 100644 --- a/fastq_to_ubam.nf +++ b/fastq_to_ubam.nf @@ -5,23 +5,41 @@ input_glob = params.input_glob ?: ['*.{1,2}.fastq.gz'] read_format = params.read_format ?: 'paired-end' params.outdir = './ubam' +// Shared shell function to extract and validate barcode from FASTQ header +// Samples first 10k reads and returns the most frequent valid barcode +def extractBarcodeFunction = ''' +extract_barcode() { + local fastq_file="$1" + + # Extract last colon-field from comment (after space) of first 10k read headers + # Filter to valid barcodes (nucleotides with optional +), count occurrences, return most frequent + barcode=$(zcat "$fastq_file" \ + | head -n 40000 \ + | awk 'NR % 4 == 1 {sub(/.*[[:space:]]/, ""); n=split($0,a,":"); print a[n]}' \ + | grep -E '^[ACGTN+-]+$' \ + | sort | uniq -c | sort -rn | head -1 | awk '{print $2}') + + # Fallback to unknown if no valid barcode found + echo "${barcode:-unknown}" +} +''' + process FastqToBamPaired { conda "bioconda::picard=3.3.0 bioconda::samtools=1.21" publishDir "${params.outdir}", mode: 'copy' memory { params.max_memory ?: 300.GB } - + input: tuple val(library), path(read1), path(read2) - + output: path('*.bam') script: """ set +o pipefail - - barcode=\$(zcat ${read1} | head -n 1 | cut -d ":" -f 10) - + ${extractBarcodeFunction} + barcode=\$(extract_barcode "${read1}") set -o pipefail picard FastqToSam TMP_DIR=/state/partition1/sge_tmp F1=${read1} F2=${read2} OUTPUT=temp.bam SM=${library} LB=${library} CN="New England Biolabs" PU=Illumina QUIET=true @@ -35,19 +53,18 @@ process FastqToBamSingle { conda "bioconda::picard=3.3.0 bioconda::samtools=1.21" publishDir "${params.outdir}", mode: 'copy' memory { params.max_memory ?: 300.GB } - + input: tuple val(library), path(read1) - + output: path('*.bam') script: """ set +o pipefail - - barcode=\$(zcat ${read1} | head -n 1 | cut -d ":" -f 10) - + ${extractBarcodeFunction} + barcode=\$(extract_barcode "${read1}") set -o pipefail picard FastqToSam F1=${read1} OUTPUT=temp.bam SM=${library} LB=${library} CN="New England Biolabs" PU=Illumina QUIET=true diff --git a/tests/fastq_to_ubam.nf.test b/tests/fastq_to_ubam.nf.test index 1f77994..58f0a3e 100644 --- a/tests/fastq_to_ubam.nf.test +++ b/tests/fastq_to_ubam.nf.test @@ -40,4 +40,29 @@ nextflow_pipeline { ) } } + + test("fastq to uBam workflow - non-standard SRA headers (issue #15)") { + // Test that FASTQ files with SRA-style headers (e.g., @SRR... length=111) + // are handled gracefully without breaking due to spaces in extracted barcode + when { + params.input_glob = "$projectDir/tests/fixtures/fastq_sra/sra-test{.1,.2}.fastq.gz" + } + + then { + def ubam = bam("${launchDir}/ubam/sra-test.bam").getStatistics() + // Read the BAM header to verify barcode is set to 'unknown' (not containing spaces) + def bamHeader = bam("${launchDir}/ubam/sra-test.bam").getHeader() + def rgLine = bamHeader.find { it.startsWith('@RG') } + + assertAll( + { assert workflow.success }, + // Verify the barcode field doesn't contain spaces (which would break commands) + { assert rgLine.contains('BC:unknown') : "Expected BC:unknown for non-standard headers, got: ${rgLine}" }, + { assert snapshot(workflow.trace, + ["ubam", ubam], + ).match() + } + ) + } + } } diff --git a/tests/fastq_to_ubam.nf.test.snap b/tests/fastq_to_ubam.nf.test.snap index f79aa9f..2460220 100644 --- a/tests/fastq_to_ubam.nf.test.snap +++ b/tests/fastq_to_ubam.nf.test.snap @@ -25,6 +25,32 @@ }, "timestamp": "2025-08-20T09:23:01.410361919" }, + "fastq to uBam workflow - non-standard SRA headers (issue #15)": { + "content": [ + { + "tasksFailed": 0, + "tasksCount": 1, + "tasksSucceeded": 1 + }, + [ + "ubam", + { + "maxReadLength": 150, + "minReadLength": 150, + "meanReadLength": 150, + "maxQuality": 0, + "minQuality": 0, + "meanQuality": 0, + "readCount": 200 + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-25T16:55:47.816983" + }, "fastq to uBam workflow - paired-end": { "content": [ { diff --git a/tests/fixtures/fastq_sra/sra-test.1.fastq.gz b/tests/fixtures/fastq_sra/sra-test.1.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..c6654d983d22c271f37378b0302b0868e818e84f GIT binary patch literal 9503 zcmX9^1w)iw6QvZ84hiWFX=!PsV*%+}bP4H_Sh`yp7Kx?1yFogZZjg|c&ZXny_4@;N z&fJ+f=iGT7sz@{htyot}1mr#=Sn{Tz)5^8ZNVkR8A-L(B-At4xE)gXTr75MWTl^1* z)2r?LnF<;2`Za>$u({i~yY7ldETPt0wB;rNT8Z{&IH&I-=>B5mWEsKuuP|$CK z+lB`&o^M^ml*ZWr#;RS$cE)Sl@3(i)J4Lo|w#E;{z?Bv|5b0)*r;V;{aWSmz$Pix4 z<3jO=tw1|&4o72~1W2|>6s(W&9@HdqiqTL6#QQC}E!mj4YT&$P&vWf$1w7$D7hzXmjko!cjGN_jlsB z6+UTTKw0d!9s!O;u2g>_n$B;g;{SP{&E1&mw&Por+dd=TM=ZKhzo6K*9Imt}5+hDR z2F|~b303UJmuB|;J|>7Af$7@pzAqQB0}V_ycq(m(G~aXVUO#_MryBd3D;SyrykR+s z;vR{7u15I)HQpyV9}4v=CP=pIqB%P$Zi8~^Y@B;X(@=^RtS-l-jXW--kWB+E+Lc`^ zqwS81F z-QOxhU+@>hkMlT+pk>%U{}!PjBZRiFWsr3IzG;Xe@{^4bsxSEbFc(>*(yodamG0vb z{8P#KIk53y$7j{_cz101P5GG4JW3ueGKq@!N!NZEk#t{{F!JAeuIkO;Q<{_YePhIs zKe>oA#GB7Z&d<+LA@4k~+na$eVe@g4kV!(Ui-Miv5&ge&;dY|sZR{%Y4;KjLxXvFG zqt8B2v)#7b1bnA+?Ov|^+1eNHP*F#}%H>8T>wHRp0!@`x=MakVkTnxaXiGoj+0Jt^ zZ*U2xeeZn2|I>3mRPl#KFWyA^mS|IuFlSa-r~b;Xt;QS!kksel{dn@k;6!0XJ!KIx63 zu`|tkO5CmI>Qh^O`|fW~20`zyR`b|SD7BXjCZ<6<39ka?!J?nya$}?PBsAn2AS5!R zT%tnl5uc~?q#A!nAbt0uEmVNqc#r-K#kep8gMTM~DY37;+6vuaFm9GGthl>ov;@y9 zz%+lLFG^`hn%HBPmMduKiw@D`+#`wFoZ8@n+p#}@6uV1rG%*hnz4#{U_wn&R+T%mq#=YA)cq3zkg2{3XG9^7JHw&T~0{`UeJ-GU$5(*A!=X}QX>~NgrBJFpI2x> z0_9faEioZ()07fc)B0bvZM z8~Ka(SXl%Q7EDIJX$J~_hkwBfysvK;7=pEG8`J)wxvOUpudmG|U8-aI^U=`?Gv##? z{uJY2m7S8yF5PWgyxt)3p|e&d(s1Ery3lfx3MZf%BvA26ko0kQHSKbHr6Uqz`aO%3 zaLvyF#xIm0$Qt4GmOC#hwKrt=)A2GvzJXuGTXa(b+jm8wOn2Agzwe1{I`@U?uzcz( zY88O1zRiPu`cs6(dTNgLb##-3T4R9$RjHfGdmp9nCQ}PZz0mU9(+&MmQ!!#Yq9N~z z+qIh(O-K3ki&(&jU`JgFlb=kA-BxBNDtG5kQb4OOY5HlRbsjtuI@&rXAoSR&7GFaG@$e>0OqXWz1r#+_G#BY2GFHhp zPEiKk@`Z&bI@$xqqObwU?gXEot_&y50pjI$e})*n!Os($0{KcwA-}9YiEl|uEM&>* z#X3|t0z~-Ig`0`$t;y{M!blN5Llka@q7JX^M|2-;YmDZP+F$NT{OV>@qAzUEAb5y4 z>&K<)01{FFbA0Ld+0(#yCCRVc#|pbOjFC` zdbKw{IOu!*+l{I8v8hM82|?GOI_Od1{xVNF%@{8_!{AIOblEYD6fmW3kQ#cj9oym{DtSply&u;I%a z4L=P|qDg6a$W6ZsaiVqJem?ugGzPmLDM|x+84i|mCyF)LQ)XP! zwM1oBF6F3mAA?21Sj3gq4Ddu?j+HC6eeHl3LYji$7y)epED8`sVlO4rolc>_fwH7} zbH9a#Cx8h_@d?9V>93y^pOm(6FwIYHo!c+oH@ZI_>GJqJw?wU-3!+HF!7M7NAhg2k zMy=gH|ABSrJhx9~ABE9?CN_2`JXgps>Wj=ga^0BsNF;PO3FQlRC4L5O!r16_?;}5p zF?(1*7jGPA?bgKj_LIg@^sk4jFJ`O%pjK{N|99Hy#I-k*{pm_!GtAXeLzx8YD3*}ewBjI4Nt>?4p$3^cO{0megvNU)Y|-gFg>D6((dD3R*Z&*EY|wc{)-B=aN%=S4 z?MkSVm*{Rx{~6b~bh^%qba7JMt?Is(ML=g1x%<^sEZIS-MCJ`*nMrlQAO4Ih!V*Z; zf-__Jnab;8<@~9k<6&Fki7Y!4o>BVrGTUNh2N1ri@E)up4Uh6=E{p|Fq4tH`2?;`f z396Yh)L=??@N_UzPi32z+6?4wOrgyas%4>eXKBap6NbeIzhtHdbX%c=)z3@g+t{Pg zFUO7y^+b%uip10Y; zNQ3DTTN_l;-r-yI$A^?V_`_Rgl(aq(FUL|(k09#Ymm+1gUQAgYid^HTa_$B-3OoEA z?@~*aURV25C|Z_Vb%@b4SoQxSFX2i!)rcVf&tz4)%!lXX$dw#6&5zFM4jdO0K7v zE>PUSo8Jv`nx_cJ%0t3oZ4ATyYqe)x0PB>@U(N#$HzEr77)9jTpEs-ujNTr@-ahy& zzf`5OY~L~*hWp3V`$)+dFjj4ZI~ML#)m-W(%bJ#M^TAyr)%1tf66t_7NCLhz+}XF$ zxB4YXJX`PY^_OblcIv8{m{+*8A2*ORNG&A?rq58<&Nh`qdeu5dU`d7Nji(28_X97& zb7nUivz(WILP%*39_u&Fdv$&xsCIj_?@?*_&pT!>wsM}co8jyH7t9Z!CC{gSH9Uk z^35;8yvg!3Qcy5Ik#y-fl9vOKm^@l%mEGz^mwBD`x3P?jkwm=0Xj2i7(GMJEQ-m8# z$%>EULo_`}K3o4K=vaRlPnkRcwThc&L;LPvi2O~oUzkQB&$>aoFk}On5C69To1&)n z*ELi4p4G`v)T3t(JCg32K=(@I6<`Z;61UqYe?Mn&3FL53SiB^w-MGJAr!Q;N4ZL>D5(^ zn;KuibI2ITx+mIZw@6WrE`OndoE9W511(16omeIBzoi~n>5bt$qxsg>cKM$-IJ?cE zTGT>LU7HYmMsB{o>`L$tQ!c=7FDYWvrwgSOwo6!aUpcT_jf0$5r@ zwGAz+ip~`)AV=Qm`Kw$Qa?y1+;NrKdQhDQjH-#9|z=dmizDP8(VVj>|$>Lr45NZeA z#dtHJC9*T8gbuIe^7HCwLL{s4^ohe|T!Fc6a><`{<8h;cTd*Nn-lHY9KU;$C^L050 zvziFykM|{VUIK1+F=nH`2#I3*t3K1bAQ`X#&EY$>m-#!{nbC-Ux;+Z^V;`oF0zkNexz(M2R^;;i)nXesX$<<6dxVdRtUXHWL{-{kfwV0$6u9~_Vc^<>#smKW&3rE5fW9jz^p6j`Lh>O>E&_wNapgD=WR&K);F#B8c>Mj z(!y+1v-Rs=$wrQeGmmd!T|7o#Lu>mqzxlO1)tpLx9q>r2B>4^_N;Q7`hCyPNZb13K z{-62*9`BSwwLTUF7^uza^HS?@{b1@RxZ;%5x1dpC+tsnf@>^Nv(94Q{Ee z^pSYetBkm3jsGUDu&mMLG@YFddqqKMb6?$p5tqj~dnZVe5SaF_P?PNq^ZJ=%G6 zTXX#CspA2?ZfR|978KB_(PeB~GT2j4`$A0X;>O*Zy`#wL9iB|EURr(l6sw(Gv z&r2Gd7bOzaG#8qOBK6D8@G1i8`(H;HYs2d_5xa6Frc*c<#kK>IhXT|p?SDD5UV&^) zXia#-iMPc@@?rCW1t7n*FB*ZN_$c|A%8zUlW4wKOYbe;izVgQlB*?%0T%3Rv6_o>% zi*dS1l>4^jr}Fh;d##SuR$=)6i{w{?fX|k?h zzRpje*fhS=lIMKMVRai6+0`kt$7)bU6iA|P;~DfrdZuIDf!_jqXO`rB8(ZG)=<3b| zB!M_n)=^%=(7{LVK&`{;&B5K+^b1AQ0k5S6&C%0RTY6YcShMtZ954GR%P1TX&u{|Q z^Vqx9meuzR+DEg&(Nlc>z5ui7=uFlRL;V+M|eR9QV#I%|U40SC1NR?Ys(8=jQt2B}K2sF<$|`P}3x5 z#>((N^wj*wB2@?iRXH2i$|AEiNt>(^_B-VusiY(;YPff2%j?qJECvFiaq14PcYN?3mrJ?bSl4KbK5YwoIn=_tMP8{%HT0ybyM&9pKE&?JVEaA$dpR z#mEHoZL$|C`hh>JD&G4Bz%^hgNW^o-*9kUW4cWe*w3u*5P+j(ZCXzOHm&+8rIT8(Y z){QPCrw`r69Vf}S|NBhu_&KWg+p3Ib0Pc3+g}wz_m&`m|^Y zaZsbx#PQu@GGR#7VY64cR6uPS0{C5qkE-8}zCdC|QnLH7!P?~j&ohNVD3)?u39N#K zu{*B(N5CIPo}%L{Ix=@-;HQqd#GK#+wn&a&Q5Q}B1nS{?S0@X5bu>vLBFay6J6xim zM#U^WKewp*A9XAaQc1$P>|^JAdE74UaQo>pPHow0R=v5SdRmyK3Yfv(>G0~6o*{nA zhkX-y{A>f0q~|x4klU_*z;APa1T7WWqbl9AUyh_`;{r(p3f9#=H7Lr|z_f&Mmfec+ zBnp_=$Ao(bHrJN$CYSP3z3W;9ir8Xe|IDp&y)V}rjxX*D$P|1o+65qnll2XO7h<~KY9B+eKUx=nps z!2)deV(!>$`o93X-SN;+ls;L!TQ>igg@|sDJ#~|*2Gs@9(d=eTPR>>sRt>>Fhi^)s zD&~dmtjPeOU)3So*q1UYP>_0zq#Y#}KvHuZ%?vzY38dfDwfiK#rm}sd!_az8xOy?I z$H8-d&O_;=e9McMDBZF2oy}fsSgOwQT_{L!3jpN-{;Y}hWepTpS@#ZR;(VDhC0}2v zg~>_OuCYXyst$Q_`DiVaDs1ys<=FXd$&Gy$cRn%`0wx9IQyxs0A>3nu9tWYyU{!Be zau_b!19}rTt^G@f)bQ{ZVj?(%J1i3R0%PVwcWyfM{!Pj>P!FHx3 zsgEpy|dvO-V0yHdS5Z z+{SvXOGvFW`T3{WR};ATNWnm8k<=@#)~vL|ssYyRB;SNI<{I+e`Q)2{DzBkA&i?z< z+`ThzQj2x_zK$A%{aBSM!U^bWL^Qs_ z=ArTC=89abqwMh?f|C>EFb6JV76%?xv;Z_yf0m@cnJ*@z^VN9@U!;wC(m@7{SM~Vw zkbE*Rcl>8jYEd5fcz|M?dYqm-`EjE`=?PZb%(=Y*+8n>C?NQ>_*W&!Lt7&C63<}iy z=U&OlVgE!Dq6(x%DtS^fJ=032E78jcNR;q(wf=(--=u`YEOk};zsC&l?*86gyaLwsI+kD5Uv7>fd^?j;yUD3AP~4NEl2zH=d{U$@ z4~e6k=AjPyj1X_{?jIFPYP!%Tl2n8eHwgQf2;mwe|07$c3P;S*9-L2G6Zo&jV7)VY zNSR1_ilXAOWfIjI)a(_F!@!gL{etiJ$F3P4nm+^J^RDyVtcbHR0rDABts&I0DcFUf zhoczLw#$^r#}l46hZt!eeNEHd32G^m#l_y#M5Yco&cjqN0kxUL?R~>}Qeq#5Z{jkV zy>>GJfxIIjYz3-3mIW`%*q5=X@CoOV=`-4Uy_|y5(()0Rq2$M@*@eY&+K^LJR4^ke z{9<+Pp;mID`6nGRPxrjc+U|tBi19cU2x0s%MF;12us?vuf-Q_ry#BHlLQIsl6H{`rgFXvnZZwHf2&(Y_)Lt# zRcKkbEthz??)4RFDr;Dh{;Um*e7 zS$jSpw*B^keu(LAGlkFBG6a{%I8~*atpDIh4!!Fh-;Qc${vkzSQ)mQ4GVkTkr~o`Z z=D3TSuf|57+h5x6D9|BaX})DUp8kSAYhd6rI;g9-@&P?-(*_w8$=)OM5w~yC>j2t@ z>_LET{rcWuHYwySL3}tyivqp8aJm4i2$t}dX#Nb`92g{{@-p-Ah3{6p*D4i0y3^s| zMml^co0+axO_DKSBHDj5B;N$+ch`@d^HG7rWi@M{Iv=8ZwAQ$t3?XD}kl!(>K}L-u z*L%cAyv9ZwJNEq#zZ^nhVE5L?FNYer`DUh4($K4INni*k_(C-&^afC32X9kD{ZZ%h zgLm2)MUKd^I+Mk?;w*1OSvS#yH+v2}+L+3p@!p#Y{XVWB2QWk3^-n4q2i%nnDhen9X+fg5P|o5~prP|>|8J$>bX_y`F^Pn}A!Tudky@ZE2yp^G7$rNTUjjmA z0LThD)Cc`U!hzEH8@4PN2&2^|#IS-3Zcu6L8H$J3EYO3GDHOkQ_QcFj)o{iFpFW~) zdw?OFl?5jZLbT{rr^%#$cqwItC{^>{@b(!=2-w|rx5*6)Z_>n!3I13OeOVa1sKPS* zI_A|P^|AI4J6q5EuR(I6S-Opa#&yR)cqxr3enW7BH4=vAz((yT(ZSO9GJ$X6=WJMM zkz0*$IX&~Od|n|2VQ(SS_vkTkSS#wwYXct7zmPx0t@01TI29qaw7%rR30(@Fo#RfN zh1p(5PC#QE;zy>|m(&~eef8!wQgcvVdscvW=fCto6T?#Gz5l`*h$cD9t%yV`1k!0eK5vJ=tmv@WJdU!%<}RRaez&>T%slZPl{^XcI3srvo}g|8bP=g$cQt&0j_@R5h# zV0P);7N0rl&ZlyH>M$M;Y*%Syq8~{I_ zT4a>}D+(8|Z1?0K$b2bbRh~0ClCK7BR>(RyH|sXo_vP7s@AJrTxK(ujD}p_X4~$jn zr(RWhC{7>!hx=z$fiEovZIpD_9}HU@q_I0 zsP3jESh&BYrY6srvEfzmCrC_mTdvAp2=mHyzBnz;nR*LbKi-t)Y^wpRzy`BsP#<0(i zLF+7X#=cdsCKmI1ah{?eP%Ax>x>FQf!_foMhts9RzN;b>v%K3{w!*TzQ;G==P~oo4 zi`pvOI|Kfc_Up$JU`^&!VosbW2Z|)LZYNPW9ki#aVMiO&VGavlC`cY;ICcs2cfDZPeg^_eq~m z?3G@5t@1Pr@OQ@UwV>r>^GNhRXY)8M44e(%;tGNxT2~O3(plFj8O|fjeBm_UDVd6n z@y3wD`qaVV7(0RCLsh7Nww`C&V#l-vaJ|$|creSD(X%7vfc>f+h!#ITz9vQQZW82c z^Xk^oA?_Tz;d%MNgeU6W#zdS7QknARHfZ4eZPz2CceAfC{GPrR*&4?X>X%>aalN9Q z@p0!V9WnIG*6H2*)mpL&jJPIB2dD(n2JldHKY-=NEof`<9g@iB=@LoB?o)@iY=sXu zG}r0Fp-Qw5f(dS&fSNo-qF%t%sTw7tp06t_L?rnjDS>Ct3$~ky@yN;i`8?^Lvfo26 zp8GpHUCmQH`Onp(Lh~99+TzV#Njoy)*7FY6uVCNRKm(SkwKHNV^t~1h-|>h!XO)2G;iGBKiI4-m?iM_^|98p3ldicY&81C9U_D7-?XjMLjUjRvcHdbMpT7~u+|dJ;EZ~Hw@|FgGA46o;Mo+$l5ILpU z^)0=%f>@1PX0oTs_qWzZN-HMXi{cc+@e9s-+048H6vGsl1)F&`yRDh4W<&ZuRAO&F ztTp3sU)}ee?D;Ixa>4YxyGuqnwAMg0S??CLWmwLX{(MjduxMZoS{}Ti%xh7P>o1}) zI#>+)_&t-I%9aQv;6V(I9UA3U^qyP%ZZl!|3mqGwWO8%4*l&^H&~~j|cZ?cG(#{Xk UT!g7&kC(pWws?@H7zBj>0VfPHnE(I) literal 0 HcmV?d00001 diff --git a/tests/fixtures/fastq_sra/sra-test.2.fastq.gz b/tests/fixtures/fastq_sra/sra-test.2.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..f94cfd73399a918ad45be929d6b0dc992507627b GIT binary patch literal 10968 zcmXAugI^tf7sqR>wrtz(W-Qw_7M7RowY+TIv~1hU-E1ton~j^EetZ6b@9VtIIiL6E zobx4({0yZT=WY%4vC|KfK*^V|*V&aPUBJ#x$wAqhb!L!7h}86r-*tyon3dfHFMam8 z8wslo%A!D8SIZ5Rqc@8ah|hr)QG8y?@aM_@`Kg-}eED*_F*iDTXXo*^gH*{>SZd{U z$lss8GuzO}$WS@M&->tZZ|`>F_5STa`0e`bwR&{xfu}H8jqPRn%i9ziLzWTab9_u; zsv`=~mh_i`t!`Irf#}E2LW;NpzC0~ZN~#Z>FH`7~QI0cl_JT+ZFC;f{*@NU|&?p6z z{!cp&FT8tUlc0Hm8X2Hvve%$bkrZRSz9RnpuXPb>@YkRHDUYruW@$z>85aF%Fg6kH4{8Cv_CZIuin_bB@J*m(eI=1j3H>lO zK1;no89%lb-um_2cju3dV2e54lEg$Pi?2dY3rTigPp$mKBZ&cFhb(RR$w`=P+Z4=3 z(PHXm@yVU>*mP*oZE@V$2CR*gXP?20t_SbO{W-Hfo3?q!+aEX0t*sl{=}(>P9SIjCo41Lyfr;W1WT$-@LgonH_TG5c+=t`2GL3r%zof}HM5Gv7Fy!_{`$)3xF>(Mi!XmL9L? zBjDwC_2ZRo1ZYzh9~TJd+Z52EP#t?GYkf#|h}bP|u52ej)T5rMf@541@ zQ#ZiYQh&GdG_YRem^Bd&;I0&GwA`WLu%qQ)9YmjKTVG$zc-t~Wt-7>ZJj$^C@sNQ} zxWRWY=K46JnANqiQh9zpTPSO_uwK~~yRYp-fO}_H$Y^9_slkddiY7&Sf>s(D1x-48 zrEuvh^IN_{f-w_;;i(Hwf@b)DP~s8#oIEr2X)D%03*nU!wcAqVSz>aVbu>5o^-*G@UTu89Kb6_~7KKcr}b zQn^d50d|f2=xP4+I82;*JuK5woTk1c177qzK~W?nk{TW$|JHr_H|^U=kBRnAF-r^F9_1tk7Dxr($OW^=s|fpbawy5y7G)o3Ah=Uso!jE)L@I z?9Rc+tYGLP(2+(qYnDp6oltMd_Wd|F9VhkLZqJ47Q%A!+gWF@9(UoXkzaIR+FD5ql zGQRRp2`Wrs7rCm7>R9svyk@%Jn9}O8Np$&|DMBP6*g*Eb3-89_=id%sy1wwVh0X1MMpUzbm;RYukAI6n!yH z{Oh~BQNFu(Y*Mwanvx%XNe|Z;;Os8JAU6aeZ7D+u#TqJVVP6xE!`avdQ5?=OIq9)|(mRDZODUM=&Op9t3p+3$zXl6Sv zlX6$POv(q&n^`@G{SYVytIbTos7!E7{e!>h{wfSLr|+RHOd2nJX;8QN?-hFPuvQwE z{Es@_LI97vpYD#`7K$0;wog=h=7a@Fe%uy3DfxFb>Opj4&MYEHmG3c_z#Ab&ik5F@ z8x;!~A$B%ehBJQ4oSRANaZEYV{lkkIIWEAphjIP^KL{dgeldV6Xm26+FAxIUe>yAg^NQ>4t>1g7xjx5e*$8CfI@^;TFj z3rG$Eu&kiWA~Rzo-B*Pp@MU0Tf`H0vgeGiqvSOwqcl#HjPxVY{Mo1SGrNqpFT;6Xo zCUHKnOl9j`5l9tejV>8CIQn2Eg?71)&=)L=aWXrqeDcE4_bNe^BCSsQ;d{{E;$nGX zv6k(3pxo^3`)SFWenf*YrjF`}o(`UqGZ&2sj)&HR4~aUt9HX{|lk^hPJ{UUlAJ@}7QnAtCwULk0=DoPhCMXDOu{llwkt-S1ya))H~z(Z32HDShlIoBF~ zBdUta#+nmvkt<#N;3*fe!+#tE@SsA}V5kTcO5i@ShjPlMs~}cippO2CC{<<3|Y%=-UYU466m^#2I9NLzLUquet$r|G*KHf%Zoer zOJ6`$jh0F%wjVylz+Tglz3Sy}NemT9k3na{{>|Ek=5#T0`g0=MO96y#CcGXGO?I76 zT7;EK-qe$ybFib+W996FhN0}&cEeE0Q5=WjX@UqbyU8cu@zXZpy7k}r)3wTUOFeFl z6PDJ3aWzypBm>J4^1;vrJBB@6XIT^%01}%K>4kGzg2Tt{p-)hGeCYm*a5>3(NlTCG zZ4xB%qBiNE1s)wYFv2;Z+YT|eh!=z*Q`=EZ@ahdpk}D-{+NM-h3H=rjlQ4zd%|gCi zx-Pnurn`<^uOrNV6ww)YS%r19cW4ST5jQ)j-r&adSMVt7z56+4QzHuao|^IuvWv{P zR`6#Tevts#K4Mlq!f7HPt^t7zFk95uqgb6k6U#QGU4-1LJgnM^{oPP&rtq0W#j?2% zzE>VR@NUH^*x%@sxbki4L6LmSQZsJud79CNHic#5>dbWkK&=kdWJg+eL zJ#sIm#*em|(J21?GU@HE74U7mA}|Y6nUO6;MHJP5bSCq3J9o-ioS9ho`)kRmpsEme z;h4W^4Tuk3)v8UswjguOAZ7uxA*eL?Q@5 zAjdQHok6&MM6EJjMXVux*BuO$GFQ6-{&=--1j0j@_f&?j&*qBe+3nA>Hcl$zCnd?3 ze<=U|n#1U`$CXA^>Eu#VdaXF>jGyeJwQ?eI#O4k`?C^36OAs`=lV=uHs7 z!hxl1ukwWN4_;cM)67)mSoHLob;17{%-1K!gTf|vJ^mvuIbym3u5WU@@r-tez(QVT zaJ+OYmof@A^({pXNCRU6=tgnynt@gzIi-%P^c#)fLob5RIKs?Mb>>|kNe=k|+E+04SBXJg+Pu}^x?(GjW98nyUiE|_W zpCl`FKf(h5b$#FCqj!q}@sm!AykR`7<%ax_k0bcGIm$`1`8XOO5`p9&E(~-blPC9& z8hvj!2}kLxoAEgn@1?IlTbjwzx;{@Jt1yeryZ;=CSVLzon5#9e(_We2Dd3azk($4# z1?THZ`_8&hf5M2nyFHNv9Dw*XTip0FRhS%qEVA=ZVX~#0X{>;NINIj&KftY22;xLu ztd^;F)R#fUA!uL2B2on{t(by@HJg&_=lguT;V8`&;WMvuHa8}@cI)d&jOI*@?b6BQ z1>c(FjJn2e%#Do}<}UkQhqtP~_ib60ur)3vARyR&^Ho;w`3gU8Dp2}FE@4Q|GFt2Z z)_ia~dQgNgIq^^$Qp)lS!lrRFgwko>)>{=b>UziaIp7)+Gv=cZa2R1UY3Z+_35c+W zm&pA3jh^{Kgo`yXiJokjGI1 zcw}#8O^;B|b4!ea=8?S-Ug>5E*25uj9~!j8Ya@kxKpW>6eJmUU@5EwyZd-Hj)b5Z8 zJ~J(EQ2pevY8ASGlCK#F*?W2y^6QtyYhW6H96){gt8A`RP_FUG=9N}Q>lGM8!wdGcqiAP5{B~$<&Xtq4 zmdR*%3~@TVqrN@1TjA?>$VT}ml=JJ?C5lr7hS}Z4WIS>goA}6H5;l%eF7M!nGN2S6 z1_@0}r^Z&4MskX`O4GB^(77AbTN;+9nhZzm5+J9$=KeigAtNu)SAkV;d@t3cLq%&! z3+32AjCcW!MGc@LvOwaHI+Z3Hd&UJ!ug$1aGWASR6+%KfwZWtJtzElRvbuY;$)Csv$3B|kqg2#Rf3}81 zQvdk69KT?lPzzraUf0j=s%e2}IC+drU%^)}vp^-3iMbA*7O~77pYuMn3*+Jr68VAn zcOy4qRfJ^fK&o+~!7(;?wgy;y?Xl6sBYgKN^d&K=NSpf)gM}xLtpY+tDr=`QXiWTz zYiQuj%zu9VRQIE_6>OW5OEWg58D#tNOJk$pTaZ>EX1Ru+XQe(ivD^!j^Q*ihK6Sz) z>g(zEWelszSz4gELKW{GgO5_72xQWTqC(Vn<$x@5^fTL}lKMZVVA2jg$#+j0KiB08 z3D;h&USo`St<$dpCuyv^(GgWew%(nk#mron4^ib3zdoy>)qHFLo`e(_go zTFE*oP|Yi{IzFi!P_#5qlb$$o?T5G%aqNl|pcIAK|3zE}o0c9lzenL-1wh2X<&ZW+ zH#kZ@xs>ZjyC@(a8~5nLMwLK@pMi&P%>y`N2^&LFmlLZVlTgJZq393>_y8@e^2R&M zUU(SwwF@INo>zL8)_O#uGN}3c9GS}WDKtJFvYX;A=u!sFW7>A_?S5*Z3b1I_gQ?RI zh^Z*`(!t^Kc5>T~OHjxq=&X>LzD3$(B|`j|hxEttHZ=RE#?Qd3GAomV3bkO^i)>R! z;9`bhO&_#SyWB>Sohktl!6>OjL&ym3KT)oPbnQxw(ya!_*3R^z>tcm-C4ck7Ivz-m z;D|tH?5{9gDq8BTrhAq{finH2l92<>Qg++J@DtyURZbC|22n5}yS2whf5IDN-0g6d z&)wsW8oIS!c^{OXeghbxWzHC|Kh$Z#Qbj;CT}8=Mw}Br?U{zUQH1P>&5;M{BGnqN? zh#JR~Lw^7V=|Ne+O%uEGYU?QCSC=S^UrpIDPqtYb{OV#Z!?-gbmDkIO#Z5e~GEkV>^|UC*4t-UVg>{orB`SB(>Hb$VHlM4{-h)ftwq< zcDFyk^7kMyN2fIqbqzntd9VcL#fg8{NYQ8xUS}6%Z4E?hI}jonOaF=+QQlpd;E3WVN@MJ>qT#ThYG!TO5V>y1 z{km)VuQ{4e-eMGzC`Y{pn1N>ON<(!w#inD-zl@k-Q0H z-y~*AswgIJH}GRN<2#y(BZzgGtQ=*C`rJr5nC5d65|uf)n?dG(KV+<>2`Qpv6&_$H zzt6X)uesfF+G9Xn0uwRWGgVbXY1tMv1g@C~Iz!LE@n2ki{X3F+YMy&XG_Z(&@5zXL z4y*Gl;JC|FBA^%LYFBYh0iIrPIX~|vWb9fe(hUS@k*ry)7=Fg^q=|5klbJrD;1z+& ze*9ab+LzX)Yk<@-P^~q#+(g#6EIK#{^vQ5-QNMv1QG7#^YHFXo!7B_O3*Qr-`1T`C z@a=gAT)DgXk1%%+Q*X@KgI$CN&+((OYIZM8E8JxPu?LPF{vixw#X$wr=W_C)T5+?q#!%+ZSKl zZr1kq)gM;UU&uKj5c_27uvD8T%i^(@&fBwCeGfbPBU}6CAQC05$5@`zrfT{Y!>$|_ zBYR65ps4Z+AhL7$J&|jpb=?sP^qe(VO(kAhpJ5AC)Azx^gbPBZ(2_^R77HexEzHKu zui-N?-o^77M1VI|7JyV*)&1pB2KSDvZ~^!%R=AhTfy%L&mJDs<_VfX{r2qKr+sVb= zU-A6pZao+4jC~Y4Z9fcld)zx_iTU%VodG{Y?~bfcEDMhwVuSoZYaVVQjSQj1)vTAx zdC&V-=nP4w|JZq!aw`wIiKGx!9+hvCH&!p}V4ff;XXI$4TV_=9Qq3sRpu{^^?#FVZ ztkzS_tR~8ro2hNH8RSGi(KujcqvQTmnxJZLRx(C=UEn{}@euc}kb7c|7C>Qci~ z%BmaO>ikN~=zo)^>uz&M^3Yr7#mytTKb|%Je2fc!MIrq=X*H${#|0p8fMD2b;6#M2LIaOLn0B9sc`B>JkyTvxf z>}3`?zqd;_3wMmX*3x}KW_V|RBoyT$-rjDHDUSF=U&45@xJgKubR8}q?06}o?b~7m zQ0aW!6Ur{lOe9I{G}H2PxXP3%VN*OOn{Ri$;r)Z~huq3Cicpl3uUXrX8}F`S-+pqBu7Dmrj+v1d(&1Z;^O8mq zn}9|y&|Z{QhSyw{o(sX+#ZXeRv*RLdh*^nY0BCmJzrz+Yt=uJr)G$w%-=2KJx4DjE*S|L zjdSE>{@SR)JX83Lrn@@ioZevWv>YIVvFWj*JPY-N8pHCq-zXeVkpqdHP>vJ3T!F(; zD!ue=WbkUnZzMEwE1&RccD*olzHn;T`mtUJwbaV$>!AKD=#bp%-OkbFCwyX{a!$+f zGgUe3OHp&R&l6lbNs$+8$5(sOE&Fu`=G_nfaP4soF;8EM?C2I@%B}del54%>!MBA4 z+i2auiRo~r5rgfe%I?dqVc@t>|AJskkL#lcKJYqCvgC_Bid%%!Uv`+ylje^IiDCcQ z;3vHhA-*P2KIl8;0P0t|9ENN<5VNmAE8961;VpNC3=*fpW0fbE5~X8{?C2v>)2riWn+9!6mY0}ht! zuy!^&2vSFCYE_xU2w84Z2|m0u6>`Z{2xj8R;iO(F4U4MjZT zlt#EQ!El*uYV^uyWtQgB`-6qzVwA2=f363As$g2Fb!1d+oqd@Vb=wqJ=I#iy@CZc% z_qrlVuWDW({JDBE7A~qp-nVsMn+jaw%|*|AwHx}0Uvo?j2- zS6(~4@*((Z(tdOP^4`lvzJTzQ29M?Xp9_cmdCQ30ztX6lt0H+_ol=VAoRo@!p}qJy z(1F@5Te*3_gNxL~;ul;Y9o)-D$Yqmf8GumQjq!&Igs&G z1sX)V?x^4x`+ixgMiCmiSgo$v!p}>-O#rroV|EYaOBb|ZFZFo@7_+3Eg-2Bp-W#*L zCU=tSn}ciW>de8p%cb~nZ`x)lKBm(r6ni_d43+>Xf43t1M5wIHyF?}SjV8iGnMvvP z%c_;XcVz?~T^^&5=O6C}FyV@B4Y|6Sq%C74dU9kZ%PKH+f%8&-&9H$qgyF*O%q#!1 z+@oHAXN!qpVFmk}l5Y*@)ejQCJZRSb8?#i)L?Emq*8^AN?BQa;&EXXHK85(gS-F<8 zBU@OcHw^@i?d5q1Sd>%z3l-L#diybW)<$=6Jt8s%7eyc!n%}Sxvdk$j>32V zJ8nn($)4kEhc1@DA1C6+%FU%V8~Pr1vtb*dO;4To_xpVz``>z7g$dr)(-)t5h+i94 zVrdQ+YoErA#=x+3Mzr0!zL;bGrEl^mm8j6FhgJ0CkJZ<3DFjOIDZI>`^ zTl!BnK>4R=^~KW8ukB|m00^^U;j4Me>))oue-8N4A*?0;zBUr)!6K%?NYO;OYMFhG z=oiE(Q&&wPlbY~%3XCspjuU39+c)?_`ywP9nf~MQ1l-zk#GCX&+=_Xr%iBcV{N&YP z@UG8D2(@@9gJ&*PGcgB;#mUWW-wiZS}M%_s)Ukj_FcaE1i=zFd|BNzDW z=oRL}x^MGhe=VTrv^W*Co&hdDe>)K(Sc&Sg{#&b9)R1piF+W80*th4$+X~R)Ss@=! zdI5h`EMC9gdPzN)p?eS#NXL z+PM!#+u{n|!86>1Ib?YBX*)x!Iv_o@SbV%b?~a8JysI83DIbJS>~e43O1(MpKDE8O zy3F=OyK%GY658#)fA`=YTHR*rIIVyPJux(>mM--+Xg=ha%Vh3bQQyVDJ0Km|fpL1X zqe*eLW-ym(sKVR8%d$nBs<9Y1Mae~QBr5iPOIdgNOx(gnJLWA+%WCjm>f=%$gP&L0 z`FUSux*Tp08?>2eE%G#f>rc>hZaZl1wSK1LXrLL41<4y zS&M8p3;OZwS`SlZ2Vb}scUF`me7wOM>0*GYXm;qP1N`YQZH+Lo;4LXBB_q^q4%J`T zqY)4`_~f37_gGVpg8@r39dZYZlhvR1StD?2t2SwRs`=XNiE(422ErEEs`X=2e?mBW zn6bJn_TQmGVC<%l2DOL*n1sK~K*h#ITLSiFy0fC!w$l9dOKg-q@}Px|FL1a|r*d~! zNQr30@B(q&k+FaxphqY1E7bUEPOBVJ(mn{Nx3U&(hH^+WFf$ z)Z7CALCNKg6TI!--?_2Dtf!?6CZ`m@F7fEU83=WSnsg4xEbdsj%d^ZAQDuUd;GZ}2 z-I~2e0v==3nQ?rTIR68ABgU|HW_w}Y;mrEge80KAG=5NBxwpCJ2lHGi9jH3<)}-;5 zR4>t@^Dq&{*`I+?0YWHUoAWKcXA&E{HSfQveqK5!=jucia=UDoIN4%mM3KmIJQWP9 zJq0;fOxT*!aw|n-D9H?X^1RDB>}Cvd-IJVrCySggWJ^VSdl9;r745s(pIl!Go?Qki z$A$iipR@mzvYIVis^Y?)eX%ZZ}n-ZZsFE_ID{)qzWeOSV_I zCDsG%acqV{_X3e=pueA;)efe~5+|T;hicSp6e$up5NvsoekreBi0(n@X%MRIE0BnN z>T&0@LG0mjl37~KS7$gMH<3o|ajF6T#y^Ma`c%(fnCLKN!}XYGxNG`fSxp{qQ_bW+ zIcRw52D@^>E%byrk$+%Nl2W1*hKM-D8xfuHkN&#&&txr-xu14FfLY#ZE}KtJ$axHlW86- zcpiJ=?{t@$$+@bJ7wj$(n7zH-0Ky_Mqq8;pUQIgNj0HEVi)q`q?gM+wQExw~_`D94 z>fzU0HqHZ;eK7#sz_+UoW7?sMsJh7J1 z&XD8_{Y;pur{zI5fBrRCTi?KJ(r-44W0_!|{VRGudoK6}S?6BMWdmT-lq(-K;-bL| z3f%i!kp62zf_Cf56A~~685j5L4Z3s7ta#T^%R4A6pN76$OvEE{7>5LmQSGsX9!5~- z`RBz-8Oj+A#y**su8v(Wg=5n@s7NVHlW1MYod~kr#}063ZhqwWb|Nt789tQ_zFFGN zBML@PuPl-iCST#*Y$EZxR>;9^>iLk+0abJPHJdHb{t8GUQ`&;X9QOY$?$R!s)J$}E z3=P)8$tof`Vn#IzpjzAR!XhUVHKP|rR1PN^ip}l(m6>HrO!IxO!fZl?bcBL>v@c;+ z;d*t37zFVm6MRPyR@ZwBX)y;Gx$!L+W8&^UJX;x?8E1KX;t!@hP-1nxHS03-@#yjq+I8%~6Qhf-JOSdLPM-aow_YW$h72PwjBL1t*@ zeZ&gR^0H<^lxoD~Ukb+D8W~O2sWlV-6#FI>Who(yl46xB!ITq)4Xl4Wo7}%*aZgJO z9D^?r;<@8HhpE(V3OYwsaLkrz3Q8&)2bDJ6GC1HMMiRxeEcf6k)yUngf~iuXv=?$Z zg{dmmjolGa_2Qy}ut#RRY6TsMOmm0AKD&Sp4pQA5o+BP1cSH#Z4Om83iD#e%46vxr%KmBNPF3uB)W zK3z1H@HaNE@P~D3b+^=y&?leLcaE7$^NPZXAaVMG0jNMG;9!B0x?@f5xzu75s`tTv z6~w)jpQv}f3PkEbM%fMq%3~-jViyAsU&OGm*Bey?1NZY37>$bAcIJE}e44!)=Xk{) z2g&3A&=83meccjwV2`hVkW__RG4CAM<0eTrVG&En#ODaeZMuMO7hq-{5@o0U(@cJ+d#<%R=D(J(AXp)3Q~wBIT-!3!F$AE zUTmWJxZi-6bCeNzPUxpg$+}hXh}%mCc26~+2h`I<${;Ja&}g#p!!QRgqO8vR^rCy! zYL{uI&5LF9r2|nfGUR>1GqHrSw}cYHyhDUjBc-bzZEH9=Q2IV1