Skip to content
This repository was archived by the owner on Oct 6, 2025. It is now read-only.
Open
213 changes: 160 additions & 53 deletions src/AmyrisBio/primercore2.fs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,22 @@ module primercore =

g3Final > -9.0

// find longest tail to tail pair that is self complementary
// 5' tgtCTTTAAAG 3'
// 3' GAAATTTCtgtg 5'
let longestTailTailOverlap (s1:char array) (s2:char array) =
let suffix (d:char array) l = d.[d.Length-l..]

let maxOverlap = min s1.Length s2.Length
seq {
Comment thread
chrismacklin marked this conversation as resolved.
for i in 0..maxOverlap do
let suffix1 = suffix s1 i
let suffix2 = suffix s2 i
if suffix1 = Amyris.Bio.biolib.revComp suffix2 then
yield i
} |> Seq.fold (max) 0


/// Test if an oligo is self complementary
let selfComp (oligo:char array) length =
let rec checkOne l r =
Expand Down Expand Up @@ -363,58 +379,144 @@ module primercore =
let _,_,x = _temp p oligo N
x

/// check longest self complementary tail for
/// oligo defined by f'->t' inclusive
let localLongestTailTailOverlap (s:char []) primerFrom primerTo =
let l = primerTo-primerFrom+1
seq {
for i in l-1..-1..1 do
if seq { 0..i-1 } |> Seq.forall (fun j -> s.[primerTo-i+j] = biolib.rcBase (s.[primerTo-j])) then
yield (i+1)
}
|> Seq.tryFind(fun x -> x<> 0)
|> fun x -> defaultArg x 0

/// disrupt self primer dimers
/// Tweak the stopping point of a primer if it would help avoid a
/// situation where last N bases of the primer are self complementary and
/// can lead to formation of a primer dimer
/// 5'-------CTTTAAAG-3'
/// 3'GAAATTTC--------5'
/// primerLeft and primerRight delineate the range of bases in the underlying template s that are currently in the primer
let disruptPrimerDimers (debug:bool) (existingTemp : float<C>) (p:PrimerParams) (s:char[]) primerLeft primerRight offset =
if debug then
printfn "disruptPrimerDimers: checking for problems"

let initialTail = localLongestTailTailOverlap s primerLeft primerRight
if debug then printfn "disruptPrimerDimers: initialTail of length %d for %s" initialTail (arr2seq s.[primerLeft..primerRight])
// no-op
if initialTail < 3 then
{ tag = ""; oligo = s.[primerLeft..primerRight] ; temp = existingTemp ; offset = primerLeft+offset }
else
if debug then printfn "disruptPrimerDimers: found problematic initialTail of length %d initial f=%d t=%d" initialTail primerLeft primerRight
seq { for i in [1 ; -1 ; 2 ; -2 ; 3 ; -3 ; 4 ; -4; 5; -5 ; 6 ; -6] do
let primerRightNew = primerRight+i
let primerLenNew = primerRightNew-primerLeft+1
if debug then printfn "disruptPrimerDimers: considering f=%d t=%d len=%d min=%d max=%d templateLen=%d" primerLeft primerRightNew primerLenNew p.minLength p.maxLength s.Length
if primerLenNew >= p.minLength && primerLenNew <= p.maxLength && primerRightNew < s.Length && primerRightNew > primerLeft then
let tail' = localLongestTailTailOverlap s primerLeft primerRightNew
if debug then printfn "disruptPrimerDimers: considering tail of length %d for %d" tail' primerRightNew
yield tail',primerRightNew
}
|> Seq.fold (min) (initialTail,primerRight)
|> fun (finalTail,finalT) ->
if debug then printfn "disruptPrimerDimers: finalTail of length %d deltaT=%d f=%d finalT=%d" finalTail (finalT-primerRight) primerLeft finalT
let finalTemp = temp p (s.[primerLeft..finalT]) (finalT-primerLeft+1)
{ tag = ""; oligo = s.[primerLeft..finalT] ; temp = finalTemp; offset = primerLeft+offset }


/// Cut out the region from fr -> to and extend
// Given oligo array from to gcInit offset , return oligo, temp, offset
let cutToGC (debug:bool) (existingTemp : float<C>) (p:PrimerParams) (s:char[]) f t _ (*startingGC*) offset =
//let startingN = t-f + 1
let rec findGCFwd t' (*gc' *) =
if t' < 0 || t' >= s.Length then
failwithf "ERROR: primercord findGC array bounds exception t'=%d\n" t'
match s.[t'] with
|'G' | 'g' | 'C' | 'c' when threePrimeStable false (s.[..t'])->
(t', temp p (s.[f..t']) (t'-f+1)) // end on G/C
/// Given oligo array from to gcInit offset , return oligo, temp, offset
let cutToGC (debug:bool) (existingTemp : float<C>) (p:PrimerParams) (s:char[]) primerFrom primerTo _startingGC offset =
if debug then
printfn "cutToGC: starting design f=%d t=%d oligo=%s"
primerFrom
primerTo
(arr2seq s.[primerFrom..primerTo])
let rec findGCFwd newRight =
if newRight < 0 || newRight >= s.Length then
failwithf "ERROR: primercord findGC array bounds exception t'=%d\n" newRight
match s.[newRight] with
|'G' | 'g' | 'C' | 'c' when threePrimeStable false (s.[..newRight])->
Some (newRight, temp p (s.[primerFrom..newRight]) (newRight-primerFrom+1)) // end on G/C

| _ when t' < (s.Length-1) && t'-f+1 < p.maxLength -> findGCFwd (t'+1)
| _ -> (t, (temp p s.[f..t] (t-f+1))) // fall back on original oligo if we run out of S without finding G

let rec findGCRev t' =
if t' < 0 || t' >= s.Length then
failwithf "ERROR: primercord findGC array bounds exception t'=%d\n" t'
match s.[t'] with
|'G' | 'g' | 'C' | 'c' when threePrimeStable false (s.[..t']) ->
(t', temp p (s.[f..t']) (t'-f+1)) // end on G/C
| _ when newRight < (s.Length-1) && newRight-primerFrom+1 < p.maxLength -> findGCFwd (newRight+1)
| _ -> None

let rec findGCRev newRight =
if newRight < 0 || newRight >= s.Length then
failwithf "ERROR: primercord findGC array bounds exception newRight=%d\n" newRight
match s.[newRight] with
|'G' | 'g' | 'C' | 'c' when threePrimeStable false (s.[..newRight]) ->
Some (newRight, temp p (s.[primerFrom..newRight]) (newRight-primerFrom+1))

| _ when t' > 1 && t'-f+1 > p.minLength -> findGCRev (t'-1)
| _ -> (t, (temp p s.[f..t] (t-f+1))) // fall back on original oligo if we run out of S without finding G
| _ when newRight > 1 && newRight-primerFrom+1 > p.minLength -> findGCRev (newRight-1)
| _ -> None

if p.ATPenalty < 0.0001<C> then
// did we already end on a G or a C
let endsWithGC = s.[primerTo] |> base2GC = 1
if endsWithGC || p.ATPenalty < 0.0001<C> then
// No GC optimization
if debug then printfn "cutToGC: no atPenalty, done"
{ tag = ""; oligo = s.[f..t] ; temp = temp p (s.[f..t]) (t-f+1); offset = f+offset }
if debug then printfn "cutToGC: no atPenalty or endsWithGC already, done len=%d" (primerTo-primerFrom+1)
{ tag = ""; oligo = s.[primerFrom..primerTo] ; temp = temp p (s.[primerFrom..primerTo]) (primerTo-primerFrom+1); offset = primerFrom+offset }
else
// Is there an alternative starting point that would end on a G or C that's not too far away?
let altTFwd,altTempFwd = findGCFwd t
let altTRev,altTempRev = findGCRev t

assert(altTempFwd > 10.0<C>)
assert(altTempRev > 10.0<C>)
if debug then
printfn "cutToGC: altTempFwd=%A altTFwd=%d altTempRev=%A altRFwd=%d"
altTempFwd altTFwd altTempRev altTRev

// Choose the direction to a GC that was least perturbative
let altT,altTemp =
if abs (altTempFwd-existingTemp) < abs(altTempRev-existingTemp) then
altTFwd,altTempFwd
else altTRev,altTempRev

if abs (altTemp-existingTemp) <= p.ATPenalty then
if debug then printfn "cutToGC: using altGC option temp=%A f=%d t=%d" altTemp f altT

{ tag = ""; oligo = s.[f..altT] ; temp = altTemp ; offset = f+offset}
else
if debug then printfn "cutToGC: ignore altGC option temp=%A f=%d t=%d" temp f t
{ tag = ""; oligo = s.[f..t] ; temp = temp p (s.[f..t]) (t-f+1); offset = f+offset }
let altBest =
match findGCFwd primerTo,findGCRev primerTo with
| None,None -> None // no better option
| Some (altPos,altTemp),None ->
if debug then
printfn "cutToGC: altTempFwd=%A altTFwd=%d altRev=None"
altTemp altPos
Some (altPos,altTemp) // only one worked
| None, Some (altPos,altTemp) ->
if debug then
printfn "cutToGC: altTempRev=%A altTRev=%d altFwd=None"
altTemp
altPos
Some (altPos,altTemp) // only one worked
| Some (altPosFwd,altTempFwd),Some(altPosRev,altTempRev) ->
if debug then
printfn "cutToGC: altTempFwd=%A alTFwd=%d altTempRev=%A altTRev=%d"
altTempFwd altPosFwd altTempRev altPosRev
if abs (altTempFwd-existingTemp) < abs(altTempRev-existingTemp) then
altPosFwd,altTempFwd
else altPosRev,altTempRev
|> Some

match altBest with
| Some (_,temperature) ->
if temperature < 10.0<C> then
failwithf "wikitemp1000: fail design temperature of %A < 10C" temperature // ensure selected temp isn't crazy
| None -> ()

match altBest with
| None ->
if debug then
printfn "cutToGC: no better altGC version, going with original f=%d t=%d oligo=%s"
primerFrom
primerTo
(arr2seq s.[primerFrom..primerTo])
{ tag = ""; oligo = s.[primerFrom..primerTo] ; temp = temp p (s.[primerFrom..primerTo]) (primerTo-primerFrom+1); offset = primerFrom+offset }

| Some(_,altTemp) when (altTemp-existingTemp) > p.ATPenalty ->
// stick with original design, compromise too big
if debug then
printfn "cutToGC: ignore altGC option (compromise not worth it) temp=%A f=%d t=%d oligo=%s"
temp
primerFrom
primerTo
(arr2seq s.[primerFrom..primerTo])
{ tag = ""; oligo = s.[primerFrom..primerTo] ; temp = temp p (s.[primerFrom..primerTo]) (primerTo-primerFrom+1); offset = primerFrom+offset }
| Some(altPos,altTemp) ->
if debug then
printfn "cutToGC: using altGC option temp=%A f=%d t=%d oligo=%s"
altTemp
primerFrom
altPos
(arr2seq s.[primerFrom..altPos])

{ tag = ""; oligo = s.[primerFrom..altPos] ; temp = altTemp ; offset = primerFrom+offset}

let upper (s:char array) =
[| for x in s ->
Expand Down Expand Up @@ -461,7 +563,11 @@ module primercore =
if debug then printfn "designLeft, stopping at nextBase-1=%d thisTemp=%f" (nextBase-1) (thisTemp/1.0<C>)
Some(cutToGC debug lastTemp p s 0 (nextBase-1) gcCount offset)

designLeftInternal sInitUpper nextBase gcCount (0.0<C>)
designLeftInternal sInitUpper nextBase gcCount (0.0<C>)
|> Option.map (fun prePDCheck ->
// possible final tweaks if we have made a primer-dimer with the tail
disruptPrimerDimers debug prePDCheck.temp p sInitUpper 0 (prePDCheck.oligo.Length-1) offset
)

/// Check for the presence of mono- or dinucleotide repeats longer than N
let hasPolyrun n (s:char[]) =
Expand Down Expand Up @@ -538,12 +644,16 @@ module primercore =
let threeP = if r-l+1 > 5 then threePrimeStable false (s.[l..r]) else false
// get average of all nucleotide specific penalties
let seqP = Array.average seqPen.[l..r]
// check ability to self dimerize
let primerDimerPotential = longestTailTailOverlap (s.[l..r]) (s.[l..r])
let p = posCloseness +
tempCloseness +
lenCloseness +
(if threeP then 0.0 else pen.threePrimeUnstablePenalty) +
(if hasPoly then pen.polyPenalty else 0.0) +
seqP
seqP +
(if primerDimerPotential > 2 then (float primerDimerPotential)*3.0 else 0.0)

if (abs(t - tTemp) <= pen.tmMaxDifference) && (not polyC) then
yield
{l = l ;
Expand Down Expand Up @@ -655,16 +765,13 @@ module primercore =
printf "No oligo in required length, so make max allowable max=%d templateLen=%d\n"
pen.maxLength s'.Length
let oligo = s'.[..(min s'.Length pen.maxLength)-1] // longest allowed
Some( { tag=o.tag; oligo = oligo ; temp = temp pen oligo pen.maxLength; offset = o.offset } ) // calc temperature and offset is 0 plus their supplied offset
let oligoTemp = temp pen oligo oligo.Length
let oligo' = disruptPrimerDimers debug oligoTemp pen s' 0 (oligo.Length-1) o.offset
Some( { oligo' with tag=o.tag ; temp = temp pen oligo pen.maxLength; offset = o.offset } ) // calc temperature and offset is 0 plus their supplied offset
| x -> x //

| RIGHT -> failwith "should not get right here"
| CENTERLEFT -> designCenter debug pen s' seqPen offFn ( (float o.targetTemp)*1.0<C>)

(*with
| Some(a,b,c,d) -> Some( { tag=o.tag; oligo = a ; temp = b ; offset = c} )
| None -> None *)

| CENTERRIGHT -> failwith "should not get centerright here"

(*
Expand Down
Loading