diff --git a/boot.prg b/boot.prg
new file mode 100644
index 0000000..668001e
Binary files /dev/null and b/boot.prg differ
diff --git a/cmd/multitonycrunch/main.go b/cmd/multitonycrunch/main.go
new file mode 100644
index 0000000..e5b3b9a
--- /dev/null
+++ b/cmd/multitonycrunch/main.go
@@ -0,0 +1,110 @@
+/*
+TonyCrunch is a fork of Antonio Savona's TSCrunch.
+Refactoring, including fast mode and multi-hack by burg.
+*/
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"runtime/pprof"
+	"sync"
+	"time"
+
+	"github.com/staD020/TSCrunch"
+)
+
+func usage() {
+	fmt.Printf("TSCrunch %s - binary cruncher, by Antonio Savona\n", TSCrunch.Version)
+	fmt.Println("Multi-hack and fast mode by burg, quickly compile multiple files")
+	fmt.Println("Usage: tscrunch [-p] [-i] [-f] [-q] infile infile infile")
+	fmt.Println(" -p  : input file is a prg, first 2 bytes are discarded.")
+	fmt.Println(" -i  : inplace crunching (forces -p)")
+	fmt.Println(" -q  : quiet mode")
+	fmt.Println(" -f  : fast mode")
+}
+
+func main() {
+	if err := run(); err != nil {
+		log.Printf("error: %v\n", err)
+		usage()
+		return
+	}
+}
+
+func run() error {
+	t0 := time.Now()
+	opt := TSCrunch.Options{STATS: true}
+	var cpuProfile string
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to `file`")
+	flag.BoolVar(&opt.PRG, "p", false, "")
+	flag.BoolVar(&opt.QUIET, "q", false, "")
+	flag.BoolVar(&opt.INPLACE, "i", false, "")
+	flag.BoolVar(&opt.Fast, "f", false, "")
+	flag.Usage = usage
+	flag.Parse()
+
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			return fmt.Errorf("could not create CPU profile %q: %w", cpuProfile, err)
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			return fmt.Errorf("could not start CPU profile: %w", err)
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	inFiles := flag.Args()
+	if len(inFiles) < 1 {
+		return fmt.Errorf("not enough args")
+	}
+
+	crunchFiles(opt, inFiles)
+
+	if !opt.QUIET {
+		fmt.Printf("elapsed: %s\n", time.Since(t0))
+	}
+	return nil
+}
+
+func crunchFiles(opt TSCrunch.Options, ff []string) {
+	wg := &sync.WaitGroup{}
+	wg.Add(len(ff))
+	for _, file := range ff {
+		go func(file string) {
+			defer wg.Done()
+			t1 := time.Now()
+			in, err := os.Open(file)
+			if err != nil {
+				log.Printf("error: %v\n", err)
+				return
+			}
+			defer in.Close()
+			t, err := TSCrunch.New(opt, in)
+			if err != nil {
+				log.Printf("error: %v\n", err)
+				return
+			}
+			f, err := os.Create(file + ".lz")
+			if err != nil {
+				log.Printf("error: %v\n", err)
+				return
+			}
+			defer f.Close()
+			_, err = t.WriteTo(f)
+			if err != nil {
+				log.Printf("error: %v\n", err)
+				return
+			}
+
+			if !opt.QUIET {
+				fmt.Printf("crunching %q took %s\n\n", file, time.Since(t1))
+			}
+		}(file)
+	}
+	wg.Wait()
+}
diff --git a/cmd/tonycrunch/main.go b/cmd/tonycrunch/main.go
new file mode 100644
index 0000000..73db5da
--- /dev/null
+++ b/cmd/tonycrunch/main.go
@@ -0,0 +1,90 @@
+/*
+TonyCrunch is a fork of Antonio Savona's TSCrunch.
+Refactoring, including fast mode by burg.
+*/
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"runtime/pprof"
+	"time"
+
+	"github.com/staD020/TSCrunch"
+)
+
+func usage() {
+	fmt.Printf("TSCrunch %s - binary cruncher, by Antonio Savona\n", TSCrunch.Version)
+	fmt.Println("Fast mode by burg")
+	fmt.Println("Usage: tscrunch [-p] [-i] [-f] [-q] [-x $081f|0x081f|2079] infile outfile")
+	fmt.Println(" -p  : input file is a prg, first 2 bytes are discarded.")
+	fmt.Println(" -x  $addr: creates a self extracting file (forces -p)")
+	fmt.Println(" -i  : inplace crunching (forces -p)")
+	fmt.Println(" -q  : quiet mode")
+	fmt.Println(" -f  : fast mode")
+}
+
+func main() {
+	if err := run(); err != nil {
+		log.Printf("error: %v\n", err)
+		usage()
+		return
+	}
+}
+
+func run() error {
+	t0 := time.Now()
+	opt := TSCrunch.Options{STATS: true}
+	var cpuProfile string
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to `file`")
+	flag.BoolVar(&opt.PRG, "p", false, "")
+	flag.BoolVar(&opt.QUIET, "q", false, "")
+	flag.BoolVar(&opt.INPLACE, "i", false, "")
+	flag.BoolVar(&opt.Fast, "f", false, "")
+	flag.StringVar(&opt.JumpTo, "x", "", "")
+	flag.Usage = usage
+	flag.Parse()
+
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			return fmt.Errorf("could not create CPU profile %q: %w", cpuProfile, err)
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			return fmt.Errorf("could not start CPU profile: %w", err)
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	if flag.NArg() != 2 {
+		return fmt.Errorf("not enough args")
+	}
+
+	inFilename := flag.Args()[0]
+	outFilename := flag.Args()[1]
+	in, err := os.Open(inFilename)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+	t, err := TSCrunch.New(opt, in)
+	if err != nil {
+		return err
+	}
+	out, err := os.Create(outFilename)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+	_, err = t.WriteTo(out)
+	if err != nil {
+		return err
+	}
+	if !opt.QUIET {
+		fmt.Printf("elapsed: %s\n", time.Since(t0))
+	}
+	return nil
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..23bcc3b
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,5 @@
+module github.com/staD020/TSCrunch
+
+go 1.17
+
+require github.com/RyanCarrier/dijkstra v1.1.0
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..c210741
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,8 @@
+github.com/RyanCarrier/dijkstra v1.1.0 h1:/NDihjfJA3CxFaZz8EdzTwdFKFZDvvB881OVLdakRcI=
+github.com/RyanCarrier/dijkstra v1.1.0/go.mod h1:5agGUBNEtUAGIANmbw09fuO3a2htPEkc1jNH01qxCWA=
+github.com/RyanCarrier/dijkstra-1 v0.0.0-20170512020943-0e5801a26345 h1:fgSpoKViTSqRb4hjDNj10ig5wUvO0CayCzFdLf6fuRM=
+github.com/RyanCarrier/dijkstra-1 v0.0.0-20170512020943-0e5801a26345/go.mod h1:OK4EvWJ441LQqGzed5NGB6vKBAE34n3z7iayPcEwr30=
+github.com/albertorestifo/dijkstra v0.0.0-20160910063646-aba76f725f72 h1:uGeGZl8PxSq8VZGG4QK5njJTFA4/G/x5CYORvQVXtAE=
+github.com/albertorestifo/dijkstra v0.0.0-20160910063646-aba76f725f72/go.mod h1:o+JdB7VetTHjLhU0N57x18B9voDBQe0paApdEAEoEfw=
+github.com/mattomatic/dijkstra v0.0.0-20130617153013-6f6d134eb237 h1:acuCHBjzG7MFTugvx3buC4m5rLDLaKC9J8C9jtlraRc=
+github.com/mattomatic/dijkstra v0.0.0-20130617153013-6f6d134eb237/go.mod h1:UOnLAUmVG5paym8pD3C4B9BQylUDC2vXFJJpT7JrlEA=
diff --git a/tscrunch.go b/tscrunch.go
index 150d890..2a8639e 100644
--- a/tscrunch.go
+++ b/tscrunch.go
@@ -2,39 +2,44 @@
 TSCrunch binary cruncher, by Antonio Savona
 */
 
-package main
+package TSCrunch
 
 import (
 	"bytes"
-	"flag"
+	_ "embed"
 	"fmt"
+	"io"
 	"math"
 	"os"
 	"sort"
 	"strconv"
-	"sync"
 	"time"
 
 	"github.com/RyanCarrier/dijkstra"
 )
 
-type tokenGraph struct {
-	wg           sync.WaitGroup
-	mg, ms, me   sync.Mutex
-	starts, ends map[int]bool
-	graph        map[edge]token
+const Version = "1.3"
+
+type Options struct {
+	QUIET      bool
+	PRG        bool
+	SFX        bool
+	INPLACE    bool
+	STATS      bool
+	Fast       bool // skipping RLE ranges drastically improves crunch time at the cost of pack-ratio.
+	JumpTo     string
+	jmp        uint16
+	decrunchTo uint16
+	loadTo     uint16
+	addr       []byte
 }
 
-type crunchCtx struct {
-	QUIET          bool
-	STATS          bool
-	PRG            bool
-	SFX            bool
-	INPLACE        bool
-	jmp            uint16
-	decrunchTo     uint16
-	loadTo         uint16
-	addr           []byte
+type tsc struct {
+	options        Options
+	src            []byte
+	starts         map[int]bool
+	ends           map[int]bool
+	graph          map[edge]token
 	optimalRun     int
 	crunchedSize   int
 	sourceLen      int
@@ -79,6 +84,100 @@ const LITERALID = 4
 const LONGLZID = 5
 const ZERORUNID = 6
 
+func New(opt Options, r io.Reader) (*tsc, error) {
+	if opt.JumpTo != "" {
+		opt.SFX = true
+		opt.loadTo = 0x0801
+		opt.PRG = true
+	}
+	if opt.INPLACE {
+		opt.PRG = true
+	}
+	if opt.SFX {
+		if opt.JumpTo[0] == '$' {
+			jmp, err := strconv.ParseUint(opt.JumpTo[1:], 16, 16)
+			if err != nil {
+				return nil, fmt.Errorf("unable to parse jump address %q: %w", opt.JumpTo, err)
+			}
+			opt.jmp = uint16(jmp)
+		} else if opt.JumpTo[0] == '0' && opt.JumpTo[1] == 'x' {
+			jmp, err := strconv.ParseUint(opt.JumpTo[2:], 16, 16)
+			if err != nil {
+				return nil, fmt.Errorf("unable to parse jump address %q: %w", opt.JumpTo, err)
+			}
+			opt.jmp = uint16(jmp)
+		} else {
+			jmp, err := strconv.Atoi(opt.JumpTo)
+			if err != nil {
+				return nil, fmt.Errorf("unable to parse jump address %q: %w", opt.JumpTo, err)
+			}
+			opt.jmp = uint16(jmp)
+		}
+		if opt.jmp == 0 {
+			return nil, fmt.Errorf("incorrect jump address %q", opt.JumpTo)
+		}
+	}
+	src, err := io.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("ReadAll failed for r %v", r)
+	}
+	if opt.PRG {
+		opt.addr = src[:2]
+		src = src[2:]
+		opt.decrunchTo = uint16(opt.addr[0]) + 256*uint16(opt.addr[1])
+	}
+
+	t := &tsc{
+		options: opt,
+		src:     src,
+		starts:  make(map[int]bool, 0xffff),
+		ends:    make(map[int]bool, 0xffff),
+		graph:   make(map[edge]token, 0xffff),
+		// prefix arrays improve crunch performance 3x
+		// 19 prgs sequential, usePrefixArray:
+		// true:  0.89 sec
+		// false: 2.97 sec
+		usePrefixArray: true,
+	}
+	return t, nil
+}
+
+func (t *tsc) WriteTo(w io.Writer) (int64, error) {
+	buf, err := t.crunch()
+	if err != nil {
+		return 0, fmt.Errorf("t.crunch failed: %w", err)
+	}
+	decrunchEnd := uint16(int(t.options.decrunchTo) + len(t.src) - 1)
+	if t.options.INPLACE {
+		t.options.loadTo = decrunchEnd - uint16(len(buf)) + 1
+		buf = append([]byte{byte(t.options.loadTo & 0xff), byte(t.options.loadTo >> 8)}, buf...)
+	}
+
+	n, err := w.Write(buf)
+	if err != nil {
+		return int64(n), err
+	}
+
+	if !t.options.QUIET {
+		ratio := float32(len(buf)) * 100.0 / float32(len(t.src))
+		srcPrg := "RAW"
+		destPrg := "RAW"
+		if t.options.PRG {
+			srcPrg = "PRG"
+		}
+		if t.options.SFX || t.options.INPLACE {
+			destPrg = "PRG"
+		}
+		fmt.Printf("input file  %s, $%04x - $%04x : %d bytes\n",
+			srcPrg, t.options.decrunchTo, decrunchEnd, len(t.src))
+		fmt.Printf("output file %s, $%04x - $%04x : %d bytes\n",
+			destPrg, t.options.loadTo, len(buf)+int(t.options.loadTo)-1, len(buf))
+		fmt.Printf("crunched to %.2f%% of original size\n", ratio)
+	}
+
+	return int64(n), nil
+}
+
 func min(x, y int) int {
 	if x < y {
 		return x
@@ -93,34 +192,20 @@ func max(x, y int) int {
 	return y
 }
 
-func load_raw(f string) []byte {
-	data, err := os.ReadFile(f)
-	if err == nil {
-		return data
-	} else {
-		fmt.Println("can't read data")
-		return nil
-	}
-}
-
-func save_raw(f string, data []byte) {
-	os.WriteFile(f, data, 0666)
-}
-
-func fillPrefixArray(data []byte, ctx *crunchCtx) {
-	ctx.prefixArray = make(map[[MINLZ]byte][]int)
-	for i := 0; i < len(data)-MINLZ; i++ {
-		ctx.prefixArray[*(*[MINLZ]byte)(data[i:])] = append(ctx.prefixArray[*(*[MINLZ]byte)(data[i:])], i)
+func (t *tsc) fillPrefixArray() {
+	t.prefixArray = make(map[[MINLZ]byte][]int)
+	for i := 0; i < len(t.src)-MINLZ; i++ {
+		t.prefixArray[*(*[MINLZ]byte)(t.src[i:])] = append(t.prefixArray[*(*[MINLZ]byte)(t.src[i:])], i)
 	}
 }
 
-func findall(data []byte, prefix []byte, i int, minlz int, ctx *crunchCtx) <-chan int {
+func (t *tsc) findall(prefix []byte, i int, minlz int) <-chan int {
 	c := make(chan int)
 	x0 := max(0, i-LZOFFSET)
-	x1 := min(i+minlz-1, len(data))
+	x1 := min(i+minlz-1, len(t.src))
 
-	if ctx.usePrefixArray {
-		parray := ctx.prefixArray[*(*[MINLZ]byte)(prefix[:MINLZ])]
+	if t.usePrefixArray {
+		parray := t.prefixArray[*(*[MINLZ]byte)(prefix[:MINLZ])]
 		go func() {
 			//binary search to the closest entry on the left
 			l := 0
@@ -139,36 +224,36 @@ func findall(data []byte, prefix []byte, i int, minlz int, ctx *crunchCtx) <-cha
 			}
 
 			for o := mid; len(parray) > 0 && o >= 0 && parray[o] > x0; o-- {
-				if parray[o] < i && bytes.Equal(data[parray[o]:parray[o]+minlz], prefix) {
+				if parray[o] < i && bytes.Equal(t.src[parray[o]:parray[o]+minlz], prefix) {
 					c <- parray[o]
 				}
 			}
 			close(c)
 		}()
-	} else {
+		return c
+	}
+
+	go func() {
 		f := 1
-		go func() {
-			for f >= 0 {
-				f = bytes.LastIndex(data[x0:x1], prefix)
-				if f >= 0 {
-					c <- f + x0
-					x1 = x0 + f + minlz - 1
-				}
+		for f >= 0 {
+			f = bytes.LastIndex(t.src[x0:x1], prefix)
+			if f >= 0 {
+				c <- f + x0
+				x1 = x0 + f + minlz - 1
 			}
-			close(c)
-		}()
-	}
+		}
+		close(c)
+	}()
 	return c
 }
 
-func findOptimalZeroRun(src []byte) int {
+func (t *tsc) findOptimalZeroRun() int {
 	zeroruns := make(map[int]int)
-	var i = 0
-	var j = 0
-	for i < len(src)-1 {
-		if src[i] == 0 {
+	var i, j int
+	for i < len(t.src)-1 {
+		if t.src[i] == 0 {
 			j = i + 1
-			for j < len(src) && src[j] == 0 && j-i < 256 {
+			for j < len(t.src) && t.src[j] == 0 && j-i < 256 {
 				j += 1
 			}
 			if j-i >= MINRLE {
@@ -179,20 +264,19 @@ func findOptimalZeroRun(src []byte) int {
 			i += 1
 		}
 	}
-	if len(zeroruns) > 0 {
-		bestrun := 0
-		bestvalue := 0.0
-		for key, amount := range zeroruns {
-			currentvalue := float64(key) * math.Pow(float64(amount), 1.1)
-			if currentvalue > bestvalue {
-				bestrun = key
-				bestvalue = currentvalue
-			}
-		}
-		return bestrun
-	} else {
+	if len(zeroruns) < 1 {
 		return LONGESTRLE
 	}
+	bestrun := 0
+	bestvalue := 0.0
+	for key, amount := range zeroruns {
+		currentvalue := float64(key) * math.Pow(float64(amount), 1.1)
+		if currentvalue > bestvalue {
+			bestrun = key
+			bestvalue = currentvalue
+		}
+	}
+	return bestrun
 }
 
 func tokenCost(n0, n1 int, t byte) int64 {
@@ -217,41 +301,43 @@ func tokenCost(n0, n1 int, t byte) int64 {
 	return 0
 }
 
-func tokenPayload(src []byte, t token) []byte {
-
+func (ts *tsc) tokenPayload(t token) []byte {
 	n0 := t.i
 	n1 := t.i + t.size
 
-	if t.tokentype == LZID {
+	switch t.tokentype {
+	case LZID:
 		return []byte{byte(LZMASK | (((t.size - 1) << 2) & 0x7f) | 2), byte(t.offset & 0xff)}
-	} else if t.tokentype == LONGLZID {
+	case LONGLZID:
 		negoffset := (0 - t.offset)
 		return []byte{byte(LZMASK | (((t.size-1)>>1)<<2)&0x7f), byte(negoffset & 0xff), byte(((negoffset >> 8) & 0x7f) | (((t.size - 1) & 1) << 7))}
-	} else if t.tokentype == RLEID {
+	case RLEID:
 		return []byte{RLEMASK | byte(((t.size-1)<<1)&0x7f), t.rlebyte}
-	} else if t.tokentype == ZERORUNID {
+	case ZERORUNID:
 		return []byte{RLEMASK}
-	} else if t.tokentype == LZ2ID {
+	case LZ2ID:
 		return []byte{LZ2MASK | byte(0x7f-t.offset)}
-	} else {
-		return append([]byte{byte(LITERALMASK | t.size)}, src[n0:n1]...)
+	default:
+		return append([]byte{byte(LITERALMASK | t.size)}, ts.src[n0:n1]...)
 	}
 }
 
-func LZ(src []byte, i int, size int, offset int, minlz int, ctx *crunchCtx) token {
-	var lz token
-	lz.tokentype = LZID
-	lz.i = i
+func (t *tsc) LZ(i int, size int, offset int, minlz int) token {
+	lz := token{
+		tokentype: LZID,
+		i:         i,
+		size:      size,
+		offset:    offset,
+	}
 	if i >= 0 {
-
 		bestpos := i - 1
 		bestlen := 0
 
-		if len(src)-i >= minlz {
-			prefixes := findall(src, src[i:i+minlz], i, minlz, ctx)
+		if len(t.src)-i >= minlz {
+			prefixes := t.findall(t.src[i:i+minlz], i, minlz)
 			for j := range prefixes {
 				l := minlz
-				for i+l < len(src) && l < LONGESTLONGLZ && src[j+l] == src[i+l] {
+				for i+l < len(t.src) && l < LONGESTLONGLZ && t.src[j+l] == t.src[i+l] {
 					l++
 				}
 				if l > bestlen {
@@ -262,9 +348,6 @@ func LZ(src []byte, i int, size int, offset int, minlz int, ctx *crunchCtx) toke
 		}
 		lz.size = bestlen
 		lz.offset = i - bestpos
-	} else {
-		lz.size = size
-		lz.offset = offset
 	}
 	if lz.size > LONGESTLZ || lz.offset >= 256 {
 		lz.tokentype = LONGLZID
@@ -272,330 +355,270 @@ func LZ(src []byte, i int, size int, offset int, minlz int, ctx *crunchCtx) toke
 	return lz
 }
 
-func RLE(src []byte, i int, size int, rlebyte byte) token {
-	var rle token
-	rle.tokentype = RLEID
-	rle.i = i
-	if i >= 0 {
-		rle.rlebyte = src[i]
-		x := 0
-		for i+x < len(src) && x < LONGESTRLE && src[i+x] == src[i] {
-			x++
-		}
-		rle.size = x
-	} else {
+func (t *tsc) RLE(i int, size int, rlebyte byte) token {
+	rle := token{
+		tokentype: RLEID,
+		i:         i,
+	}
+	if i < 0 {
 		rle.size = size
 		rle.rlebyte = rlebyte
+		return rle
 	}
+	rle.rlebyte = t.src[i]
+	x := 0
+	for i+x < len(t.src) && x < LONGESTRLE && t.src[i+x] == t.src[i] {
+		x++
+	}
+	rle.size = x
 	return rle
 }
 
-func ZERORUN(src []byte, i int, optimalRun int) token {
-	var zero token
-	zero.tokentype = ZERORUNID
-
-	zero.i = i
-	zero.rlebyte = 0
-	zero.size = 0
-
+func (t *tsc) ZERORUN(i int) token {
+	zero := token{
+		tokentype: ZERORUNID,
+		i:         i,
+		rlebyte:   0,
+		size:      0,
+	}
 	if i >= 0 {
 		var x int
-		for x = 0; x < optimalRun && i+x < len(src) && src[i+x] == 0; x++ {
+		for x = 0; x < t.optimalRun && i+x < len(t.src) && t.src[i+x] == 0; x++ {
 		}
-		if x == optimalRun {
-			zero.size = optimalRun
+		if x == t.optimalRun {
+			zero.size = t.optimalRun
 		}
 	}
 	return zero
 }
 
-func LZ2(src []byte, i int, size int, offset int) token {
-	var lz2 token
-	lz2.tokentype = LZ2ID
-
-	lz2.offset = -1
-	lz2.size = -1
-	lz2.i = i
-
-	if i >= 0 {
-		if i+2 < len(src) {
-			leftbound := max(0, i-LZ2OFFSET)
-			lpart := src[leftbound : i+1]
-			o := bytes.LastIndex(lpart, src[i:i+2])
-			if o >= 0 {
-				lz2.offset = i - (o + leftbound)
-				lz2.size = 2
-			}
-		}
-	} else {
+func (t *tsc) LZ2(i int, size int, offset int) token {
+	lz2 := token{
+		tokentype: LZ2ID,
+		offset:    -1,
+		size:      -1,
+		i:         i,
+	}
+	if i < 0 {
 		lz2.size = size
 		lz2.offset = offset
+		return lz2
+	}
+	if i+2 < len(t.src) {
+		leftbound := max(0, i-LZ2OFFSET)
+		lpart := t.src[leftbound : i+1]
+		o := bytes.LastIndex(lpart, t.src[i:i+2])
+		if o >= 0 {
+			lz2.offset = i - (o + leftbound)
+			lz2.size = 2
+		}
 	}
 	return lz2
 }
 
 func LIT(i int, size int) token {
-	var lit token
-	lit.tokentype = LITERALID
-	lit.size = size
-	lit.i = i
-	return lit
+	return token{
+		tokentype: LITERALID,
+		size:      size,
+		i:         i,
+	}
 }
 
-func crunchAtByte(src []byte, i int, tg *tokenGraph, ctx *crunchCtx) {
-	rle := RLE(src, i, 0, 0)
+//func crunchAtByte(src []byte, i int, tg *tokenGraph, ctx *crunchCtx) {
+func (t *tsc) crunchAtByte(i int) int {
+	rle := t.RLE(i, 0, 0)
 	//don't compute prefix for same bytes or this will explode
 	//start computing for prefixes larger than RLE size
 	var lz token
 	if rle.size < LONGESTLONGLZ-1 {
-		lz = LZ(src, i, 0, 0, rle.size+1, ctx)
+		lz = t.LZ(i, 0, 0, rle.size+1)
 	} else {
-		lz = LZ(src, -1, -1, -1, -1, ctx) // start with a dummy lz
+		lz = t.LZ(-1, -1, -1, -1) // start with a dummy lz
 	}
 
 	if lz.size >= MINLZ || rle.size >= MINRLE {
-		tg.ms.Lock()
-		tg.starts[i] = true
-		tg.ms.Unlock()
+		t.starts[i] = true
 	}
 
 	for size := lz.size; size >= MINLZ && size > rle.size; size-- {
-		tg.me.Lock()
-		tg.ends[i+size] = true
-		tg.me.Unlock()
-
-		tg.mg.Lock()
-		tg.graph[edge{i, i + size}] = LZ(src, -1, size, lz.offset, MINLZ, ctx)
-		tg.mg.Unlock()
-	}
-
-	for size := rle.size; size >= MINRLE; size-- {
-		tg.me.Lock()
-		tg.ends[i+size] = true
-		tg.me.Unlock()
-
-		tg.mg.Lock()
-		tg.graph[edge{i, i + size}] = RLE(src, -1, size, src[i])
-		tg.mg.Unlock()
+		t.graph[edge{i, i + size}] = t.LZ(-1, size, lz.offset, MINLZ)
+		t.ends[i+size] = true
+	}
+
+	skip := 0
+	if t.options.Fast {
+		// using this more efficient one-shot, it looks like we use a couple bytes more in resulting .prg
+		// skipping identical bytes in this RLE block improves crunchtime, but impact on file size is big
+		// worst case was 200 bytes extra for me
+		if rle.size >= MINRLE {
+			t.graph[edge{i, i + rle.size}] = t.RLE(-1, rle.size, t.src[i])
+			t.ends[i+rle.size] = true
+			t.graph[edge{i, i + MINRLE}] = t.RLE(-1, MINRLE, t.src[i])
+			t.ends[i+MINRLE] = true
+			skip = rle.size - 1
+		}
+	} else {
+		// the original RLE implementation consumes tons of RAM and CPU, but is more efficient in packratio
+		for size := rle.size; size >= MINRLE; size-- {
+			t.graph[edge{i, i + size}] = t.RLE(-1, size, t.src[i])
+			t.ends[i+size] = true
+		}
 	}
 
-	if len(src)-i > 2 {
-		lz2 := LZ2(src, i, 0, 0)
+	if len(t.src)-i > 2 {
+		lz2 := t.LZ2(i, 0, 0)
 		if lz2.size == 2 {
-			tg.mg.Lock()
-			tg.graph[edge{i, i + 2}] = lz2 //LZ2ID
-			tg.mg.Unlock()
-
-			tg.ms.Lock()
-			tg.starts[i] = true
-			tg.ms.Unlock()
-
-			tg.me.Lock()
-			tg.ends[i+2] = true
-			tg.me.Unlock()
+			t.graph[edge{i, i + 2}] = lz2 //LZ2ID
+			t.starts[i] = true
+			t.ends[i+2] = true
 		}
 	}
 
-	zero := ZERORUN(src, i, ctx.optimalRun)
+	zero := t.ZERORUN(i)
 	if zero.size != 0 {
-		tg.mg.Lock()
-		tg.graph[edge{i, i + ctx.optimalRun}] = zero
-		tg.mg.Unlock()
-
-		tg.ms.Lock()
-		tg.starts[i] = true
-		tg.ms.Unlock()
-
-		tg.me.Lock()
-		tg.ends[i+ctx.optimalRun] = true
-		tg.me.Unlock()
+		t.graph[edge{i, i + t.optimalRun}] = zero
+		t.starts[i] = true
+		t.ends[i+t.optimalRun] = true
 	}
-
-	tg.wg.Done()
+	return skip
 }
 
-func crunch(src []byte, ctx *crunchCtx) []byte {
-
-	var boot = []byte{
-
-		0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
-		0x00, 0x00, 0x78, 0xA2, 0xC9, 0xBD, 0x1A, 0x08, 0x95, 0x00, 0xCA, 0xD0,
-		0xF8, 0x4C, 0x02, 0x00, 0x34, 0xBD, 0x00, 0x10, 0x9D, 0x00, 0xFF, 0xE8,
-		0xD0, 0xF7, 0xC6, 0x04, 0xC6, 0x07, 0xA5, 0x04, 0xC9, 0x07, 0xB0, 0xED,
-		0xA0, 0x00, 0xB3, 0x21, 0x30, 0x21, 0xC9, 0x20, 0xB0, 0x3F, 0xA8, 0xB9,
-		0xFF, 0xFF, 0x88, 0x99, 0xFF, 0xFF, 0xD0, 0xF7, 0x8A, 0xE8, 0x65, 0x25,
-		0x85, 0x25, 0xB0, 0x77, 0x8A, 0x65, 0x21, 0x85, 0x21, 0x90, 0xDF, 0xE6,
-		0x22, 0xB0, 0xDB, 0x4B, 0x7F, 0x90, 0x3A, 0xF0, 0x6B, 0xA2, 0x02, 0x85,
-		0x53, 0xC8, 0xB1, 0x21, 0xA4, 0x53, 0x91, 0x25, 0x88, 0x91, 0x25, 0xD0,
-		0xFB, 0xA9, 0x00, 0xB0, 0xD5, 0xA9, 0x37, 0x85, 0x01, 0x58, 0x4C, 0x5B,
-		0x00, 0xF0, 0xF6, 0x09, 0x80, 0x65, 0x25, 0x85, 0x9B, 0xA5, 0x26, 0xE9,
-		0x00, 0x85, 0x9C, 0xB1, 0x9B, 0x91, 0x25, 0xC8, 0xB1, 0x9B, 0x91, 0x25,
-		0x98, 0xAA, 0x88, 0xF0, 0xB1, 0x4A, 0x85, 0xA0, 0xC8, 0xA5, 0x25, 0x90,
-		0x33, 0xF1, 0x21, 0x85, 0x9B, 0xA5, 0x26, 0xE9, 0x00, 0x85, 0x9C, 0xA2,
-		0x02, 0xA0, 0x00, 0xB1, 0x9B, 0x91, 0x25, 0xC8, 0xB1, 0x9B, 0x91, 0x25,
-		0xC8, 0xB9, 0x9B, 0x00, 0x91, 0x25, 0xC0, 0x00, 0xD0, 0xF6, 0x98, 0xA0,
-		0x00, 0xB0, 0x83, 0xE6, 0x26, 0x18, 0x90, 0x84, 0xA0, 0xFF, 0x84, 0x53,
-		0xA2, 0x01, 0xD0, 0x96, 0x71, 0x21, 0x85, 0x9B, 0xC8, 0xB3, 0x21, 0x09,
-		0x80, 0x65, 0x26, 0x85, 0x9C, 0xE0, 0x80, 0x26, 0xA0, 0xA2, 0x03, 0xD0,
-		0xC4,
-	}
-
-	tgraph := tokenGraph{
-		wg:     sync.WaitGroup{},
-		mg:     sync.Mutex{},
-		ms:     sync.Mutex{},
-		me:     sync.Mutex{},
-		starts: make(map[int]bool),
-		ends:   make(map[int]bool),
-		graph:  make(map[edge]token),
-	}
-
-	ctx.sourceLen = len(src)
-
-	remainder := []byte{}
-	var G = dijkstra.NewGraph()
-
-	if ctx.PRG {
-		ctx.addr = src[:2]
-		src = src[2:]
-		ctx.decrunchTo = uint16(ctx.addr[0]) + 256*uint16(ctx.addr[1])
-	}
-
-	for i := 0; i < len(src)+1; i++ {
+func (t *tsc) crunch() ([]byte, error) {
+	t.sourceLen = len(t.src)
+	G := dijkstra.NewGraph()
+	for i := 0; i < len(t.src)+1; i++ {
 		G.AddVertex(i)
 	}
 
-	if ctx.INPLACE {
-		remainder = src[len(src)-1:]
-		src = src[:len(src)-1]
+	remainder := []byte{}
+	if t.options.INPLACE {
+		remainder = t.src[len(t.src)-1:]
+		t.src = t.src[:len(t.src)-1]
 	}
 
-	ctx.optimalRun = findOptimalZeroRun(src)
+	t.optimalRun = t.findOptimalZeroRun()
 
-	if ctx.usePrefixArray {
-		fillPrefixArray(src, ctx)
+	if t.usePrefixArray {
+		t.fillPrefixArray()
 	}
 
-	if !ctx.QUIET {
+	if !t.options.QUIET {
 		fmt.Print("Populating LZ layer")
 	}
-
 	tm := time.Now()
 
-	for i := 0; i < len(src); i++ {
-		tgraph.wg.Add(1)
-		go crunchAtByte(src, i, &tgraph, ctx)
+	for i := 0; i < len(t.src); i++ {
+		i += t.crunchAtByte(i)
 	}
-	tgraph.wg.Wait()
 
-	if !ctx.QUIET {
-		if ctx.STATS {
-			fmt.Println(" ...", time.Since(tm))
-		} else {
-			fmt.Println()
+	if !t.options.QUIET {
+		if t.options.STATS {
+			fmt.Print(" ...", time.Since(tm))
 		}
+		fmt.Println()
 	}
 
-	tgraph.starts[len(src)] = true
-	tgraph.ends[0] = true
-	starts_ := make([]int, 0, len(tgraph.starts))
-	ends_ := make([]int, 0, len(tgraph.ends))
-	for k := range tgraph.starts {
+	t.starts[len(t.src)] = true
+	t.ends[0] = true
+	starts_ := make([]int, 0, len(t.starts))
+	ends_ := make([]int, 0, len(t.ends))
+	for k := range t.starts {
 		starts_ = append(starts_, k)
 	}
-	for k := range tgraph.ends {
+	for k := range t.ends {
 		ends_ = append(ends_, k)
 	}
 
 	sort.Ints(starts_)
 	sort.Ints(ends_)
 
-	if !ctx.QUIET {
+	if !t.options.QUIET {
 		fmt.Print("Closing Gaps")
 	}
 
 	e, s := 0, 0
 	for e < len(ends_) && s < len(starts_) {
 		end := ends_[e]
-		if end < starts_[s] {
-			//bridge
-			for starts_[s]-end >= LONGESTLITERAL {
-				key := edge{end, end + LONGESTLITERAL}
-				_, haskey := tgraph.graph[key]
-				if !haskey {
-					lit := LIT(end, LONGESTLITERAL)
-					lit.size = LONGESTLITERAL
-					tgraph.graph[key] = lit
-				}
-				end += LONGESTLITERAL
+		if end >= starts_[s] {
+			s++
+			continue
+		}
+		//bridge
+		for starts_[s]-end >= LONGESTLITERAL {
+			key := edge{end, end + LONGESTLITERAL}
+			_, haskey := t.graph[key]
+			if !haskey {
+				lit := LIT(end, LONGESTLITERAL)
+				lit.size = LONGESTLITERAL
+				t.graph[key] = lit
 			}
+			end += LONGESTLITERAL
+		}
 
-			for s0 := s; s0 < len(starts_) && starts_[s0]-end < LONGESTLITERAL; s0++ {
-				key := edge{end, starts_[s0]}
-				_, haskey := tgraph.graph[key]
-				if !haskey {
-					lit := LIT(end, starts_[s0]-end)
-					lit.size = starts_[s0] - end
-					tgraph.graph[key] = lit
-				}
+		for s0 := s; s0 < len(starts_) && starts_[s0]-end < LONGESTLITERAL; s0++ {
+			key := edge{end, starts_[s0]}
+			_, haskey := t.graph[key]
+			if !haskey {
+				lit := LIT(end, starts_[s0]-end)
+				lit.size = starts_[s0] - end
+				t.graph[key] = lit
 			}
-			e++
-		} else {
-			s++
 		}
+		e++
 	}
 
-	if !ctx.QUIET {
-		if ctx.STATS {
-			fmt.Println(" ...", time.Since(tm))
-		} else {
-			fmt.Println()
+	if !t.options.QUIET {
+		if t.options.STATS {
+			fmt.Print(" ...", time.Since(tm))
 		}
+		fmt.Println()
 		fmt.Print("Populating Graph")
 	}
 
 	tm = time.Now()
 
-	for k, t := range tgraph.graph {
-		G.AddArc(k.n0, k.n1, tokenCost(k.n0, k.n1, t.tokentype))
+	for k, t := range t.graph {
+		if err := G.AddArc(k.n0, k.n1, tokenCost(k.n0, k.n1, t.tokentype)); err != nil {
+			return nil, fmt.Errorf("G.AddArc failed: %w", err)
+		}
 	}
 
-	if !ctx.QUIET {
-		if ctx.STATS {
-			fmt.Println(" ...", time.Since(tm))
-		} else {
-			fmt.Println()
+	if !t.options.QUIET {
+		if t.options.STATS {
+			fmt.Print(" ...", time.Since(tm))
 		}
+		fmt.Println()
 		fmt.Print("Computing shortest path")
 	}
 
 	tm = time.Now()
+	best, err := G.Shortest(0, len(t.src))
+	if err != nil {
+		return nil, fmt.Errorf("G.Shortest failed: %w", err)
+	}
 
-	best, _ := G.Shortest(0, len(src))
-
-	if !ctx.QUIET {
-		if ctx.STATS {
-			fmt.Println(" ...", time.Since(tm))
-		} else {
-			fmt.Println()
+	if !t.options.QUIET {
+		if t.options.STATS {
+			fmt.Print(" ...", time.Since(tm))
 		}
+		fmt.Println()
 	}
 	crunched := make([]byte, 0)
 	token_list := make([]token, 0)
 
 	for i := 0; i < len(best.Path)-1; i++ {
 		e := edge{best.Path[i], best.Path[i+1]}
-		token_list = append(token_list, tgraph.graph[e])
+		token_list = append(token_list, t.graph[e])
 	}
 
-	if ctx.INPLACE {
+	if t.options.INPLACE {
 		safety := len(token_list)
 		segment_uncrunched_size := 0
 		segment_crunched_size := 0
 		total_uncrunched_size := 0
 		for i := len(token_list) - 1; i >= 0; i-- {
-			segment_crunched_size += len(tokenPayload(src, token_list[i])) //token size
-			segment_uncrunched_size += token_list[i].size                  //decrunched token raw size
+			segment_crunched_size += len(t.tokenPayload(token_list[i])) //token size
+			segment_uncrunched_size += token_list[i].size               //decrunched token raw size
 			if segment_uncrunched_size <= segment_crunched_size+0 {
 				safety = i
 				total_uncrunched_size += segment_uncrunched_size
@@ -603,31 +626,32 @@ func crunch(src []byte, ctx *crunchCtx) []byte {
 				segment_crunched_size = 0
 			}
 		}
-		for _, t := range token_list[:safety] {
-			crunched = append(crunched, tokenPayload(src, t)...)
+		for _, v := range token_list[:safety] {
+			crunched = append(crunched, t.tokenPayload(v)...)
 		}
 		if total_uncrunched_size > 0 {
-			remainder = append(src[len(src)-total_uncrunched_size:], remainder...)
+			remainder = append(t.src[len(t.src)-total_uncrunched_size:], remainder...)
 		}
 		crunched = append(crunched, TERMINATOR)
 		crunched = append(crunched, remainder[1:]...)
 		crunched = append(remainder[:1], crunched...)
-		crunched = append([]byte{byte(ctx.optimalRun - 1)}, crunched...)
-		crunched = append(ctx.addr, crunched...)
+		crunched = append([]byte{byte(t.optimalRun - 1)}, crunched...)
+		crunched = append(t.options.addr, crunched...)
 
 	} else {
-		for _, t := range token_list {
-			crunched = append(crunched, tokenPayload(src, t)...)
+		for _, v := range token_list {
+			crunched = append(crunched, t.tokenPayload(v)...)
 		}
 		crunched = append(crunched, TERMINATOR)
-		if !ctx.SFX {
-			crunched = append([]byte{byte(ctx.optimalRun - 1)}, crunched...)
+		if !t.options.SFX {
+			crunched = append([]byte{byte(t.optimalRun - 1)}, crunched...)
 		}
 	}
 
-	ctx.crunchedSize = len(crunched)
+	t.crunchedSize = len(crunched)
 
-	if ctx.SFX {
+	if t.options.SFX {
+		boot := newBoot()
 		fileLen := len(boot) + len(crunched)
 		startAddress := 0x10000 - len(crunched)
 		transfAddress := fileLen + 0x6ff
@@ -638,105 +662,35 @@ func crunch(src []byte, ctx *crunchCtx) []byte {
 		boot[0x3c] = byte(startAddress & 0xff) //Depack from..
 		boot[0x3d] = byte(startAddress >> 8)
 
-		boot[0x40] = byte(ctx.decrunchTo & 0xff) //decrunch to..
-		boot[0x41] = byte(ctx.decrunchTo >> 8)
+		boot[0x40] = byte(t.options.decrunchTo & 0xff) //decrunch to..
+		boot[0x41] = byte(t.options.decrunchTo >> 8)
 
-		boot[0x77] = byte(ctx.jmp & 0xff) // Jump to..
-		boot[0x78] = byte(ctx.jmp >> 8)
+		boot[0x77] = byte(t.options.jmp & 0xff) // Jump to..
+		boot[0x78] = byte(t.options.jmp >> 8)
 
-		boot[0xc9] = byte(ctx.optimalRun - 1)
+		boot[0xc9] = byte(t.optimalRun - 1)
 
 		crunched = append(boot, crunched...)
 
-		ctx.crunchedSize += len(boot)
-		ctx.loadTo = 0x0801
+		t.crunchedSize += len(boot)
+		t.options.loadTo = 0x0801
 	}
 
-	ctx.decrunchEnd = uint16(int(ctx.decrunchTo) + len(src) - 1)
+	t.decrunchEnd = uint16(int(t.options.decrunchTo) + len(t.src) - 1)
 
-	if ctx.INPLACE {
-		ctx.loadTo = ctx.decrunchEnd - uint16(len(crunched)) + 1
-		crunched = append([]byte{byte(ctx.loadTo & 255), byte(ctx.loadTo >> 8)}, crunched...)
+	if t.options.INPLACE {
+		t.options.loadTo = t.decrunchEnd - uint16(len(crunched)) + 1
+		crunched = append([]byte{byte(t.options.loadTo & 255), byte(t.options.loadTo >> 8)}, crunched...)
 	}
 
-	return crunched
-}
-
-func usage() {
-	fmt.Println("TSCrunch 1.3 - binary cruncher, by Antonio Savona")
-	fmt.Println("Usage: tscrunch [-p] [-i] [-q] [-x $addr] infile outfile")
-	fmt.Println(" -p  : input file is a prg, first 2 bytes are discarded.")
-	fmt.Println(" -x  $addr: creates a self extracting file (forces -p)")
-	fmt.Println(" -i  : inplace crunching (forces -p)")
-	fmt.Println(" -q  : quiet mode")
+	return crunched, nil
 }
 
-func main() {
-	ctx := crunchCtx{
-		//prefix arrays for efficient prefix search don't really improve performance, here
-		//due to the small search window.
-		usePrefixArray: true,
-		STATS:          true,
-	}
-
-	var jmp_str string
-	flag.BoolVar(&ctx.PRG, "p", false, "")
-	flag.BoolVar(&ctx.QUIET, "q", false, "")
-	flag.BoolVar(&ctx.INPLACE, "i", false, "")
-	flag.StringVar(&jmp_str, "x", "", "")
-	flag.Usage = usage
-	flag.Parse()
-
-	if jmp_str != "" {
-		ctx.SFX = true
-		ctx.PRG = true
-	}
+//go:embed "boot.prg"
+var bootPrg []byte
 
-	if ctx.INPLACE {
-		ctx.PRG = true
-	}
-
-	if flag.NArg() != 2 {
-		usage()
-		os.Exit(2)
-	}
-
-	if ctx.SFX {
-		if jmp_str[0] == '$' {
-			jmp, err := strconv.ParseUint(jmp_str[1:], 16, 16)
-			if err == nil {
-				ctx.jmp = uint16(jmp)
-			}
-		}
-		if ctx.jmp == 0 {
-			usage()
-			os.Exit(2)
-		}
-	}
-
-	ifidx := flag.NArg() - 2
-	ofidx := flag.NArg() - 1
-
-	src := load_raw(flag.Args()[ifidx])
-
-	crunched := crunch(src, &ctx)
-
-	save_raw(flag.Args()[ofidx], crunched)
-
-	if !ctx.QUIET {
-		ratio := (float32(ctx.crunchedSize) * 100.0 / float32(ctx.sourceLen))
-		prg := "RAW"
-		dest_prg := "RAW"
-		if ctx.PRG {
-			prg = "PRG"
-		}
-		if ctx.SFX || ctx.INPLACE {
-			dest_prg = "prg"
-		}
-		fmt.Printf("Input file  %s: %s, $%04x - $%04x : %d bytes\n",
-			prg, flag.Args()[ifidx], ctx.decrunchTo, ctx.decrunchEnd, ctx.sourceLen)
-		fmt.Printf("Output file %s: %s, $%04x - $%04x : %d bytes\n",
-			dest_prg, flag.Args()[ofidx], ctx.loadTo, ctx.crunchedSize+int(ctx.loadTo)-1, ctx.crunchedSize)
-		fmt.Printf("Crunched to %.2f%% of original size\n", ratio)
-	}
+func newBoot() []byte {
+	boot := make([]byte, len(bootPrg))
+	copy(boot, bootPrg)
+	return boot
 }