From 76b6dd7b52163940c18bc4f7128544747c3d24aa Mon Sep 17 00:00:00 2001 From: Deven Panchal Date: Mon, 8 Jan 2018 10:08:17 -0800 Subject: [PATCH 1/2] + Introducing Hilbert packing on top of bulk load. --- pom.xml | 6 + .../com/github/davidmoten/rtree/PackAlgo.java | 12 ++ .../com/github/davidmoten/rtree/RTree.java | 92 ++++++++- .../davidmoten/rtree/BenchmarksRTree.java | 5 +- .../github/davidmoten/rtree/RTreeTest.java | 188 ++++++++++++++++-- 5 files changed, 279 insertions(+), 24 deletions(-) create mode 100644 src/main/java/com/github/davidmoten/rtree/PackAlgo.java diff --git a/pom.xml b/pom.xml index f9b31611..fb8d1880 100644 --- a/pom.xml +++ b/pom.xml @@ -168,6 +168,12 @@ true + + com.github.davidmoten + hilbert-curve + 0.1.3 + + diff --git a/src/main/java/com/github/davidmoten/rtree/PackAlgo.java b/src/main/java/com/github/davidmoten/rtree/PackAlgo.java new file mode 100644 index 00000000..26e4dc49 --- /dev/null +++ b/src/main/java/com/github/davidmoten/rtree/PackAlgo.java @@ -0,0 +1,12 @@ +package com.github.davidmoten.rtree; + +/** + * @author Deven Panchal + * @since 11/27/2017 + */ +public enum PackAlgo { + + STR, + HILBERT; + +} diff --git a/src/main/java/com/github/davidmoten/rtree/RTree.java b/src/main/java/com/github/davidmoten/rtree/RTree.java index b489b026..860f1ffd 100644 --- a/src/main/java/com/github/davidmoten/rtree/RTree.java +++ b/src/main/java/com/github/davidmoten/rtree/RTree.java @@ -4,6 +4,7 @@ import static com.github.davidmoten.guavamini.Optional.of; import static com.github.davidmoten.rtree.geometry.Geometries.rectangle; +import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -23,6 +24,7 @@ import com.github.davidmoten.rtree.internal.NodeAndEntries; import com.github.davidmoten.rtree.internal.operators.OperatorBoundedPriorityQueue; +import org.davidmoten.hilbert.HilbertCurve; import rx.Observable; import rx.functions.Func1; import rx.functions.Func2; @@ -119,8 +121,8 @@ public static RTree create() { * the geometry type of the entries in the tree * @return a new RTree instance */ - public static RTree create(List> entries) { - return new Builder().create(entries); + public static RTree create(List> entries, PackAlgo packAlgo) { + return new Builder().create(entries, packAlgo); } /** @@ -333,25 +335,35 @@ public RTree create() { } /** - * Create an RTree by bulk loading, using the STR method. STR: a simple and - * efficient algorithm for R-tree packing + * Create an RTree by bulk loading, using {@link PackAlgo}. Supported pack methods. + * 1. STR method. STR: a simple and efficient algorithm for R-tree packing * http://ieeexplore.ieee.org/abstract/document/582015/ + * 2. Hilbert method. + * http://repository.cmu.edu/cgi/viewcontent.cgi?article=1586&context=compsci + * *

* Note: this method mutates the input entries, the internal order of the List * may be changed. *

* - * @param entries - * entries to be added to the r-tree + * @param entries entries to be added to the r-tree * @return a loaded RTree */ @SuppressWarnings("unchecked") - public RTree create(List> entries) { + public RTree create(List> entries, PackAlgo packAlgo) { setDefaultCapacity(); Context context = new Context(minChildren.get(), maxChildren.get(), selector, splitter, (Factory) factory); - return packingSTR(entries, true, entries.size(), context); + + switch (packAlgo) { + case STR: + return packingSTR(entries, true, entries.size(), context); + case HILBERT: + return packingHilbert(entries, true, entries.size(), context); + default: + return packingSTR(entries, true, entries.size(), context); + } } private void setDefaultCapacity() { @@ -411,6 +423,48 @@ private RTree packingSTR(List RTree packingHilbert(List objects, + boolean isLeaf, int size, + Context context) { + int capacity = (int) Math.round(maxChildren.get() * loadingFactor); + //int capacity = maxChildren.get(); // apply loading factor for better balance? + int nodeCount = (int) Math.ceil(1.0 * objects.size() / capacity); + + if (nodeCount == 0) { + return create(); + } else if (nodeCount == 1) { + Node root; + if (isLeaf) { + root = context.factory().createLeaf((List>) objects, context); + } else { + root = context.factory().createNonLeaf((List>) objects, context); + } + return new RTree(of(root), size, context); + } + + if (isLeaf) { + Collections.sort(objects, new HilbertComparator((short)2)); + } + + List> nodes = new ArrayList>(nodeCount); + for (int i = 0; i < objects.size(); i += capacity) { + if (isLeaf) { + List> entries = (List>) objects.subList(i, Math.min(objects.size(), i + capacity)); + Node leaf = context.factory().createLeaf(entries, context); + nodes.add(leaf); + } else { + List> children = (List>) objects.subList(i, Math.min(objects.size(), i + capacity)); + Node nonleaf = context.factory().createNonLeaf(children, context); + nodes.add(nonleaf); + } + } + + return packingHilbert(nodes, false, size, context); + } + + + private static final class MidComparator implements Comparator { private final short dimension; // leave space for multiple dimensions, 0 for x, 1 for y, // ... @@ -433,6 +487,28 @@ private double mid(HasGeometry o) { } } + private static final class HilbertComparator implements Comparator { + private final short dimension; // supporting N dimensions. + + public HilbertComparator(short dim) { + dimension = dim; + } + + @Override + public int compare(HasGeometry o1, HasGeometry o2) { + return Float.compare(twoDCHilbertIndex(o1), twoDCHilbertIndex(o2)); + } + + private float twoDCHilbertIndex(HasGeometry o) { + Rectangle mbr = o.geometry().mbr(); + HilbertCurve c = HilbertCurve.bits(31).dimensions(dimension); //31 + int centerX = (int) ((mbr.x1() + mbr.x2()) / 2); + int centerY = (int) ((mbr.y1() + mbr.y2()) / 2); + BigInteger index = c.index(centerX, centerY); + return index.floatValue(); + } + } + } /** diff --git a/src/test/java/com/github/davidmoten/rtree/BenchmarksRTree.java b/src/test/java/com/github/davidmoten/rtree/BenchmarksRTree.java index 8cc2fef4..97a845c2 100644 --- a/src/test/java/com/github/davidmoten/rtree/BenchmarksRTree.java +++ b/src/test/java/com/github/davidmoten/rtree/BenchmarksRTree.java @@ -1,5 +1,6 @@ package com.github.davidmoten.rtree; +import static com.github.davidmoten.rtree.PackAlgo.STR; import static com.github.davidmoten.rtree.Utilities.entries1000; import java.io.ByteArrayInputStream; @@ -154,12 +155,12 @@ public RTree flatBufferRTreeCreation010() { @Benchmark public RTree bulkLoadingRTreeCreation010() { - return RTree.maxChildren(10). create(entries); + return RTree.maxChildren(10).create(entries, STR); } @Benchmark public RTree bulkLoadingFullRTreeCreation010() { - return RTree.maxChildren(10).loadingFactor(1.0). create(entries); + return RTree.maxChildren(10).loadingFactor(1.0).create(entries, STR); } @Benchmark diff --git a/src/test/java/com/github/davidmoten/rtree/RTreeTest.java b/src/test/java/com/github/davidmoten/rtree/RTreeTest.java index 624604c9..15b6de5e 100644 --- a/src/test/java/com/github/davidmoten/rtree/RTreeTest.java +++ b/src/test/java/com/github/davidmoten/rtree/RTreeTest.java @@ -1,6 +1,8 @@ package com.github.davidmoten.rtree; import static com.github.davidmoten.rtree.Entries.entry; +import static com.github.davidmoten.rtree.PackAlgo.HILBERT; +import static com.github.davidmoten.rtree.PackAlgo.STR; import static com.github.davidmoten.rtree.geometry.Geometries.circle; import static com.github.davidmoten.rtree.geometry.Geometries.line; import static com.github.davidmoten.rtree.geometry.Geometries.point; @@ -27,6 +29,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import org.junit.Assert; import org.junit.Test; import com.github.davidmoten.guavamini.Lists; @@ -110,14 +113,14 @@ public void testVisualizerWithEmptyTree() { @Test public void testBulkLoadingEmpty() { - RTree tree = RTree.create(new ArrayList>()); + RTree tree = RTree.create(new ArrayList>(), STR); assertTrue(tree.entries().isEmpty().toBlocking().single()); } @Test public void testBulkLoadingWithOneItemIsNotEmpty() { @SuppressWarnings("unchecked") - RTree tree = RTree.create(Arrays.asList(e(1))); + RTree tree = RTree.create(Arrays.asList(e(1)), STR); assertFalse(tree.isEmpty()); } @@ -130,17 +133,18 @@ public void testBulkLoadingEntryCount() { // "),"); entries.add(new EntryDefault(i, point)); } - RTree tree = RTree.create(entries); + RTree tree = RTree.create(entries, STR); int entrySize = tree.entries().count().toBlocking().single(); System.out.println("entry count: " + entrySize); assertEquals(entrySize, entries.size()); + tree.visualize(2000, 2000).save("target/check.png"); } @SuppressWarnings("unchecked") @Test public void testSearchOnOneItemOnBulkLoadingRTree() { Entry entry = e(1); - RTree tree = RTree.create(Arrays.asList(entry)); + RTree tree = RTree.create(Arrays.asList(entry), STR); assertEquals(Arrays.asList(entry), tree.search(r(1)).toList().toBlocking().single()); } @@ -570,8 +574,11 @@ public void testVisualizer() { RTree tree2 = RTree.star().maxChildren(maxChildren).create().add(entries); tree2.visualize(600, 600).save("target/tree2.png"); - RTree tree3 = RTree.maxChildren(maxChildren).create(entries); + RTree tree3 = RTree.maxChildren(maxChildren).create(entries, STR); tree3.visualize(600, 600).save("target/tree3.png"); + + RTree tree4 = RTree.maxChildren(maxChildren).create(entries, HILBERT); + tree4.visualize(600, 600).save("target/tree4.png"); } @Test(expected = RuntimeException.class) @@ -613,7 +620,7 @@ public Object call(byte[] t) { .add(entries); tree2.visualize(2000, 2000).save("target/greek2.png"); - RTree tree3 = RTree.maxChildren(maxChildren).create(entries); + RTree tree3 = RTree.maxChildren(maxChildren).create(entries, STR); tree3.visualize(2000, 2000).save("target/greek3.png"); } @@ -758,10 +765,15 @@ public void testBulkLoadingTreeAndStarTreeReturnsSameAsStandardRTree() { RTree tree1 = RTree.create(); RTree tree2 = RTree.star().create(); - Rectangle[] testRects = { rectangle(0, 0, 0, 0), rectangle(0, 0, 100, 100), - rectangle(0, 0, 10, 10), rectangle(0.12, 0.25, 50.356, 50.756), - rectangle(1, 0.252, 50, 69.23), rectangle(13.12, 23.123, 50.45, 80.9), - rectangle(10, 10, 50, 50) }; + Rectangle[] testRects = { + rectangle(0, 0, 0, 0), + rectangle(0, 0, 100, 100), + rectangle(0, 0, 10, 10), + rectangle(0.12, 0.25, 50.356, 50.756), + rectangle(1, 0.252, 50, 69.23), + rectangle(13.12, 23.123, 50.45, 80.9), + rectangle(10, 10, 50, 50) + }; List> entries = new ArrayList>(10000); for (int i = 1; i <= 10000; i++) { @@ -772,11 +784,15 @@ public void testBulkLoadingTreeAndStarTreeReturnsSameAsStandardRTree() { tree2 = tree2.add(i, point); entries.add(new EntryDefault(i, point)); } - RTree tree3 = RTree.create(entries); + RTree tree3 = RTree.create(entries, STR); + + List> entries2 = new ArrayList>(entries); + RTree tree4 = RTree.create(entries2, HILBERT); + // tree1.visualize(2000, 2000).save("target/tree11.png"); // tree2.visualize(2000, 2000).save("target/tree22.png"); // tree3.visualize(2000, 2000).save("target/tree33.png"); - + // tree4.visualize(2000, 2000).save("target/tree44.png"); for (Rectangle r : testRects) { Set res1 = new HashSet(tree1.search(r) .map(RTreeTest.toValue()).toList().toBlocking().single()); @@ -784,13 +800,157 @@ public void testBulkLoadingTreeAndStarTreeReturnsSameAsStandardRTree() { .map(RTreeTest.toValue()).toList().toBlocking().single()); Set res3 = new HashSet(tree3.search(r) .map(RTreeTest.toValue()).toList().toBlocking().single()); + Set res4 = new HashSet(tree4.search(r) + .map(RTreeTest.toValue()).toList().toBlocking().single()); System.out.println("searchRect= rectangle(" + r.x1() + "," + r.y1() + "," + r.x2() + "," + r.y2() + ")"); System.out.println("res1.size=" + res1.size() + ",res2.size=" + res2.size() - + ",res3.size=" + res3.size()); - // System.out.println("res1=" + res1 + ",res2=" + res2 + ",res3=" + res3); + + ",res3.size=" + res3.size() + ",res4.size=" + res4.size()); + //System.out.println("res1=" + res1 + "\nres2=" + res2 + "\nres3=" + res3 + "\nres4=" + res4); + + assertEquals(res1.size(), res2.size()); + assertEquals(res1.size(), res3.size()); + assertEquals(res1.size(), res4.size()); + } + } + + /** + * This one is very similar to previous one, except it tests radial search results. + */ + @Test + public void testBulkLoadingWithRadialSearchReturnsSameAsStandardRTree() { + + RTree tree1 = RTree.create(); + RTree tree2 = RTree.star().create(); + + Point[] testPoints = { + point(0, 0), + point(100, 100), + point(10, 10), + point(50.356, 50.756), + point(50, 69.23), + point(50.45, 80.9), + point(50, 50) + }; + + List> entries = new ArrayList>(10000); + for (int i = 1; i <= 10000; i++) { + Point point = nextPoint(); + // System.out.println("point(" + point.x() + "," + point.y() + + // "),"); + tree1 = tree1.add(i, point); + tree2 = tree2.add(i, point); + entries.add(new EntryDefault(i, point)); + } + System.out.println("tree1 created."); + System.out.println("tree2 created."); + + RTree tree3 = RTree.create(entries, STR); + System.out.println("tree3 created."); + + List> entries2 = new ArrayList>(entries); + RTree tree4 = RTree.create(entries2, HILBERT); + System.out.println("tree4 created."); + + tree1.visualize(2000, 2000).save("target/tree11.png"); + System.out.println("tree1 visualization created."); + tree2.visualize(2000, 2000).save("target/tree22.png"); + System.out.println("tree2 visualization created."); + tree3.visualize(2000, 2000).save("target/tree33.png"); + System.out.println("tree3 visualization created."); + tree4.visualize(2000, 2000).save("target/tree44.png"); + System.out.println("tree4 visualization created."); + + for (Point p : testPoints) { + System.out.println("searchRect= point(" + p.x() + "," + p.y() + ")"); + + Set res1 = new HashSet(tree1.search(p, 10).map(RTreeTest. toValue()).toList().toBlocking().single()); + System.out.println("res1.size=" + res1.size()); + Set res2 = new HashSet(tree2.search(p, 10).map(RTreeTest. toValue()).toList().toBlocking().single()); + System.out.println("res2.size=" + res2.size()); + Set res3 = new HashSet(tree3.search(p, 10).map(RTreeTest. toValue()).toList().toBlocking().single()); + System.out.println("res3.size=" + res3.size()); + Set res4 = new HashSet(tree4.search(p, 10).map(RTreeTest. toValue()).toList().toBlocking().single()); + System.out.println("res4.size=" + res4.size()); + + //System.out.println("res1.size=" + res1.size() + ",res2.size=" + res2.size() + ",res3.size=" + res3.size() + ",res4.size=" + res4.size()); + //System.out.println("res1=" + res1 + "\nres2=" + res2 + "\nres3=" + res3 + "\nres4=" + res4); + assertEquals(res1.size(), res2.size()); assertEquals(res1.size(), res3.size()); + assertEquals(res1.size(), res4.size()); + } + } + + @Test + public void testHilbertPack() { + Rectangle[] testRects = { + rectangle(0, 0, 0, 0), + rectangle(0, 0, 100, 100), + rectangle(0, 0, 10, 10), + rectangle(0.12, 0.25, 50.356, 50.756), + rectangle(1, 0.252, 50, 69.23), + rectangle(13.12, 23.123, 50.45, 80.9), + rectangle(10, 10, 50, 50) + }; + + List> entries = new ArrayList>(10); + entries.add(new EntryDefault(1, Geometries.rectangle(7, 8, 13, 37))); + entries.add(new EntryDefault(2, Geometries.rectangle(9, 7, 27, 13))); + entries.add(new EntryDefault(3, Geometries.rectangle(20, 16, 35, 25))); + entries.add(new EntryDefault(4, Geometries.rectangle(20, 60, 30, 75))); + entries.add(new EntryDefault(5, Geometries.rectangle(35, 65, 53, 75))); + entries.add(new EntryDefault(6, Geometries.rectangle(45, 35, 60, 50))); + entries.add(new EntryDefault(7, Geometries.rectangle(65, 30, 72, 40))); + entries.add(new EntryDefault(8, Geometries.rectangle(55, 15, 74, 20))); + entries.add(new EntryDefault(9, Geometries.rectangle(70, 10, 80, 26))); + + RTree tree = RTree.create(entries, HILBERT); + System.out.println("tree created."); + + tree.visualize(600, 600 ).save("target/hrtree.png"); + System.out.println("tree visualization created."); + + Set res0 = new HashSet(tree.search(testRects[0]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(0, res0.size()); + + Set res1 = new HashSet(tree.search(testRects[1]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(9, res1.size()); + + Set res2 = new HashSet(tree.search(testRects[2]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(2, res2.size()); + Assert.assertTrue(res2.contains(1)); + Assert.assertTrue(res2.contains(2)); + + Set res3 = new HashSet(tree.search(testRects[3]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(4, res3.size()); + Assert.assertTrue(res3.contains(1)); + Assert.assertTrue(res3.contains(2)); + Assert.assertTrue(res3.contains(3)); + Assert.assertTrue(res3.contains(6)); + + Set res4 = new HashSet(tree.search(testRects[4]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(6, res4.size()); + + Set res5 = new HashSet(tree.search(testRects[5]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(4, res5.size()); + Assert.assertTrue(res5.contains(3)); + Assert.assertTrue(res5.contains(4)); + Assert.assertTrue(res5.contains(5)); + Assert.assertTrue(res5.contains(6)); + + Set res6 = new HashSet(tree.search(testRects[6]).map(RTreeTest. toValue()).toList().toBlocking().single()); + Assert.assertEquals(4, res6.size()); + + for (Rectangle r : testRects) { + System.out.println("searchRect= rectangle(" + r.x1() + "," + r.y1() + "," + r.x2() + "," + r.y2()+ ")"); + + Set res = new HashSet(tree.search(r).map(RTreeTest. toValue()).toList().toBlocking().single()); + System.out.println("res.size=" + res.size()); + for (Integer i : res) { + System.out.print(i + " "); + } + System.out.println(); } } From 9d18cac7db612eab9bff25d5b1e5757cd81d5d16 Mon Sep 17 00:00:00 2001 From: Deven Panchal Date: Mon, 5 Feb 2018 13:58:46 -0800 Subject: [PATCH 2/2] + Capture and Sort Hilbert values with higher bit precision. 32 >> 64. + Switch to 52bit Hilbert sort precision for Hilbert pack. Default to 24bit. --- .../com/github/davidmoten/rtree/RTree.java | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/github/davidmoten/rtree/RTree.java b/src/main/java/com/github/davidmoten/rtree/RTree.java index 860f1ffd..be2d3f30 100644 --- a/src/main/java/com/github/davidmoten/rtree/RTree.java +++ b/src/main/java/com/github/davidmoten/rtree/RTree.java @@ -378,7 +378,7 @@ private void setDefaultCapacity() { @SuppressWarnings("unchecked") private RTree packingSTR(List objects, - boolean isLeaf, int size, Context context) { + boolean isLeaf, int size, Context context) { int capacity = (int) Math.round(maxChildren.get() * loadingFactor); int nodeCount = (int) Math.ceil(1.0 * objects.size() / capacity); @@ -425,10 +425,8 @@ private RTree packingSTR(List RTree packingHilbert(List objects, - boolean isLeaf, int size, - Context context) { + boolean isLeaf, int size, Context context) { int capacity = (int) Math.round(maxChildren.get() * loadingFactor); - //int capacity = maxChildren.get(); // apply loading factor for better balance? int nodeCount = (int) Math.ceil(1.0 * objects.size() / capacity); if (nodeCount == 0) { @@ -444,7 +442,8 @@ private RTree packingHilbert(List> nodes = new ArrayList>(nodeCount); @@ -489,23 +488,32 @@ private double mid(HasGeometry o) { private static final class HilbertComparator implements Comparator { private final short dimension; // supporting N dimensions. + private final int bitPrecision; // maps this precision in distance. - public HilbertComparator(short dim) { - dimension = dim; + public HilbertComparator(short dim, int bitPrecision) { + this.dimension = dim; + if (bitPrecision >= 0 && bitPrecision < 64) { + this.bitPrecision = bitPrecision; + } else { + this.bitPrecision = 24; // default to ~10000 meters + } } @Override public int compare(HasGeometry o1, HasGeometry o2) { - return Float.compare(twoDCHilbertIndex(o1), twoDCHilbertIndex(o2)); + Long h1 = twoDCHilbertIndex(o1); + Long h2 = twoDCHilbertIndex(o2); + //System.out.println("h1: " + h1 + ", h2: " + h2); + return Long.compare(h1, h2); } - private float twoDCHilbertIndex(HasGeometry o) { + private Long twoDCHilbertIndex(HasGeometry o) { Rectangle mbr = o.geometry().mbr(); - HilbertCurve c = HilbertCurve.bits(31).dimensions(dimension); //31 + HilbertCurve c = HilbertCurve.bits(bitPrecision).dimensions(dimension); int centerX = (int) ((mbr.x1() + mbr.x2()) / 2); int centerY = (int) ((mbr.y1() + mbr.y2()) / 2); BigInteger index = c.index(centerX, centerY); - return index.floatValue(); + return index.longValue(); } }