diff --git a/.gitignore b/.gitignore index 433ea98..8d6a179 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ classes /.lein-* .nrepl-port out + +*iml +.idea/* diff --git a/loom.iml b/loom.iml new file mode 100644 index 0000000..12b3e5d --- /dev/null +++ b/loom.iml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/loom/alg.cljc b/src/loom/alg.cljc index b3008b1..8f25f19 100644 --- a/src/loom/alg.cljc +++ b/src/loom/alg.cljc @@ -10,9 +10,11 @@ can use these functions." out-degree in-degree weighted? directed? graph digraph transpose] :as graph] [loom.alg-generic :refer [trace-path preds->span]] - #?(:clj [clojure.data.priority-map :as pm] - :cljs [tailrecursion.priority-map :as pm]) - [clojure.set :as clj.set])) + #?(:clj + [clojure.data.priority-map :as pm] + :cljs [tailrecursion.priority-map :as pm]) + [clojure.set :as clj.set] + [clojure.set :as set])) ;;; ;;; Convenience wrappers for loom.alg-generic functions @@ -792,3 +794,32 @@ can use these functions." (edges g1)))))) ;; ;; Todo: MST, coloring, matching, etc etc + +(defn clustering-coefficient + "Computes clustering coefficient as described in Watts and Strogatz (1998). + When g and node are supplied as arguments, returns the local clustering + coefficient for node in g. When only g is supplied as an argument, returns + the average over all clustering coefficients. For details see + Watts, Duncan J., and Steven H. Strogatz. “Collective Dynamics of ‘Small-World’ + Networks.” Nature 393, no. 6684 (June 1998): 440–42. https://doi.org/10.1038/30918." + ([g node] + (let [neighbours (successors g node) + potential-connections (/ (* (count neighbours) (dec (count neighbours))) 2)] + (if (> (count neighbours) 1) + ;; how many connections between the neighbours + ;; of node are there? + (let [neighbour-overlaps (for [n neighbours] + (let [potential (clj.set/difference neighbours #{n}) + actual (successors g n) + overlap (clj.set/intersection potential actual)] + (count overlap))) + sum-overlaps (reduce + 0 neighbour-overlaps)] + (/ sum-overlaps potential-connections 2)) + ;; if there are not at least 2 neighbours, + ;; clustering-coefficient is set to zero + 0))) + ([g] + (let [nodeset (nodes g) + sum-coeffs (reduce #(+ %1 (clustering-coefficient g %2)) 0 nodeset)] + (/ sum-coeffs (count nodeset))))) + diff --git a/src/loom/gen.clj b/src/loom/gen.clj index 662c8cd..188c2ba 100644 --- a/src/loom/gen.clj +++ b/src/loom/gen.clj @@ -1,7 +1,7 @@ (ns ^{:doc "Graph-generating functions" :author "Justin Kramer"} loom.gen - (:require [loom.graph :refer [weighted? directed? add-nodes* add-edges*]])) + (:require [loom.graph :refer [weighted? directed? add-nodes* add-edges* nodes]])) (defn gen-rand "Adds num-nodes nodes and approximately num-edges edges to graph g. Nodes @@ -53,3 +53,40 @@ (-> g (add-nodes* nodes) (add-edges* edges)))) + +(defn gen-barabasi-albert + "Generate a preferential attachment graph as described in Barabasi + and Albert (1999)." + ([g num-nodes num-edges seed] + (let [rnd (java.util.Random. seed) + ;; initialize graph with two connected nodes + ;; with equal probability, a new node will attach to + ;; either one + g-0 (loom.graph/add-edges g [0 1]) + ;; predicate for deciding wether a node + ;; should be connected to a new node + connect? (fn [g node] + (let [degree-node (count (loom.graph/successors g node)) + degree-sum (reduce #(+ %1 (count (loom.graph/successors g %2))) 0 (nodes g))] + (<= (/ degree-node degree-sum) (.nextDouble rnd)))) + ;; go through all nodes in g and decide whether + ;; they connect to new + new-edges (fn [g new] + (for [n (nodes g) + :when (connect? g n)] + [new n])) + ;; compute num-edges edges for new in graph g + get-new-edges-and-connect (fn [g new num-edges] + (as-> g v + (new-edges v new) + (take num-edges v) + (filter #(= 2 (count %)) v) + (apply loom.graph/add-edges g v))) + ;; two nodes are already in the initialized graph + ;; the remaining notes will be added + remaining-nodes (range 2 num-nodes) + ] + + (reduce #(get-new-edges-and-connect %1 %2 num-edges) g-0 remaining-nodes))) + ([g num-nodes num-edges] + (gen-barabasi-albert g num-nodes num-edges (System/nanoTime)))) diff --git a/test/loom/test/alg.cljc b/test/loom/test/alg.cljc index 25d9565..56a8bdd 100644 --- a/test/loom/test/alg.cljc +++ b/test/loom/test/alg.cljc @@ -13,7 +13,8 @@ coloring? greedy-coloring prim-mst-edges prim-mst-edges prim-mst astar-path astar-dist degeneracy-ordering maximal-cliques - subgraph? eql? isomorphism?]] + subgraph? eql? isomorphism? + clustering-coefficient]] [loom.derived :refer [mapped-by]] clojure.walk #?@(:clj [[clojure.test :refer :all]] @@ -627,3 +628,24 @@ false (isomorphism? g7 (mapped-by inc g7) dec) false (isomorphism? (digraph) (graph) identity) false(isomorphism? (digraph [1 2]) (graph [1 2]) identity))) + +(deftest clustering-coefficient-test + (let [g1 (graph {0 #{}, 1 #{}, 2 #{}, 3 #{}}) ; empty graph + g2 (graph {0 #{1 2 3}, 1 #{0 2 3}, 2 #{0 1 3}, 3 #{0 1 2}}) ; fully connected + g3 (graph {0 #{1 4}, 1 #{2 0}, 2 #{3 1}, 3 #{4 2}, 4 #{0 3}}) ; circle + g4 (graph {0 #{1 2 3}, 1 #{0 3}, 2 #{0}, 3 #{0 1}}) + g5 (graph [0 1] [0 2] [0 3] [1 3])] + (testing "local clustering coefficients" + (are [expected got] (= expected got) + (clustering-coefficient g1 0) 0 + (clustering-coefficient g2 1) 1 + (clustering-coefficient g3 0) 0 + (clustering-coefficient g4 0) (/ 1 3) + (clustering-coefficient g5 0) (/ 1 3))) + (testing "global clustering coefficients" + (are [expected got] (= expected got) + (clustering-coefficient g1) 0 + (clustering-coefficient g2) 1 + (clustering-coefficient g3) 0 + (clustering-coefficient g4) (/ 7 12) + (clustering-coefficient g4) (clustering-coefficient g5))))) diff --git a/test/loom/test/gen.clj b/test/loom/test/gen.clj new file mode 100644 index 0000000..3502c7c --- /dev/null +++ b/test/loom/test/gen.clj @@ -0,0 +1,32 @@ +(ns loom.test.gen + (:require [clojure.test :refer (deftest testing is are)] + [loom.graph :refer (graph digraph weighted-graph weighted-digraph graph? nodes edges)] + [loom.alg :refer (clustering-coefficient)] + [loom.gen :refer (gen-circle gen-newman-watts gen-barabasi-albert)])) + +(deftest gen-barabasi-albert-test + (let [g (graph) + percentage (fn [num percent] + (* (/ percent 100) num)) + expected-edge-count (fn [num-nodes] + (+ 2 (* 2 (dec num-nodes))))] + (testing "Construction" + (are [graphs] loom.graph/graph? + (gen-barabasi-albert g 10 1) + (gen-barabasi-albert g 20 2) + (gen-barabasi-albert g 42 5))) + (testing "Node Count" + ;; Because creating the graph involves probabilistic decisions + ;; the actual number of nodes may be a bit lower than the expected count + (are [num-nodes degree] (< (percentage num-nodes 95) (count (nodes (gen-barabasi-albert g num-nodes degree)))) + 200 3 + 100 1 + 567 7 + 980 20)) + (testing "Edge Count" + ;; same problem as with node count + (are [num-nodes degree] (< (percentage (expected-edge-count num-nodes) 95) (count (edges (gen-barabasi-albert g num-nodes degree)))) + 200 3 + 100 1 + 567 7 + 980 20)))) \ No newline at end of file