From 96995daf9f667a58d5298fe463130e6adf388bc0 Mon Sep 17 00:00:00 2001 From: Sasha Kolpakov Date: Fri, 21 Nov 2025 01:51:16 +0100 Subject: [PATCH 1/3] Enhance Quick Start with Colab badge and cleanup Updated the Quick Start section to include a badge for Google Colab and removed the separate Google Colab Notebook section. --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index a36a23d..1e3326e 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ From source: pip install git+https://github.com/igorrivin/graphem.git ``` -## Quick Start +## Quick Start [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/igorrivin/graphem/blob/main/examples/graphem_notebook.ipynb) ### Graph Embedding @@ -118,10 +118,6 @@ ge.report_full_correlation_matrix( results['node_load'] ) ``` -### Google Colab Notebook - -You can check out most of the graphem features in Google Colab by running the demo notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/igorrivin/graphem/blob/main/examples/graphem_notebook.ipynb) - ## Key Components From f17afeb73f7b13a5fac8e4d832c124601ede5ccb Mon Sep 17 00:00:00 2001 From: Sasha Kolpakov Date: Tue, 24 Feb 2026 14:56:42 -0600 Subject: [PATCH 2/3] Add files via upload --- examples/graphem_jax_notebook.ipynb | 15670 ++++++++++++++++++++++++++ 1 file changed, 15670 insertions(+) create mode 100644 examples/graphem_jax_notebook.ipynb diff --git a/examples/graphem_jax_notebook.ipynb b/examples/graphem_jax_notebook.ipynb new file mode 100644 index 0000000..45a63e3 --- /dev/null +++ b/examples/graphem_jax_notebook.ipynb @@ -0,0 +1,15670 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "17QZz7os6Yyr" + }, + "source": [ + "### GraphEm installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZQeUMXoeWlpx", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5c18c3f6-90a0-43a0-c956-25e9edebff25" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'graphem'...\n", + "remote: Enumerating objects: 1492, done.\u001b[K\n", + "remote: Counting objects: 100% (231/231), done.\u001b[K\n", + "remote: Compressing objects: 100% (107/107), done.\u001b[K\n", + "remote: Total 1492 (delta 165), reused 128 (delta 124), pack-reused 1261 (from 2)\u001b[K\n", + "Receiving objects: 100% (1492/1492), 29.90 MiB | 38.46 MiB/s, done.\n", + "Resolving deltas: 100% (945/945), done.\n", + "/content/graphem\n", + "Obtaining file:///content/graphem\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n", + " Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: jax>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (0.7.2)\n", + "Requirement already satisfied: jaxlib>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (0.7.2)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (2.0.2)\n", + "Requirement already satisfied: matplotlib>=3.5.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (3.10.0)\n", + "Requirement already satisfied: networkx>=2.6.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (3.6.1)\n", + "Requirement already satisfied: pandas>=1.3.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (2.2.2)\n", + "Requirement already satisfied: plotly>=5.5.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (5.24.1)\n", + "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (1.16.3)\n", + "Collecting ndlib>=5.1.0 (from graphem-jax==0.2.0)\n", + " Downloading ndlib-5.1.1-py3-none-any.whl.metadata (5.5 kB)\n", + "Collecting loguru>=0.6.0 (from graphem-jax==0.2.0)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: requests>=2.25.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (2.32.4)\n", + "Collecting line_profiler>=4.0.0 (from graphem-jax==0.2.0)\n", + " Downloading line_profiler-5.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (30 kB)\n", + "Collecting snakeviz>=2.2.0 (from graphem-jax==0.2.0)\n", + " Downloading snakeviz-2.2.2-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting tensorboard>=2.10.0 (from graphem-jax==0.2.0)\n", + " Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: tqdm>=4.66.0 in /usr/local/lib/python3.12/dist-packages (from graphem-jax==0.2.0) (4.67.1)\n", + "Collecting pyinstrument>=5.0.0 (from graphem-jax==0.2.0)\n", + " Downloading pyinstrument-5.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (26 kB)\n", + "Collecting tabulate>=0.9.0 (from graphem-jax==0.2.0)\n", + " Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\n", + "Requirement already satisfied: ml_dtypes>=0.5.0 in /usr/local/lib/python3.12/dist-packages (from jax>=0.3.0->graphem-jax==0.2.0) (0.5.4)\n", + "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from jax>=0.3.0->graphem-jax==0.2.0) (3.4.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (1.3.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (4.61.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (1.4.9)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (25.0)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (12.0.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (3.2.5)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.5.0->graphem-jax==0.2.0) (2.9.0.post0)\n", + "Collecting netdispatch (from ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading netdispatch-0.1.0-py3-none-any.whl.metadata (744 bytes)\n", + "Collecting python-igraph (from ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading python_igraph-1.0.0-py3-none-any.whl.metadata (3.1 kB)\n", + "Collecting dynetx (from ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading dynetx-0.3.2-py3-none-any.whl.metadata (2.9 kB)\n", + "Collecting bokeh (from ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading bokeh-3.8.1-py3-none-any.whl.metadata (10 kB)\n", + "Collecting future (from ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=1.3.0->graphem-jax==0.2.0) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=1.3.0->graphem-jax==0.2.0) (2025.3)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly>=5.5.0->graphem-jax==0.2.0) (9.1.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->graphem-jax==0.2.0) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->graphem-jax==0.2.0) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->graphem-jax==0.2.0) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->graphem-jax==0.2.0) (2025.11.12)\n", + "Requirement already satisfied: tornado>=2.0 in /usr/local/lib/python3.12/dist-packages (from snakeviz>=2.2.0->graphem-jax==0.2.0) (6.5.1)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.12/dist-packages (from tensorboard>=2.10.0->graphem-jax==0.2.0) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.12/dist-packages (from tensorboard>=2.10.0->graphem-jax==0.2.0) (1.76.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/lib/python3/dist-packages (from tensorboard>=2.10.0->graphem-jax==0.2.0) (3.3.6)\n", + "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.12/dist-packages (from tensorboard>=2.10.0->graphem-jax==0.2.0) (6.33.2)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard>=2.10.0->graphem-jax==0.2.0) (75.2.0)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard>=2.10.0->graphem-jax==0.2.0)\n", + " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "Collecting werkzeug>=1.0.1 (from tensorboard>=2.10.0->graphem-jax==0.2.0)\n", + " Downloading werkzeug-3.1.4-py3-none-any.whl.metadata (4.0 kB)\n", + "Requirement already satisfied: typing-extensions~=4.12 in /usr/local/lib/python3.12/dist-packages (from grpcio>=1.48.2->tensorboard>=2.10.0->graphem-jax==0.2.0) (4.15.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib>=3.5.0->graphem-jax==0.2.0) (1.17.0)\n", + "Requirement already satisfied: markupsafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->graphem-jax==0.2.0) (3.0.3)\n", + "Requirement already satisfied: Jinja2>=2.9 in /usr/local/lib/python3.12/dist-packages (from bokeh->ndlib>=5.1.0->graphem-jax==0.2.0) (3.1.6)\n", + "Requirement already satisfied: narwhals>=1.13 in /usr/local/lib/python3.12/dist-packages (from bokeh->ndlib>=5.1.0->graphem-jax==0.2.0) (2.13.0)\n", + "Requirement already satisfied: PyYAML>=3.10 in /usr/local/lib/python3.12/dist-packages (from bokeh->ndlib>=5.1.0->graphem-jax==0.2.0) (6.0.3)\n", + "Collecting xyzservices>=2021.09.1 (from bokeh->ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading xyzservices-2025.11.0-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from dynetx->ndlib>=5.1.0->graphem-jax==0.2.0) (5.2.1)\n", + "Collecting igraph==1.0.0 (from python-igraph->ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading igraph-1.0.0-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (4.4 kB)\n", + "Collecting texttable>=1.6.2 (from igraph==1.0.0->python-igraph->ndlib>=5.1.0->graphem-jax==0.2.0)\n", + " Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)\n", + "Downloading line_profiler-5.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m33.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading ndlib-5.1.1-py3-none-any.whl (110 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.2/110.2 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyinstrument-5.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (146 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m146.8/146.8 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading snakeviz-2.2.2-py3-none-any.whl (183 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.5/183.5 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tabulate-0.9.0-py3-none-any.whl (35 kB)\n", + "Downloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m132.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m116.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading werkzeug-3.1.4-py3-none-any.whl (224 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.0/225.0 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading bokeh-3.8.1-py3-none-any.whl (7.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m148.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dynetx-0.3.2-py3-none-any.whl (39 kB)\n", + "Downloading future-1.0.0-py3-none-any.whl (491 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.3/491.3 kB\u001b[0m \u001b[31m39.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading netdispatch-0.1.0-py3-none-any.whl (3.3 kB)\n", + "Downloading python_igraph-1.0.0-py3-none-any.whl (9.2 kB)\n", + "Downloading igraph-1.0.0-cp39-abi3-manylinux_2_28_x86_64.whl (5.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m149.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xyzservices-2025.11.0-py3-none-any.whl (93 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.9/93.9 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)\n", + "Building wheels for collected packages: graphem-jax\n", + " Building editable for graphem-jax (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for graphem-jax: filename=graphem_jax-0.2.0-0.editable-py3-none-any.whl size=7082 sha256=5d15c43f6beb9e7e3f417479284f12a030264d2cc61ab816a89705170cabe37f\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ssrqjfhl/wheels/5b/32/b9/b3eff49f778f780fe592cbbb73e257f54675245e07cebf7825\n", + "Successfully built graphem-jax\n", + "Installing collected packages: texttable, xyzservices, werkzeug, tensorboard-data-server, tabulate, snakeviz, pyinstrument, loguru, line_profiler, igraph, future, tensorboard, python-igraph, dynetx, netdispatch, bokeh, ndlib, graphem-jax\n", + "Successfully installed bokeh-3.8.1 dynetx-0.3.2 future-1.0.0 graphem-jax-0.2.0 igraph-1.0.0 line_profiler-5.0.0 loguru-0.7.3 ndlib-5.1.1 netdispatch-0.1.0 pyinstrument-5.1.1 python-igraph-1.0.0 snakeviz-2.2.2 tabulate-0.9.0 tensorboard-2.20.0 tensorboard-data-server-0.7.2 texttable-1.7.0 werkzeug-3.1.4 xyzservices-2025.11.0\n" + ] + } + ], + "source": [ + "!git clone https://github.com/sashakolpakov/graphem.git\n", + "# need to be in the repo root\n", + "%cd graphem\n", + "%pip install -e ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dy9ScQH_tKM4", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4f51ad36-7b84-4fe1-9deb-30ff36e2265d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting umap-learn\n", + " Downloading umap_learn-0.5.9.post2-py3-none-any.whl.metadata (25 kB)\n", + "Collecting trimap\n", + " Downloading trimap-1.1.5-py3-none-any.whl.metadata (9.4 kB)\n", + "Collecting pacmap\n", + " Downloading pacmap-0.8.0-py3-none-any.whl.metadata (14 kB)\n", + "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.12/dist-packages (from umap-learn) (2.0.2)\n", + "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from umap-learn) (1.16.3)\n", + "Requirement already satisfied: scikit-learn>=1.6 in /usr/local/lib/python3.12/dist-packages (from umap-learn) (1.6.1)\n", + "Requirement already satisfied: numba>=0.51.2 in /usr/local/lib/python3.12/dist-packages (from umap-learn) (0.63.1)\n", + "Collecting pynndescent>=0.5 (from umap-learn)\n", + " Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from umap-learn) (4.67.1)\n", + "Collecting annoy>=1.11 (from trimap)\n", + " Downloading annoy-1.17.3.tar.gz (647 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m647.5/647.5 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: llvmlite<0.47,>=0.46.0dev0 in /usr/local/lib/python3.12/dist-packages (from numba>=0.51.2->umap-learn) (0.46.0)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.12/dist-packages (from pynndescent>=0.5->umap-learn) (1.5.3)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn>=1.6->umap-learn) (3.6.0)\n", + "Downloading umap_learn-0.5.9.post2-py3-none-any.whl (90 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.1/90.1 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading trimap-1.1.5-py3-none-any.whl (15 kB)\n", + "Downloading pacmap-0.8.0-py3-none-any.whl (21 kB)\n", + "Downloading pynndescent-0.5.13-py3-none-any.whl (56 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: annoy\n", + " Building wheel for annoy (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for annoy: filename=annoy-1.17.3-cp312-cp312-linux_x86_64.whl size=551628 sha256=62303a6f79d6102571584e8229fe704131924684656203475695923fdf30acbb\n", + " Stored in directory: /root/.cache/pip/wheels/db/b9/53/a3b2d1fe1743abadddec6aa541294b24fdbc39d7800bc57311\n", + "Successfully built annoy\n", + "Installing collected packages: annoy, trimap, pynndescent, pacmap, umap-learn\n", + "Successfully installed annoy-1.17.3 pacmap-0.8.0 pynndescent-0.5.13 trimap-1.1.5 umap-learn-0.5.9.post2\n" + ] + } + ], + "source": [ + "# Installs necessary for comparison and benchmarking\n", + "!pip install umap-learn trimap pacmap" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kr0FOYya6mMG" + }, + "source": [ + "### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g-qhZYfpF0pS" + }, + "outputs": [], + "source": [ + "import plotly.io as pio\n", + "# Plotly settings: either interactive images or stills\n", + "pio.renderers.default = 'colab' # interactive plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o4a04ODvka-Y", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "15187688-2f8f-43bd-a6c8-397d9dbb67f0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/jax/_src/cloud_tpu_init.py:86: UserWarning:\n", + "\n", + "Transparent hugepages are not enabled. TPU runtime startup and shutdown time should be significantly improved on TPU v5e and newer. If not already set, you may need to enable transparent hugepages in your VM image (sudo sh -c \"echo always > /sys/kernel/mm/transparent_hugepage/enabled\")\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Level(name='DEBUG', no=10, color='', icon='🐞')" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "import numpy as np\n", + "import networkx as nx\n", + "from time import time\n", + "\n", + "import graphem as ge\n", + "from graphem import GraphEmbedder\n", + "from graphem.benchmark import benchmark_correlations\n", + "from graphem.visualization import report_full_correlation_matrix\n", + "\n", + "import loguru\n", + "\n", + "logger = loguru.logger\n", + "loguru.logger.level(\"DEBUG\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "My3Ow0SM6zzZ" + }, + "source": [ + "### Testing the GraphEm layout" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2L2D83JA5Oii" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "Test the layout on graphs generated above.\n", + "\"\"\"\n", + "\n", + "def graph_test(graph_generator,\n", + " graph_params,\n", + " dim=3,\n", + " num_iterations=20,\n", + " L_min=10.0,\n", + " k_attr=0.5,\n", + " k_inter=0.1,\n", + " n_neighbors=15,\n", + " edge_width=0.5,\n", + " node_size=8,\n", + " sample_size=512,\n", + " batch_size=512,\n", + " logger=None): # Renamed from my_logger for consistency\n", + "\n", + " # Generate adjacency matrix\n", + " adj = graph_generator(**graph_params)\n", + "\n", + " n = adj.shape[0]\n", + "\n", + " # Calculate degree directly from adjacency matrix\n", + " deg = np.array(adj.sum(axis=1)).flatten()\n", + " deg_normalized = (deg - np.min(deg)) / (np.max(deg) - np.min(deg))\n", + "\n", + " # Count edges from adjacency matrix\n", + " edges_count = adj.nnz\n", + " if logger:\n", + " logger.debug(f\"Vertices {n}, edges {edges_count}\")\n", + "\n", + " # Create graphem embedder with updated API\n", + " gm = GraphEmbedder(\n", + " adjacency=adj,\n", + " n_components=dim,\n", + " L_min=L_min,\n", + " k_attr=k_attr,\n", + " k_inter=k_inter,\n", + " n_neighbors=n_neighbors, # Renamed from knn_k\n", + " sample_size=sample_size,\n", + " batch_size=batch_size,\n", + " verbose=True,\n", + " logger_instance=logger\n", + " )\n", + "\n", + " # Display initial layout\n", + " if logger:\n", + " logger.info(\"Initial layout\")\n", + " gm.display_layout(edge_width=edge_width, node_size=node_size, node_colors=deg_normalized)\n", + "\n", + " # Run layout algorithm\n", + " _ = gm.run_layout(num_iterations)\n", + "\n", + " # Display final layout\n", + " if logger:\n", + " logger.info(\"Final layout\")\n", + " gm.display_layout(edge_width=edge_width, node_size=node_size, node_colors=deg_normalized)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1RgnB8fw7Akv" + }, + "source": [ + "#### Bipartite graph (complete)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bkyTyFKN7_aW", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "7f56f907-c5b4-43f6-91b8-8196a84bf58d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:14.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:14.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:14.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:14.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/10 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Bipartite graph\n", + "\"\"\"\n", + "graph_test(ge.generate_complete_bipartite_graph, {\"n_top\": 50, \"n_bottom\": 100}, dim=2, L_min=10, num_iterations=10)\n", + "#" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oG9zgp9-7MPC" + }, + "source": [ + "#### Balanced tree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3kX0n53_ouf", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "d491a33f-391a-4582-fab5-3025af16c93f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:20.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:22.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:22.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:22.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/10 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Balanced tree\n", + "\"\"\"\n", + "graph_test(ge.generate_balanced_tree, {\"r\": 2, \"h\": 10}, dim=2, sample_size=2048, num_iterations=10)\n", + "#" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zF9cj19h7QDD" + }, + "source": [ + "#### Grid graph / road network graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RYAL4ObrEe13", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "29c038d7-59e1-4ebb-fc4b-802de30cbd20" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:35.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:35.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:35.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Grid (road network) graph\n", + "\"\"\"\n", + "graph_test(ge.generate_road_network, {\"width\": 30, \"height\": 40}, dim=2, num_iterations=20)\n", + "#" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uc6TF8Ht7VjH" + }, + "source": [ + "#### Power law cluster graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AdbYVGboFUdj", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "6f9b4e91-1b57-4fe9-8470-0cf7f90cfd2f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:47.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:47.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:16:47.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:16:47.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Test power cluster graph\n", + "\"\"\"\n", + "graph_test(ge.generate_power_cluster, {\"n\": 1000, \"m\": 5, \"p\": 0.75}, dim=2, sample_size=2048, num_iterations=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w0F7oyGI7f88" + }, + "source": [ + "#### Barabási–Albert graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eLkGhQCwGdAD", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "54df4c05-8fa9-42b4-a340-32e9915b0749" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:05.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:05.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:05.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:06.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Test Barabási–Albert graph\n", + "\"\"\"\n", + "graph_test(ge.generate_ba, {\"n\": 1000, \"m\": 5}, dim=2, L_min=20, sample_size=2048, num_iterations=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kr-fpB1V7lc-" + }, + "source": [ + "#### Stochastic Block Model graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hsm65x1_G6yA", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "a80995f0-b0b0-4e47-ad45-56cbd04c40f7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:26.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:26.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:26.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:26.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Test SBM graph\n", + "\"\"\"\n", + "graph_test(ge.generate_sbm, {\"n_per_block\": 100, \"num_blocks\": 4, \"p_in\": 0.15, \"p_out\": 0.01}, dim=2, L_min=60, sample_size=1024, num_iterations=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yc_wdI1p7rOC" + }, + "source": [ + "#### Watts-Strogaz \"small world\" graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eAab1ctXHeUW", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "59302219-54ae-4dd8-8fd7-74492df1fbd5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:42.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:42.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:42.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:42.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Watts-Strogaz graph\n", + "\"\"\"\n", + "graph_test(ge.generate_ws, {\"n\": 1000, \"k\": 6, \"p\": 0.3}, dim=2, L_min=60, sample_size=1024, num_iterations=20)\n", + "#" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jpTJWGcO7yrP" + }, + "source": [ + "#### Erdös-Renyi random graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tIczAhL6IEhD", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "a3f5ece6-ed30-45bc-eb1f-f902b066295b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:51.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:51.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:17:51.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mdisplay_layout\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mDisplaying layout\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:17:52.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/20 [00:00\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "\"\"\"\n", + "Erdos-Renyi graph\n", + "\"\"\"\n", + "graph_test(ge.generate_er, {\"n\": 1000, \"p\": 0.1}, dim=2, L_min=40, sample_size=2048, num_iterations=20)\n", + "#" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "57FXWmEm8L34" + }, + "source": [ + "### Centrality measures correlation benchmarks" + ] + }, + { + "cell_type": "code", + "source": [ + "benchmark_correlations?" + ], + "metadata": { + "id": "EzNMYU7ISTmi" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E7VjqQmu8Sj_" + }, + "source": [ + "#### Erdös-Renyi rendom graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XDdIAnRCRWau", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4a3f6bd0-a9dd-46f7-aa4c-ff0149c0312a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2025-12-22 04:20:28.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.benchmark\u001b[0m:\u001b[36mrun_benchmark\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mRunning benchmark with generate_er...\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:28.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.benchmark\u001b[0m:\u001b[36mrun_benchmark\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mGenerated graph with 1000 vertices and 50020 edges\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:28.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.benchmark\u001b[0m:\u001b[36mrun_benchmark\u001b[0m:\u001b[36m53\u001b[0m - \u001b[1mCalculating centrality measures...\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:47.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.benchmark\u001b[0m:\u001b[36mrun_benchmark\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mCreating embedder...\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:47.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1mComputing Laplacian embedding\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:47.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36m_laplacian_embedding\u001b[0m:\u001b[36m143\u001b[0m - \u001b[1mLaplacian embedding done\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:47.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.benchmark\u001b[0m:\u001b[36mrun_benchmark\u001b[0m:\u001b[36m104\u001b[0m - \u001b[1mRunning layout for 30 iterations...\u001b[0m\n", + "\u001b[32m2025-12-22 04:20:47.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mgraphem.embedder\u001b[0m:\u001b[36mrun_layout\u001b[0m:\u001b[36m270\u001b[0m - \u001b[1mRunning layout\u001b[0m\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/30 [00:00\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0manalyze_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'snap-ca-GrQc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_iterations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/content/graphem/examples/real_world_datasets_example.py\u001b[0m in \u001b[0;36manalyze_dataset\u001b[0;34m(dataset_name, sample_size, dim, num_iterations)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0mcols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0madjacency\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcsr_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcols\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_vertices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_vertices\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0madjacency\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madjacency\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0madjacency\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m \u001b[0;31m# Make symmetric\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/scipy/sparse/_compressed.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, arg1, shape, dtype, copy, maxprint)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;31m# (data, ij) format\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0mcoo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_coo_container\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0marrays\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcoo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_coo_to_compressed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_swap\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindptr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marrays\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/scipy/sparse/_coo.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, arg1, shape, dtype, copy, maxprint)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint64\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/scipy/sparse/_coo.py\u001b[0m in \u001b[0;36m_check\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m raise ValueError(f'axis {i} index {idx.max()} exceeds '\n\u001b[0m\u001b[1;32m 223\u001b[0m f'matrix dimension {self.shape[i]}')\n\u001b[1;32m 224\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: axis 0 index 26058 exceeds matrix dimension 5241" + ] + } + ], + "source": [ + "analyze_dataset('snap-ca-GrQc', sample_size=None, dim=3, num_iterations=30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vSoASBY54LjY" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-ca-GrQc', sample_size=None, dim=4, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ze2mVf2FBX7O" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-ca-GrQc', sample_size=None, dim=6, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pEBi3Xcr-p8o" + }, + "source": [ + "#### Social Circles: Facebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uBkDxs8uO2Zb" + }, + "outputs": [], + "source": [ + "analyze_dataset('snap-facebook_combined', sample_size=None, dim=3, num_iterations=30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GFmahCvb4joT" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-facebook_combined', sample_size=None, dim=4, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PN0Wp4-uBep8" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-facebook_combined', sample_size=None, dim=6, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aOlswPDm-wSA" + }, + "source": [ + "#### Wikipedia vote network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZY0YnyCqYqSD" + }, + "outputs": [], + "source": [ + "analyze_dataset('snap-wiki-vote', sample_size=None, dim=3, num_iterations=30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IiQVMBj4B8IT" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-wiki-vote', sample_size=None, dim=4, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rcAFoebX56Sp" + }, + "outputs": [], + "source": [ + "try:\n", + " analyze_dataset('snap-wiki-vote', sample_size=None, dim=6, num_iterations=30)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G1dyZQYm-354" + }, + "source": [ + "### Node influence maximization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BF82-9GSBvEV" + }, + "outputs": [], + "source": [ + "from time import time\n", + "from graphem_rapids.influence import graphem_seed_selection, greedy_seed_selection, ndlib_estimated_influence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lhmd7rvPC8yH" + }, + "outputs": [], + "source": [ + "import ndlib.models.ModelConfig as mc\n", + "import ndlib.models.epidemics as ep\n", + "\n", + "def ndlib_estimated_influence(G, seeds, p=0.1, iterations_count=200):\n", + " \"\"\"\n", + " Run NDlib's Independent Cascades model on graph G, starting with the given seeds,\n", + " and return the estimated final influence (number of nodes in state 2) and\n", + " the number of iterations executed.\n", + " \"\"\"\n", + " model = ep.IndependentCascadesModel(G)\n", + " config = mc.Configuration()\n", + " config.add_model_parameter('fraction_infected', 0.1)\n", + " for e in G.edges():\n", + " config.add_edge_configuration(\"threshold\", e, p)\n", + " model.set_initial_status(config)\n", + " sim_iterations = model.iteration_bunch(iterations_count)\n", + " final_count = sim_iterations[-1]['node_count']\n", + " influence = final_count.get(2, 0)\n", + " return influence, len(sim_iterations)\n", + "\n", + "def greedy_seed_selection(G, k, p=0.1, iterations_count=200):\n", + " \"\"\"\n", + " Greedy seed selection using NDlib influence estimation.\n", + " For each candidate node evaluation, it calls NDlib's simulation and accumulates\n", + " the total number of iterations used across all evaluations.\n", + "\n", + " Returns:\n", + " seeds: the selected seed set (list of nodes)\n", + " total_iters: the total number of NDlib iterations run during selection.\n", + " \"\"\"\n", + " seeds = []\n", + " candidate_nodes = set(G.nodes())\n", + " total_iters = 0\n", + " for _ in range(k):\n", + " best_candidate = None\n", + " best_spread = -1\n", + " # Evaluate each candidate's marginal gain when added to the current seed set.\n", + " for node in candidate_nodes:\n", + " current_seeds = seeds + [node]\n", + " spread, iters = ndlib_estimated_influence(G, current_seeds, p=p, iterations_count=iterations_count)\n", + " total_iters += iters # accumulate iterations used for this simulation\n", + " if spread > best_spread:\n", + " best_spread = spread\n", + " best_candidate = node\n", + " seeds.append(best_candidate)\n", + " candidate_nodes.remove(best_candidate)\n", + " return seeds, total_iters" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vd42rHJWNpg8" + }, + "source": [ + "#### Synthetic dataset benchmark (Erdös-Renyi random graph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RGGn5SMk-_5D" + }, + "outputs": [], + "source": [ + "# Benchmark parameters for the Erdos-Renyi graph\n", + "n_nodes = 256 # number of nodes\n", + "p_edge = 0.05 # probability of edge\n", + "ic_prob = 0.1 # node activation probability\n", + "k_seeds = 10 # number of seeds\n", + "sample_size = 10 # sample size for benchmark stats\n", + "\n", + "def run_benchmark_synthetic():\n", + "\n", + " # Create a sample graph (Erdős–Rényi)\n", + " adjacency = gr.generate_er(n_nodes, p_edge, seed=42)\n", + "\n", + " # Get NetworkX graph for influence estimation\n", + " G_nx = nx.from_scipy_sparse_array(adjacency)\n", + "\n", + " # ------------------------------\n", + " # GraphEm Influence Maximization\n", + " # ------------------------------\n", + " embedder = gr.GraphEmbedderPyTorch(\n", + " adjacency=adjacency,\n", + " n_components=2,\n", + " L_min=10.0,\n", + " k_attr=0.5,\n", + " k_inter=0.1,\n", + " n_neighbors=15,\n", + " sample_size=256,\n", + " batch_size=None,\n", + " verbose=True\n", + " )\n", + "\n", + " start_time = time()\n", + " gm_seeds = gr.graphem_seed_selection(embedder, k_seeds, num_iterations=10)\n", + " gm_time = time() - start_time\n", + " gm_influence, gm_iter_count = gr.ndlib_estimated_influence(G_nx, gm_seeds, p=ic_prob, iterations_count=200)\n", + "\n", + " # -----------------------------\n", + " # Greedy Influence Maximization\n", + " # -----------------------------\n", + " start_time = time()\n", + " greedy_seeds, greedy_iters = gr.greedy_seed_selection(G_nx, k_seeds, p=ic_prob, iterations_count=200)\n", + " greedy_time = time() - start_time\n", + " greedy_influence, iters = gr.ndlib_estimated_influence(G_nx, greedy_seeds, p=ic_prob, iterations_count=200)\n", + " greedy_iters += iters # accumulate iterations used for the final simulation\n", + "\n", + " return gm_seeds, gm_influence, gm_iter_count, gm_time, greedy_seeds, greedy_influence, greedy_iters, greedy_time\n", + "\n", + "gm_seeds_stats = []\n", + "gm_influence_stats = []\n", + "gm_iter_count_stats = []\n", + "gm_time_stats = []\n", + "greedy_seeds_stats = []\n", + "greedy_influence_stats = []\n", + "greedy_iters_stats = []\n", + "greedy_time_stats = []\n", + "\n", + "for _ in range(sample_size):\n", + "\n", + " print(\"Iteration\", _+1, \"of\", sample_size)\n", + "\n", + " gm_seeds, gm_influence, gm_iter_count, gm_time, greedy_seeds, greedy_influence, greedy_iters, greedy_time = run_benchmark_synthetic()\n", + "\n", + " gm_seeds_stats.append(gm_seeds)\n", + " gm_influence_stats.append(gm_influence)\n", + " gm_iter_count_stats.append(gm_iter_count)\n", + " gm_time_stats.append(gm_time)\n", + "\n", + " greedy_seeds_stats.append(greedy_seeds)\n", + " greedy_influence_stats.append(greedy_influence)\n", + " greedy_iters_stats.append(greedy_iters)\n", + " greedy_time_stats.append(greedy_time)\n", + "\n", + "gm_seeds_stats = np.array(gm_seeds_stats)\n", + "gm_influence_stats = np.array(gm_influence_stats)\n", + "gm_iter_count_stats = np.array(gm_iter_count_stats)\n", + "gm_time_stats = np.array(gm_time_stats)\n", + "\n", + "greedy_seeds_stats = np.array(greedy_seeds_stats)\n", + "greedy_influence_stats = np.array(greedy_influence_stats)\n", + "greedy_iters_stats = np.array(greedy_iters_stats)\n", + "greedy_time_stats = np.array(greedy_time_stats)\n", + "\n", + "print(\"\\nGraphEm Embedding Method:\")\n", + "print(\" Estimated Influence Spread:\", gm_influence_stats.mean(), \"(sigma)\", gm_influence_stats.std())\n", + "print(\" NDlib Iterations:\", gm_iter_count_stats.mean(), \"(sigma)\", gm_iter_count_stats.std())\n", + "print(\" Runtime (s):\", gm_time_stats.mean(), \"(sigma)\", gm_time_stats.std())\n", + "\n", + "print(\"\\nGreedy Influence Maximization Method:\")\n", + "print(\" Estimated Influence Spread:\", greedy_influence_stats.mean(), \"(sigma)\", greedy_influence_stats.std())\n", + "print(\" NDlib Iterations:\", greedy_iters_stats.mean(), \"(sigma)\", greedy_iters_stats.std())\n", + "print(\" Runtime (s):\", greedy_time_stats.mean(), \"(sigma)\", greedy_time_stats.std())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BHtGdY2RN5zI" + }, + "source": [ + "#### Real-world dataset benchmark (SNAP arXiv collaboration network)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "poJR3L63ODVb" + }, + "outputs": [], + "source": [ + "# Benchmark parameters for the dataset\n", + "ic_prob = 0.1 # node activation probability\n", + "k_seeds = 10 # number of seeds\n", + "sample_size = 10 # sample size for benchmark stats\n", + "subsample_nodes = 256 # subsample to this many nodes for faster greedy\n", + "\n", + "# subsample before running iterations\n", + "print(\"Loading and subsampling dataset...\")\n", + "vertices, edges = gr.load_dataset('snap-ca-GrQc')\n", + "\n", + "# Create NetworkX graph and take the largest connected component\n", + "G_full = nx.Graph()\n", + "G_full.add_nodes_from(vertices)\n", + "G_full.add_edges_from(edges)\n", + "G_full = G_full.subgraph(max(nx.connected_components(G_full), key=len)).copy()\n", + "G_full = nx.convert_node_labels_to_integers(G_full)\n", + "\n", + "# Better subsampling: Use BFS from a random node to get a connected subgraph\n", + "np.random.seed(42) # Fixed seed for reproducibility\n", + "start_node = np.random.choice(list(G_full.nodes()))\n", + "\n", + "# BFS to get approximately subsample_nodes connected nodes\n", + "visited = set()\n", + "queue = [start_node]\n", + "visited.add(start_node)\n", + "\n", + "while queue and len(visited) < subsample_nodes:\n", + " node = queue.pop(0)\n", + " neighbors = list(G_full.neighbors(node))\n", + " np.random.shuffle(neighbors)\n", + " for neighbor in neighbors:\n", + " if neighbor not in visited and len(visited) < subsample_nodes:\n", + " visited.add(neighbor)\n", + " queue.append(neighbor)\n", + "\n", + "# Create subgraph from visited nodes\n", + "G_sub = G_full.subgraph(visited).copy()\n", + "G_sub = nx.convert_node_labels_to_integers(G_sub)\n", + "\n", + "print(f\"Subsampled graph: {G_sub.number_of_nodes()} nodes, {G_sub.number_of_edges()} edges\")\n", + "print()\n", + "\n", + "def run_benchmark_snap():\n", + " \"\"\"Run one iteration of the benchmark on the fixed subsampled graph\"\"\"\n", + "\n", + " # Use the pre-subsampled graph\n", + " G_nx = G_sub\n", + "\n", + " # Get adjacency matrix for embedder\n", + " adjacency = nx.to_scipy_sparse_array(G_nx, format='csr')\n", + " n_nodes = G_nx.number_of_nodes()\n", + "\n", + " # ------------------------------\n", + " # GraphEm Influence Maximization\n", + " # ------------------------------\n", + " embedder = gr.GraphEmbedderPyTorch(\n", + " adjacency=adjacency,\n", + " n_components=6,\n", + " L_min=4.0,\n", + " k_attr=0.5,\n", + " k_inter=0.1,\n", + " n_neighbors=15,\n", + " sample_size=512,\n", + " batch_size=1024,\n", + " verbose=False\n", + " )\n", + "\n", + " start_time = time()\n", + " gm_seeds = gr.graphem_seed_selection(embedder, k_seeds, num_iterations=10)\n", + " gm_time = time() - start_time\n", + " gm_influence, gm_iter_count = gr.ndlib_estimated_influence(G_nx, gm_seeds, p=ic_prob, iterations_count=200)\n", + "\n", + " # -----------------------------\n", + " # Greedy Influence Maximization\n", + " # -----------------------------\n", + " start_time = time()\n", + " greedy_seeds, greedy_iters = gr.greedy_seed_selection(G_nx, k_seeds, p=ic_prob, iterations_count=200)\n", + " greedy_time = time() - start_time\n", + " greedy_influence, iters = gr.ndlib_estimated_influence(G_nx, greedy_seeds, p=ic_prob, iterations_count=200)\n", + " greedy_iters += iters # accumulate iterations used for the final simulation\n", + "\n", + " return gm_seeds, gm_influence, gm_iter_count, gm_time, greedy_seeds, greedy_influence, greedy_iters, greedy_time\n", + "\n", + "gm_seeds_stats = []\n", + "gm_influence_stats = []\n", + "gm_iter_count_stats = []\n", + "gm_time_stats = []\n", + "greedy_seeds_stats = []\n", + "greedy_influence_stats = []\n", + "greedy_iters_stats = []\n", + "greedy_time_stats = []\n", + "\n", + "for _ in range(sample_size):\n", + "\n", + " print(\"Iteration\", _+1, \"of\", sample_size)\n", + "\n", + " gm_seeds, gm_influence, gm_iter_count, gm_time, greedy_seeds, greedy_influence, greedy_iters, greedy_time = run_benchmark_snap()\n", + "\n", + " gm_seeds_stats.append(gm_seeds)\n", + " gm_influence_stats.append(gm_influence)\n", + " gm_iter_count_stats.append(gm_iter_count)\n", + " gm_time_stats.append(gm_time)\n", + "\n", + " greedy_seeds_stats.append(greedy_seeds)\n", + " greedy_influence_stats.append(greedy_influence)\n", + " greedy_iters_stats.append(greedy_iters)\n", + " greedy_time_stats.append(greedy_time)\n", + "\n", + "gm_seeds_stats = np.array(gm_seeds_stats)\n", + "gm_influence_stats = np.array(gm_influence_stats)\n", + "gm_iter_count_stats = np.array(gm_iter_count_stats)\n", + "gm_time_stats = np.array(gm_time_stats)\n", + "\n", + "greedy_seeds_stats = np.array(greedy_seeds_stats)\n", + "greedy_influence_stats = np.array(greedy_influence_stats)\n", + "greedy_iters_stats = np.array(greedy_iters_stats)\n", + "greedy_time_stats = np.array(greedy_time_stats)\n", + "\n", + "print(\"\\nGraphEm Embedding Method:\")\n", + "print(\" Estimated Influence Spread:\", gm_influence_stats.mean(), \"(sigma)\", gm_influence_stats.std())\n", + "print(\" NDlib Iterations:\", gm_iter_count_stats.mean(), \"(sigma)\", gm_iter_count_stats.std())\n", + "print(\" Runtime (s):\", gm_time_stats.mean(), \"(sigma)\", gm_time_stats.std())\n", + "\n", + "print(\"\\nGreedy Influence Maximization Method:\")\n", + "print(\" Estimated Influence Spread:\", greedy_influence_stats.mean(), \"(sigma)\", greedy_influence_stats.std())\n", + "print(\" NDlib Iterations:\", greedy_iters_stats.mean(), \"(sigma)\", greedy_iters_stats.std())\n", + "print(\" Runtime (s):\", greedy_time_stats.mean(), \"(sigma)\", greedy_time_stats.std())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bfFARRmcrpjL" + }, + "source": [ + "### Comparison to other embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ol5uqOhgvKIa" + }, + "outputs": [], + "source": [ + "import scipy.sparse.linalg as spla\n", + "import umap, trimap, pacmap\n", + "from scipy.stats import spearmanr\n", + "from graphem_rapids.datasets import load_dataset_as_networkx\n", + "import plotly.graph_objects as go" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V8UeDIH9s4j9" + }, + "outputs": [], + "source": [ + "#\n", + "# Plot the graph embedding\n", + "#\n", + "def plot_graph_embedding(positions, G, title=\"Embedding\"):\n", + " \"\"\"\n", + " Interactive 2D scatter plot of node embedding using Plotly,\n", + " colored by node degree.\n", + "\n", + " Args:\n", + " positions: np.ndarray (n_nodes, dim)\n", + " G: networkx.Graph\n", + " \"\"\"\n", + " degree = np.array([d for _, d in G.degree()])\n", + " norm_deg = (degree - degree.min()) / (degree.max() - degree.min() + 1e-9)\n", + "\n", + " fig = go.Figure(\n", + " data=go.Scattergl(\n", + " x=positions[:, 0],\n", + " y=positions[:, 1],\n", + " mode='markers',\n", + " marker=dict(\n", + " size=6,\n", + " color=norm_deg,\n", + " colorscale='Bluered',\n", + " colorbar=dict(title='Degree'),\n", + " showscale=True,\n", + " line=dict(width=0)\n", + " ),\n", + " text=[f\"Node {i}
Degree: {deg}\" for i, deg in enumerate(degree)],\n", + " hoverinfo='text'\n", + " )\n", + " )\n", + "\n", + " fig.update_layout(\n", + " title=title,\n", + " width=800,\n", + " height=700,\n", + " xaxis=dict(visible=False),\n", + " yaxis=dict(visible=False),\n", + " plot_bgcolor='white',\n", + " margin=dict(l=10, r=10, t=40, b=10)\n", + " )\n", + "\n", + " fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fPszcNQqwaIS" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import networkx as nx\n", + "from scipy.stats import spearmanr\n", + "\n", + "def bootstrap_ci(x, y, n_boot=1000, ci=95):\n", + " \"\"\"\n", + " Bootstraps Spearman correlation confidence interval.\n", + " \"\"\"\n", + " n = len(x)\n", + " corrs = []\n", + " for _ in range(n_boot):\n", + " idx = np.random.choice(n, size=n, replace=True)\n", + " corr, _ = spearmanr(x[idx], y[idx])\n", + " corrs.append(corr)\n", + " corrs = np.sort(corrs)\n", + " alpha = 100 - ci\n", + " lower = np.percentile(corrs, alpha / 2)\n", + " upper = np.percentile(corrs, 100 - alpha / 2)\n", + " return np.mean(corrs), (lower, upper)\n", + "\n", + "def compute_and_display_correlations(G, positions, bootstrap=True, n_boot=1000):\n", + " \"\"\"\n", + " Compute radial distances and correlate with various centrality measures.\n", + " Args:\n", + " G: networkx.Graph\n", + " positions: np.ndarray (n_nodes, dim)\n", + " \"\"\"\n", + " positions = np.array(positions)\n", + " positions -= np.mean(positions, axis=0)\n", + " radii = np.linalg.norm(positions, axis=1)\n", + "\n", + " degree = np.array([d for _, d in G.degree()])\n", + " betweenness = np.array(list(nx.betweenness_centrality(G).values())) if G.number_of_nodes() < 5000 else np.zeros_like(radii)\n", + " try:\n", + " eigenvector = np.array(list(nx.eigenvector_centrality_numpy(G).values()))\n", + " except:\n", + " eigenvector = np.zeros_like(radii)\n", + " pagerank = np.array(list(nx.pagerank(G).values()))\n", + " closeness = np.array(list(nx.closeness_centrality(G).values()))\n", + " try:\n", + " node_load = np.array(list(nx.load_centrality(G).values()))\n", + " except:\n", + " node_load = np.zeros_like(radii)\n", + "\n", + " measures = {\n", + " \"Degree\": degree,\n", + " \"Betweenness\": betweenness,\n", + " \"Eigenvector\": eigenvector,\n", + " \"PageRank\": pagerank,\n", + " \"Closeness\": closeness,\n", + " \"Node Load\": node_load\n", + " }\n", + "\n", + " print(\"\\nCorrelation of Radial Distance with Centralities:\")\n", + " print(\"-\" * 80)\n", + " print(f\"{'Centrality':<15} {'ρ':>7} {'p-value':>12} {'Confidence Interval':>25}\")\n", + " print(\"-\" * 80)\n", + "\n", + " for name, values in measures.items():\n", + " if np.all(values == 0):\n", + " print(f\"{name:<15} {'N/A (skipped)':>50}\")\n", + " continue\n", + " corr, pval = spearmanr(radii, values)\n", + " if bootstrap:\n", + " mean_corr, (low, high) = bootstrap_ci(radii, values, n_boot=n_boot)\n", + " print(f\"{name:<15} {corr:7.3f} {pval:12.3g} {'':5} [{low:6.3f}, {high:6.3f}]\")\n", + " else:\n", + " print(f\"{name:<15} {corr:7.3f} {pval:12.3g}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mui8LK0Dr6lk" + }, + "source": [ + "#### Laplacian eigenmaps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Yk0nu2CKr5ty" + }, + "outputs": [], + "source": [ + "#\n", + "# Laplacian embedding\n", + "#\n", + "def laplacian_embedding(G, dim=2):\n", + " \"\"\"\n", + " Compute Laplacian eigenmap + UMAP from a NetworkX graph.\n", + " Args:\n", + " G: networkx.Graph\n", + " dim_lap: number of Laplacian eigenvectors (before UMAP)\n", + " dim_umap: target embedding dimension\n", + " Returns:\n", + " np.ndarray: shape (n_nodes, dim_umap)\n", + " \"\"\"\n", + " # Adjacency and Laplacian\n", + " A = nx.adjacency_matrix(G)\n", + " L = nx.normalized_laplacian_matrix(G)\n", + "\n", + " # Eigen-decomposition (smallest eigenvalues)\n", + " k = min(dim + 1, G.number_of_nodes() - 1)\n", + " eigvals, eigvecs = spla.eigsh(L, k=k, which='SM')\n", + "\n", + " # Skip trivial first eigenvector (constant)\n", + " return eigvecs[:, 1:dim+1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LpVmvtnKw8E7" + }, + "outputs": [], + "source": [ + "# Generate a random Erdos-Renyi graph\n", + "G = nx.erdos_renyi_graph(1000, 0.05, seed=0)\n", + "\n", + "# Embedding\n", + "positions = laplacian_embedding(G, dim=2) # 2D for visualization\n", + "\n", + "# Correlation analysis\n", + "compute_and_display_correlations(G, positions, bootstrap=True, n_boot=500)\n", + "\n", + "# Plot the result\n", + "plot_graph_embedding(positions, G, title=\"Laplacian embedding\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1eQayVXmr_zf" + }, + "source": [ + "#### UMAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eIXxRgJpzsg8" + }, + "outputs": [], + "source": [ + "# UMAP embedding\n", + "reducer = umap.UMAP(\n", + " n_components=2,\n", + " metric='euclidean',\n", + " verbose=False\n", + " )\n", + "umap_embedding = reducer.fit_transform(positions)\n", + "\n", + "# Correlation analysis\n", + "compute_and_display_correlations(G, umap_embedding, bootstrap=True, n_boot=500)\n", + "\n", + "# Plot the result\n", + "plot_graph_embedding(umap_embedding, G, title=\"UMAP embedding\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rm0YG9ifsUxu" + }, + "source": [ + "#### TriMAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sx6hT_I4r1x_" + }, + "outputs": [], + "source": [ + "# TriMAP embedding\n", + "reducer = trimap.TRIMAP(\n", + " n_dims=2,\n", + " distance='euclidean',\n", + " verbose=False\n", + " )\n", + "trimap_embedding = reducer.fit_transform(positions)\n", + "\n", + "# Correlation analysis\n", + "compute_and_display_correlations(G, trimap_embedding, bootstrap=True, n_boot=500)\n", + "\n", + "# Plot the result\n", + "plot_graph_embedding(trimap_embedding, G, title=\"TriMAP embedding\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYxMUmRBsZbs" + }, + "source": [ + "#### PaCMAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N4RNz7N-scEH" + }, + "outputs": [], + "source": [ + "# PaCMAP embedding\n", + "reducer = pacmap.PaCMAP(\n", + " n_components=2,\n", + " distance='euclidean',\n", + " verbose=False\n", + " )\n", + "pacmap_embedding = reducer.fit_transform(positions)\n", + "\n", + "# Correlation analysis\n", + "compute_and_display_correlations(G, pacmap_embedding, bootstrap=True, n_boot=500)\n", + "\n", + "# Plot the result\n", + "plot_graph_embedding(pacmap_embedding, G, title=\"PaCMAP embedding\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KhJby2aR4ujw" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "TPU", + "colab": { + "gpuType": "V5E1", + "machine_shape": "hm", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From e398bafb66f2d79a1d28f81c19ad7646a3cbc849 Mon Sep 17 00:00:00 2001 From: Sasha Kolpakov Date: Tue, 24 Feb 2026 15:00:20 -0600 Subject: [PATCH 3/3] Rename notebook links in README Updated the README to reflect changes in the notebook file names. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1e3326e..019ee65 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ From source: pip install git+https://github.com/igorrivin/graphem.git ``` -## Quick Start [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/igorrivin/graphem/blob/main/examples/graphem_notebook.ipynb) +## Quick Start [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/igorrivin/graphem/blob/main/examples/graphem_jax_notebook.ipynb) ### Graph Embedding @@ -143,7 +143,7 @@ The `examples/` directory contains: - `graph_generator_example.py` - Generate and visualize various graph embeddings - `random_regular_example.py` - Random regular graph analysis with GraphEm - `real_world_datasets_example.py` - Work with real world datasets (based on Facebook, arXiv, and Wikipedia data) -- `graphem_notebook.ipynb` - Interactive Jupyter notebook with examples and visualizations +- `graphem_jax_notebook.ipynb` - Interactive Jupyter notebook with examples and visualizations ## Testing