Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ruff.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@ jobs:
run: |
cd stgraph/dataset/
ruff check .
cd ../../
cd stgraph/graph
ruff check .
cd ../../
228 changes: 166 additions & 62 deletions benchmarking/dynamic-temporal-tgcn/seastar/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,55 +9,116 @@
import os
from stgraph.dataset.LinkPredDataLoader import LinkPredDataLoader
from stgraph.benchmark_tools.table import BenchmarkTable
from stgraph.graph.dynamic.gpma.GPMAGraph import GPMAGraph
from stgraph.graph.dynamic.pcsr.PCSRGraph import PCSRGraph
from stgraph.graph.dynamic.naive.NaiveGraph import NaiveGraph
from stgraph.graph.dynamic.gpma.gpma_graph import GPMAGraph
from stgraph.graph.dynamic.pcsr.pcsr_graph import PCSRGraph
from stgraph.graph.dynamic.naive.naive_graph import NaiveGraph
from model import STGraphTGCN
from utils import to_default_device, get_default_device


def main(args):

if torch.cuda.is_available():
print("🎉 CUDA is available")
else:
print("😔 CUDA is not available")
quit()

# dummy object to account for initial CUDA context object
Graph = None
if args.type == "naive":
Graph = NaiveGraph([[(0,0)]],1)
Graph = NaiveGraph([[(0, 0)]], 1)
elif args.type == "pcsr":
Graph = PCSRGraph([[(0,1)]],2) # PCSRGraph([[(0,1)]],2)
Graph = PCSRGraph([[(0, 1)]], 2) # PCSRGraph([[(0,1)]],2)
elif args.type == "gpma":
Graph = GPMAGraph([[(0,0)]],1)
Graph = GPMAGraph([[(0, 0)]], 1)

if args.dataset == "math":
dataloader = LinkPredDataLoader('dynamic-temporal', f'sx-mathoverflow-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"sx-mathoverflow-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "wikitalk":
dataloader = LinkPredDataLoader('dynamic-temporal', f'wiki-talk-temporal-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"wiki-talk-temporal-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "askubuntu":
dataloader = LinkPredDataLoader('dynamic-temporal', f'sx-askubuntu-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"sx-askubuntu-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "superuser":
dataloader = LinkPredDataLoader('dynamic-temporal', f'sx-superuser-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"sx-superuser-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "stackoverflow":
dataloader = LinkPredDataLoader('dynamic-temporal', f'sx-stackoverflow-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"sx-stackoverflow-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "reddit_title":
dataloader = LinkPredDataLoader('dynamic-temporal', f'reddit-title-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"reddit-title-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "reddit_body":
dataloader = LinkPredDataLoader('dynamic-temporal', f'reddit-body-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"reddit-body-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "email":
dataloader = LinkPredDataLoader('dynamic-temporal', f'email-eu-core-temporal-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"email-eu-core-temporal-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
elif args.dataset == "bitcoin_otc":
dataloader = LinkPredDataLoader('dynamic-temporal', f'bitcoin-otc-data-{args.slide_size}', args.cutoff_time, verbose=True, for_stgraph=True)
dataloader = LinkPredDataLoader(
"dynamic-temporal",
f"bitcoin-otc-data-{args.slide_size}",
args.cutoff_time,
verbose=True,
for_stgraph=True,
)
else:
print("😔 Unrecognized dataset")
quit()

edge_lists = dataloader.get_edges()
pos_neg_edges_lists, pos_neg_targets_lists = dataloader.get_pos_neg_edges()
pos_neg_edges_lists = [to_default_device(torch.from_numpy(pos_neg_edges)) for pos_neg_edges in pos_neg_edges_lists]
pos_neg_targets_lists = [to_default_device(torch.from_numpy(pos_neg_targets).type(torch.float32)) for pos_neg_targets in pos_neg_targets_lists]
pos_neg_edges_lists = [
to_default_device(torch.from_numpy(pos_neg_edges))
for pos_neg_edges in pos_neg_edges_lists
]
pos_neg_targets_lists = [
to_default_device(torch.from_numpy(pos_neg_targets).type(torch.float32))
for pos_neg_targets in pos_neg_targets_lists
]

pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
Expand Down Expand Up @@ -89,16 +150,27 @@ def main(args):
backprop_every = args.backprop_every
if backprop_every == 0:
backprop_every = total_timestamps

if total_timestamps % backprop_every == 0:
num_iter = int(total_timestamps/backprop_every)
num_iter = int(total_timestamps / backprop_every)
else:
num_iter = int(total_timestamps/backprop_every) + 1
num_iter = int(total_timestamps / backprop_every) + 1

# metrics
dur = []
max_gpu = []
table = BenchmarkTable(f"(STGraph Dynamic-Temporal) TGCN on {dataloader.name} dataset", ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)", "Build FWD Graph Time(s)", "Build BWD Graph Time(s)", "Move to GPU Time(s)"])
table = BenchmarkTable(
f"(STGraph Dynamic-Temporal) TGCN on {dataloader.name} dataset",
[
"Epoch",
"Time(s)",
"MSE",
"Used GPU Memory (Max MB)",
"Build FWD Graph Time(s)",
"Build BWD Graph Time(s)",
"Move to GPU Time(s)",
],
)

try:
# train
Expand All @@ -107,7 +179,7 @@ def main(args):
torch.cuda.synchronize()
torch.cuda.reset_peak_memory_stats(0)
model.train()

t0 = time.time()
gpu_mem_arr = []
cost_arr = []
Expand All @@ -117,7 +189,10 @@ def main(args):
optimizer.zero_grad()
cost = 0
hidden_state = None
y_hat = torch.randn((dataloader.max_num_nodes, args.feat_size), device=get_default_device())
y_hat = torch.randn(
(dataloader.max_num_nodes, args.feat_size),
device=get_default_device(),
)
G.get_graph(index * backprop_every)
for k in range(backprop_every):
t = index * backprop_every + k
Expand All @@ -128,7 +203,10 @@ def main(args):

initial_used_gpu_mem = pynvml.nvmlDeviceGetMemoryInfo(handle).used
G.get_graph(t)
graph_mem_delta = pynvml.nvmlDeviceGetMemoryInfo(handle).used - initial_used_gpu_mem
graph_mem_delta = (
pynvml.nvmlDeviceGetMemoryInfo(handle).used
- initial_used_gpu_mem
)
graph_mem = graph_mem + graph_mem_delta

if G.get_ndata("norm") is None:
Expand All @@ -141,11 +219,11 @@ def main(args):
y_hat, hidden_state = model(G, y_hat, None, hidden_state)
out = model.decode(y_hat, pos_neg_edges_lists[t]).view(-1)
cost = cost + criterion(out, pos_neg_targets_lists[t])

if cost == 0:
break
cost = cost / (backprop_every+1)

cost = cost / (backprop_every + 1)
cost.backward()
optimizer.step()
torch.cuda.synchronize()
Expand All @@ -159,61 +237,87 @@ def main(args):
dur.append(run_time_this_epoch)
max_gpu.append(max(gpu_mem_arr))

table.add_row([epoch, "{:.5f}".format(run_time_this_epoch), "{:.4f}".format(sum(cost_arr)/len(cost_arr)), "{:.4f}".format((max(gpu_mem_arr) * 1.0 / (1024**2))), "{:.5f}".format(G.get_fwd_graph_time), "{:.5f}".format(G.get_bwd_graph_time), "{:.5f}".format(G.move_to_gpu_time)])
table.add_row(
[
epoch,
"{:.5f}".format(run_time_this_epoch),
"{:.4f}".format(sum(cost_arr) / len(cost_arr)),
"{:.4f}".format((max(gpu_mem_arr) * 1.0 / (1024**2))),
"{:.5f}".format(G.get_fwd_graph_time),
"{:.5f}".format(G.get_bwd_graph_time),
"{:.5f}".format(G.move_to_gpu_time),
]
)

table.display()
print('Average Time taken: {:6f}'.format(np.mean(dur)))
print("Average Time taken: {:6f}".format(np.mean(dur)))
return np.mean(dur), (max(max_gpu) * 1.0 / (1024**2))

except RuntimeError as e:
if 'out of memory' in str(e):
table.add_row(["OOM", "OOM", "OOM", "OOM", "OOM", "OOM", "OOM"])
if "out of memory" in str(e):
table.add_row(["OOM", "OOM", "OOM", "OOM", "OOM", "OOM", "OOM"])
table.display()
else:
print("😔 Something went wrong")
return "OOM", "OOM"


def write_results(args, time_taken, max_gpu):
cutoff = "whole"
if args.cutoff_time < sys.maxsize:
cutoff = str(args.cutoff_time)
file_name = f"stgraph_{args.type}_{args.dataset}_T{cutoff}_S{args.slide_size}_B{args.backprop_every}_H{args.num_hidden}_F{args.feat_size}"
df_data = pd.DataFrame([{'Filename': file_name, 'Time Taken (s)': time_taken, 'Max GPU Usage (MB)': max_gpu}])

if os.path.exists('../../results/dynamic-temporal.csv'):
df = pd.read_csv('../../results/dynamic-temporal.csv')
df_data = pd.DataFrame(
[
{
"Filename": file_name,
"Time Taken (s)": time_taken,
"Max GPU Usage (MB)": max_gpu,
}
]
)

if os.path.exists("../../results/dynamic-temporal.csv"):
df = pd.read_csv("../../results/dynamic-temporal.csv")
df = pd.concat([df, df_data])
else:
df = df_data

df.to_csv('../../results/dynamic-temporal.csv', sep=',', index=False, encoding='utf-8')

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='STGraph Static TGCN')
df.to_csv(
"../../results/dynamic-temporal.csv", sep=",", index=False, encoding="utf-8"
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="STGraph Static TGCN")
snoop.install(enabled=False)

parser.add_argument("--dataset", type=str, default="math",
help="Name of the Dataset (math, wikitalk, askubuntu, superuser, stackoverflow, email, bitcoin_otc, reddit_title, reddit_body)")
parser.add_argument("--slide-size", type=str, default="1.0",
help="Slide Size")
parser.add_argument("--type", type=str, default="naive",
help="STGraph Type")
parser.add_argument("--backprop-every", type=int, default=0,
help="Feature size of nodes")
parser.add_argument("--feat-size", type=int, default=8,
help="Feature size of nodes")
parser.add_argument("--num-hidden", type=int, default=100,
help="Number of hidden units")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--cutoff-time", type=int, default=sys.maxsize,
help="cutoff time")
parser.add_argument("--num-epochs", type=int, default=1,
help="number of training epochs")
parser.add_argument(
"--dataset",
type=str,
default="math",
help="Name of the Dataset (math, wikitalk, askubuntu, superuser, stackoverflow, email, bitcoin_otc, reddit_title, reddit_body)",
)
parser.add_argument("--slide-size", type=str, default="1.0", help="Slide Size")
parser.add_argument("--type", type=str, default="naive", help="STGraph Type")
parser.add_argument(
"--backprop-every", type=int, default=0, help="Feature size of nodes"
)
parser.add_argument(
"--feat-size", type=int, default=8, help="Feature size of nodes"
)
parser.add_argument(
"--num-hidden", type=int, default=100, help="Number of hidden units"
)
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--cutoff-time", type=int, default=sys.maxsize, help="cutoff time"
)
parser.add_argument(
"--num-epochs", type=int, default=1, help="number of training epochs"
)
args = parser.parse_args()

print(args)
time_taken, max_gpu = main(args)
write_results(args, time_taken, max_gpu)


2 changes: 1 addition & 1 deletion benchmarking/gat/seastar/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import torch
import torch.nn.functional as F
import pynvml
from stgraph.graph.static.StaticGraph import StaticGraph
from stgraph.graph.static.static_graph import StaticGraph
from stgraph.dataset.CoraDataLoader import CoraDataLoader
from utils import EarlyStopping, accuracy
import snoop
Expand Down
6 changes: 2 additions & 4 deletions benchmarking/gcn/seastar/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pynvml
import torch.nn as nn
import torch.nn.functional as F
from stgraph.graph.static.StaticGraph import StaticGraph
from stgraph.graph.static.static_graph import StaticGraph
from stgraph.dataset import CoraDataLoader
from utils import to_default_device, accuracy, generate_test_mask, generate_train_mask
from model import GCN
Expand Down Expand Up @@ -47,9 +47,7 @@ def main(args):

# A simple sanity check
print("Measuerd Graph Size (pynvml): ", graph_mem, " B", flush=True)
print(
"Measuerd Graph Size (pynvml): ", (graph_mem) / (1024**2), " MB", flush=True
)
print("Measuerd Graph Size (pynvml): ", (graph_mem) / (1024**2), " MB", flush=True)

# normalization
degs = torch.from_numpy(g.weighted_in_degrees()).type(torch.int32)
Expand Down
Loading