forked from Mazen030/Team43-AWS_Distributed_system_project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtry.py
More file actions
84 lines (64 loc) · 2.43 KB
/
try.py
File metadata and controls
84 lines (64 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import subprocess
import os
import socket
def check_hosts_alive(hosts):
"""Pings hosts to check if they are reachable.
Args:
hosts (list): A list of hostnames to ping.
Returns:
list: A list of hostnames that responded to pings.
"""
alive_hosts = []
counter = 0
for host in hosts:
try:
subprocess.check_output(["ping", "-c", "1", "-W", "1", host]) # Adjust ping parameters as needed
alive_hosts.append(host)
print(f"{host} is alive!")
except subprocess.CalledProcessError:
counter += 1
print(f"{host} is not reachable!")
if counter == len(hosts):
# If all initial hosts are unreachable, try to ping slave11
try:
subprocess.check_output(["ping", "-c", "1", "-W", "1", "slave11"])
alive_hosts.append("slave11")
print(f"slave11 is alive!")
except subprocess.CalledProcessError:
print(f"slave11 is also not reachable!")
return alive_hosts
def launch_mpi_processes(alive_hosts, script_path):
"""Launches MPI processes across the specified hosts.
Args:
alive_hosts (list): A list of hostnames where MPI processes should be launched.
script_path (str): The absolute path to your Python script.
"""
# Ensure script_path is an absolute path
script_path = os.path.abspath(script_path)
num_processes = len(alive_hosts) + 1 # Include the master node
all_hosts = [socket.gethostname()] + alive_hosts
host_string = ",".join(all_hosts)
command = [
"mpirun",
"-n", str(num_processes),
"--host", host_string,
"python3", script_path
]
try:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
return_code = process.returncode
print(f"mpirun return code: {return_code}")
print(f"Output from mpirun: {stdout.decode()}")
if return_code != 0:
print(f"Errors from mpirun: {stderr.decode()}")
except Exception as e:
print(f"Failed to launch MPI processes: {e}")
# Example usage:
potential_hosts = ["slave1", "slave2", "slave3", "slave4"]
responsive_hosts = check_hosts_alive(potential_hosts)
if responsive_hosts:
script_path = "master-node4.py"
launch_mpi_processes(responsive_hosts, script_path)
else:
print("No alive slave hosts found. Exiting.")