Skip to content

Commit aca650a

Browse files
committed
Merge pull request #132 from High-Performance-IO/hotfix
This commit improves the logging status when failing to allocate shared memory segments, and when ERR_EXIT is called. It also adds a mechanism to handle applications that are highly parallel, for which all threads (except the main) perform ONLY computation. In this case, to avoid the exhaustion of file descriptors, CAPIO will not intercept and handle child threads. To allow for this behaviour, export the following variable ''CAPIO_IGNORE_CHILD_THREADS=YES''
1 parent e209845 commit aca650a

2 files changed

Lines changed: 36 additions & 6 deletions

File tree

src/common/capio/shm.hpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
#define SHM_CREATE_CHECK(condition, source) \
2727
if (condition) { \
28-
ERR_EXIT("Unable to open shm: %s", source); \
28+
ERR_EXIT("Unable to open shm: %s: %s", source, strerror(errno)); \
2929
};
3030

3131
#else
@@ -42,7 +42,7 @@
4242
LOG("error while creating %s", source); \
4343
std::cout << CAPIO_SERVER_CLI_LOG_SERVER_ERROR << " [ " << node_name << " ] " \
4444
<< "Unable to create shm: " << source << std::endl; \
45-
ERR_EXIT("Unable to open shm: %s", source); \
45+
ERR_EXIT("Unable to open shm %s: %s", source, strerror(errno)); \
4646
};
4747

4848
#endif
@@ -133,7 +133,12 @@ void *get_shm(const std::string &shm_name) {
133133
}
134134
void *p = mmap(nullptr, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
135135
if (p == MAP_FAILED) {
136-
ERR_EXIT("mmap get_shm %s", shm_name.c_str());
136+
LOG("ERROR MMAP arg dump:");
137+
LOG("mmap-size: %ld", sb.st_size);
138+
LOG("mmap-prot: %ld", PROT_READ | PROT_WRITE);
139+
LOG("mmap-flags: %ld", MAP_SHARED);
140+
LOG("mmap-fd: %ld", fd);
141+
ERR_EXIT("ERROR: mmap failed at get_shm(%s): %s", shm_name.c_str(), strerror(errno));
137142
}
138143
if (close(fd) == -1) {
139144
ERR_EXIT("close");
@@ -151,7 +156,7 @@ void *get_shm_if_exist(const std::string &shm_name) {
151156
if (errno == ENOENT) {
152157
return nullptr;
153158
}
154-
ERR_EXIT("get_shm shm_open %s", shm_name.c_str());
159+
ERR_EXIT("ERROR: unable to open shared memory %s: %s", shm_name.c_str(), strerror(errno));
155160
}
156161
/* Open existing object */
157162
/* Use shared memory object size as length argument for mmap()
@@ -161,7 +166,13 @@ void *get_shm_if_exist(const std::string &shm_name) {
161166
}
162167
void *p = mmap(nullptr, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
163168
if (p == MAP_FAILED) {
164-
ERR_EXIT("mmap get_shm %s", shm_name.c_str());
169+
LOG("ERROR MMAP arg dump:");
170+
LOG("mmap-size: %ld", sb.st_size);
171+
LOG("mmap-prot: %ld", PROT_READ | PROT_WRITE);
172+
LOG("mmap-flags: %ld", MAP_SHARED);
173+
LOG("mmap-fd: %ld", fd);
174+
ERR_EXIT("ERROR: mmap failed at get_shm_if_exist(%s): %s", shm_name.c_str(),
175+
strerror(errno));
165176
}
166177
if (close(fd) == -1) {
167178
ERR_EXIT("close");

src/posix/utils/clone.hpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@ inline bool is_capio_tid(const pid_t tid) {
1818
}
1919

2020
inline void register_capio_tid(const pid_t tid) {
21+
START_LOG(syscall_no_intercept(SYS_gettid), "call(tid=%ld)", tid);
2122
const std::lock_guard<std::mutex> lg(clone_mutex);
2223
tids->insert(tid);
2324
}
2425

2526
inline void remove_capio_tid(const pid_t tid) {
27+
START_LOG(syscall_no_intercept(SYS_gettid), "call(tid=%ld)", tid);
2628
const std::lock_guard<std::mutex> lg(clone_mutex);
27-
tids->erase(tid);
29+
if (tids->find(tid) != tids->end()) {
30+
tids->erase(tid);
31+
}
2832
}
2933

3034
inline void init_threading_support() { tids = new std::unordered_set<pid_t>{}; }
@@ -59,6 +63,21 @@ inline void hook_clone_child() {
5963

6064
#ifdef __CAPIO_POSIX
6165
syscall_no_intercept_flag = true;
66+
67+
/*
68+
* This piece of code is aimed at addressing issues with applications that spawn several
69+
* thousand threads that only do computations. When this occurs, under some circumstances CAPIO
70+
* might fail to allocate shared memory objects. As such, if child threads ONLY do computation,
71+
* we can safely ignore them with CAPIO.
72+
*/
73+
thread_local char *skip_child = std::getenv("CAPIO_IGNORE_CHILD_THREADS");
74+
if (skip_child != nullptr) {
75+
auto skip_child_str = std::string(skip_child);
76+
if (skip_child_str == "ON" || skip_child_str == "TRUE" || skip_child_str == "YES") {
77+
return;
78+
}
79+
}
80+
6281
#endif
6382
std::unique_lock<std::mutex> lock(clone_mutex);
6483
clone_cv.wait(lock, [&tid] { return tids->find(tid) != tids->end(); });

0 commit comments

Comments
 (0)