From 5ee674c4b09a29b82a0e2d7a4ce064fea3df1f4c Mon Sep 17 00:00:00 2001 From: Henry Zhang Date: Wed, 29 Aug 2018 14:52:03 -0700 Subject: [PATCH 1/4] Add support for python3.7 --- configure.ac | 10 ++++++++-- src/Makefile.am | 7 +++++++ src/frob.cc | 30 +++++++++++++++++++++++++++++- src/frob37.cc | 18 ++++++++++++++++++ src/prober.cc | 8 +++++++- src/pyfrob.cc | 11 +++++++++++ src/symbol.cc | 15 ++++++++++++++- src/symbol.h | 9 +++++++-- 8 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 src/frob37.cc diff --git a/configure.ac b/configure.ac index 0858fc3..5d18782 100644 --- a/configure.ac +++ b/configure.ac @@ -111,7 +111,12 @@ PKG_CHECK_MODULES([PY36], [python-3.6], [enable_py36="yes"], [AC_MSG_WARN([Build AM_CONDITIONAL([ENABLE_PY36], [test x"$enable_py36" = xyes]) AM_COND_IF([ENABLE_PY36], [AC_DEFINE([ENABLE_PY36], [1], [Python 3.6 will be enabled])]) -AS_IF([test x"$enable_py26" = xyes -o x"$enable_py34" = xyes -o x"$enable_py36" = xyes], +enable_py37=no +PKG_CHECK_MODULES([PY37], [python-3.7], [enable_py37="yes"], [AC_MSG_WARN([Building without Python 3.7 support])]) +AM_CONDITIONAL([ENABLE_PY37], [test x"$enable_py37" = xyes]) +AM_COND_IF([ENABLE_PY37], [AC_DEFINE([ENABLE_PY37], [1], [Python 3.7 will be enabled])]) + +AS_IF([test x"$enable_py26" = xyes -o x"$enable_py34" = xyes -o x"$enable_py36" = xyes -o x"$enable_py37" = xyes], [AC_MSG_NOTICE([Found at least one copy of Python.h])], [AC_MSG_ERROR([Failed to find a supported Python.h])] ) @@ -127,7 +132,8 @@ echo echo " with threads = $enable_threads" echo " with Python 2.6/7 = $enable_py26" echo " with Python 3.4/5 = $enable_py34" -echo " with Python 3.6+ = $enable_py36" +echo " with Python 3.6 = $enable_py36" +echo " with Python 3.7+ = $enable_py37" echo echo " CXX = $CXX" echo " CXXFLAGS = $CXXFLAGS" diff --git a/src/Makefile.am b/src/Makefile.am index 11d8ab6..0e1a9ef 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -34,3 +34,10 @@ libfrob36_la_CXXFLAGS = $(PY36_CFLAGS) noinst_LTLIBRARIES += libfrob36.la pyflame_LDADD += libfrob36.la endif + +if ENABLE_PY37 +libfrob37_la_SOURCES = frob37.cc +libfrob37_la_CXXFLAGS = $(PY37_CFLAGS) +noinst_LTLIBRARIES += libfrob37.la +pyflame_LDADD += libfrob37.la +endif diff --git a/src/frob.cc b/src/frob.cc index 6fcff39..337a3e8 100644 --- a/src/frob.cc +++ b/src/frob.cc @@ -89,6 +89,22 @@ unsigned long ByteData(unsigned long addr) { return addr + offsetof(PyBytesObject, ob_sval); } +#elif PYFLAME_PY_VERSION == 37 +namespace py37 { +std::string StringDataPython3(pid_t pid, unsigned long addr); + +unsigned long StringSize(unsigned long addr) { + return addr + offsetof(PyVarObject, ob_size); +} + +std::string StringData(pid_t pid, unsigned long addr) { + return StringDataPython3(pid, addr); +} + +unsigned long ByteData(unsigned long addr) { + return addr + offsetof(PyBytesObject, ob_sval); +} + #else static_assert(false, "uh oh, bad PYFLAME_PY_VERSION"); #endif @@ -256,7 +272,19 @@ std::vector GetThreads(pid_t pid, PyAddresses addrs, // First try to get interpreter state via dereferencing // _PyThreadState_Current. This won't work if the main thread doesn't hold // the GIL (_Current will be null). - unsigned long tstate = PtracePeek(pid, addrs.tstate_addr); + unsigned long tstate = 0; + if (addrs.tstate_addr) { + tstate = PtracePeek(pid, addrs.tstate_addr); + } + + if (tstate == 0 && addrs.tstate_get_addr != 0) { + // If we are Python 3.7, there will be no global reference to current thread + // state, and the gilstate's ThreadState will be null if during memory probing + // the child was not executing Python code. We need to run this function + // to get the current running ThreadState + tstate = PtraceCallFunction(pid, addrs.tstate_get_addr); + } + unsigned long current_tstate = tstate; if (enable_threads) { if (tstate != 0) { diff --git a/src/frob37.cc b/src/frob37.cc new file mode 100644 index 0000000..3bb3bad --- /dev/null +++ b/src/frob37.cc @@ -0,0 +1,18 @@ +// Copyright 2018 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// ABI for Python 3.7 +#define PYFLAME_PY_VERSION 37 + +#include "./frob.cc" diff --git a/src/prober.cc b/src/prober.cc index a691c83..c653357 100644 --- a/src/prober.cc +++ b/src/prober.cc @@ -64,7 +64,7 @@ static const char usage_str[] = " -x, --exclude-idle Exclude idle time from statistics\n" "\n" "Advanced Options:\n" - " --abi Force a particular Python ABI (26, 34, 36)\n" + " --abi Force a particular Python ABI (26, 34, 36, 37)\n" " --flamechart Include timestamps for generating Chrome " "\"flamecharts\"\n"); @@ -79,6 +79,9 @@ static const int build_abis[] = { #ifdef ENABLE_PY36 36, #endif +#ifdef ENABLE_PY37 + 37, +#endif }; static_assert(sizeof(build_abis) > 0, "No Python ABIs detected!"); @@ -221,6 +224,9 @@ int Prober::ParseOpts(int argc, char **argv) { case 36: abi_ = PyABI::Py36; break; + case 37: + abi_ = PyABI::Py37; + break; default: std::cerr << "Unknown or unsupported ABI version: " << abi_version << "\n"; diff --git a/src/pyfrob.cc b/src/pyfrob.cc index 7c0588d..2d8c4f0 100644 --- a/src/pyfrob.cc +++ b/src/pyfrob.cc @@ -126,6 +126,12 @@ FROB_FUNCS } #endif +#ifdef ENABLE_PY37 +namespace py37 { +FROB_FUNCS +} +#endif + // Fill the addrs_ member int PyFrob::set_addrs_(PyABI *abi) { Namespace ns(pid_); @@ -172,6 +178,11 @@ int PyFrob::DetectABI(PyABI abi) { case PyABI::Py36: get_threads_ = py36::GetThreads; break; +#endif +#ifdef ENABLE_PY37 + case PyABI::Py37: + get_threads_ = py37::GetThreads; + break; #endif default: std::ostringstream os; diff --git a/src/symbol.cc b/src/symbol.cc index 39c3e81..115a36d 100644 --- a/src/symbol.cc +++ b/src/symbol.cc @@ -137,8 +137,17 @@ PyABI ELF::WalkTable(int sym, int str, PyAddresses *addrs) { reinterpret_cast(p() + s->sh_offset + i * s->sh_entsize); const char *name = reinterpret_cast(p() + d->sh_offset + sym->st_name); + if (!addrs->tstate_addr && strcmp(name, "_PyThreadState_Current") == 0) { addrs->tstate_addr = static_cast(sym->st_value); + } else if (!addrs->tstate_addr && strcmp(name, "_PyThreadState_UncheckedGet") == 0) { + // In Python 3.7, the _PyThreadState_Current variable is held by _PyRuntime, which + // is defined in a private header. This function allows us to retrieve the pointer + // to the currently running thread. This function can't simply be duplicated + // because the implementation is defined using a macro to an internal header + // See https://github.com/python/cpython/commit/2ebc5ce42a8a9e047e790aefbf9a94811569b2b6 + // (bpo-30860) + addrs->tstate_get_addr = static_cast(sym->st_value); } else if (!addrs->interp_head_addr && strcmp(name, "interp_head") == 0) { addrs->interp_head_addr = static_cast(sym->st_value); } else if (!addrs->interp_head_addr && @@ -158,8 +167,12 @@ PyABI ELF::WalkTable(int sym, int str, PyAddresses *addrs) { strcmp(name, "_PyCode_SetExtra") == 0) { // Symbols added for Python 3.6, see: // https://www.python.org/dev/peps/pep-0523/ - have_abi = true; abi = PyABI::Py36; + } else if (strcmp(name, "Py_UTF8Mode") == 0) { + // Symbol added in Python 3.7 + // See https://www.python.org/dev/peps/pep-0540/ + have_abi = true; + abi = PyABI::Py37; } } } diff --git a/src/symbol.h b/src/symbol.h index bb92b9a..ba50da9 100644 --- a/src/symbol.h +++ b/src/symbol.h @@ -53,12 +53,14 @@ enum class PyABI { Unknown = 0, // Unknown Python ABI Py26 = 26, // ABI for Python 2.6/2.7 Py34 = 34, // ABI for Python 3.4/3.5 - Py36 = 36 // ABI for Python 3.6 + Py36 = 36, // ABI for Python 3.6 + Py37 = 37, // ABI for Python 3.7 }; // Symbols struct PyAddresses { unsigned long tstate_addr; + unsigned long tstate_get_addr; unsigned long interp_head_addr; unsigned long interp_head_fn_addr; unsigned long interp_head_hint; @@ -66,6 +68,7 @@ struct PyAddresses { PyAddresses() : tstate_addr(0), + tstate_get_addr(0), interp_head_addr(0), interp_head_fn_addr(0), interp_head_hint(0), @@ -74,6 +77,7 @@ struct PyAddresses { PyAddresses operator-(const unsigned long base) const { PyAddresses res(*this); res.tstate_addr = this->tstate_addr == 0 ? 0 : this->tstate_addr - base; + res.tstate_get_addr = this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr - base; res.interp_head_addr = this->interp_head_addr == 0 ? 0 : this->interp_head_addr - base; res.interp_head_fn_addr = @@ -84,6 +88,7 @@ struct PyAddresses { PyAddresses operator+(const unsigned long base) const { PyAddresses res(*this); res.tstate_addr = this->tstate_addr == 0 ? 0 : this->tstate_addr + base; + res.tstate_get_addr = this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr + base; res.interp_head_addr = this->interp_head_addr == 0 ? 0 : this->interp_head_addr + base; res.interp_head_fn_addr = @@ -95,7 +100,7 @@ struct PyAddresses { explicit operator bool() const { return !empty(); } // Empty means the struct hasn't been initialized. - inline bool empty() const { return this->tstate_addr == 0; } + inline bool empty() const { return this->tstate_addr == 0 and this->tstate_get_addr == 0; } }; // Representation of an ELF file. From 610b5281502ff6d57471e84071f17a33d30f3bcf Mon Sep 17 00:00:00 2001 From: Henry Zhang Date: Wed, 29 Aug 2018 14:52:15 -0700 Subject: [PATCH 2/4] Add python3.7 to travis test matrix --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 9cc00ec..8a2d6c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,7 @@ env: - PYVERSION=python3.4 - PYVERSION=python3.5 - PYVERSION=python3.6 + - PYVERSION=python3.7 addons: apt: From ec82a43c90da64815a87d4e3fe2a12ec3c93dc38 Mon Sep 17 00:00:00 2001 From: Henry Zhang Date: Wed, 29 Aug 2018 15:08:36 -0700 Subject: [PATCH 3/4] Update ppa and Ubuntu version Trusty with 2.7.6 no longer can download from pypi due to enforced newer TLS --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8a2d6c7..12e2c61 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ sudo: required -dist: trusty +dist: xenial language: cpp compiler: gcc @@ -17,7 +17,7 @@ env: addons: apt: sources: - - sourceline: 'ppa:fkrull/deadsnakes' + - sourceline: 'ppa:deadsnakes/ppa' - autotools-dev - libtool - pkg-config From fb81e40398d6209c38d49d0b6758d9581b3c2bba Mon Sep 17 00:00:00 2001 From: Henry Zhang Date: Wed, 29 Aug 2018 16:29:50 -0700 Subject: [PATCH 4/4] Clang-Format --- src/frob.cc | 6 +++--- src/symbol.cc | 14 ++++++++------ src/symbol.h | 10 +++++++--- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/frob.cc b/src/frob.cc index 337a3e8..c68ddef 100644 --- a/src/frob.cc +++ b/src/frob.cc @@ -279,9 +279,9 @@ std::vector GetThreads(pid_t pid, PyAddresses addrs, if (tstate == 0 && addrs.tstate_get_addr != 0) { // If we are Python 3.7, there will be no global reference to current thread - // state, and the gilstate's ThreadState will be null if during memory probing - // the child was not executing Python code. We need to run this function - // to get the current running ThreadState + // state, and the gilstate's ThreadState will be null if during memory + // probing the child was not executing Python code. We need to run this + // function to get the current running ThreadState tstate = PtraceCallFunction(pid, addrs.tstate_get_addr); } diff --git a/src/symbol.cc b/src/symbol.cc index 115a36d..56fc37e 100644 --- a/src/symbol.cc +++ b/src/symbol.cc @@ -140,12 +140,14 @@ PyABI ELF::WalkTable(int sym, int str, PyAddresses *addrs) { if (!addrs->tstate_addr && strcmp(name, "_PyThreadState_Current") == 0) { addrs->tstate_addr = static_cast(sym->st_value); - } else if (!addrs->tstate_addr && strcmp(name, "_PyThreadState_UncheckedGet") == 0) { - // In Python 3.7, the _PyThreadState_Current variable is held by _PyRuntime, which - // is defined in a private header. This function allows us to retrieve the pointer - // to the currently running thread. This function can't simply be duplicated - // because the implementation is defined using a macro to an internal header - // See https://github.com/python/cpython/commit/2ebc5ce42a8a9e047e790aefbf9a94811569b2b6 + } else if (!addrs->tstate_addr && + strcmp(name, "_PyThreadState_UncheckedGet") == 0) { + // In Python 3.7, the _PyThreadState_Current variable is held by + // _PyRuntime, which is defined in a private header. This function allows + // us to retrieve the pointer to the currently running thread. This + // function can't simply be duplicated because the implementation is + // defined using a macro to an internal header See + // https://github.com/python/cpython/commit/2ebc5ce42a8a9e047e790aefbf9a94811569b2b6 // (bpo-30860) addrs->tstate_get_addr = static_cast(sym->st_value); } else if (!addrs->interp_head_addr && strcmp(name, "interp_head") == 0) { diff --git a/src/symbol.h b/src/symbol.h index ba50da9..23e82bd 100644 --- a/src/symbol.h +++ b/src/symbol.h @@ -77,7 +77,8 @@ struct PyAddresses { PyAddresses operator-(const unsigned long base) const { PyAddresses res(*this); res.tstate_addr = this->tstate_addr == 0 ? 0 : this->tstate_addr - base; - res.tstate_get_addr = this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr - base; + res.tstate_get_addr = + this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr - base; res.interp_head_addr = this->interp_head_addr == 0 ? 0 : this->interp_head_addr - base; res.interp_head_fn_addr = @@ -88,7 +89,8 @@ struct PyAddresses { PyAddresses operator+(const unsigned long base) const { PyAddresses res(*this); res.tstate_addr = this->tstate_addr == 0 ? 0 : this->tstate_addr + base; - res.tstate_get_addr = this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr + base; + res.tstate_get_addr = + this->tstate_get_addr == 0 ? 0 : this->tstate_get_addr + base; res.interp_head_addr = this->interp_head_addr == 0 ? 0 : this->interp_head_addr + base; res.interp_head_fn_addr = @@ -100,7 +102,9 @@ struct PyAddresses { explicit operator bool() const { return !empty(); } // Empty means the struct hasn't been initialized. - inline bool empty() const { return this->tstate_addr == 0 and this->tstate_get_addr == 0; } + inline bool empty() const { + return this->tstate_addr == 0 and this->tstate_get_addr == 0; + } }; // Representation of an ELF file.