causify-ai · gpsaggese · Aug 3, 2025
diff --git a/helpers/hasyncio.py b/helpers/hasyncio.py
@@ -58,7 +58,7 @@ class _EventLoop(async_solipsism.EventLoop):
     #  the replayed time approach and can remove `ReplayedTime` object.
     def __init__(self) -> None:
         super().__init__()
-        self._initial_dt = datetime.datetime.utcnow()
+        self._initial_dt = datetime.datetime.now(datetime.timezone.utc)
 
     def get_current_time(self) -> datetime.datetime:
         # `loop.time()` returns the number of seconds as `float` from when the event
@@ -100,7 +100,7 @@ def solipsism_context() -> Iterator:
 
 
 async def gather_coroutines_with_wall_clock(
-    event_loop: asyncio.AbstractEventLoop, *coroutines: List[Coroutine]
+    event_loop: asyncio.AbstractEventLoop, *coroutines: Callable[..., Coroutine]
 ) -> List[Any]:
     """
     Inject a wall clock associated to `event_loop` in all the coroutines and
@@ -111,9 +111,9 @@ async def gather_coroutines_with_wall_clock(
     )
     # Construct the coroutines here by passing the `get_wall_clock_time()`
     # function.
-    coroutines = [coro(get_wall_clock_time) for coro in coroutines]
+    coroutine_instances = [coro(get_wall_clock_time) for coro in coroutines]
     #
-    result: List[Any] = await asyncio.gather(*coroutines)
+    result: List[Any] = await asyncio.gather(*coroutine_instances)
     return result
 
 

diff --git a/helpers/hcache.py b/helpers/hcache.py
@@ -182,7 +182,10 @@ def _get_cache_size(path: str, description: str) -> str:
     else:
         if os.path.exists(path):
             size_in_bytes = hsystem.du(path)
-            size_as_str = hintros.format_size(size_in_bytes)
+            if isinstance(size_in_bytes, str):
+                size_as_str = size_in_bytes
+            else:
+                size_as_str = hintros.format_size(size_in_bytes)
         else:
             size_as_str = "nan"
         # TODO(gp): Compute number of files.

diff --git a/helpers/hcache_simple.py b/helpers/hcache_simple.py
@@ -5,7 +5,10 @@
 import os
 import pickle
 import re
-from typing import Any, Callable, Dict, List, Union, cast
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Union, cast
+
+if TYPE_CHECKING:
+    import pandas as pd
 
 import helpers.hdbg as hdbg
 import helpers.hprint as hprint
@@ -406,7 +409,7 @@ def get_cache(func_name: str) -> _CacheType:
 # #############################################################################
 
 
-def cache_stats_to_str(func_name: str = "") -> "pd.DataFrame":  # noqa: F821
+def cache_stats_to_str(func_name: str = "") -> "pd.DataFrame":
     """
     Print the cache stats for a function or for all functions.
 

diff --git a/helpers/hchatgpt.py b/helpers/hchatgpt.py
@@ -9,14 +9,17 @@
 import os
 import sys
 import time
-from typing import Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional
 
 import helpers.hdbg as hdbg
 import helpers.henv as henv
 import helpers.hio as hio
 
-henv.install_module_if_not_present("openai")
-import openai  # noqa: E402
+if TYPE_CHECKING:
+    import openai  # type: ignore
+else:
+    henv.install_module_if_not_present("openai")
+    import openai  # noqa: E402
 
 _LOG = logging.getLogger(__name__)
 
@@ -41,7 +44,7 @@ def create_assistant(
     model: str = "gpt-3.5-turbo-1106",
     use_retrieval: bool = True,
     use_code_interpreter: bool = True,
-    use_function: Dict = None,
+    use_function: Optional[Dict] = None,
 ) -> str:
     """
     Create an OpenAI Assistant for your OpenAI Organization. All configs can
@@ -175,6 +178,7 @@ def _path_to_dict(path: str) -> Dict:
         tree = {d: _path_to_dict(os.path.join(root, d)) for d in dirs}
         tree.update({f: {"name": f} for f in files})
         return tree
+    return {}
 
 
 # TODO(Henry): We use fileIO here to store the directory structure, which may

diff --git a/helpers/hcsv.py b/helpers/hcsv.py
@@ -7,7 +7,7 @@
 import ast
 import logging
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, cast
 
 import pandas as pd
 
@@ -45,7 +45,7 @@ def _read_csv_range(
     """
     hdbg.dassert_lt(0, from_, msg="Row 0 assumed to be header row")
     hdbg.dassert_lt(from_, to, msg="Empty range requested!")
-    skiprows = range(1, from_)
+    skiprows = list(range(1, from_))
     nrows = to - from_
     df = pd.read_csv(csv_path, skiprows=skiprows, nrows=nrows, **kwargs)
     if df.shape[0] < to:
@@ -236,7 +236,7 @@ def convert_csv_dir_to_pq_dir(
     if hs3.is_s3_path(csv_dir):
         # TODO(gp): Pass aws_profile.
         s3fs = hs3.get_s3fs("am")
-        filenames = s3fs.ls(csv_dir)
+        filenames = cast(Any, s3fs).ls(csv_dir)
     else:
         # Local filesystem.
         hdbg.dassert_dir_exists(csv_dir)

diff --git a/helpers/hdataframe.py b/helpers/hdataframe.py
@@ -236,9 +236,10 @@ def infer_sampling_points_per_year(df: Union[pd.Series, pd.DataFrame]) -> float:
     :return: number of time points per year (approximate)
     """
     hdbg.dassert(hasattr(df.index, "freq") and df.index.freq is not None)
+    assert hasattr(df.index, "freq") and df.index.freq is not None
     freq = df.index.freq
     # TODO(*): Make start, end dates parameters that can be passed in.
-    return compute_points_per_year_for_given_freq(freq)
+    return compute_points_per_year_for_given_freq(str(cast(Any, freq)))
 
 
 @functools.lru_cache()
@@ -274,7 +275,7 @@ def compute_count_per_year(df: Union[pd.Series, pd.DataFrame]) -> float:
     assert hasattr(df.index, "freq") and df.index.freq is not None
     freq = df.index.freq
     # Calculate the time span of `df` in years.
-    points_per_year = compute_points_per_year_for_given_freq(freq)
+    points_per_year = compute_points_per_year_for_given_freq(str(freq))
     span_in_years = df.size / points_per_year
     # Determine the number of non-NaN/inf/etc. data points per year.
     count_per_year = df.count() / span_in_years
@@ -302,7 +303,7 @@ def remove_duplicates(
     # Fix maximum value of control column at the bottom.
     if control_column:
         df = df.sort_values(by=control_column)
-    duplicate_columns = duplicate_columns or df.columns
+    duplicate_columns = duplicate_columns or list(df.columns)
     df = df.drop_duplicates(subset=duplicate_columns)
     # Sort by index to return to original view.
     df = df.sort_index()

diff --git a/helpers/hdatetime.py b/helpers/hdatetime.py
@@ -9,7 +9,7 @@
 import datetime
 import logging
 import re
-from typing import Callable, Iterable, Optional, Tuple, Union
+from typing import Any, Callable, Iterable, Optional, Tuple, Union, cast
 
 # TODO(gp): Use hdbg.WARNING
 _WARNING = "\033[33mWARNING\033[0m"
@@ -371,11 +371,12 @@ def get_current_time(
         # We accept only `hasyncio.EventLoop` here. If we are using standard asyncio
         # EventLoop we rely on wall-clock time instead of `loop.time()`.
         hdbg.dassert_isinstance(event_loop, asyncio.AbstractEventLoop)
+        assert isinstance(event_loop, asyncio.AbstractEventLoop)
         hdbg.dassert(hasattr(event_loop, "get_current_time"))
-        timestamp = event_loop.get_current_time()
+        timestamp = cast(Any, event_loop).get_current_time()
     else:
         # Use true real-time.
-        timestamp = datetime.datetime.utcnow()
+        timestamp = datetime.datetime.now(datetime.timezone.utc)
     # Convert it into the right
     timestamp = pd.Timestamp(timestamp, tz=get_UTC_tz())
     if tz == "UTC":
@@ -616,9 +617,9 @@ def to_generalized_datetime(
     # Handle both scalar and array cases for `pd.isna()`.
     if hasattr(datetime_dates, "all"):
         # datetime_dates is a Series or array-like
-        all_na = pd.isna(datetime_dates).all()
+        all_na = cast(Any, pd.isna(datetime_dates)).all()
         datetime_example = (
-            datetime_dates.tolist()[format_example_index]
+            cast(Any, datetime_dates).tolist()[format_example_index]
             if hasattr(datetime_dates, "tolist")
             else datetime_dates
         )
@@ -632,25 +633,25 @@ def to_generalized_datetime(
             and hasattr(datetime_example, "strftime")
             and datetime_example.strftime("%Y-%m-%d") == date_example
         ):
-            return datetime_dates
+            return cast(Union[pd.Series, pd.Index], datetime_dates)
         shift_func = _shift_to_period_end(date_example)
         if shift_func is not None:
             if hasattr(datetime_dates, "map"):
-                datetime_dates = datetime_dates.map(shift_func)
+                datetime_dates = cast(Any, datetime_dates).map(shift_func)
             else:
                 # For scalar case, apply the shift function directly
                 datetime_dates = shift_func(datetime_dates)
-        return datetime_dates
+        return cast(Union[pd.Series, pd.Index], datetime_dates)
     # If standard conversion fails, attempt our own conversion.
     date_standard = date_standard or "standard"
     format_determination_output = _determine_date_format(
         date_example, date_standard
     )
     if format_determination_output is None:
-        return datetime_dates
+        return cast(Union[pd.Series, pd.Index], datetime_dates)
     format_, date_modification_func = format_determination_output
     dates = dates.map(date_modification_func)
-    return pd.to_datetime(dates, format=format_)
+    return cast(Union[pd.Series, pd.Index], pd.to_datetime(dates, format=format_))
 
 
 def _handle_incorrect_conversions(
@@ -722,7 +723,7 @@ def shift_to_year_end(x: StrictDatetime) -> StrictDatetime:
     # shift the month aliases by one to get the correct order.
     # E.g., `calendar.month_name[1:]` is `['January', 'February', ...]` and
     # `calendar.month_abbr[1:]` is `['Jan', 'Feb', ...]`.
-    month_aliases = calendar.month_name[1:] + calendar.month_abbr[1:]
+    month_aliases = list(calendar.month_name[1:]) + list(calendar.month_abbr[1:])
     pattern = re.compile("|".join(month_aliases), re.IGNORECASE)
     match = pattern.search(date)
     if match is None:

diff --git a/helpers/hdict.py b/helpers/hdict.py
@@ -6,6 +6,7 @@
 
 import logging
 from typing import (
+    TYPE_CHECKING,
     Any,
     Dict,
     Generator,
@@ -23,6 +24,9 @@
 
 import helpers.hdbg as hdbg
 
+if TYPE_CHECKING:
+    from config_root.config.config_ import Config
+
 _LOG = logging.getLogger(__name__)
 
 
@@ -71,7 +75,7 @@ def extract_leaf_values(nested: Dict[Any, Any], key: Any) -> Dict[Any, Any]:
 
 
 def typed_get(
-    dict_: Union[Dict, "Config"],  # noqa: F821
+    dict_: Union[Dict, "Config"],  # noqa: F821  # type: ignore
     key: Any,
     default_value: Optional[Any] = _NO_VALUE_SPECIFIED,
     *,

diff --git a/helpers/hdockerized_executables.py b/helpers/hdockerized_executables.py
@@ -247,18 +247,18 @@ def convert_pandoc_cmd_to_arguments(cmd: str) -> Dict[str, Any]:
     :return: A dictionary with the parsed arguments.
     """
     # Use shlex.split to tokenize the string like a shell would.
-    cmd = shlex.split(cmd)
+    cmd_list = shlex.split(cmd)
     # Remove the newline character that come from multiline commands with `\n`.
-    cmd = [arg for arg in cmd if arg != "\n"]
+    cmd_list = [arg for arg in cmd_list if arg != "\n"]
     _LOG.debug(hprint.to_str("cmd"))
     # The first option is the executable.
-    hdbg.dassert_eq(cmd[0], "pandoc")
+    hdbg.dassert_eq(cmd_list[0], "pandoc")
     # pandoc parser is difficult to emulate with `argparse`, since pandoc allows
     # the input file to be anywhere in the command line options. In our case we
     # don't know all the possible command line options so for simplicity we
     # assume that the first option is always the input file.
-    in_file_path = cmd[1]
-    cmd = cmd[2:]
+    in_file_path = cmd_list[1]
+    cmd_list = cmd_list[2:]
     _LOG.debug(hprint.to_str("cmd"))
     #
     parser = argparse.ArgumentParser()
@@ -267,7 +267,7 @@ def convert_pandoc_cmd_to_arguments(cmd: str) -> Dict[str, Any]:
     parser.add_argument("--template", default=None)
     parser.add_argument("--extract-media", default=None)
     # Parse known arguments and capture the rest.
-    args, unknown_args = parser.parse_known_args(cmd)
+    args, unknown_args = parser.parse_known_args(cmd_list)
     _LOG.debug(hprint.to_str("args unknown_args"))
     # Filter out the option terminator if present.
     # Remove the `--` option terminator to treat `--option-after-terminator` as a regular argument, not as an option.
@@ -707,31 +707,31 @@ def convert_latex_cmd_to_arguments(cmd: str) -> Dict[str, Any]:
     :return: A dictionary with the parsed arguments.
     """
     # Use shlex.split to tokenize the string like a shell would.
-    cmd = shlex.split(cmd)
+    cmd_list = shlex.split(cmd)
     # Remove the newline character that come from multiline commands with `\n`.
-    cmd = [arg for arg in cmd if arg != "\n"]
+    cmd_list = [arg for arg in cmd_list if arg != "\n"]
     _LOG.debug(hprint.to_str("cmd"))
     # The first option is the executable.
-    hdbg.dassert_eq(cmd[0], "pdflatex")
+    hdbg.dassert_eq(cmd_list[0], "pdflatex")
     # We assume that the first option is always the input file.
-    in_file_path = cmd[-1]
+    in_file_path = cmd_list[-1]
     hdbg.dassert(
         not in_file_path.startswith("-"),
         "Invalid input file '%s'",
         in_file_path,
     )
     hdbg.dassert_file_exists(in_file_path)
-    cmd = cmd[1:-1]
+    cmd_list = cmd_list[1:-1]
     _LOG.debug(hprint.to_str("cmd"))
     #
     parser = argparse.ArgumentParser()
     parser.add_argument("--output-directory", required=True)
     # Latex uses options like `-XYZ` which confuse `argparse` so we need to
     # replace `-XYZ` with `--XYZ`.
-    cmd = [re.sub(r"^-", r"--", cmd_opts) for cmd_opts in cmd]
+    cmd_list = [re.sub(r"^-", r"--", cmd_opts) for cmd_opts in cmd_list]
     _LOG.debug(hprint.to_str("cmd"))
     # # Parse known arguments and capture the rest.
-    args, unknown_args = parser.parse_known_args(cmd)
+    args, unknown_args = parser.parse_known_args(cmd_list)
     _LOG.debug(hprint.to_str("args unknown_args"))
     # Return all the arguments in a dictionary with names that match the
     # function signature of `run_dockerized_pandoc()`.

diff --git a/helpers/henv.py b/helpers/henv.py
@@ -453,7 +453,7 @@ def _get_library_version(lib_name: str) -> str:
     return version
 
 
-def _get_package_info() -> Tuple[List[str], int]:
+def _get_package_info() -> Tuple[str, int]:
     """
     Get package version information.
 
@@ -559,8 +559,8 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]:
     hprint.dassert_one_trailing_newline(txt_tmp)
     txt.append(txt_tmp)
     #
-    txt = hprint.to_info("System signature", txt)
-    return txt, failed_imports
+    txt_str: str = hprint.to_info("System signature", txt)
+    return txt_str, failed_imports
 
 
 # #############################################################################

diff --git a/helpers/hgit.py b/helpers/hgit.py
@@ -11,7 +11,7 @@
 import random
 import re
 import string
-from typing import List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, cast
 
 import helpers.hdbg as hdbg
 import helpers.hprint as hprint
@@ -242,7 +242,8 @@ def find_git_root(path: str = ".") -> str:
         )
         # Update the path to the parent directory for the next iteration.
         path = parent
-    return git_root_dir
+    hdbg.dassert_is_not(git_root_dir, None, "Git root directory should have been found")
+    return str(git_root_dir)
 
 
 # #############################################################################
@@ -853,7 +854,7 @@ def get_path_from_git_root(
         super_module,
         ret,
     )
-    return ret
+    return str(ret)
 
 
 # TODO(gp): Rewrite this function in a better way.
@@ -1394,7 +1395,7 @@ def does_branch_exist(
             exists_tmp = does_branch_exist(
                 branch_name, mode_tmp, dir_name=dir_name
             )
-            exists |= exists_tmp
+            exists = exists or exists_tmp
         return exists
     #
     hdbg.dassert_in(mode, ("git_local", "git_remote", "github"))