Skip to content

Commit e0586ef

Browse files
authored
Add a command type (#118)
This command type splits an executable (path) from its arguments (list). There is a best effort detection for windows type commands. This is because windows executables handles its own argument parsing. (DIS-2977)
1 parent 4a47670 commit e0586ef

4 files changed

Lines changed: 267 additions & 14 deletions

File tree

flow/record/fieldtypes/__init__.py

Lines changed: 120 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
import os
66
import pathlib
77
import re
8+
import shlex
89
import sys
910
import warnings
1011
from binascii import a2b_hex, b2a_hex
1112
from datetime import datetime as _dt
1213
from datetime import timezone
1314
from posixpath import basename, dirname
14-
from typing import Any, Optional, Tuple
15+
from typing import Any, Optional
1516
from urllib.parse import urlparse
1617

1718
try:
@@ -34,8 +35,8 @@
3435
PY_311 = sys.version_info >= (3, 11, 0)
3536
PY_312 = sys.version_info >= (3, 12, 0)
3637

37-
PATH_POSIX = 0
38-
PATH_WINDOWS = 1
38+
TYPE_POSIX = 0
39+
TYPE_WINDOWS = 1
3940

4041
string_type = str
4142
varint_type = int
@@ -694,15 +695,15 @@ def __repr__(self) -> str:
694695
return repr(str(self))
695696

696697
def _pack(self):
697-
path_type = PATH_WINDOWS if isinstance(self, windows_path) else PATH_POSIX
698+
path_type = TYPE_WINDOWS if isinstance(self, windows_path) else TYPE_POSIX
698699
return (str(self), path_type)
699700

700701
@classmethod
701-
def _unpack(cls, data: Tuple[str, str]):
702+
def _unpack(cls, data: tuple[str, str]):
702703
path_, path_type = data
703-
if path_type == PATH_POSIX:
704+
if path_type == TYPE_POSIX:
704705
return posix_path(path_)
705-
elif path_type == PATH_WINDOWS:
706+
elif path_type == TYPE_WINDOWS:
706707
return windows_path(path_)
707708
else:
708709
# Catch all: default to posix_path
@@ -734,3 +735,115 @@ def __repr__(self) -> str:
734735
quote = '"'
735736

736737
return f"{quote}{s}{quote}"
738+
739+
740+
class command(FieldType):
741+
executable: Optional[path] = None
742+
args: Optional[list[str]] = None
743+
744+
_path_type: type[path] = None
745+
_posix: bool
746+
747+
def __new__(cls, value: str) -> command:
748+
if cls is not command:
749+
return super().__new__(cls)
750+
751+
if not isinstance(value, str):
752+
raise ValueError(f"Expected a value of type 'str' not {type(value)}")
753+
754+
# pre checking for windows like paths
755+
# This checks for windows like starts of a path:
756+
# an '%' for an environment variable
757+
# r'\\' for a UNC path
758+
# the strip and check for ":" on the second line is for `<drive_letter>:`
759+
windows = value.startswith((r"\\", "%")) or value.lstrip("\"'")[1] == ":"
760+
761+
if windows:
762+
cls = windows_command
763+
else:
764+
cls = posix_command
765+
return super().__new__(cls)
766+
767+
def __init__(self, value: str | tuple[str, tuple[str]] | None):
768+
if value is None:
769+
return
770+
771+
if isinstance(value, str):
772+
self.executable, self.args = self._split(value)
773+
return
774+
775+
executable, self.args = value
776+
self.executable = self._path_type(executable)
777+
self.args = list(self.args)
778+
779+
def __repr__(self) -> str:
780+
return f"(executable={self.executable!r}, args={self.args})"
781+
782+
def __eq__(self, other: Any) -> bool:
783+
if isinstance(other, command):
784+
return self.executable == other.executable and self.args == other.args
785+
elif isinstance(other, str):
786+
return self._join() == other
787+
elif isinstance(other, (tuple, list)):
788+
return self.executable == other[0] and self.args == list(other[1:])
789+
790+
return False
791+
792+
def _split(self, value: str) -> tuple[str, list[str]]:
793+
executable, *args = shlex.split(value, posix=self._posix)
794+
executable = executable.strip("'\" ")
795+
796+
return self._path_type(executable), args
797+
798+
def _join(self) -> str:
799+
return shlex.join([str(self.executable)] + self.args)
800+
801+
def _pack(self) -> tuple[tuple[str, list], str]:
802+
command_type = TYPE_WINDOWS if isinstance(self, windows_command) else TYPE_POSIX
803+
if self.executable:
804+
_exec, _ = self.executable._pack()
805+
return ((_exec, self.args), command_type)
806+
else:
807+
return (None, command_type)
808+
809+
@classmethod
810+
def _unpack(cls, data: tuple[tuple[str, tuple] | None, int]) -> command:
811+
_value, _type = data
812+
if _type == TYPE_WINDOWS:
813+
return windows_command(_value)
814+
815+
return posix_command(_value)
816+
817+
@classmethod
818+
def from_posix(cls, value: str) -> command:
819+
return posix_command(value)
820+
821+
@classmethod
822+
def from_windows(cls, value: str) -> command:
823+
return windows_command(value)
824+
825+
826+
class posix_command(command):
827+
_posix = True
828+
_path_type = posix_path
829+
830+
831+
class windows_command(command):
832+
_posix = False
833+
_path_type = windows_path
834+
835+
def _split(self, value: str) -> tuple[str, list[str]]:
836+
executable, args = super()._split(value)
837+
if args:
838+
args = [" ".join(args)]
839+
840+
return executable, args
841+
842+
def _join(self) -> str:
843+
arg = f" {self.args[0]}" if self.args else ""
844+
executable_str = str(self.executable)
845+
846+
if " " in executable_str:
847+
return f"'{executable_str}'{arg}"
848+
849+
return f"{executable_str}{arg}"

flow/record/jsonpacker.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ def pack_obj(self, obj):
7272
return base64.b64encode(obj).decode()
7373
if isinstance(obj, fieldtypes.path):
7474
return str(obj)
75+
if isinstance(obj, fieldtypes.command):
76+
return {
77+
"executable": obj.executable,
78+
"args": obj.args,
79+
}
7580

7681
raise Exception("Unpackable type " + str(type(obj)))
7782

flow/record/whitelist.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
WHITELIST = [
22
"boolean",
3+
"command",
34
"dynamic",
45
"datetime",
56
"filesize",

tests/test_fieldtypes.py

Lines changed: 141 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# coding: utf-8
2+
from __future__ import annotations
23

34
import hashlib
45
import os
@@ -12,14 +13,22 @@
1213
import flow.record.fieldtypes
1314
from flow.record import RecordDescriptor, RecordReader, RecordWriter
1415
from flow.record.fieldtypes import (
15-
PATH_POSIX,
16-
PATH_WINDOWS,
1716
PY_312,
17+
TYPE_POSIX,
18+
TYPE_WINDOWS,
1819
_is_posixlike_path,
1920
_is_windowslike_path,
21+
command,
2022
)
2123
from flow.record.fieldtypes import datetime as dt
22-
from flow.record.fieldtypes import fieldtype_for_value, net, uri, windows_path
24+
from flow.record.fieldtypes import (
25+
fieldtype_for_value,
26+
net,
27+
posix_command,
28+
uri,
29+
windows_command,
30+
windows_path,
31+
)
2332

2433
UTC = timezone.utc
2534

@@ -639,16 +648,16 @@ def test_path():
639648
assert isinstance(test_path, flow.record.fieldtypes.windows_path)
640649

641650
test_path = flow.record.fieldtypes.path.from_posix(posix_path_str)
642-
assert test_path._pack() == (posix_path_str, PATH_POSIX)
651+
assert test_path._pack() == (posix_path_str, TYPE_POSIX)
643652

644-
test_path = flow.record.fieldtypes.path._unpack((posix_path_str, PATH_POSIX))
653+
test_path = flow.record.fieldtypes.path._unpack((posix_path_str, TYPE_POSIX))
645654
assert str(test_path) == posix_path_str
646655
assert isinstance(test_path, flow.record.fieldtypes.posix_path)
647656

648657
test_path = flow.record.fieldtypes.path.from_windows(windows_path_str)
649-
assert test_path._pack() == (windows_path_str, PATH_WINDOWS)
658+
assert test_path._pack() == (windows_path_str, TYPE_WINDOWS)
650659

651-
test_path = flow.record.fieldtypes.path._unpack((windows_path_str, PATH_WINDOWS))
660+
test_path = flow.record.fieldtypes.path._unpack((windows_path_str, TYPE_WINDOWS))
652661
assert str(test_path) == windows_path_str
653662
assert isinstance(test_path, flow.record.fieldtypes.windows_path)
654663

@@ -998,5 +1007,130 @@ def test_datetime_comparisions():
9981007
assert dt("2023-01-02") != datetime(2023, 3, 4, tzinfo=UTC)
9991008

10001009

1010+
def test_command_record() -> None:
1011+
TestRecord = RecordDescriptor(
1012+
"test/command",
1013+
[
1014+
("command", "commando"),
1015+
],
1016+
)
1017+
1018+
record = TestRecord(commando="help.exe -h")
1019+
assert isinstance(record.commando, posix_command)
1020+
assert record.commando.executable == "help.exe"
1021+
assert record.commando.args == ["-h"]
1022+
1023+
record = TestRecord(commando="something.so -h -q -something")
1024+
assert isinstance(record.commando, posix_command)
1025+
assert record.commando.executable == "something.so"
1026+
assert record.commando.args == ["-h", "-q", "-something"]
1027+
1028+
1029+
def test_command_integration(tmp_path: pathlib.Path) -> None:
1030+
TestRecord = RecordDescriptor(
1031+
"test/command",
1032+
[
1033+
("command", "commando"),
1034+
],
1035+
)
1036+
1037+
with RecordWriter(tmp_path / "command_record") as writer:
1038+
record = TestRecord(commando=r"\\.\\?\some_command.exe -h,help /d quiet")
1039+
writer.write(record)
1040+
assert record.commando.executable == r"\\.\\?\some_command.exe"
1041+
assert record.commando.args == [r"-h,help /d quiet"]
1042+
1043+
with RecordReader(tmp_path / "command_record") as reader:
1044+
for record in reader:
1045+
assert record.commando.executable == r"\\.\\?\some_command.exe"
1046+
assert record.commando.args == [r"-h,help /d quiet"]
1047+
1048+
1049+
def test_command_integration_none(tmp_path: pathlib.Path) -> None:
1050+
TestRecord = RecordDescriptor(
1051+
"test/command",
1052+
[
1053+
("command", "commando"),
1054+
],
1055+
)
1056+
1057+
with RecordWriter(tmp_path / "command_record") as writer:
1058+
record = TestRecord(commando=command.from_posix(None))
1059+
writer.write(record)
1060+
with RecordReader(tmp_path / "command_record") as reader:
1061+
for record in reader:
1062+
assert record.commando.executable is None
1063+
assert record.commando.args is None
1064+
1065+
1066+
@pytest.mark.parametrize(
1067+
"command_string, expected_executable, expected_argument",
1068+
[
1069+
# Test relative windows paths
1070+
("windows.exe something,or,somethingelse", "windows.exe", ["something,or,somethingelse"]),
1071+
# Test weird command strings for windows
1072+
("windows.dll something,or,somethingelse", "windows.dll", ["something,or,somethingelse"]),
1073+
# Test environment variables
1074+
(r"%WINDIR%\\windows.dll something,or,somethingelse", r"%WINDIR%\\windows.dll", ["something,or,somethingelse"]),
1075+
# Test a quoted path
1076+
(r"'c:\path to some exe' /d /a", r"c:\path to some exe", [r"/d /a"]),
1077+
# Test a unquoted path
1078+
(r"'c:\Program Files\hello.exe'", r"c:\Program Files\hello.exe", []),
1079+
# Test an unquoted path with a path as argument
1080+
(r"'c:\Program Files\hello.exe' c:\startmepls.exe", r"c:\Program Files\hello.exe", [r"c:\startmepls.exe"]),
1081+
(None, None, None),
1082+
],
1083+
)
1084+
def test_command_windows(command_string: str, expected_executable: str, expected_argument: list[str]) -> None:
1085+
cmd = windows_command(command_string)
1086+
1087+
assert cmd.executable == expected_executable
1088+
assert cmd.args == expected_argument
1089+
1090+
1091+
@pytest.mark.parametrize(
1092+
"command_string, expected_executable, expected_argument",
1093+
[
1094+
# Test relative posix command
1095+
("some_file.so -h asdsad -f asdsadas", "some_file.so", ["-h", "asdsad", "-f", "asdsadas"]),
1096+
# Test command with spaces
1097+
(r"/bin/hello\ world -h -word", r"/bin/hello world", ["-h", "-word"]),
1098+
],
1099+
)
1100+
def test_command_posix(command_string: str, expected_executable: str, expected_argument: list[str]) -> None:
1101+
cmd = posix_command(command_string)
1102+
1103+
assert cmd.executable == expected_executable
1104+
assert cmd.args == expected_argument
1105+
1106+
1107+
def test_command_equal() -> None:
1108+
assert command("hello.so -h") == command("hello.so -h")
1109+
assert command("hello.so -h") != command("hello.so")
1110+
1111+
# Test different types with the comparitor
1112+
assert command("hello.so -h") == ["hello.so", "-h"]
1113+
assert command("hello.so -h") == ("hello.so", "-h")
1114+
assert command("hello.so -h") == "hello.so -h"
1115+
assert command("c:\\hello.dll -h -b") == "c:\\hello.dll -h -b"
1116+
1117+
# Compare paths that contain spaces
1118+
assert command("'/home/some folder/file' -h") == "'/home/some folder/file' -h"
1119+
assert command("'c:\\Program files\\some.dll' -h -q") == "'c:\\Program files\\some.dll' -h -q"
1120+
assert command("'c:\\program files\\some.dll' -h -q") == ["c:\\program files\\some.dll", "-h -q"]
1121+
assert command("'c:\\Program files\\some.dll' -h -q") == ("c:\\Program files\\some.dll", "-h -q")
1122+
1123+
# Test failure conditions
1124+
assert command("hello.so -h") != 1
1125+
assert command("hello.so") != "hello.so -h"
1126+
assert command("hello.so") != ["hello.so", ""]
1127+
assert command("hello.so") != ("hello.so", "")
1128+
1129+
1130+
def test_command_failed() -> None:
1131+
with pytest.raises(ValueError):
1132+
command(b"failed")
1133+
1134+
10011135
if __name__ == "__main__":
10021136
__import__("standalone_test").main(globals())

0 commit comments

Comments
 (0)