Update benchmark configuration files

david-chapela · david-chapela · commit a75c534e7350 · 2024-10-01T16:43:21.000+03:00
diff --git a/resources/config/benchmark/benchmark_diff.yml b/resources/config/benchmark/benchmark_diff.yml
@@ -1,12 +1,12 @@
-version: 4
+version: 5
 ##########
 # Common #
 ##########
 common:
   # Path to the event log in CSV format
-  train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
   # Event log to evaluate the discovered BPS model with
-  test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
   # Use observed arrival distributions
   use_observed_arrival_distribution: false
   # Specify the name for each of the columns in the CSV file (XES standard by default)
@@ -28,7 +28,7 @@ common:
     - arrival_event_distribution
     - cycle_time_distribution
   # Whether to discover case attributes or not
-  discover_case_attributes: false
+  discover_data_attributes: false
 #################
 # Preprocessing #
 #################
@@ -62,9 +62,7 @@ control_flow:
     - true
     - false
   # Whether to prioritize parallelism over loops or not
-  prioritize_parallelism:
-    - true
-    - false
+  prioritize_parallelism: true
 ##################
 # Resource model #
 ##################
@@ -83,9 +81,7 @@ resource_model:
     # Resource profile discovery type
     discovery_type: differentiated
     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
-    granularity:
-      - 15
-      - 60
+    granularity: 60
     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
     confidence:
       - 0.5
diff --git a/resources/config/benchmark/benchmark_diff_data_aware.yml b/resources/config/benchmark/benchmark_diff_data_aware.yml
@@ -0,0 +1,98 @@
+version: 5
+##########
+# Common #
+##########
+common:
+  # Path to the event log in CSV format
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
+  # Event log to evaluate the discovered BPS model with
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
+  # Specify the name for each of the columns in the CSV file (XES standard by default)
+  log_ids:
+    case: "case_id"
+    activity: "activity"
+    resource: "resource"
+    start_time: "start_time"
+    end_time: "end_time"
+  # Number of evaluations of the discovered BPS model
+  num_final_evaluations: 10
+  # Metrics to evaluate the discovered BPS model
+  evaluation_metrics:
+    - 3_gram_distance
+    - 2_gram_distance
+    - absolute_event_distribution
+    - relative_event_distribution
+    - circadian_event_distribution
+    - arrival_event_distribution
+    - cycle_time_distribution
+  # Whether to discover case attributes or not
+  discover_data_attributes: true
+#################
+# Preprocessing #
+#################
+preprocessing:
+  multitasking: false
+  enable_time_concurrency_threshold: 0.5
+################
+# Control-flow #
+################
+control_flow:
+  # Metric to guide the optimization process (loss function to minimize)
+  optimization_metric: two_gram_distance
+  # Number of optimization iterations over the search space
+  num_iterations: 30
+  # Number of times to evaluate each iteration (using the mean of all of them)
+  num_evaluations_per_iteration: 5
+  # Methods for discovering gateway probabilities
+  gateway_probabilities: discovery
+  # Discover process model with SplitMiner v3
+  mining_algorithm: sm1
+  # Number of concurrent relations between events to be captured
+  epsilon:
+    - 0.05
+    - 0.4
+  # Threshold for filtering the incoming and outgoing edges
+  eta:
+    - 0.2
+    - 0.7
+  # Whether to replace non-trivial OR joins or not
+  replace_or_joins:
+    - true
+    - false
+  # Whether to prioritize parallelism over loops or not
+  prioritize_parallelism: true
+  # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
+  discover_branch_rules: true
+  # Minimum f-score value to consider the discovered data-aware branching rules
+  f_score:
+    - 0.3
+    - 0.9
+##################
+# Resource model #
+##################
+resource_model:
+  # Metric to guide the optimization process (loss function to minimize)
+  optimization_metric: circadian_emd
+  # Number of optimization iterations over the search space
+  num_iterations: 40
+  # Number of times to evaluate each iteration (using the mean of all of them)
+  num_evaluations_per_iteration: 5
+  # Whether to discover prioritization or batching behavior
+  discover_prioritization_rules: false
+  discover_batching_rules: false
+  # Resource profiles configuration
+  resource_profiles:
+    # Resource profile discovery type
+    discovery_type: differentiated
+    # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
+    granularity: 60
+    # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
+    confidence:
+      - 0.5
+      - 0.85
+    # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
+    support:
+      - 0.05
+      - 0.5
+    # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
+    participation: 0.4
diff --git a/resources/config/benchmark/benchmark_diff_extr.yml b/resources/config/benchmark/benchmark_diff_extr.yml
@@ -1,14 +1,12 @@
-version: 4
+version: 5
 ##########
 # Common #
 ##########
 common:
   # Path to the event log in CSV format
-  train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
   # Event log to evaluate the discovered BPS model with
-  test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
-  # Use observed arrival distributions
-  use_observed_arrival_distribution: false
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
   # Specify the name for each of the columns in the CSV file (XES standard by default)
   log_ids:
     case: "case_id"
@@ -28,7 +26,7 @@ common:
     - arrival_event_distribution
     - cycle_time_distribution
   # Whether to discover case attributes or not
-  discover_case_attributes: false
+  discover_data_attributes: false
 #################
 # Preprocessing #
 #################
@@ -62,9 +60,7 @@ control_flow:
     - true
     - false
   # Whether to prioritize parallelism over loops or not
-  prioritize_parallelism:
-    - true
-    - false
+  prioritize_parallelism: true
 ##################
 # Resource model #
 ##################
@@ -83,9 +79,7 @@ resource_model:
     # Resource profile discovery type
     discovery_type: differentiated
     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
-    granularity:
-      - 15
-      - 60
+    granularity: 60
     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
     confidence:
       - 0.5
@@ -100,6 +94,8 @@ resource_model:
 # Extraneous delays #
 #####################
 extraneous_activity_delays:
+  # Method to compute the extraneous delay (naive or eclipse-aware)
+  discovery_method: eclipse-aware
   # Metric to guide the optimization process (loss function to minimize)
   optimization_metric: relative_emd
   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
diff --git a/resources/config/benchmark/benchmark_fuzz.yml b/resources/config/benchmark/benchmark_fuzz.yml
@@ -1,14 +1,12 @@
-version: 4
+version: 5
 ##########
 # Common #
 ##########
 common:
   # Path to the event log in CSV format
-  train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
   # Event log to evaluate the discovered BPS model with
-  test_log_path: ../../event_logs/AcademicCredentials_W_test.csv.gz
-  # Use observed arrival distributions
-  use_observed_arrival_distribution: false
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
   # Specify the name for each of the columns in the CSV file (XES standard by default)
   log_ids:
     case: "case_id"
@@ -28,7 +26,7 @@ common:
     - arrival_event_distribution
     - cycle_time_distribution
   # Whether to discover case attributes or not
-  discover_case_attributes: false
+  discover_data_attributes: false
 #################
 # Preprocessing #
 #################
@@ -62,9 +60,7 @@ control_flow:
     - true
     - false
   # Whether to prioritize parallelism over loops or not
-  prioritize_parallelism:
-    - true
-    - false
+  prioritize_parallelism: true
 ##################
 # Resource model #
 ##################
@@ -83,17 +79,7 @@ resource_model:
     # Resource profile discovery type
     discovery_type: differentiated_fuzzy
     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
-    granularity:
-      - 60
-      - 120
+    granularity: 60
     fuzzy_angle:
       - 0.1
       - 0.9
-#####################
-# Extraneous delays #
-#####################
-#extraneous_activity_delays:
-  # Metric to guide the optimization process (loss function to minimize)
-#  optimization_metric: relative_emd
-  # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
-#  num_iterations: 20
diff --git a/resources/config/benchmark/benchmark_fuzz_extr.yml b/resources/config/benchmark/benchmark_fuzz_extr.yml
@@ -1,14 +1,12 @@
-version: 4
+version: 5
 ##########
 # Common #
 ##########
 common:
   # Path to the event log in CSV format
-  train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
   # Event log to evaluate the discovered BPS model with
-  test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
-  # Use observed arrival distributions
-  use_observed_arrival_distribution: false
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
   # Specify the name for each of the columns in the CSV file (XES standard by default)
   log_ids:
     case: "case_id"
@@ -28,7 +26,7 @@ common:
     - arrival_event_distribution
     - cycle_time_distribution
   # Whether to discover case attributes or not
-  discover_case_attributes: false
+  discover_data_attributes: false
 #################
 # Preprocessing #
 #################
@@ -62,9 +60,7 @@ control_flow:
     - true
     - false
   # Whether to prioritize parallelism over loops or not
-  prioritize_parallelism:
-    - true
-    - false
+  prioritize_parallelism: true
 ##################
 # Resource model #
 ##################
@@ -83,16 +79,16 @@ resource_model:
     # Resource profile discovery type
     discovery_type: differentiated_fuzzy
     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
-    granularity:
-      - 60
-      - 120
+    granularity: 60
     fuzzy_angle:
       - 0.1
       - 0.9
 #####################
 # Extraneous delays #
 #####################
 extraneous_activity_delays:
+  # Method to compute the extraneous delay (naive or eclipse-aware)
+  discovery_method: eclipse-aware
   # Metric to guide the optimization process (loss function to minimize)
   optimization_metric: relative_emd
   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
diff --git a/resources/config/benchmark/benchmark_pool.yml b/resources/config/benchmark/benchmark_pool.yml
@@ -1,14 +1,12 @@
-version: 4
+version: 5
 ##########
 # Common #
 ##########
 common:
   # Path to the event log in CSV format
-  train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
+  train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
   # Event log to evaluate the discovered BPS model with
-  test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
-  # Use observed arrival distributions
-  use_observed_arrival_distribution: false
+  test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
   # Specify the name for each of the columns in the CSV file (XES standard by default)
   log_ids:
     case: "case_id"
@@ -28,7 +26,7 @@ common:
     - arrival_event_distribution
     - cycle_time_distribution
   # Whether to discover case attributes or not
-  discover_case_attributes: false
+  discover_data_attributes: false
 #################
 # Preprocessing #
 #################
@@ -62,9 +60,7 @@ control_flow:
     - true
     - false
   # Whether to prioritize parallelism over loops or not
-  prioritize_parallelism:
-    - true
-    - false
+  prioritize_parallelism: true
 ##################
 # Resource model #
 ##################
@@ -83,9 +79,7 @@ resource_model:
     # Resource profile discovery type
     discovery_type: pool
     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
-    granularity:
-      - 15
-      - 60
+    granularity: 60
     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
     confidence:
       - 0.5
@@ -95,4 +89,4 @@ resource_model:
       - 0.05
       - 0.5
     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
-    participation: 0.4
+    participation: 0.4