diff --git a/src/attribute.jl b/src/attribute.jl index 35e4aa8..b1c847d 100644 --- a/src/attribute.jl +++ b/src/attribute.jl @@ -112,6 +112,49 @@ mutable struct TimeSeries{T} <: VectorAttribute num_dimensions::Int end +mutable struct TimeSeriesRelation{T} <: VectorAttribute + id::String + type::Type{T} + default_value::Union{Missing, T} + not_null::Bool + group_id::String + parent_collection::String + relation_collection::String + relation_type::String + table_where_is_located::String + dimension_names::Vector{String} + num_dimensions::Int + + function TimeSeriesRelation( + id::String, + type::Type{T}, + default_value::Union{Missing, T}, + not_null::Bool, + group_id::String, + parent_collection::String, + relation_collection::String, + relation_type::String, + table_where_is_located::String, + dimension_names::Vector{String}, + num_dimensions::Int, + ) where {T} + _check_valid_relation_name(id, relation_collection) + return new{T}( + id, + type, + default_value, + not_null, + group_id, + parent_collection, + relation_collection, + relation_type, + table_where_is_located, + dimension_names, + num_dimensions, + ) + end +end + mutable struct SetParameter{T} <: SetAttribute id::String type::Type{T} diff --git a/src/collection.jl b/src/collection.jl index c008511..c16c5e7 100644 --- a/src/collection.jl +++ b/src/collection.jl @@ -13,6 +13,7 @@ mutable struct Collection set_parameters::OrderedDict{String, SetParameter} set_relations::OrderedDict{String, SetRelation} time_series::OrderedDict{String, TimeSeries} + time_series_relations::OrderedDict{String, TimeSeriesRelation} time_series_files::OrderedDict{String, TimeSeriesFile} end @@ -32,7 +33,7 @@ function _create_collections_map!( vector_relations = _create_collection_vector_relations(db, collection_id) set_parameters = _create_collection_set_parameters(db, collection_id) set_relations = _create_collection_set_relations(db, collection_id) - time_series = _create_collection_time_series(db, collection_id) + time_series, time_series_relations = _create_collection_time_series(db, collection_id) time_series_files = _create_collection_time_series_files(db, collection_id) collection = Collection( collection_id, @@ -43,6 +44,7 @@ function _create_collections_map!( set_parameters, set_relations, time_series, + time_series_relations, time_series_files, ) collections_map[collection_id] = collection @@ -358,11 +360,13 @@ end function _create_collection_time_series(db::SQLite.DB, collection_id::String) time_series_tables = _get_collection_time_series_tables(db, collection_id) time_series = OrderedDict{String, TimeSeries}() + time_series_relations = OrderedDict{String, TimeSeriesRelation}() parent_collection = collection_id for table_name in time_series_tables group_id = _id_of_time_series_group(table_name) table_where_is_located = table_name df_table_infos = table_info(db, table_name) + df_foreign_keys_list = foreign_keys_list(db, table_name) dimension_names = _get_time_series_dimension_names(df_table_infos) for time_series_attribute in eachrow(df_table_infos) id = time_series_attribute.name @@ -387,28 +391,70 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) end continue end - type = _sql_type_to_julia_type(id, time_series_attribute.type) - default_value = _get_default_value(type, time_series_attribute.dflt_value) - not_null = Bool(time_series_attribute.notnull) - if haskey(time_series, id) - psr_database_sqlite_error( - "Duplicated time_series attribute \"$id\" in collection \"$collection_id\"", + + # Check if this is a foreign key (relation) + if id in df_foreign_keys_list.from + # Skip the "id" foreign key (it's just the reference to parent collection) + if id == "id" + continue + end + + # This is a time series relation + _validate_actions_on_foreign_key( + collection_id, + table_name, + df_foreign_keys_list[df_foreign_keys_list.from .== id, :][1, :], + ) + type = _sql_type_to_julia_type(id, time_series_attribute.type) + default_value = _get_default_value(type, time_series_attribute.dflt_value) + not_null = Bool(time_series_attribute.notnull) + relation_type = _get_relation_type_from_attribute_id(id) + relation_collection = df_foreign_keys_list[df_foreign_keys_list.from .== id, :table][1] + + if haskey(time_series_relations, id) + psr_database_sqlite_error( + "Duplicated time_series_relation attribute \"$id\" in collection \"$collection_id\"", + ) + end + + time_series_relations[id] = TimeSeriesRelation( + id, + type, + default_value, + not_null, + group_id, + parent_collection, + relation_collection, + relation_type, + table_where_is_located, + dimension_names, + length(dimension_names), + ) + else + # This is a regular time series parameter + type = _sql_type_to_julia_type(id, time_series_attribute.type) + default_value = _get_default_value(type, time_series_attribute.dflt_value) + not_null = Bool(time_series_attribute.notnull) + if haskey(time_series, id) + psr_database_sqlite_error( + "Duplicated time_series attribute \"$id\" in collection \"$collection_id\"", + ) + end + time_series[id] = TimeSeries( + id, + type, + default_value, + not_null, + group_id, + parent_collection, + table_where_is_located, + dimension_names, + length(dimension_names), ) end - time_series[id] = TimeSeries( - id, - type, - default_value, - not_null, - group_id, - parent_collection, - table_where_is_located, - dimension_names, - length(dimension_names), - ) end end - return time_series + return time_series, time_series_relations end function _create_collection_time_series_files(db::SQLite.DB, collection_id::String) @@ -671,6 +717,7 @@ function _relations_do_not_have_null_constraints(collection::Collection) num_errors = 0 scalar_relations = collection.scalar_relations vector_relations = collection.vector_relations + time_series_relations = collection.time_series_relations for (_, scalar_relation) in scalar_relations if scalar_relation.not_null @error( @@ -687,6 +734,14 @@ function _relations_do_not_have_null_constraints(collection::Collection) num_errors += 1 end end + for (_, time_series_relation) in time_series_relations + if time_series_relation.not_null + @error( + "Time series relation \"$(time_series_relation.id)\" in collection \"$(collection.id)\" has a not null constraint. This is not allowed." + ) + num_errors += 1 + end + end return num_errors end @@ -694,6 +749,7 @@ function _relations_do_not_have_default_values(collection::Collection) num_errors = 0 scalar_relations = collection.scalar_relations vector_relations = collection.vector_relations + time_series_relations = collection.time_series_relations for (_, scalar_relation) in scalar_relations if !ismissing(scalar_relation.default_value) @error( @@ -710,5 +766,13 @@ function _relations_do_not_have_default_values(collection::Collection) num_errors += 1 end end + for (_, time_series_relation) in time_series_relations + if !ismissing(time_series_relation.default_value) + @error( + "Time series relation \"$(time_series_relation.id)\" in collection \"$(collection.id)\" has a default value." + ) + num_errors += 1 + end + end return num_errors end diff --git a/src/compare_dbs.jl b/src/compare_dbs.jl index 8678d7f..791198e 100644 --- a/src/compare_dbs.jl +++ b/src/compare_dbs.jl @@ -482,6 +482,152 @@ function compare_time_series( return differences end +""" + compare_time_series_relations(db1::DatabaseSQLite, db2::DatabaseSQLite, collection_id::String) + +Compare time series relations between two databases for a specific collection. + +This function iterates through all time series relation attributes in the specified collection, +grouped by their group_id, and compares the relation data for each element between the two databases. +For each time series relation, it checks: +- The size of the time series tables (number of rows and columns) +- The column names and their order +- Individual relation references in each cell of the time series data + +The comparison ensures that relation labels match between databases at each time point and dimension. + +# Arguments + + - `db1::DatabaseSQLite`: The first database to compare (used as the reference for reading collection structure and element labels) + - `db2::DatabaseSQLite`: The second database to compare against the first + - `collection_id::String`: The name of the collection (table) to compare time series relation data for + +# Returns + +A vector of strings describing differences found in time series relation data. Each string includes the +collection name, time series relation attribute name, element label, column name, row index, and the +differing relation labels. Returns an empty vector if all time series relation data is identical. + +# Example + +```julia +using DataFrames, Dates + +db1 = create_empty_db_from_schema("db1.sqlite", "schema.sql"; force = true) +db2 = create_empty_db_from_schema("db2.sqlite", "schema.sql"; force = true) + +# Create plants +for db in [db1, db2] + create_element!(db, "Plant"; label = "Plant 1") + create_element!(db, "Plant"; label = "Plant 2") + create_element!(db, "Plant"; label = "Plant 3") +end + +# Create resources with different time series relations +df1 = DataFrame( + date_time = [DateTime(2020), DateTime(2021), DateTime(2022)], + power = [100.0, 200.0, 300.0], + plant_id = ["Plant 1", "Plant 2", "Plant 3"], +) +df2 = DataFrame( + date_time = [DateTime(2020), DateTime(2021), DateTime(2022)], + power = [100.0, 200.0, 300.0], + plant_id = ["Plant 1", "Plant 1", "Plant 3"], +) + +create_element!(db1, "Resource"; label = "Resource1", generation = df1) +create_element!(db2, "Resource"; label = "Resource1", generation = df2) + +differences = compare_time_series_relations(db1, db2, "Resource") +# Returns: ["Collection 'Resource', time series relation 'plant_id', label 'Resource1', column 'plant_id', row 2: relations differ (db1: Plant 2, db2: Plant 1)"] +``` +""" +function compare_time_series_relations( + db1::DatabaseSQLite, + db2::DatabaseSQLite, + collection_id::String, +) + differences = String[] + collection = _get_collection(db1, collection_id) + + # Get all element labels + num_elements_db1 = number_of_elements(db1, collection_id) + num_elements_db2 = number_of_elements(db2, collection_id) + + # Check if the number of elements is the same + if num_elements_db1 != num_elements_db2 + push!( + differences, + "Collection '$collection_id': different number of elements (db1: $num_elements_db1, db2: $num_elements_db2)", + ) + return differences + end + + labels = read_scalar_parameters(db1, collection_id, "label") + + # Group time series relations by group_id + time_series_relation_groups = Dict{String, Vector{String}}() + for (attr_id, attr) in collection.time_series_relations + group_id = attr.group_id + if !haskey(time_series_relation_groups, group_id) + time_series_relation_groups[group_id] = String[] + end + push!(time_series_relation_groups[group_id], attr_id) + end + + for label in labels + for (group_id, attr_ids) in time_series_relation_groups + for attr_id in attr_ids + df1 = read_time_series_relation_table(db1, collection_id, attr_id, label) + df2 = read_time_series_relation_table(db2, collection_id, attr_id, label) + + if size(df1) != size(df2) + push!( + differences, + "Collection '$collection_id', time series relation '$attr_id', label '$label': different table sizes (db1: $(size(df1)), db2: $(size(df2)))", + ) + continue + end + + if names(df1) != names(df2) + push!( + differences, + "Collection '$collection_id', time series relation '$attr_id', label '$label': different column names (db1: $(names(df1)), db2: $(names(df2)))", + ) + continue + end + + # Compare each column + for col_name in names(df1) + col1 = df1[!, col_name] + col2 = df2[!, col_name] + + for (row_idx, (v1, v2)) in enumerate(zip(col1, col2)) + # Handle missing and empty string cases + if (ismissing(v1) || v1 == "") && (ismissing(v2) || v2 == "") + continue + elseif (ismissing(v1) || v1 == "") || (ismissing(v2) || v2 == "") + push!( + differences, + "Collection '$collection_id', time series relation '$attr_id', label '$label', column '$col_name', row $row_idx: null/empty mismatch (db1: $(v1), db2: $(v2))", + ) + else + if v1 != v2 + push!( + differences, + "Collection '$collection_id', time series relation '$attr_id', label '$label', column '$col_name', row $row_idx: relations differ (db1: $(v1), db2: $(v2))", + ) + end + end + end + end + end + end + end + + return differences +end + """ compare_time_series_files(db1::DatabaseSQLite, db2::DatabaseSQLite, collection_id::String) @@ -842,6 +988,11 @@ function compare_databases( append!(all_differences, compare_time_series(db1, db2, collection_id)) end + # Compare time series relations + if !isempty(_get_collection(db1, collection_id).time_series_relations) + append!(all_differences, compare_time_series_relations(db1, db2, collection_id)) + end + # Compare time series files append!(all_differences, compare_time_series_files(db1, db2, collection_id)) end diff --git a/src/create.jl b/src/create.jl index 7d96fcc..2ed5403 100644 --- a/src/create.jl +++ b/src/create.jl @@ -185,6 +185,8 @@ function _create_time_series!( DataFrames.insertcols!(df, 1, :id => ids) # Convert datetime column to string df[!, :date_time] = string.(df[!, :date_time]) + # Convert time series relation labels to IDs + _replace_time_series_relation_labels_with_ids!(db, collection_id, string(group), df) # Add missing columns missing_names_in_df = setdiff(_attributes_in_time_series_group(db, collection_id, string(group)), string.(names(df))) for missing_attribute in missing_names_in_df @@ -470,6 +472,46 @@ function _replace_set_relation_labels_with_ids!( return nothing end +function _replace_time_series_relation_labels_with_ids!( + db::DatabaseSQLite, + collection_id::String, + group_id::String, + df::DataFrame, +) + collection = _get_collection(db, collection_id) + + for (attr_name, attribute) in collection.time_series_relations + if attribute.group_id == group_id && string(attr_name) in names(df) + # Convert labels to IDs + relation_collection = attribute.relation_collection + + # Create a vector of IDs to replace the entire column + ids = Vector{Int}(undef, nrow(df)) + for i in 1:nrow(df) + value = df[i, attr_name] + if ismissing(value) + ids[i] = _PSRDatabase_null_value(Int) + elseif isa(value, String) + if !_is_null_in_db(value) + ids[i] = _get_id(db, relation_collection, value) + else + ids[i] = _PSRDatabase_null_value(Int) + end + elseif isa(value, Int) || isa(value, Integer) + # Already an ID, keep it + ids[i] = value + else + ids[i] = _PSRDatabase_null_value(Int) + end + end + + # Replace the entire column + df[!, attr_name] = ids + end + end + return nothing +end + function _validate_attribute_types_on_creation!( db::DatabaseSQLite, collection_id::String, @@ -604,3 +646,101 @@ function add_time_series_row!( return _add_time_series_row!(db, attribute, id, val, dimensions) end + +""" + add_time_series_relation_row!(db::DatabaseSQLite, collection_id::String, attribute_id::String, label::String, val; dimensions...) + +Add or update a relation value in a time series relation attribute for a specific element and dimension combination. + +This function performs an "upsert" operation for time series relations. Unlike scalar relations which use IDs internally, +this function accepts a label string and converts it to an ID. + +# Arguments + + - `db::DatabaseSQLite`: The database connection + - `collection_id::String`: The identifier of the collection containing the element + - `attribute_id::String`: The identifier of the time series relation attribute + - `label::String`: The label of the element to add/update the time series relation for + - `val`: The label of the related element (String) or an empty string for null relation + - `dimensions...`: Named arguments specifying the dimension values (e.g., `date_time=DateTime(2020, 1, 1)`) + +# Returns + + - `nothing` + +# Throws + + - `DatabaseException` if the attribute is not a time series relation + - `DatabaseException` if the number of dimensions doesn't match the attribute definition + - `DatabaseException` if dimension names don't match the attribute definition + - `DatabaseException` if the related element label doesn't exist + +# Examples + +```julia +# Add time series relation value +PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2020, 1, 1), +) + +# Add time series relation with multiple dimensions +PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2020, 1, 1), + block = 1, +) + +# Update existing time series relation (same dimensions) +PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2020, 1, 1), # This will update the existing value +) +``` +""" +function add_time_series_relation_row!( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, + val::String; + dimensions..., +) + _throw_if_attribute_is_not_time_series_relation( + db, + collection_id, + attribute_id, + :create, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + _validate_time_series_dimensions(collection_id, attribute, dimensions) + + if length(dimensions) != length(attribute.dimension_names) + psr_database_sqlite_error( + "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * + "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", + ) + end + + # Convert label to ID + relation_id = if _is_null_in_db(val) + _PSRDatabase_null_value(Int) + else + _get_id(db, attribute.relation_collection, val) + end + + return _add_time_series_row!(db, attribute, id, relation_id, dimensions) +end diff --git a/src/database_sqlite.jl b/src/database_sqlite.jl index daab7e9..61c4181 100644 --- a/src/database_sqlite.jl +++ b/src/database_sqlite.jl @@ -197,6 +197,15 @@ function _is_time_series( return haskey(collection.time_series, attribute_id) end +function _is_time_series_relation( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, +) + collection = _get_collection(db, collection_id) + return haskey(collection.time_series_relations, attribute_id) +end + function _is_time_series_group( db::DatabaseSQLite, collection_id::String, @@ -269,6 +278,8 @@ function _get_attribute( return collection.set_relations[attribute_id] elseif _is_time_series(db, collection_id, attribute_id) return collection.time_series[attribute_id] + elseif _is_time_series_relation(db, collection_id, attribute_id) + return collection.time_series_relations[attribute_id] elseif _is_time_series_file(db, collection_id, attribute_id) return collection.time_series_files[attribute_id] else @@ -327,6 +338,7 @@ function _attribute_exists( _is_vector_relation(db, collection_id, attribute_id) || _is_set_relation(db, collection_id, attribute_id) || _is_time_series(db, collection_id, attribute_id) || + _is_time_series_relation(db, collection_id, attribute_id) || _is_time_series_file(db, collection_id, attribute_id) end @@ -404,6 +416,11 @@ function _attributes_in_time_series_group( push!(attributes_in_time_series_group, attribute.id) end end + for (_, attribute) in collection.time_series_relations + if attribute.group_id == group_id + push!(attributes_in_time_series_group, attribute.id) + end + end return attributes_in_time_series_group end diff --git a/src/read.jl b/src/read.jl index 9b9a594..bdaf53a 100644 --- a/src/read.jl +++ b/src/read.jl @@ -1139,6 +1139,130 @@ function read_time_series_table( ) end +""" + read_time_series_relation_table(db::DatabaseSQLite, collection_id::String, attribute_id::String, label::String) + +Read the complete time series relation table for a specific element identified by label. +Returns labels instead of IDs for the relation values. + +# Arguments + + - `db::DatabaseSQLite`: The database connection + - `collection_id::String`: The identifier of the collection + - `attribute_id::String`: The identifier of the time series relation attribute + - `label::String`: The label of the element to read data for + +# Returns + + - `DataFrame`: A DataFrame containing all time series relation data with labels + +# Example + +```julia +relation_table = PSRDatabase.read_time_series_relation_table(db, "Resource", "plant_id", "Resource1") +``` +""" +function read_time_series_relation_table( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, +) + _throw_if_attribute_is_not_time_series_relation( + db, + collection_id, + attribute_id, + :read, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + + # Query the time series table + df = _read_time_series_table(db, attribute, id) + + # Convert IDs to labels + relation_labels = read_scalar_parameters(db, attribute.relation_collection, "label") + num_elements = number_of_elements(db, attribute.relation_collection) + id_to_label = Dict{Int, String}(i => label for (i, label) in enumerate(relation_labels)) + id_to_label[_PSRDatabase_null_value(Int)] = "" + + # Replace IDs with labels in the DataFrame + df[!, attribute.id] = [get(id_to_label, id_val, "") for id_val in df[!, attribute.id]] + + return df +end + +""" + read_time_series_relation_row(db::DatabaseSQLite, collection_id::String, attribute_id::String, ::Type{String}; date_time::DateTime) + +Read a single row of time series relation data with caching (read-only mode). +Returns labels instead of IDs for the relation values. + +# Arguments + + - `db::DatabaseSQLite`: The database connection (must be in read-only mode) + - `collection_id::String`: The identifier of the collection + - `attribute_id::String`: The identifier of the time series relation attribute + - `::Type{String}`: Type parameter (always String for relations) + - `date_time::DateTime`: The date/time for which to read data + +# Returns + + - `Vector{String}`: A vector of labels for all elements at the specified date/time + +# Example + +```julia +db = PSRDatabase.load_db("database.sqlite"; read_only=true) +labels = PSRDatabase.read_time_series_relation_row( + db, "Resource", "plant_id", String; + date_time=DateTime(2020, 1, 1) +) +``` +""" +function read_time_series_relation_row( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + ::Type{String}; + date_time::DateTime, +) + @assert _is_read_only(db) "Time series mapping only works in read only databases" + + _throw_if_attribute_is_not_time_series_relation( + db, + collection_id, + attribute_id, + :read, + ) + + attribute = _get_attribute(db, collection_id, attribute_id) + + # Read IDs using the TimeController cache (works for Int type) + collection_attribute = _collection_attribute(collection_id, attribute_id) + if !haskey(db._time_controller.cache, collection_attribute) + db._time_controller.cache[collection_attribute] = _start_time_controller_cache( + db, + attribute, + date_time, + Int, # Time series relations store IDs as Int + ) + end + + cache = db._time_controller.cache[collection_attribute] + ids = query_data_in_time_controller(cache, date_time) + + # Convert IDs to labels using cached mapping + id_to_label_mapping = _get_id_to_label_mapping(db, attribute.relation_collection) + + labels = Vector{String}(undef, length(ids)) + for (i, id_val) in enumerate(ids) + labels[i] = get(id_to_label_mapping, id_val, "") + end + + return labels +end + """ _treat_query_result(query_results::Vector{Missing}, attribute::Attribute, default::Union{Nothing, Any}) diff --git a/src/script_from_db.jl b/src/script_from_db.jl index cc0b829..1d62e26 100644 --- a/src/script_from_db.jl +++ b/src/script_from_db.jl @@ -183,6 +183,12 @@ function _generate_element_code( append!(relation_lines, vector_relations) end + # Collect time series relations to be set at the end + time_series_relations = _generate_time_series_relations_code(db, collection, element_id, label) + if !isempty(time_series_relations) + append!(relation_lines, time_series_relations) + end + return join(code_lines, "\n"), relation_lines end @@ -345,7 +351,8 @@ end """ _generate_time_series_parameters_code(db::DatabaseSQLite, collection::Collection, element_id::Int) -Generate code for time series data as DataFrames. +Generate code for time series data as DataFrames (excluding time series relations). +Time series relations are handled separately to avoid dependency issues. """ function _generate_time_series_parameters_code( db::DatabaseSQLite, @@ -354,7 +361,7 @@ function _generate_time_series_parameters_code( ) code_lines = String[] - # Group time series by group_id + # Group time series by group_id (only parameters, not relations) time_series_groups = Dict{String, Vector{String}}() for (attr_id, attr) in collection.time_series group_id = attr.group_id @@ -364,7 +371,22 @@ function _generate_time_series_parameters_code( push!(time_series_groups[group_id], attr_id) end - for (group_id, attr_ids) in time_series_groups + # Also collect time series relation group IDs to know which columns to exclude + time_series_relation_columns = Set{String}() + for (rel_attr_id, rel_attr) in collection.time_series_relations + push!(time_series_relation_columns, rel_attr_id) + end + + # Process all time series groups (including those with relations) + all_groups = Set{String}() + for (attr_id, attr) in collection.time_series + push!(all_groups, attr.group_id) + end + for (attr_id, attr) in collection.time_series_relations + push!(all_groups, attr.group_id) + end + + for group_id in all_groups # Read time series data for this group df = _read_time_series_group_data(db, collection.id, group_id, element_id) @@ -372,14 +394,42 @@ function _generate_time_series_parameters_code( continue end + # Exclude relation columns from the DataFrame + df_filtered = select(df, Not(collect(intersect(names(df), time_series_relation_columns)))) + + # Only include if there are non-dimension columns left (parameters) + if ncol(df_filtered) <= length(_get_time_series_dimensions(collection, group_id)) + continue + end + # Generate DataFrame code - df_code = _generate_dataframe_code(df, group_id) + df_code = _generate_dataframe_code(df_filtered, group_id) push!(code_lines, " $group_id = $df_code,") end return code_lines end +""" + _get_time_series_dimensions(collection::Collection, group_id::String) + +Get dimension names for a time series group. +""" +function _get_time_series_dimensions(collection::Collection, group_id::String) + # Get dimensions from any time series or time series relation in this group + for (attr_id, attr) in collection.time_series + if attr.group_id == group_id + return attr.dimension_names + end + end + for (attr_id, attr) in collection.time_series_relations + if attr.group_id == group_id + return attr.dimension_names + end + end + return String[] +end + """ _generate_scalar_relations_code(db::DatabaseSQLite, collection::Collection, element_id::Int, label::String) @@ -483,6 +533,90 @@ function _generate_vector_relations_code( return code_lines end +""" + _generate_time_series_relations_code(db::DatabaseSQLite, collection::Collection, element_id::Int, label::String) + +Generate code for adding time series relations row by row. +""" +function _generate_time_series_relations_code( + db::DatabaseSQLite, + collection::Collection, + element_id::Int, + label::String, +) + code_lines = String[] + + # Group time series relations by group_id + time_series_relation_groups = Dict{String, Vector{String}}() + for (attr_id, attr) in collection.time_series_relations + group_id = attr.group_id + if !haskey(time_series_relation_groups, group_id) + time_series_relation_groups[group_id] = String[] + end + push!(time_series_relation_groups[group_id], attr_id) + end + + for (group_id, attr_ids) in time_series_relation_groups + # Read time series data for this group + df = _read_time_series_group_data(db, collection.id, group_id, element_id) + + if isempty(df) + continue + end + + # For each relation attribute, generate add_time_series_relation_row! calls + for attr_id in attr_ids + attr = collection.time_series_relations[attr_id] + relation_collection = attr.relation_collection + + if !(attr_id in names(df)) + continue + end + + # Generate code for each row + for row_idx in 1:nrow(df) + row = df[row_idx, :] + id_val = row[attr_id] + + # Get related element label + if ismissing(id_val) || isnothing(id_val) + related_label = "" + else + related_label = _get_label_by_id(db, relation_collection, id_val) + end + + # Skip empty relations + if isempty(related_label) + continue + end + + # Build dimensions from the row (only actual dimension columns) + dimension_parts = String[] + for dim_name in attr.dimension_names + if dim_name in names(df) + col_val = row[dim_name] + if dim_name == "date_time" + push!(dimension_parts, "date_time = DateTime(\"$col_val\")") + else + formatted_val = _format_value(col_val, typeof(col_val)) + push!(dimension_parts, "$dim_name = $formatted_val") + end + end + end + + dimensions_str = join(dimension_parts, ", ") + + push!( + code_lines, + "PSRDatabase.add_time_series_relation_row!(db, \"$(collection.id)\", \"$attr_id\", \"$label\", \"$related_label\"; $dimensions_str)", + ) + end + end + end + + return code_lines +end + """ _generate_time_series_files_code(db::DatabaseSQLite, collection::Collection) diff --git a/src/time_controller.jl b/src/time_controller.jl index 128d2f9..025ea21 100644 --- a/src/time_controller.jl +++ b/src/time_controller.jl @@ -28,13 +28,17 @@ end Base.@kwdef mutable struct TimeController cache::Dict{CollectionAttribute, AttributeTimeSeriesCache} = Dict{CollectionAttribute, AttributeTimeSeriesCache}() - # Upon initialization the time controller will ask if a certain - # collection is empty, if the collection is empty it + # Upon initialization the time controller will ask if a certain + # collection is empty, if the collection is empty it # will be added to this cache. This cache will be used to avoid querying # multiple times if a certain collection is empty. - # This relies on the fact that the Time Controller only works in + # This relies on the fact that the Time Controller only works in # read only databases. collection_is_empty::Dict{String, Bool} = Dict{String, Bool}() + + # Cache for ID-to-label mappings for time series relations + # Maps collection_id => (element_id => label) + id_to_label_cache::Dict{String, Dict{Int, String}} = Dict{String, Dict{Int, String}}() end function _collection_attribute(collection_id::String, attribute_id::String)::CollectionAttribute @@ -50,6 +54,17 @@ function _time_controller_collection_is_empty(db, collection_id::String)::Bool end end +function _get_id_to_label_mapping(db, collection_id::String) + # Get or create cached ID-to-label mapping for a collection + if !haskey(db._time_controller.id_to_label_cache, collection_id) + labels = read_scalar_parameters(db, collection_id, "label") + mapping = Dict{Int, String}(i => label for (i, label) in enumerate(labels)) + mapping[_PSRDatabase_null_value(Int)] = "" + db._time_controller.id_to_label_cache[collection_id] = mapping + end + return db._time_controller.id_to_label_cache[collection_id] +end + function query_data_in_time_controller( attribute_cache::AttributeTimeSeriesCache, date_time::DateTime, diff --git a/src/update.jl b/src/update.jl index 0f83c66..7ff108b 100644 --- a/src/update.jl +++ b/src/update.jl @@ -11,6 +11,7 @@ const UPDATE_METHODS_BY_CLASS_OF_ATTRIBUTE = Dict( SetParameter => "update_set_parameters!", SetRelation => "set_set_relation!", TimeSeries => "update_time_series_row!", + TimeSeriesRelation => "update_time_series_relation_row!", TimeSeriesFile => "set_time_series_file!", ) @@ -951,3 +952,98 @@ function update_time_series_row!( return _update_time_series_row!(db, attribute, id, val, dimensions) end + +""" + update_time_series_relation_row!(db::DatabaseSQLite, collection_id::String, attribute_id::String, label::String, val; dimensions...) + +Update an existing relation value in a time series relation attribute for a specific element and dimension combination. + +Unlike `add_time_series_relation_row!`, this function only updates existing rows and will throw an error if the +specified dimension combination doesn't exist. + +# Arguments + + - `db::DatabaseSQLite`: The database connection + - `collection_id::String`: The identifier of the collection containing the element + - `attribute_id::String`: The identifier of the time series relation attribute + - `label::String`: The label of the element to update the time series relation for + - `val`: The label of the related element (String) or an empty string for null relation + - `dimensions...`: Named arguments specifying the dimension values that identify the row to update + +# Returns + + - `nothing` + +# Throws + + - `DatabaseException` if the attribute is not a time series relation + - `DatabaseException` if the number of dimensions doesn't match the attribute definition + - `DatabaseException` if dimension names don't match the attribute definition + - `DatabaseException` if the specified dimension combination doesn't exist + - `DatabaseException` if the related element label doesn't exist + +# Examples + +```julia +# Update an existing time series relation value +PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2020, 1, 1), +) + +# Update with multiple dimensions +PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2020, 1, 1), + block = 1, +) +``` +""" +function update_time_series_relation_row!( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, + val::String; + dimensions..., +) + _throw_if_attribute_is_not_time_series_relation( + db, + collection_id, + attribute_id, + :update, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + _validate_time_series_dimensions(collection_id, attribute, dimensions) + + if !_dimension_value_exists(db, attribute, id, dimensions...) + psr_database_sqlite_error( + "The chosen values for dimensions $(join(keys(dimensions), ", ")) do not exist in the time series for element $(label) in collection $(collection_id).", + ) + end + + if length(dimensions) != length(attribute.dimension_names) + psr_database_sqlite_error( + "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * + "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", + ) + end + + # Convert label to ID + relation_id = if _is_null_in_db(val) + _PSRDatabase_null_value(Int) + else + _get_id(db, attribute.relation_collection, val) + end + + return _update_time_series_row!(db, attribute, id, relation_id, dimensions) +end diff --git a/src/validate.jl b/src/validate.jl index b5b2c02..1a4f906 100644 --- a/src/validate.jl +++ b/src/validate.jl @@ -390,6 +390,26 @@ function _throw_if_attribute_is_not_time_series( return nothing end +function _throw_if_attribute_is_not_time_series_relation( + db::DatabaseSQLite, + collection::String, + attribute::String, + action::Symbol, +) + _throw_if_collection_or_attribute_do_not_exist(db, collection, attribute) + + if !_is_time_series_relation(db, collection, attribute) + correct_composity_type = + _attribute_composite_type(db, collection, attribute) + string_of_composite_types = _string_for_composite_types(correct_composity_type) + correct_method_to_use = _get_correct_method_to_use(correct_composity_type, action) + psr_database_sqlite_error( + "Attribute \"$attribute\" is not a time series relation. It is a $string_of_composite_types. Use `$correct_method_to_use` instead.", + ) + end + return nothing +end + function _throw_if_attribute_is_not_time_series_file( db::DatabaseSQLite, collection::String, @@ -452,7 +472,7 @@ function _throw_if_data_does_not_match_group( attributes_in_df = [] for column in names(df) - if column in keys(collection.time_series) + if column in keys(collection.time_series) || column in keys(collection.time_series_relations) # should be an attribute push!(attributes_in_df, column) elseif column in dimensions_of_group @@ -476,7 +496,9 @@ function _throw_if_data_does_not_match_group( end for dimension in dimensions_in_df - if !(dimension in collection.time_series[attributes_in_df[1]].dimension_names) + # Get the first attribute (could be either time_series or time_series_relation) + first_attribute = _get_attribute(db, collection_id, attributes_in_df[1]) + if !(dimension in first_attribute.dimension_names) psr_database_sqlite_error( "The dimension \"$dimension\" is not defined in the time series group \"$group\".", ) diff --git a/test/test_compare_dbs/test_compare_dbs.jl b/test/test_compare_dbs/test_compare_dbs.jl index e68070e..51db88e 100644 --- a/test/test_compare_dbs/test_compare_dbs.jl +++ b/test/test_compare_dbs/test_compare_dbs.jl @@ -1300,6 +1300,522 @@ function test_compare_set_relations_null_mismatch() return nothing end +function test_compare_time_series_relations_identical() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create identical time series relations + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test isempty(differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_different_values() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 3") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 3") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create time series relations with different values + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2001), + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test !isempty(differences) + @test any(occursin("plant_id", diff) for diff in differences) + @test any(occursin("Plant 2", diff) && occursin("Plant 3", diff) for diff in differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_different_sizes() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create time series relations with different sizes + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2002), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test !isempty(differences) + @test any(occursin("different table sizes", diff) for diff in differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_multi_dimensions_identical() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create identical time series relations with multiple dimensions + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test isempty(differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_multi_dimensions_different_values() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 3") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 3") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create time series relations with different values + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test !isempty(differences) + @test any(occursin("plant_dispatch_id", diff) for diff in differences) + @test any(occursin("Plant 2", diff) && occursin("Plant 3", diff) for diff in differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_different_element_count() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + + # Create different number of resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 2") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create time series relations + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 2", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test !isempty(differences) + @test any(occursin("different number of elements", diff) for diff in differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + +function test_compare_time_series_relations_null_vs_non_null() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db1 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db1.sqlite"), + path_schema; + force = true, + ) + db2 = PSRDatabase.create_empty_db_from_schema( + joinpath(@__DIR__, "test_db2.sqlite"), + path_schema; + force = true, + ) + + # Create configuration + PSRDatabase.create_element!(db1, "Configuration"; value1 = 1.0) + PSRDatabase.create_element!(db2, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db1, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db2, "Plant"; label = "Plant 2") + + # Create resources + PSRDatabase.create_element!(db1, "Resource"; label = "Resource 1") + PSRDatabase.create_element!(db2, "Resource"; label = "Resource 1") + + # Create time series relations + # db1 has data for both date_times, db2 only has data for one date_time + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + PSRDatabase.add_time_series_relation_row!( + db1, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + PSRDatabase.add_time_series_relation_row!( + db2, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + + differences = PSRDatabase.compare_time_series_relations(db1, db2, "Resource") + @test !isempty(differences) + @test any(occursin("different table sizes", diff) for diff in differences) + + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(joinpath(@__DIR__, "test_db1.sqlite"); force = true) + rm(joinpath(@__DIR__, "test_db2.sqlite"); force = true) + return nothing +end + function runtests() for name in names(@__MODULE__; all = true) if startswith("$(name)", "test_") diff --git a/test/test_script_from_db/test_generate_code.jl b/test/test_script_from_db/test_generate_code.jl index 399bae7..ef66463 100644 --- a/test/test_script_from_db/test_generate_code.jl +++ b/test/test_script_from_db/test_generate_code.jl @@ -389,6 +389,90 @@ function test_generate_code_throws_both_path_schema_and_path_migrations() return nothing end +function test_generate_code_from_time_series_relations() + path_schema = joinpath(@__DIR__, "..", "test_time_series_relations", "test_schema.sql") + db_path = joinpath(@__DIR__, "test_generate_code_time_series_relations.sqlite") + db_reconstructed_path = joinpath(@__DIR__, "test_generate_code_time_series_relations_reconstructed.sqlite") + code_path = joinpath(@__DIR__, "test_generate_code_time_series_relations_code.jl") + + # Create and populate original database + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Create resource with time series relations (single dimension) + df_generation = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + power = [100.0, 200.0, 300.0], + plant_id = ["Plant 1", "Plant 2", "Plant 3"], + ) + PSRDatabase.create_element!( + db, + "Resource"; + label = "Resource 1", + generation = df_generation, + ) + + # Create resource with time series relations (multiple dimensions) + df_dispatch = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001)], + block = [1, 2, 1], + scenario = [1, 1, 1], + energy = [50.0, 75.0, 100.0], + plant_dispatch_id = ["Plant 1", "Plant 2", "Plant 1"], + ) + PSRDatabase.create_element!( + db, + "Resource"; + label = "Resource 2", + dispatch = df_dispatch, + ) + + # Generate code to file + PSRDatabase.generate_julia_script_from_database( + db, + code_path, + db_reconstructed_path; + path_schema = path_schema, + ) + + include(code_path) + + # Reload both databases + db1 = PSRDatabase.load_db(db_path; read_only = true) + db2 = PSRDatabase.load_db(db_reconstructed_path; read_only = true) + + # Compare databases + @test isempty(PSRDatabase.compare_databases(db1, db2)) + + # Additional verification: Check specific time series relation values + df_gen_1 = PSRDatabase.read_time_series_relation_table(db2, "Resource", "plant_id", "Resource 1") + @test nrow(df_gen_1) == 3 + @test df_gen_1[1, :plant_id] == "Plant 1" + @test df_gen_1[2, :plant_id] == "Plant 2" + @test df_gen_1[3, :plant_id] == "Plant 3" + + df_dispatch_2 = PSRDatabase.read_time_series_relation_table(db2, "Resource", "plant_dispatch_id", "Resource 2") + @test nrow(df_dispatch_2) == 3 + @test df_dispatch_2[1, :plant_dispatch_id] == "Plant 1" + @test df_dispatch_2[2, :plant_dispatch_id] == "Plant 2" + @test df_dispatch_2[3, :plant_dispatch_id] == "Plant 1" + + # Cleanup + PSRDatabase.close!(db) + PSRDatabase.close!(db1) + PSRDatabase.close!(db2) + rm(db_path) + rm(db_reconstructed_path) + rm(code_path) + + return nothing +end + function test_generate_code_from_sets_with_relations() path_schema = joinpath(@__DIR__, "..", "test_create", "test_create_sets_with_relations.sql") db_path = joinpath(@__DIR__, "test_generate_code_sets_with_relations.sqlite") diff --git a/test/test_time_series_relations/test_basic.jl b/test/test_time_series_relations/test_basic.jl new file mode 100644 index 0000000..81415c9 --- /dev/null +++ b/test/test_time_series_relations/test_basic.jl @@ -0,0 +1,556 @@ +module TestTimeSeriesRelationsBasic + +using PSRDatabase +using SQLite +using Dates +using DataFrames +using Test + +function test_create_and_read_time_series_relations() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_basic.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Create resource with time series data including relations + df_generation = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + power = [100.0, 200.0, 300.0], + plant_id = ["Plant 1", "Plant 2", "Plant 3"], + ) + + PSRDatabase.create_element!( + db, + "Resource"; + label = "Resource 1", + generation = df_generation, + ) + + # Test reading time series relation table + df_read = PSRDatabase.read_time_series_relation_table( + db, + "Resource", + "plant_id", + "Resource 1", + ) + + @test nrow(df_read) == 3 + @test df_read[1, :plant_id] == "Plant 1" + @test df_read[2, :plant_id] == "Plant 2" + @test df_read[3, :plant_id] == "Plant 3" + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_add_and_update_time_series_relations() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_update.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Create resource without time series data + PSRDatabase.create_element!(db, "Resource"; label = "Resource 1") + + # Test add_time_series_relation_row! + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + ) + + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + ) + + # Verify the data was added + df_read = PSRDatabase.read_time_series_relation_table( + db, + "Resource", + "plant_id", + "Resource 1", + ) + + @test nrow(df_read) == 2 + @test df_read[1, :plant_id] == "Plant 1" + @test df_read[2, :plant_id] == "Plant 2" + + # Test update_time_series_relation_row! + PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2000), + ) + + # Verify the update + df_updated = PSRDatabase.read_time_series_relation_table( + db, + "Resource", + "plant_id", + "Resource 1", + ) + + @test nrow(df_updated) == 2 + @test df_updated[1, :plant_id] == "Plant 3" # Updated from Plant 1 + @test df_updated[2, :plant_id] == "Plant 2" # Unchanged + + # Test that update throws error for non-existent dimension + @test_throws PSRDatabase.DatabaseException PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2010), # This date doesn't exist + ) + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_update_time_series_relations_with_multiple_dimensions() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_multidim.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Create resource + PSRDatabase.create_element!(db, "Resource"; label = "Resource 1") + + # Test add_time_series_relation_row! with multiple dimensions + # Add data for different date_time, block, and scenario combinations + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2000), + block = 1, + scenario = 2, + ) + + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + + PSRDatabase.add_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 2"; + date_time = DateTime(2001), + block = 1, + scenario = 1, + ) + + # Verify the data was added + df_read = PSRDatabase.read_time_series_relation_table( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + ) + + @test nrow(df_read) == 4 + @test df_read[1, :plant_dispatch_id] == "Plant 1" + @test df_read[1, :date_time] == "2000-01-01T00:00:00" + @test df_read[1, :block] == 1 + @test df_read[1, :scenario] == 1 + + @test df_read[2, :plant_dispatch_id] == "Plant 2" + @test df_read[2, :scenario] == 2 + + @test df_read[3, :plant_dispatch_id] == "Plant 1" + @test df_read[3, :block] == 2 + + @test df_read[4, :plant_dispatch_id] == "Plant 2" + @test df_read[4, :date_time] == "2001-01-01T00:00:00" + + # Test update_time_series_relation_row! with multiple dimensions + # Update the first entry to point to Plant 3 + PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2000), + block = 1, + scenario = 1, + ) + + # Update entry with block = 2 + PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 3"; + date_time = DateTime(2000), + block = 2, + scenario = 1, + ) + + # Verify the updates + df_updated = PSRDatabase.read_time_series_relation_table( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + ) + + @test nrow(df_updated) == 4 + @test df_updated[1, :plant_dispatch_id] == "Plant 3" # Updated from Plant 1 + @test df_updated[1, :block] == 1 + @test df_updated[1, :scenario] == 1 + + @test df_updated[2, :plant_dispatch_id] == "Plant 2" # Unchanged + @test df_updated[2, :scenario] == 2 + + @test df_updated[3, :plant_dispatch_id] == "Plant 3" # Updated from Plant 1 + @test df_updated[3, :block] == 2 + + @test df_updated[4, :plant_dispatch_id] == "Plant 2" # Unchanged + @test df_updated[4, :date_time] == "2001-01-01T00:00:00" + + # Test that update throws error for non-existent dimension combination + @test_throws PSRDatabase.DatabaseException PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2010), # This date doesn't exist + block = 1, + scenario = 1, + ) + + # Test error when missing a dimension + @test_throws PSRDatabase.DatabaseException PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + # Missing scenario dimension + ) + + # Test error when providing wrong dimension name + @test_throws PSRDatabase.DatabaseException PSRDatabase.update_time_series_relation_row!( + db, + "Resource", + "plant_dispatch_id", + "Resource 1", + "Plant 1"; + date_time = DateTime(2000), + block = 1, + stage = 1, # Wrong dimension name (should be scenario) + ) + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_get_id_to_label_mapping() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_id_to_label.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Get the mapping + mapping = PSRDatabase._get_id_to_label_mapping(db, "Plant") + + # Test that mapping is a Dict{Int, String} + @test mapping isa Dict{Int, String} + + # Test that mapping contains all plants + @test mapping[1] == "Plant 1" + @test mapping[2] == "Plant 2" + @test mapping[3] == "Plant 3" + + # Test that null value is mapped to empty string + @test haskey(mapping, PSRDatabase._PSRDatabase_null_value(Int)) + @test mapping[PSRDatabase._PSRDatabase_null_value(Int)] == "" + + # Test caching - calling again should return the same cached object + mapping2 = PSRDatabase._get_id_to_label_mapping(db, "Plant") + @test mapping === mapping2 # Should be the same object reference + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_throw_if_attribute_is_not_time_series_relation() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_validation.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Test that it does NOT throw when attribute IS a time series relation + @test PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "Resource", + "plant_id", + :read, + ) === nothing + + @test PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "Resource", + "plant_dispatch_id", + :read, + ) === nothing + + # Test that it THROWS when attribute is a scalar parameter + @test_throws PSRDatabase.DatabaseException PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "Resource", + "label", + :read, + ) + + # Test that it THROWS when attribute is a regular time series parameter (not relation) + @test_throws PSRDatabase.DatabaseException PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "Resource", + "power", + :read, + ) + + # Test that it THROWS when collection doesn't exist + @test_throws PSRDatabase.DatabaseException PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "NonExistentCollection", + "plant_id", + :read, + ) + + # Test that it THROWS when attribute doesn't exist + @test_throws PSRDatabase.DatabaseException PSRDatabase._throw_if_attribute_is_not_time_series_relation( + db, + "Resource", + "non_existent_attribute", + :read, + ) + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_read_time_series_relation_row() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_read_row.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 2") + PSRDatabase.create_element!(db, "Plant"; label = "Plant 3") + + # Create resource with time series data including relations + df_generation = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + power = [100.0, 200.0, 300.0], + plant_id = ["Plant 1", "Plant 2", "Plant 3"], + ) + + PSRDatabase.create_element!( + db, + "Resource"; + label = "Resource 1", + generation = df_generation, + ) + + # Close and reopen in read-only mode (required for read_time_series_relation_row) + PSRDatabase.close!(db) + db = PSRDatabase.load_db(db_path; read_only = true) + + # Test reading single dimension time series relation row + labels = PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "plant_id", + String; + date_time = DateTime(2000), + ) + + @test labels isa Vector{String} + @test length(labels) == 1 + @test labels[1] == "Plant 1" + + # Test reading at different date + labels = PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "plant_id", + String; + date_time = DateTime(2001), + ) + + @test labels[1] == "Plant 2" + + # Test reading at DateTime(2002) + labels = PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "plant_id", + String; + date_time = DateTime(2002), + ) + + @test labels[1] == "Plant 3" + + # Test missing value handling - querying future date should return previous value + labels = PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "plant_id", + String; + date_time = DateTime(2003), + ) + + @test labels[1] == "Plant 3" # Should return previous value from 2002 + + PSRDatabase.close!(db) + return rm(db_path) +end + +function test_read_time_series_relation_row_errors() + path_schema = joinpath(@__DIR__, "test_schema.sql") + db_path = joinpath(@__DIR__, "test_read_row_errors.sqlite") + db = PSRDatabase.create_empty_db_from_schema(db_path, path_schema; force = true) + + # Create configuration + PSRDatabase.create_element!(db, "Configuration"; value1 = 1.0) + + # Create plants + PSRDatabase.create_element!(db, "Plant"; label = "Plant 1") + + # Create resource with time series data + df_generation = DataFrame(; + date_time = [DateTime(2000)], + power = [100.0], + plant_id = ["Plant 1"], + ) + + PSRDatabase.create_element!( + db, + "Resource"; + label = "Resource 1", + generation = df_generation, + ) + + # Test error when database is NOT in read-only mode + @test_throws AssertionError PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "plant_id", + String; + date_time = DateTime(2000), + ) + + # Close and reopen in read-only mode + PSRDatabase.close!(db) + db = PSRDatabase.load_db(db_path; read_only = true) + + # Test error when attribute is not a time series relation + @test_throws PSRDatabase.DatabaseException PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "power", # This is a time series parameter, not a relation + String; + date_time = DateTime(2000), + ) + + # Test error when attribute is a scalar parameter + @test_throws PSRDatabase.DatabaseException PSRDatabase.read_time_series_relation_row( + db, + "Resource", + "label", # This is a scalar parameter + String; + date_time = DateTime(2000), + ) + + PSRDatabase.close!(db) + return rm(db_path) +end + +function runtests() + for name in names(@__MODULE__; all = true) + if startswith("$(name)", "test_") + @testset "$(name)" begin + getfield(@__MODULE__, name)() + end + end + end +end + +end # module + +TestTimeSeriesRelationsBasic.runtests() diff --git a/test/test_time_series_relations/test_schema.sql b/test/test_time_series_relations/test_schema.sql new file mode 100644 index 0000000..fbe3ae7 --- /dev/null +++ b/test/test_time_series_relations/test_schema.sql @@ -0,0 +1,38 @@ +PRAGMA user_version = 1; + +CREATE TABLE Configuration ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + value1 REAL NOT NULL DEFAULT 100 +) STRICT; + +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL +) STRICT; + +CREATE TABLE Plant ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL +) STRICT; + +CREATE TABLE Resource_time_series_generation ( + id INTEGER, + date_time TEXT NOT NULL, + power REAL, + plant_id INTEGER, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(plant_id) REFERENCES Plant(id) ON DELETE SET NULL ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; + +CREATE TABLE Resource_time_series_dispatch ( + id INTEGER, + date_time TEXT NOT NULL, + block INTEGER NOT NULL, + scenario INTEGER NOT NULL, + energy REAL, + plant_dispatch_id INTEGER, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY(plant_dispatch_id) REFERENCES Plant(id) ON DELETE SET NULL ON UPDATE CASCADE, + PRIMARY KEY (id, date_time, block, scenario) +) STRICT;