Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions src/attribute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,49 @@ mutable struct TimeSeries{T} <: VectorAttribute
num_dimensions::Int
end

mutable struct TimeSeriesRelation{T} <: VectorAttribute
id::String
type::Type{T}
default_value::Union{Missing, T}
not_null::Bool
group_id::String
parent_collection::String
relation_collection::String
relation_type::String
table_where_is_located::String
dimension_names::Vector{String}
num_dimensions::Int

function TimeSeriesRelation(
id::String,
type::Type{T},
default_value::Union{Missing, T},
not_null::Bool,
group_id::String,
parent_collection::String,
relation_collection::String,
relation_type::String,
table_where_is_located::String,
dimension_names::Vector{String},
num_dimensions::Int,
) where {T}
_check_valid_relation_name(id, relation_collection)
return new{T}(
id,
type,
default_value,
not_null,
group_id,
parent_collection,
relation_collection,
relation_type,
table_where_is_located,
dimension_names,
num_dimensions,
)
end
end

mutable struct SetParameter{T} <: SetAttribute
id::String
type::Type{T}
Expand Down
102 changes: 83 additions & 19 deletions src/collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mutable struct Collection
set_parameters::OrderedDict{String, SetParameter}
set_relations::OrderedDict{String, SetRelation}
time_series::OrderedDict{String, TimeSeries}
time_series_relations::OrderedDict{String, TimeSeriesRelation}
time_series_files::OrderedDict{String, TimeSeriesFile}
end

Expand All @@ -32,7 +33,7 @@ function _create_collections_map!(
vector_relations = _create_collection_vector_relations(db, collection_id)
set_parameters = _create_collection_set_parameters(db, collection_id)
set_relations = _create_collection_set_relations(db, collection_id)
time_series = _create_collection_time_series(db, collection_id)
time_series, time_series_relations = _create_collection_time_series(db, collection_id)
time_series_files = _create_collection_time_series_files(db, collection_id)
collection = Collection(
collection_id,
Expand All @@ -43,6 +44,7 @@ function _create_collections_map!(
set_parameters,
set_relations,
time_series,
time_series_relations,
time_series_files,
)
collections_map[collection_id] = collection
Expand Down Expand Up @@ -358,11 +360,13 @@ end
function _create_collection_time_series(db::SQLite.DB, collection_id::String)
time_series_tables = _get_collection_time_series_tables(db, collection_id)
time_series = OrderedDict{String, TimeSeries}()
time_series_relations = OrderedDict{String, TimeSeriesRelation}()
parent_collection = collection_id
for table_name in time_series_tables
group_id = _id_of_time_series_group(table_name)
table_where_is_located = table_name
df_table_infos = table_info(db, table_name)
df_foreign_keys_list = foreign_keys_list(db, table_name)
dimension_names = _get_time_series_dimension_names(df_table_infos)
for time_series_attribute in eachrow(df_table_infos)
id = time_series_attribute.name
Expand All @@ -387,28 +391,70 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String)
end
continue
end
type = _sql_type_to_julia_type(id, time_series_attribute.type)
default_value = _get_default_value(type, time_series_attribute.dflt_value)
not_null = Bool(time_series_attribute.notnull)
if haskey(time_series, id)
psr_database_sqlite_error(
"Duplicated time_series attribute \"$id\" in collection \"$collection_id\"",

# Check if this is a foreign key (relation)
if id in df_foreign_keys_list.from
# Skip the "id" foreign key (it's just the reference to parent collection)
if id == "id"
continue
end

# This is a time series relation
_validate_actions_on_foreign_key(
collection_id,
table_name,
df_foreign_keys_list[df_foreign_keys_list.from .== id, :][1, :],
)
type = _sql_type_to_julia_type(id, time_series_attribute.type)
default_value = _get_default_value(type, time_series_attribute.dflt_value)
not_null = Bool(time_series_attribute.notnull)
relation_type = _get_relation_type_from_attribute_id(id)
relation_collection = df_foreign_keys_list[df_foreign_keys_list.from .== id, :table][1]

if haskey(time_series_relations, id)
psr_database_sqlite_error(
"Duplicated time_series_relation attribute \"$id\" in collection \"$collection_id\"",
)
end

time_series_relations[id] = TimeSeriesRelation(
id,
type,
default_value,
not_null,
group_id,
parent_collection,
relation_collection,
relation_type,
table_where_is_located,
dimension_names,
length(dimension_names),
)
else
# This is a regular time series parameter
type = _sql_type_to_julia_type(id, time_series_attribute.type)
default_value = _get_default_value(type, time_series_attribute.dflt_value)
not_null = Bool(time_series_attribute.notnull)
if haskey(time_series, id)
psr_database_sqlite_error(
"Duplicated time_series attribute \"$id\" in collection \"$collection_id\"",
)
end
time_series[id] = TimeSeries(
id,
type,
default_value,
not_null,
group_id,
parent_collection,
table_where_is_located,
dimension_names,
length(dimension_names),
)
end
time_series[id] = TimeSeries(
id,
type,
default_value,
not_null,
group_id,
parent_collection,
table_where_is_located,
dimension_names,
length(dimension_names),
)
end
end
return time_series
return time_series, time_series_relations
end

function _create_collection_time_series_files(db::SQLite.DB, collection_id::String)
Expand Down Expand Up @@ -671,6 +717,7 @@ function _relations_do_not_have_null_constraints(collection::Collection)
num_errors = 0
scalar_relations = collection.scalar_relations
vector_relations = collection.vector_relations
time_series_relations = collection.time_series_relations
for (_, scalar_relation) in scalar_relations
if scalar_relation.not_null
@error(
Expand All @@ -687,13 +734,22 @@ function _relations_do_not_have_null_constraints(collection::Collection)
num_errors += 1
end
end
for (_, time_series_relation) in time_series_relations
if time_series_relation.not_null
@error(
"Time series relation \"$(time_series_relation.id)\" in collection \"$(collection.id)\" has a not null constraint. This is not allowed."
)
num_errors += 1
end
end
return num_errors
end

function _relations_do_not_have_default_values(collection::Collection)
num_errors = 0
scalar_relations = collection.scalar_relations
vector_relations = collection.vector_relations
time_series_relations = collection.time_series_relations
for (_, scalar_relation) in scalar_relations
if !ismissing(scalar_relation.default_value)
@error(
Expand All @@ -710,5 +766,13 @@ function _relations_do_not_have_default_values(collection::Collection)
num_errors += 1
end
end
for (_, time_series_relation) in time_series_relations
if !ismissing(time_series_relation.default_value)
@error(
"Time series relation \"$(time_series_relation.id)\" in collection \"$(collection.id)\" has a default value."
)
num_errors += 1
end
end
return num_errors
end
151 changes: 151 additions & 0 deletions src/compare_dbs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,152 @@ function compare_time_series(
return differences
end

"""
compare_time_series_relations(db1::DatabaseSQLite, db2::DatabaseSQLite, collection_id::String)

Compare time series relations between two databases for a specific collection.

This function iterates through all time series relation attributes in the specified collection,
grouped by their group_id, and compares the relation data for each element between the two databases.
For each time series relation, it checks:
- The size of the time series tables (number of rows and columns)
- The column names and their order
- Individual relation references in each cell of the time series data

The comparison ensures that relation labels match between databases at each time point and dimension.

# Arguments

- `db1::DatabaseSQLite`: The first database to compare (used as the reference for reading collection structure and element labels)
- `db2::DatabaseSQLite`: The second database to compare against the first
- `collection_id::String`: The name of the collection (table) to compare time series relation data for

# Returns

A vector of strings describing differences found in time series relation data. Each string includes the
collection name, time series relation attribute name, element label, column name, row index, and the
differing relation labels. Returns an empty vector if all time series relation data is identical.

# Example

```julia
using DataFrames, Dates

db1 = create_empty_db_from_schema("db1.sqlite", "schema.sql"; force = true)
db2 = create_empty_db_from_schema("db2.sqlite", "schema.sql"; force = true)

# Create plants
for db in [db1, db2]
create_element!(db, "Plant"; label = "Plant 1")
create_element!(db, "Plant"; label = "Plant 2")
create_element!(db, "Plant"; label = "Plant 3")
end

# Create resources with different time series relations
df1 = DataFrame(
date_time = [DateTime(2020), DateTime(2021), DateTime(2022)],
power = [100.0, 200.0, 300.0],
plant_id = ["Plant 1", "Plant 2", "Plant 3"],
)
df2 = DataFrame(
date_time = [DateTime(2020), DateTime(2021), DateTime(2022)],
power = [100.0, 200.0, 300.0],
plant_id = ["Plant 1", "Plant 1", "Plant 3"],
)

create_element!(db1, "Resource"; label = "Resource1", generation = df1)
create_element!(db2, "Resource"; label = "Resource1", generation = df2)

differences = compare_time_series_relations(db1, db2, "Resource")
# Returns: ["Collection 'Resource', time series relation 'plant_id', label 'Resource1', column 'plant_id', row 2: relations differ (db1: Plant 2, db2: Plant 1)"]
```
"""
function compare_time_series_relations(
db1::DatabaseSQLite,
db2::DatabaseSQLite,
collection_id::String,
)
differences = String[]
collection = _get_collection(db1, collection_id)

# Get all element labels
num_elements_db1 = number_of_elements(db1, collection_id)
num_elements_db2 = number_of_elements(db2, collection_id)

# Check if the number of elements is the same
if num_elements_db1 != num_elements_db2
push!(
differences,
"Collection '$collection_id': different number of elements (db1: $num_elements_db1, db2: $num_elements_db2)",
)
return differences
end

labels = read_scalar_parameters(db1, collection_id, "label")

# Group time series relations by group_id
time_series_relation_groups = Dict{String, Vector{String}}()
for (attr_id, attr) in collection.time_series_relations
group_id = attr.group_id
if !haskey(time_series_relation_groups, group_id)
time_series_relation_groups[group_id] = String[]
end
push!(time_series_relation_groups[group_id], attr_id)
end

for label in labels
for (group_id, attr_ids) in time_series_relation_groups
for attr_id in attr_ids
df1 = read_time_series_relation_table(db1, collection_id, attr_id, label)
df2 = read_time_series_relation_table(db2, collection_id, attr_id, label)

if size(df1) != size(df2)
push!(
differences,
"Collection '$collection_id', time series relation '$attr_id', label '$label': different table sizes (db1: $(size(df1)), db2: $(size(df2)))",
)
continue
end

if names(df1) != names(df2)
push!(
differences,
"Collection '$collection_id', time series relation '$attr_id', label '$label': different column names (db1: $(names(df1)), db2: $(names(df2)))",
)
continue
end

# Compare each column
for col_name in names(df1)
col1 = df1[!, col_name]
col2 = df2[!, col_name]

for (row_idx, (v1, v2)) in enumerate(zip(col1, col2))
# Handle missing and empty string cases
if (ismissing(v1) || v1 == "") && (ismissing(v2) || v2 == "")
continue
elseif (ismissing(v1) || v1 == "") || (ismissing(v2) || v2 == "")
push!(
differences,
"Collection '$collection_id', time series relation '$attr_id', label '$label', column '$col_name', row $row_idx: null/empty mismatch (db1: $(v1), db2: $(v2))",
)
else
if v1 != v2
push!(
differences,
"Collection '$collection_id', time series relation '$attr_id', label '$label', column '$col_name', row $row_idx: relations differ (db1: $(v1), db2: $(v2))",
)
end
end
end
end
end
end
end

return differences
end

"""
compare_time_series_files(db1::DatabaseSQLite, db2::DatabaseSQLite, collection_id::String)

Expand Down Expand Up @@ -842,6 +988,11 @@ function compare_databases(
append!(all_differences, compare_time_series(db1, db2, collection_id))
end

# Compare time series relations
if !isempty(_get_collection(db1, collection_id).time_series_relations)
append!(all_differences, compare_time_series_relations(db1, db2, collection_id))
end

# Compare time series files
append!(all_differences, compare_time_series_files(db1, db2, collection_id))
end
Expand Down
Loading