From ba7ebff5564f5798b0e0fac1f867f0867ab006f8 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Tue, 24 Mar 2026 14:15:23 +0000 Subject: [PATCH] add novelty timeseries to disease and target change args, remove logs sort by year change Novelty to association timeseries novelty field derived from direct or indirect novelty update timeseries table name added ordering by novelty column add params to timeseries query added aggregation type and year params to timeseries formatting Update Backend.scala --- app/esecuele/Functions.scala | 2 + app/models/Backend.scala | 38 +++++- .../db/AssociationTimeSeriesQuery.scala | 79 +++++++++++++ app/models/db/CredibleSetQuery.scala | 1 - app/models/db/InteractionSourcesQuery.scala | 2 - app/models/db/QAOTF.scala | 26 +++-- .../entities/AssociationTimeSeries.scala | 34 ++++++ app/models/entities/Associations.scala | 9 +- app/models/entities/Configuration.scala | 1 + app/models/gql/Arguments.scala | 27 ++++- app/models/gql/Objects.scala | 109 ++++++++++++++++-- conf/application.conf | 4 + 12 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 app/models/db/AssociationTimeSeriesQuery.scala create mode 100644 app/models/entities/AssociationTimeSeries.scala diff --git a/app/esecuele/Functions.scala b/app/esecuele/Functions.scala index b5e4f35a..3fc8037f 100644 --- a/app/esecuele/Functions.scala +++ b/app/esecuele/Functions.scala @@ -36,6 +36,8 @@ object Functions { def any(col: Column): Column = f("any", col) + def anyIf(col: Column, cond: Column): Column = f("anyIf", col, cond) + def lower(col: Column): Column = f("lower", col) def upper(col: Column): Column = f("upper", col) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 92a5846c..217db14f 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -24,13 +24,14 @@ import models.entities.Interactions.* import models.entities.Loci.* import models.entities.MechanismsOfAction.* import models.entities.MousePhenotypes.* +import models.entities.AssociationTimeSeriesResults.* import models.entities.Pharmacogenomics.* import models.entities.SearchFacetsResults.* import models.entities.Studies.* import models.entities.Evidences.* import models.entities.SequenceOntologyTerm.* import models.entities.* -import models.gql.{Fetchers, StudyTypeEnum, InteractionSourceEnum} +import models.gql.{Fetchers, AggregationTypeEnum, StudyTypeEnum, InteractionSourceEnum} import models.entities.Violations.{DateFilterError, InputParameterCheckError} import org.apache.http.impl.nio.reactor.IOReactorConfig import play.api.cache.AsyncCacheApi @@ -744,6 +745,37 @@ class Backend @Inject() (implicit dbRetriever.executeQuery[MechanismsOfAction, Query](query.query) } + def getAssociationTimeSeries(diseaseId: String, + targetId: String, + isDirect: Boolean, + aggregationTypes: Option[Seq[AggregationTypeEnum.Value]], + startYear: Option[Int], + endYear: Option[Int], + pagination: Option[Pagination] + ): Future[AssociationTimeSeriesResults] = { + val tableName = getTableWithPrefixOrDefault( + defaultOTSettings.clickhouse.associationTimeSeries.name + ) + val pag = pagination.getOrElse(Pagination.mkDefault).offsetLimit + val query = AssociationTimeSeriesQuery(diseaseId, + targetId, + isDirect, + tableName, + pag._1, + pag._2, + aggregationTypes, + startYear, + endYear + ) + dbRetriever.executeQuery[AssociationTimeSeries, Query](query.query).map { timeSeriesSeq => + if (timeSeriesSeq.isEmpty) { + AssociationTimeSeriesResults(0, timeSeriesSeq) + } else { + AssociationTimeSeriesResults(timeSeriesSeq.head.meta_total, timeSeriesSeq) + } + } + } + def getDrugWarnings(ids: Seq[String]): Future[IndexedSeq[DrugWarnings]] = { val tableName = getTableWithPrefixOrDefault(defaultOTSettings.clickhouse.drugWarnings.name) val localMarkerContext = markerContext.fromExistingContext(append("table", tableName)) @@ -871,6 +903,7 @@ class Backend @Inject() (implicit tableName: String, datasources: Option[Seq[DatasourceSettings]], fixedEntityId: String, + indirect: Boolean, indirectIds: Set[String], bIds: Set[String], columnFilters: Seq[(String, Any)], @@ -900,6 +933,7 @@ class Backend @Inject() (implicit weights, _, dontPropagate, + indirect, page.offset, page.size ) @@ -950,6 +984,7 @@ class Backend @Inject() (implicit getTableWithPrefixOrDefault(defaultOTSettings.clickhouse.disease.associations.name), datasources, disease.id, + indirect, indirectIDs, targetIds, Seq.empty, @@ -996,6 +1031,7 @@ class Backend @Inject() (implicit getTableWithPrefixOrDefault(defaultOTSettings.clickhouse.target.associations.name), datasources, target.id, + indirect, indirectIDs, diseaseIds, columnFilters, diff --git a/app/models/db/AssociationTimeSeriesQuery.scala b/app/models/db/AssociationTimeSeriesQuery.scala new file mode 100644 index 00000000..45fab553 --- /dev/null +++ b/app/models/db/AssociationTimeSeriesQuery.scala @@ -0,0 +1,79 @@ +package models.db + +import esecuele.Column.column +import esecuele.Column.literal +import esecuele.* +import utils.OTLogging +import models.gql.AggregationTypeEnum + +case class AssociationTimeSeriesQuery( + diseaseId: String, + targetId: String, + isDirect: Boolean, + tableName: String, + offset: Int, + size: Int, + aggregationTypeInclude: Option[Seq[AggregationTypeEnum.Value]] = None, + yearFrom: Option[Int] = None, + yearTo: Option[Int] = None +) extends Queryable + with OTLogging { + + private val aggregationTypes: Seq[AggregationTypeEnum.Value] = aggregationTypeInclude.getOrElse( + Seq(AggregationTypeEnum.overall, AggregationTypeEnum.datasourceId) + ) + + private val aggregationTypeFilter = + Functions.in(column("aggregationType"), + Functions.set(aggregationTypes.map(t => literal(t.toString))) + ) + + private val yearFromFilter: Option[Column] = yearFrom match { + case Some(year) => Some(Functions.greaterOrEquals(column("year"), literal(year))) + case None => None + } + private val yearToFilter: Option[Column] = yearTo match { + case Some(year) => Some(Functions.lessOrEquals(column("year"), literal(year))) + case None => None + } + private val yearFilter: Column = (yearFromFilter, yearToFilter) match { + case (Some(from), Some(to)) => Functions.and(from, to) + case (Some(filter), None) => filter + case (None, Some(filter)) => filter + case (None, None) => literal(true) + } + + private val positionalQuery = + Functions.and( + Functions.equals(column("diseaseId"), literal(diseaseId)), + Functions.equals(column("targetId"), literal(targetId)), + Functions.equals(column("isDirect"), literal(isDirect)) + ) + + private val queryWithFilters = Where( + Functions.and( + positionalQuery, + aggregationTypeFilter, + yearFilter + ) + ) + + val totals: Query = + Query( + Select(Functions.count(Column.star) :: Nil), + From(column(tableName)), + queryWithFilters + ) + + override val query: Query = + Query( + Select( + Column.star :: Functions.countOver("meta_total") :: Nil + ), + From(column(tableName)), + queryWithFilters, + OrderBy(column("year").asc :: Nil), + Limit(offset, size), + Format("JSONEachRow") + ) +} diff --git a/app/models/db/CredibleSetQuery.scala b/app/models/db/CredibleSetQuery.scala index b92082f6..d83a3fe0 100644 --- a/app/models/db/CredibleSetQuery.scala +++ b/app/models/db/CredibleSetQuery.scala @@ -4,7 +4,6 @@ import esecuele.Column.column import esecuele.Column.literal import esecuele._ import utils.OTLogging -import models.entities.StudyQueryArgs import models.gql.StudyTypeEnum import models.entities.CredibleSetQueryArgs diff --git a/app/models/db/InteractionSourcesQuery.scala b/app/models/db/InteractionSourcesQuery.scala index 50c28019..33bc92cc 100644 --- a/app/models/db/InteractionSourcesQuery.scala +++ b/app/models/db/InteractionSourcesQuery.scala @@ -1,10 +1,8 @@ package models.db import esecuele.Column.column -import esecuele.Column.literal import esecuele._ import utils.OTLogging -import play.libs.F import models.gql.InteractionSourceEnum case class InteractionSourcesQuery( diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index 50dadb73..fabf112d 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -47,6 +47,7 @@ case class QAOTF( datasourceWeights: Seq[(String, Double)], mustIncludeDatasources: Set[String], nonPropagatedDatasources: Set[String], + indirect: Boolean, offset: Int, size: Int ) extends Queryable @@ -61,8 +62,7 @@ case class QAOTF( val T: Column = column(tableName) val RowID: Column = column("rowId") val RowScore: Column = column("rowScore") - val NoveltyDirect: Column = column("noveltyDirect") - val NoveltyIndirect: Column = column("noveltyIndirect") + val Novelty: Column = if indirect then column("noveltyIndirect") else column("noveltyDirect") val maxHS: Column = literal(Harmonic.maxValue(100000, pExponentDefault, 1.0)) .as(Some("max_hs_score")) @@ -152,8 +152,9 @@ case class QAOTF( .as(Some("weightPair")) val DSFieldWC = F.tupleElement(WC.name, literal(1)).as(Some("datasourceId")) val WFieldWC = F.toNullable(F.tupleElement(WC.name, literal(2))).as(Some("weight")) - val NoveltyDirectAny: Column = F.any(NoveltyDirect).as(Some("noveltyDirectAny")) - val NoveltyIndirectAny: Column = F.any(NoveltyIndirect).as(Some("noveltyIndirectAny")) + // novelty where A is is the AId, cannot use any, it must be the novelty where A is AId. + val NoveltyWhereA: Column = + F.anyIf(Novelty, F.equals(A, literal(AId))).as(Some("noveltyWhereA")) // transform weights vector into a table to extract each value of each tuple val q = Q( @@ -163,7 +164,7 @@ case class QAOTF( ) val withDT = With(DSScore :: DTAny :: DSW :: Nil) val selectDSScores = Select( - B :: DSW.name :: DTAny.name :: DS :: DSScore.name :: NoveltyDirectAny :: NoveltyIndirectAny :: Nil + B :: DSW.name :: DTAny.name :: DS :: DSScore.name :: NoveltyWhereA :: Nil ) val fromT = From(T, Some("l")) val joinWeights = @@ -242,8 +243,8 @@ case class QAOTF( F.arrayMap("x -> (x.3, x.1)", collectedDScored.name).as(Some("score_datasources")) val scoreDTs = F.arrayMap("x -> (x.4, x.1)", collectedDScored.name).as(Some("score_dt")) val uniqDTs = F.groupUniqArray(DT).as(Some("datatypes_v")) - val NoveltyDirectAny: Column = F.any(NoveltyDirect).as(Some("noveltyDirectAny")) - val NoveltyIndirectAny: Column = F.any(NoveltyIndirect).as(Some("noveltyIndirectAny")) + val NoveltyWhereA: Column = + F.anyIf(Novelty, F.equals(A, literal(AId))).as(Some("noveltyWhereA")) val mappedDTs = F .arrayMap( @@ -258,8 +259,7 @@ case class QAOTF( mappedDTs.name ) .as(Some("score_datatypes")) - val noveltyDirectScore: Column = F.any(NoveltyDirectAny).as(Some("noveltyDirect")) - val noveltyIndirectScore: Column = F.any(NoveltyIndirectAny).as(Some("noveltyIndirect")) + val noveltyScore: Column = F.any(NoveltyWhereA).as(Some("novelty")) val orderColumn = orderScoreBy.getOrElse((scoreOverall.name.rep, "desc")) val jointColumns = F.concat(scoredDTs.name, scoreDSs.name) @@ -291,12 +291,13 @@ case class QAOTF( mappedDTs, scoredDTs, scoreOverall, + noveltyScore, jointColumns, orderByC ) ) val selectScores = Select( - B :: scoreOverall.name :: scoredDTs.name :: scoreDSs.name :: noveltyDirectScore :: noveltyIndirectScore :: Nil + B :: scoreOverall.name :: scoredDTs.name :: scoreDSs.name :: noveltyScore.name :: Nil ) // :: scoreDTs.name :: collectedDScored :: Nil) val fromAgg = From(queryGroupByDS.toColumn(None)) val groupByB = GroupBy(B :: Nil) @@ -306,6 +307,11 @@ case class QAOTF( (if (order == "desc") scoreOverall.name.desc else scoreOverall.name.asc) :: Nil ) + case ("novelty", order) => + OrderBy( + (if (order == "desc") noveltyScore.name.desc + else noveltyScore.name.asc) :: Nil + ) case (_, order) => OrderBy( (if (order == "desc") orderByC.name.desc diff --git a/app/models/entities/AssociationTimeSeries.scala b/app/models/entities/AssociationTimeSeries.scala new file mode 100644 index 00000000..fed8dc37 --- /dev/null +++ b/app/models/entities/AssociationTimeSeries.scala @@ -0,0 +1,34 @@ +package models.entities + +import play.api.libs.json.{Json, OFormat} +import slick.jdbc.GetResult +import utils.db.DbJsonParser.fromPositionedResult +import models.gql.AggregationTypeEnum + +case class AssociationTimeSeries( + diseaseId: String, + targetId: String, + aggregationType: AggregationTypeEnum.AggregationType, + aggregationValue: String, + year: Option[Int], + associationScore: Double, + novelty: Option[Double], + yearlyEvidenceCount: Option[Int], + isDirect: Boolean, + meta_total: Long +) + +case class AssociationTimeSeriesResults( + count: Long, + rows: Vector[AssociationTimeSeries] +) + +object AssociationTimeSeriesResults { + val empty: AssociationTimeSeriesResults = AssociationTimeSeriesResults(0, Vector.empty) + implicit val getAssociationTimeSeriesRowFromDB: GetResult[AssociationTimeSeries] = + GetResult(fromPositionedResult[AssociationTimeSeries]) + implicit val AssociationTimeSeriesImp: OFormat[AssociationTimeSeries] = + Json.format[AssociationTimeSeries] + implicit val AssociationTimeSeriesResultsImp: OFormat[AssociationTimeSeriesResults] = + Json.format[AssociationTimeSeriesResults] +} diff --git a/app/models/entities/Associations.scala b/app/models/entities/Associations.scala index 1f3de59a..54178383 100644 --- a/app/models/entities/Associations.scala +++ b/app/models/entities/Associations.scala @@ -17,8 +17,7 @@ case class Association( score: Double, datatypeScores: Vector[ScoredComponent], datasourceScores: Vector[ScoredComponent], - noveltyDirect: Option[Double] = None, - noveltyIndirect: Option[Double] = None + novelty: Option[Double] = None ) case class Associations( @@ -38,8 +37,7 @@ object Associations { val score: Double = r.<< val tuples1: String = r.<< val tuples2: String = r.<< - val noveltyDirect: Option[Double] = r.<< - val noveltyIndirect: Option[Double] = r.<< + val novelty: Option[Double] = r.<< Association( id, @@ -62,8 +60,7 @@ object Associations { ScoredComponent(left, right) } ).rep, - noveltyDirect, - noveltyIndirect + novelty ) } diff --git a/app/models/entities/Configuration.scala b/app/models/entities/Configuration.scala index 332fce6b..b154f09b 100644 --- a/app/models/entities/Configuration.scala +++ b/app/models/entities/Configuration.scala @@ -111,6 +111,7 @@ object Configuration { clinicalTarget: DbTableSettings, mechanismOfAction: DbTableSettings, mousePhenotypes: DbTableSettings, + associationTimeSeries: DbTableSettings, otarProjects: DbTableSettings, pharmacogenomics: PharmacogenomicsSettings, proteinCodingCoordinates: ProteinCodingCoordinatesSettings, diff --git a/app/models/gql/Arguments.scala b/app/models/gql/Arguments.scala index 4544bd9a..de0aa801 100644 --- a/app/models/gql/Arguments.scala +++ b/app/models/gql/Arguments.scala @@ -9,6 +9,14 @@ import sangria.marshalling.FromInput import sangria.util.tag.@@ import play.api.libs.json.{Format, Json} +object AggregationTypeEnum extends Enumeration { + + type AggregationType = Value + val overall, datasourceId = Value + + implicit val aggregationTypeF: Format[AggregationType] = Json.formatEnum(this) +} + object StudyTypeEnum extends Enumeration { type StudyType = Value @@ -27,6 +35,12 @@ object InteractionSourceEnum extends Enumeration { object Arguments { import sangria.macros.derive._ + implicit val AggregationType: EnumType[AggregationTypeEnum.Value] = + deriveEnumType[AggregationTypeEnum.Value]( + EnumTypeDescription( + "Aggregation type used to group the data" + ) + ) implicit val StudyType: EnumType[StudyTypeEnum.Value] = deriveEnumType[StudyTypeEnum.Value]( EnumTypeDescription( @@ -159,10 +173,13 @@ object Arguments { Argument("studyId", OptionInputType(StringType), description = "Study ID") val studyIds: Argument[Option[Seq[String]]] = Argument("studyIds", OptionInputType(ListInputType(StringType)), description = "Study IDs") - val diseaseId: Argument[Option[String]] = - Argument("diseaseId", OptionInputType(StringType), description = "Disease ID") val diseaseIds: Argument[Option[Seq[String]]] = Argument("diseaseIds", OptionInputType(ListInputType(StringType)), description = "Disease IDs") + val aggregationTypes = + Argument("aggregationTypes", + OptionInputType(ListInputType(AggregationType)), + description = "Aggregation types" + ) val studyTypes = Argument("studyTypes", OptionInputType(ListInputType(StudyType)), description = "Study types") val regions: Argument[Option[Seq[String]]] = @@ -177,6 +194,12 @@ object Arguments { OptionInputType(ListInputType(StringType)), description = "Study-locus IDs" ) + val isDirect: Argument[Boolean] = Argument( + "isDirect", + BooleanType, + description = + "Whether to include only direct associations/evidence (true), or also indirect ones (false)." + ) val enableIndirect: Argument[Option[Boolean]] = Argument( "enableIndirect", OptionInputType(BooleanType), diff --git a/app/models/gql/Objects.scala b/app/models/gql/Objects.scala index 04aace9d..e9bce57a 100644 --- a/app/models/gql/Objects.scala +++ b/app/models/gql/Objects.scala @@ -443,6 +443,21 @@ object Objects extends OTLogging { description = Some(""), arguments = Nil, resolve = ctx => ctx.ctx.getClinicalTargetsByTarget(ctx.value.id) + ), + Field( + "associationTimeSeries", + associationTimeSeriesResultsImp, + description = Some("Association time series"), + arguments = efoId :: isDirect :: aggregationTypes :: startYear :: endYear :: pageArg :: Nil, + resolve = ctx => + ctx.ctx.getAssociationTimeSeries(ctx.arg(efoId), + ctx.value.id, + ctx.arg(isDirect), + ctx.arg(aggregationTypes), + ctx.arg(startYear), + ctx.arg(endYear), + ctx.arg(pageArg) + ) ) ) ) @@ -683,6 +698,22 @@ object Objects extends OTLogging { ), arguments = Nil, resolve = ctx => ctx.ctx.getClinicalIndicationsByDisease(ctx.value.id) + ), + Field( + "associationTimeSeries", + associationTimeSeriesResultsImp, + description = Some("Association time series"), + arguments = + ensemblId :: isDirect :: aggregationTypes :: startYear :: endYear :: pageArg :: Nil, + resolve = ctx => + ctx.ctx.getAssociationTimeSeries(ctx.value.id, + ctx.arg(ensemblId), + ctx.arg(isDirect), + ctx.arg(aggregationTypes), + ctx.arg(startYear), + ctx.arg(endYear), + ctx.arg(pageArg) + ) ) ) ) @@ -725,12 +756,8 @@ object Objects extends OTLogging { "Association scores computed for every datasource (e.g., IMPC, ChEMBL, Gene2Phenotype)" ), DocumentField( - "noveltyDirect", - "A measure of how novel the target–disease association is, calculated based on the accumulation of direct evidence over time" - ), - DocumentField( - "noveltyIndirect", - "A measure of how novel the target–disease association is, calculated based on the accumulation of indirect evidence over time" + "novelty", + "A measure of how novel the target–disease association is, calculated based on the accumulation of evidence over time" ), ReplaceField( "id", @@ -759,13 +786,9 @@ object Objects extends OTLogging { "Association scores computed for every datasource (e.g., IMPC, ChEMBL, Gene2Phenotype)" ), DocumentField( - "noveltyDirect", + "novelty", "A measure of how novel the target–disease association is, calculated based on the accumulation of direct evidence over time" ), - DocumentField( - "noveltyIndirect", - "A measure of how novel the target–disease association is, calculated based on the accumulation of indirect evidence over time" - ), ReplaceField( "id", Field( @@ -788,6 +811,70 @@ object Objects extends OTLogging { "List of credible set entries with their associated statistics and fine-mapping information" ) ) + implicit val associationTimeSeriesResultsImp: ObjectType[Backend, AssociationTimeSeriesResults] = + deriveObjectType[Backend, AssociationTimeSeriesResults]( + ObjectTypeDescription( + "Association time series results for a target-disease association. Provides a temporal view of the association, including the number of studies and variants over time." + ), + DocumentField( + "count", + "Total number of association time series results matching the query filters" + ), + DocumentField( + "rows", + "List of assocition time series entries" + ) + ) + + implicit val associationTimeSeriesImp: ObjectType[Backend, AssociationTimeSeries] = + deriveObjectType[Backend, AssociationTimeSeries]( + ObjectTypeDescription( + "Association time series entry for a target-disease association." + ), + DocumentField("diseaseId", "EFO ID of the disease"), + DocumentField( + "targetId", + "Ensembl ID of the target gene" + ), + DocumentField( + "aggregationType", + "Type of aggregation used for novelty calculation" + ), + DocumentField( + "aggregationValue", + "Value used for novelty aggregation" + ), + DocumentField( + "year", + "Year of the evidence item used for novelty calculation" + ), + DocumentField( + "associationScore", + "Association score between the target and disease" + ), + DocumentField( + "novelty", + "Novelty score indicating how novel the target-disease association is." + ), + DocumentField( + "yearlyEvidenceCount", + "Yearly count of evidence items" + ), + DocumentField( + "isDirect", + "Flag indicating whether the novelty calculation is based on direct evidence only or includes indirect evidence" + ), + ReplaceField( + "aggregationType", + Field( + "aggregationType", + AggregationType, + Some("Aggregation type used to group the data"), + resolve = _.value.aggregationType + ) + ), + ExcludeFields("meta_total") + ) implicit val tissueImp: ObjectType[Backend, Tissue] = deriveObjectType[Backend, Tissue]( ObjectTypeDescription( diff --git a/conf/application.conf b/conf/application.conf index 6fac8812..f49ba526 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -115,6 +115,10 @@ ot { label = "Mouse phenotypes table" name = "mouse_phenotypes" } + associationTimeSeries { + label = "Association time series table" + name = "association_time_series" + } otarProjects { label = "OTAR projects table" name = "otar_projects"