From f53b466fc3e721a011b885904b5adc0150cbd3e4 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Mon, 27 Apr 2026 08:34:36 -0400 Subject: [PATCH 01/11] Add flowerpress functionality to daffodil-sbt Notable improvements from the current released flowerpress: - Works on XSLT files as well - Only packages files that are referenced by the projects files --- VERSION | 2 +- .../org/apache/daffodil/DaffodilPlugin.scala | 254 +++++++++++++++++- 2 files changed, 254 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index bd8bf88..820fb8d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.0 +1.7.0-SNAPSHOT diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 4592b40..b5a7f3f 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -18,6 +18,13 @@ package org.apache.daffodil import java.io.File +import java.net.URI +import java.nio.charset.Charset +import java.nio.file. { FileSystems, Files, Path, Paths, StandardCopyOption } +import java.util.HashMap +import java.util.zip. { ZipOutputStream, ZipEntry } + +import scala.collection.JavaConverters._ import scala.language.implicitConversions import scala.util.Properties @@ -63,6 +70,18 @@ object DaffodilPlugin extends AutoPlugin { val daffodilTdmlUsesPackageBin = settingKey[Boolean]( "Whether or not TDML files use the saved parsers created by daffodilPackageBin" ) + val flattenTarget = settingKey[File]( + "File to write the flattened schemas package to" + ) + val flattenSchemas = taskKey[File]( + "flatten the directory structure of all schemas and schema dependencies to a single common directory and update 'schemaLocation' paths to match" + ) + val flattenExcludes = settingKey[Seq[Glob]]( + "Globs of paths to exclude from schema flattening" + ) + val flattenIncludes = settingKey[Seq[Glob]]( + "Globs of paths to include for schema flattening, even if the path is listed in flattenExcludes" + ) /** * Class to define daffodilPackageBinInfos, auto-imported to simplify sbt configs @@ -598,7 +617,8 @@ object DaffodilPlugin extends AutoPlugin { inConfig(Compile)(packageDaffodilBinSettings) ++ inConfig(Test)(packageDaffodilBinSettings) ++ inConfig(Compile)(flatLayoutSettings("src")) ++ - inConfig(Test)(flatLayoutSettings("test")) + inConfig(Test)(flatLayoutSettings("test")) ++ + inConfig(Compile)(flattenerSettings) /** * Define the artifacts, products, and packageDaffodilBin task that creates the saved parsers @@ -840,6 +860,238 @@ object DaffodilPlugin extends AutoPlugin { } } + def flattenerSettings: Seq[Setting[_]] = Seq( + flattenTarget := target.value / s"${name.value}-${version.value}-flat.zip", + /* Paths in flattenExcludes/Includes that are not globbed at the start are + * generally only going to match paths within JAR files on the classpath. + * In order to deal with paths on the filesystem we need to glob the start + * of the path to account for different directory structure before the + * schema project path. + */ + flattenExcludes := Seq( + //"**/src/test/resources/**", + //"Log4j*.xsd", + //"xsd/**", // This is coming from XSAT2 + //"IBMdefined/**", + //"org/apache/xml/**", + //"edu/illinois/ncsa/daffodil/**", + "org/apache/daffodil/**", + //"**/*-tests.jar", + //"META-INF/**", + //"com/ibm/icu/**", + //"eclipse-xml-catalog.xml", + //"daffodil-built-in-catalog.xml" + ), + flattenIncludes := Seq( + "org/apache/daffodil/xsd/DFDLGeneralFormat*.dfdl.xsd", + ), + + /** + * Whether or not to publish the flattened schemas zip. Defaults to false. + * + * If projects want to publish flattened schemas then they must explicitly enable it by + * setting 'flattenSchemas / publishArtifact := true'. + * + * If false, flattened schemas will not be created unless you explicitly run the + * flattenSchemas. + */ + flattenSchemas / publishArtifact := false, + + flattenSchemas / artifact := Artifact(name.value, "flat", "zip", Some("flat"), Vector(), None), + flattenSchemas := { + + val logger = streams.value.log + + val extractDir = Paths.get(target.value.getPath(), "flatExtractDir") + if (Files.exists(extractDir)) + IO.delete(extractDir.toFile) + Files.createDirectory(extractDir) + + val flatDir = Paths.get(target.value.getPath(), "flatDir") + if (Files.exists(flatDir)) + IO.delete(flatDir.toFile) + Files.createDirectory(flatDir) + + val projectXsdFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** "*.xsd").get }.map(path => Paths.get(path.toString)) + val projectXslFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** ("*.xsl" || "*.xslt")).get }.map(path => Paths.get(path.toString)) + val projectXmlFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** "*.xml").get }.map(path => Paths.get(path.toString)) + + /* Copy schema files from the current project's src/main/resources + * directory + */ + val filesFromProject = (projectXsdFiles ++ projectXslFiles ++ projectXmlFiles) filterNot { path => + val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) + val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) + matchesExcludes && !matchesIncludes + } + + val extractedProjectFiles = filesFromProject map { origPath => + val resourcePath = Paths.get((Compile / resourceDirectories).value(0).toString).relativize(origPath).toString + val newPath = Paths.get(extractDir.toString, resourcePath) + Files.createDirectories(newPath.getParent()) + Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) + newPath + } + + /* Get all dependency jars and resources specific to this project. Note + * that the "Test" configuration is used for JAR files in order to ensure + * we pull in XSD files from daffodil-lib, as in many schema projects the + * daffodil dependencies are only used for testing, not compiling. Also + * note that we want to use Test/externalDependencyClasspath to get + * dependency jars, and not something like Test/fullClasspath or + * Test/dependencyClasspath, since those could trigger expensive resource + * generators or compilation of internal test jars that we don't need--we + * only need Compile/resources from this project and Test/dependency jars + * from external projects + */ + val projectJarFiles = (Test / externalDependencyClasspath).value.files.flatMap { file => (file ** "*.jar").get } + + projectJarFiles.reverse.map { jar => + val env = new HashMap[String, String] + val fs = FileSystems.newFileSystem(URI.create(s"jar:file:${jar}"), env) + val rootDirs = fs.getRootDirectories().asScala + rootDirs foreach { dir => + val xsdFiles = Files.walk(dir).iterator().asScala.filter(_.toString.endsWith(".xsd")) + + // Includes XML files as they may be used for configuration of the XSLT + val xsltFiles = Files.walk(dir).iterator().asScala.toList.filter(f => f.toString.endsWith(".xslt") || f.toString.endsWith(".xsl") || f.toString.endsWith(".xml")) + + /* For each XSD file we have, we want to extract it from its original + * jar while maintaining its path from inside the jar, ex: + * com.owlcyberdefense.whatever.jar:com/owlcyberdefense/whatever/xsd/whatever.xsd + * + * extracts to + * + * target/flatExtractDir/com/owlcyberdefense/whatever/xsd/whatever.xsd + * + * We also want to exclude schemas from paths listed in flattenExcludes + * that are not also listed in flattenIncludes + */ + val filesToExtract = (xsdFiles ++ xsltFiles) filterNot { path => + val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) + val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) + matchesExcludes && !matchesIncludes + } + + filesToExtract foreach { origPath => + val newPath = Paths.get(extractDir.toString, origPath.toString) + Files.createDirectories(newPath.getParent()) + Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) + } + } + fs.close() + } + + val schemaLocationPattern = """schemaLocation=\"([^\"]*)\"""".r + val hrefPattern = "href=\"([^\"]*)\"".r + val documentPattern = "document[(]'([^']*)'[)]".r + val referenceRegexes = List(schemaLocationPattern, hrefPattern, documentPattern) + val extractedFiles = Files.walk(extractDir).iterator().asScala.filter(Files.isRegularFile(_)) + + val referencedFiles = { + // This annotation simply warns if the compiler cannot enable tail call optimization + @scala.annotation.tailrec + def getReferences(parents: Seq[Path], acc: Seq[Path] = List.empty): Seq[Path] = { + parents match { + case h :: t if (acc.contains(h)) => getReferences(t, acc) // Already processed this file (h), proceed with rest of files (t) + case h :: t => { // Need to process this file (h) + val resourcePath = extractDir.relativize(h) + val newPath = Paths.get(flatDir.toString, extractDir.relativize(h).toString.replaceAll("/", "__")) + val bw = Files.newBufferedWriter(newPath) + val fileAsString = new String(Files.readAllBytes(h), Charset.defaultCharset()) + val references = { + referenceRegexes.flatMap { re => + re.findAllIn(fileAsString).matchData.map(_.group(1)) + } + } + + // Resolve the location of all references on the actual file + // system + val resolvedReferences = references.map { ref => + val origLocation = { + if (ref.contains("urn:")) + ref.split(" ")(1) + else + ref + } + + val resolvedPath: Option[Path] = { + if (Files.exists(Paths.get(extractDir.toString, origLocation))) { + // Original schemaLocation is full path, ex: + // com/whatever/schema.xsd + Some(Paths.get(extractDir.toString, origLocation)) + } else if (Files.exists(Paths.get(h.getParent().toString, origLocation))) { + // Original schemaLocation is a relative path to the current + // schema + Some(Paths.get(h.getParent().toString, origLocation).normalize()) + } else if (origLocation.startsWith("http")) { + None + } else { + if (origLocation.contains("DFDLGeneralFormat")) + throw new MessageOnlyException(s"Unable to locate file: $origLocation, required by: $h. Consider adding 'daffodil-lib' to this project's list of dependencies") + else + throw new MessageOnlyException(s"Unable to locate file: $origLocation, required by: $h") + } + } + ref -> resolvedPath + }.toMap.filter(e => e._2.isDefined) + + // For each reference do a search and replace of the entire file + val updatedFileAsString = { + resolvedReferences.foldLeft(fileAsString) { + case (input, (ref, resolvedRef)) => { + // For each reference replace each instance of it in the + // file with the same reference but with "/" changed to "__" + input.replaceAll(ref, extractDir.relativize(resolvedRef.get).toString.replaceAll("/", "__")) + } + } + } + + bw.write(updatedFileAsString) + bw.close() + + /* Have succesfully processed this file (h), call getReferences + * again on the rest of the list (t) + any references from this + * file. Add this file to the accumulator list + */ + getReferences(t ++ resolvedReferences.values.collect { case Some(path) => path }.filterNot(t.contains), acc :+ h) + } + case _ => acc // Have processed all referenced files, return acc + } + } + getReferences(extractedProjectFiles) + } + + /* Create zip file containing all flattened schemas */ + val flattenedFiles = Files.list(flatDir).iterator().asScala.filter(Files.isRegularFile(_)) + val zipPath = Paths.get(flattenTarget.value.toString) + val zos = new ZipOutputStream(Files.newOutputStream(zipPath)) + flattenedFiles foreach { file => + zos.putNextEntry(new ZipEntry(flatDir.relativize(file).toString)) + Files.copy(file, zos) + zos.closeEntry() + } + zos.close() + logger.info(s"Generated flattened schema package at ${flattenTarget.value.toString}") + flattenTarget.value + }, + + artifacts ++= { + if ((flattenSchemas / publishArtifact).value) { + Seq((flattenSchemas / artifact).value) + } else { + Seq.empty + } + }, + packagedArtifacts ++= { + if ((flattenSchemas / publishArtifact).value) { + Map((flattenSchemas / artifact).value -> flattenSchemas.value) + } else { + Map.empty[Artifact,File] + } + } + ) + class DaffodilProject(rootProject: Project, crossProjects: Seq[Project]) extends CompositeProject { From 886e412a713df7c79516f274ee635dbada5df7b4 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Tue, 28 Apr 2026 08:30:01 -0400 Subject: [PATCH 02/11] Fix formatting issues --- .../org/apache/daffodil/DaffodilPlugin.scala | 177 +++++++++++------- 1 file changed, 113 insertions(+), 64 deletions(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index b5a7f3f..1d7f765 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -20,10 +20,9 @@ package org.apache.daffodil import java.io.File import java.net.URI import java.nio.charset.Charset -import java.nio.file. { FileSystems, Files, Path, Paths, StandardCopyOption } +import java.nio.file.{ FileSystems, Files, Path, Paths, StandardCopyOption } import java.util.HashMap -import java.util.zip. { ZipOutputStream, ZipEntry } - +import java.util.zip.{ ZipEntry, ZipOutputStream } import scala.collection.JavaConverters._ import scala.language.implicitConversions import scala.util.Properties @@ -869,21 +868,21 @@ object DaffodilPlugin extends AutoPlugin { * schema project path. */ flattenExcludes := Seq( - //"**/src/test/resources/**", - //"Log4j*.xsd", - //"xsd/**", // This is coming from XSAT2 - //"IBMdefined/**", - //"org/apache/xml/**", - //"edu/illinois/ncsa/daffodil/**", - "org/apache/daffodil/**", - //"**/*-tests.jar", - //"META-INF/**", - //"com/ibm/icu/**", - //"eclipse-xml-catalog.xml", - //"daffodil-built-in-catalog.xml" + // "**/src/test/resources/**", + // "Log4j*.xsd", + // "xsd/**", // This is coming from XSAT2 + // "IBMdefined/**", + // "org/apache/xml/**", + // "edu/illinois/ncsa/daffodil/**", + "org/apache/daffodil/**" + // "**/*-tests.jar", + // "META-INF/**", + // "com/ibm/icu/**", + // "eclipse-xml-catalog.xml", + // "daffodil-built-in-catalog.xml" ), flattenIncludes := Seq( - "org/apache/daffodil/xsd/DFDLGeneralFormat*.dfdl.xsd", + "org/apache/daffodil/xsd/DFDLGeneralFormat*.dfdl.xsd" ), /** @@ -897,7 +896,14 @@ object DaffodilPlugin extends AutoPlugin { */ flattenSchemas / publishArtifact := false, - flattenSchemas / artifact := Artifact(name.value, "flat", "zip", Some("flat"), Vector(), None), + flattenSchemas / artifact := Artifact( + name.value, + "flat", + "zip", + Some("flat"), + Vector(), + None + ), flattenSchemas := { val logger = streams.value.log @@ -912,21 +918,31 @@ object DaffodilPlugin extends AutoPlugin { IO.delete(flatDir.toFile) Files.createDirectory(flatDir) - val projectXsdFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** "*.xsd").get }.map(path => Paths.get(path.toString)) - val projectXslFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** ("*.xsl" || "*.xslt")).get }.map(path => Paths.get(path.toString)) - val projectXmlFiles = (Compile / resourceDirectories).value.flatMap { dir => (dir ** "*.xml").get }.map(path => Paths.get(path.toString)) + val projectXsdFiles = (Compile / resourceDirectories).value + .flatMap { dir => (dir ** "*.xsd").get } + .map(path => Paths.get(path.toString)) + val projectXslFiles = (Compile / resourceDirectories).value + .flatMap { dir => (dir ** ("*.xsl" || "*.xslt")).get } + .map(path => Paths.get(path.toString)) + val projectXmlFiles = (Compile / resourceDirectories).value + .flatMap { dir => (dir ** "*.xml").get } + .map(path => Paths.get(path.toString)) /* Copy schema files from the current project's src/main/resources * directory */ - val filesFromProject = (projectXsdFiles ++ projectXslFiles ++ projectXmlFiles) filterNot { path => - val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) - val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) - matchesExcludes && !matchesIncludes - } + val filesFromProject = + (projectXsdFiles ++ projectXslFiles ++ projectXmlFiles).filterNot { path => + val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) + val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) + matchesExcludes && !matchesIncludes + } - val extractedProjectFiles = filesFromProject map { origPath => - val resourcePath = Paths.get((Compile / resourceDirectories).value(0).toString).relativize(origPath).toString + val extractedProjectFiles = filesFromProject.map { origPath => + val resourcePath = Paths + .get((Compile / resourceDirectories).value(0).toString) + .relativize(origPath) + .toString val newPath = Paths.get(extractDir.toString, resourcePath) Files.createDirectories(newPath.getParent()) Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) @@ -944,17 +960,27 @@ object DaffodilPlugin extends AutoPlugin { * only need Compile/resources from this project and Test/dependency jars * from external projects */ - val projectJarFiles = (Test / externalDependencyClasspath).value.files.flatMap { file => (file ** "*.jar").get } + val projectJarFiles = (Test / externalDependencyClasspath).value.files.flatMap { file => + (file ** "*.jar").get + } projectJarFiles.reverse.map { jar => val env = new HashMap[String, String] val fs = FileSystems.newFileSystem(URI.create(s"jar:file:${jar}"), env) val rootDirs = fs.getRootDirectories().asScala - rootDirs foreach { dir => + rootDirs.foreach { dir => val xsdFiles = Files.walk(dir).iterator().asScala.filter(_.toString.endsWith(".xsd")) // Includes XML files as they may be used for configuration of the XSLT - val xsltFiles = Files.walk(dir).iterator().asScala.toList.filter(f => f.toString.endsWith(".xslt") || f.toString.endsWith(".xsl") || f.toString.endsWith(".xml")) + val xsltFiles = Files + .walk(dir) + .iterator() + .asScala + .toList + .filter(f => + f.toString.endsWith(".xslt") || f.toString.endsWith(".xsl") || f.toString + .endsWith(".xml") + ) /* For each XSD file we have, we want to extract it from its original * jar while maintaining its path from inside the jar, ex: @@ -967,13 +993,13 @@ object DaffodilPlugin extends AutoPlugin { * We also want to exclude schemas from paths listed in flattenExcludes * that are not also listed in flattenIncludes */ - val filesToExtract = (xsdFiles ++ xsltFiles) filterNot { path => + val filesToExtract = (xsdFiles ++ xsltFiles).filterNot { path => val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) matchesExcludes && !matchesIncludes } - filesToExtract foreach { origPath => + filesToExtract.foreach { origPath => val newPath = Paths.get(extractDir.toString, origPath.toString) Files.createDirectories(newPath.getParent()) Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) @@ -986,17 +1012,25 @@ object DaffodilPlugin extends AutoPlugin { val hrefPattern = "href=\"([^\"]*)\"".r val documentPattern = "document[(]'([^']*)'[)]".r val referenceRegexes = List(schemaLocationPattern, hrefPattern, documentPattern) - val extractedFiles = Files.walk(extractDir).iterator().asScala.filter(Files.isRegularFile(_)) + val extractedFiles = + Files.walk(extractDir).iterator().asScala.filter(Files.isRegularFile(_)) val referencedFiles = { // This annotation simply warns if the compiler cannot enable tail call optimization @scala.annotation.tailrec def getReferences(parents: Seq[Path], acc: Seq[Path] = List.empty): Seq[Path] = { parents match { - case h :: t if (acc.contains(h)) => getReferences(t, acc) // Already processed this file (h), proceed with rest of files (t) + case h :: t if (acc.contains(h)) => + getReferences( + t, + acc + ) // Already processed this file (h), proceed with rest of files (t) case h :: t => { // Need to process this file (h) val resourcePath = extractDir.relativize(h) - val newPath = Paths.get(flatDir.toString, extractDir.relativize(h).toString.replaceAll("/", "__")) + val newPath = Paths.get( + flatDir.toString, + extractDir.relativize(h).toString.replaceAll("/", "__") + ) val bw = Files.newBufferedWriter(newPath) val fileAsString = new String(Files.readAllBytes(h), Charset.defaultCharset()) val references = { @@ -1007,34 +1041,41 @@ object DaffodilPlugin extends AutoPlugin { // Resolve the location of all references on the actual file // system - val resolvedReferences = references.map { ref => - val origLocation = { - if (ref.contains("urn:")) - ref.split(" ")(1) - else - ref - } - - val resolvedPath: Option[Path] = { - if (Files.exists(Paths.get(extractDir.toString, origLocation))) { - // Original schemaLocation is full path, ex: - // com/whatever/schema.xsd - Some(Paths.get(extractDir.toString, origLocation)) - } else if (Files.exists(Paths.get(h.getParent().toString, origLocation))) { - // Original schemaLocation is a relative path to the current - // schema - Some(Paths.get(h.getParent().toString, origLocation).normalize()) - } else if (origLocation.startsWith("http")) { - None - } else { - if (origLocation.contains("DFDLGeneralFormat")) - throw new MessageOnlyException(s"Unable to locate file: $origLocation, required by: $h. Consider adding 'daffodil-lib' to this project's list of dependencies") + val resolvedReferences = references + .map { ref => + val origLocation = { + if (ref.contains("urn:")) + ref.split(" ")(1) else - throw new MessageOnlyException(s"Unable to locate file: $origLocation, required by: $h") + ref } + + val resolvedPath: Option[Path] = { + if (Files.exists(Paths.get(extractDir.toString, origLocation))) { + // Original schemaLocation is full path, ex: + // com/whatever/schema.xsd + Some(Paths.get(extractDir.toString, origLocation)) + } else if (Files.exists(Paths.get(h.getParent().toString, origLocation))) { + // Original schemaLocation is a relative path to the current + // schema + Some(Paths.get(h.getParent().toString, origLocation).normalize()) + } else if (origLocation.startsWith("http")) { + None + } else { + if (origLocation.contains("DFDLGeneralFormat")) + throw new MessageOnlyException( + s"Unable to locate file: $origLocation, required by: $h. Consider adding 'daffodil-lib' to this project's list of dependencies" + ) + else + throw new MessageOnlyException( + s"Unable to locate file: $origLocation, required by: $h" + ) + } + } + ref -> resolvedPath } - ref -> resolvedPath - }.toMap.filter(e => e._2.isDefined) + .toMap + .filter(e => e._2.isDefined) // For each reference do a search and replace of the entire file val updatedFileAsString = { @@ -1042,7 +1083,10 @@ object DaffodilPlugin extends AutoPlugin { case (input, (ref, resolvedRef)) => { // For each reference replace each instance of it in the // file with the same reference but with "/" changed to "__" - input.replaceAll(ref, extractDir.relativize(resolvedRef.get).toString.replaceAll("/", "__")) + input.replaceAll( + ref, + extractDir.relativize(resolvedRef.get).toString.replaceAll("/", "__") + ) } } } @@ -1054,7 +1098,12 @@ object DaffodilPlugin extends AutoPlugin { * again on the rest of the list (t) + any references from this * file. Add this file to the accumulator list */ - getReferences(t ++ resolvedReferences.values.collect { case Some(path) => path }.filterNot(t.contains), acc :+ h) + getReferences( + t ++ resolvedReferences.values + .collect { case Some(path) => path } + .filterNot(t.contains), + acc :+ h + ) } case _ => acc // Have processed all referenced files, return acc } @@ -1066,7 +1115,7 @@ object DaffodilPlugin extends AutoPlugin { val flattenedFiles = Files.list(flatDir).iterator().asScala.filter(Files.isRegularFile(_)) val zipPath = Paths.get(flattenTarget.value.toString) val zos = new ZipOutputStream(Files.newOutputStream(zipPath)) - flattenedFiles foreach { file => + flattenedFiles.foreach { file => zos.putNextEntry(new ZipEntry(flatDir.relativize(file).toString)) Files.copy(file, zos) zos.closeEntry() @@ -1087,7 +1136,7 @@ object DaffodilPlugin extends AutoPlugin { if ((flattenSchemas / publishArtifact).value) { Map((flattenSchemas / artifact).value -> flattenSchemas.value) } else { - Map.empty[Artifact,File] + Map.empty[Artifact, File] } } ) From 608f718d12b64d3243510445498a81eadc49bb12 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Fri, 8 May 2026 14:38:54 -0400 Subject: [PATCH 03/11] Address comments from review Somewhat major rework to get things more SBT friendly and to use existing classpath jars directly instead of extracting them all and sorting through the files. --- VERSION | 2 +- .../org/apache/daffodil/DaffodilPlugin.scala | 265 ++++++------------ 2 files changed, 88 insertions(+), 179 deletions(-) diff --git a/VERSION b/VERSION index 820fb8d..c4954f2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.0-SNAPSHOT +1.8.0-SNAPSHOT diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 1d7f765..50034ce 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -17,13 +17,12 @@ package org.apache.daffodil +import scala.collection.mutable.ArrayBuffer + import java.io.File -import java.net.URI +import java.net.URLClassLoader import java.nio.charset.Charset -import java.nio.file.{ FileSystems, Files, Path, Paths, StandardCopyOption } -import java.util.HashMap -import java.util.zip.{ ZipEntry, ZipOutputStream } -import scala.collection.JavaConverters._ +import java.nio.file.{ Files, Paths } import scala.language.implicitConversions import scala.util.Properties @@ -69,17 +68,17 @@ object DaffodilPlugin extends AutoPlugin { val daffodilTdmlUsesPackageBin = settingKey[Boolean]( "Whether or not TDML files use the saved parsers created by daffodilPackageBin" ) - val flattenTarget = settingKey[File]( + val daffodilFlattenTarget = settingKey[File]( "File to write the flattened schemas package to" ) - val flattenSchemas = taskKey[File]( + val daffodilFlattenSchemas = taskKey[File]( "flatten the directory structure of all schemas and schema dependencies to a single common directory and update 'schemaLocation' paths to match" ) - val flattenExcludes = settingKey[Seq[Glob]]( - "Globs of paths to exclude from schema flattening" + val daffodilFlattenIncludes = settingKey[FileFilter]( + "File extensions to include when flattening resources. Defaults to *.xsd | *.xsl | *.xslt | *.xml" ) - val flattenIncludes = settingKey[Seq[Glob]]( - "Globs of paths to include for schema flattening, even if the path is listed in flattenExcludes" + val daffodilFlattenExcludes = settingKey[FileFilter]( + "Globs of paths to exclude from schema flattening" ) /** @@ -617,7 +616,7 @@ object DaffodilPlugin extends AutoPlugin { inConfig(Test)(packageDaffodilBinSettings) ++ inConfig(Compile)(flatLayoutSettings("src")) ++ inConfig(Test)(flatLayoutSettings("test")) ++ - inConfig(Compile)(flattenerSettings) + inConfig(Compile)(daffodilFlattenSettings) /** * Define the artifacts, products, and packageDaffodilBin task that creates the saved parsers @@ -859,44 +858,29 @@ object DaffodilPlugin extends AutoPlugin { } } - def flattenerSettings: Seq[Setting[_]] = Seq( - flattenTarget := target.value / s"${name.value}-${version.value}-flat.zip", - /* Paths in flattenExcludes/Includes that are not globbed at the start are + def daffodilFlattenSettings: Seq[Setting[_]] = Seq( + daffodilFlattenTarget := target.value / s"${name.value}-${version.value}-flat.zip", + /* Paths in daffodilFlattenExcludes/Includes that are not globbed at the start are * generally only going to match paths within JAR files on the classpath. * In order to deal with paths on the filesystem we need to glob the start * of the path to account for different directory structure before the * schema project path. */ - flattenExcludes := Seq( - // "**/src/test/resources/**", - // "Log4j*.xsd", - // "xsd/**", // This is coming from XSAT2 - // "IBMdefined/**", - // "org/apache/xml/**", - // "edu/illinois/ncsa/daffodil/**", - "org/apache/daffodil/**" - // "**/*-tests.jar", - // "META-INF/**", - // "com/ibm/icu/**", - // "eclipse-xml-catalog.xml", - // "daffodil-built-in-catalog.xml" - ), - flattenIncludes := Seq( - "org/apache/daffodil/xsd/DFDLGeneralFormat*.dfdl.xsd" - ), + daffodilFlattenIncludes := "*.xsd" | "*.xsl" | "*.xslt" | "*.xml", + daffodilFlattenExcludes := HiddenFileFilter, /** * Whether or not to publish the flattened schemas zip. Defaults to false. * * If projects want to publish flattened schemas then they must explicitly enable it by - * setting 'flattenSchemas / publishArtifact := true'. + * setting 'daffodilFlattenSchemas / publishArtifact := true'. * * If false, flattened schemas will not be created unless you explicitly run the - * flattenSchemas. + * daffodilFlattenSchemas. */ - flattenSchemas / publishArtifact := false, + daffodilFlattenSchemas / publishArtifact := false, - flattenSchemas / artifact := Artifact( + daffodilFlattenSchemas / artifact := Artifact( name.value, "flat", "zip", @@ -904,50 +888,19 @@ object DaffodilPlugin extends AutoPlugin { Vector(), None ), - flattenSchemas := { + daffodilFlattenSchemas := { val logger = streams.value.log + val filter = daffodilFlattenIncludes.value -- daffodilFlattenExcludes.value - val extractDir = Paths.get(target.value.getPath(), "flatExtractDir") - if (Files.exists(extractDir)) - IO.delete(extractDir.toFile) - Files.createDirectory(extractDir) - - val flatDir = Paths.get(target.value.getPath(), "flatDir") - if (Files.exists(flatDir)) - IO.delete(flatDir.toFile) - Files.createDirectory(flatDir) - - val projectXsdFiles = (Compile / resourceDirectories).value - .flatMap { dir => (dir ** "*.xsd").get } - .map(path => Paths.get(path.toString)) - val projectXslFiles = (Compile / resourceDirectories).value - .flatMap { dir => (dir ** ("*.xsl" || "*.xslt")).get } - .map(path => Paths.get(path.toString)) - val projectXmlFiles = (Compile / resourceDirectories).value - .flatMap { dir => (dir ** "*.xml").get } - .map(path => Paths.get(path.toString)) - - /* Copy schema files from the current project's src/main/resources - * directory - */ - val filesFromProject = - (projectXsdFiles ++ projectXslFiles ++ projectXmlFiles).filterNot { path => - val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) - val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) - matchesExcludes && !matchesIncludes - } + val flatDir = target.value / "flatDir" + if (flatDir.exists()) + IO.delete(flatDir) + IO.createDirectory(flatDir) - val extractedProjectFiles = filesFromProject.map { origPath => - val resourcePath = Paths - .get((Compile / resourceDirectories).value(0).toString) - .relativize(origPath) - .toString - val newPath = Paths.get(extractDir.toString, resourcePath) - Files.createDirectories(newPath.getParent()) - Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) - newPath - } + val projectResources = (Compile / resourceDirectories).value + .map { root => root.toURI.toURL -> (root ** filter).get.map(_.toURI.toURL).toList } + .toMap /* Get all dependency jars and resources specific to this project. Note * that the "Test" configuration is used for JAR files in order to ensure @@ -960,79 +913,41 @@ object DaffodilPlugin extends AutoPlugin { * only need Compile/resources from this project and Test/dependency jars * from external projects */ - val projectJarFiles = (Test / externalDependencyClasspath).value.files.flatMap { file => - (file ** "*.jar").get - } - - projectJarFiles.reverse.map { jar => - val env = new HashMap[String, String] - val fs = FileSystems.newFileSystem(URI.create(s"jar:file:${jar}"), env) - val rootDirs = fs.getRootDirectories().asScala - rootDirs.foreach { dir => - val xsdFiles = Files.walk(dir).iterator().asScala.filter(_.toString.endsWith(".xsd")) - - // Includes XML files as they may be used for configuration of the XSLT - val xsltFiles = Files - .walk(dir) - .iterator() - .asScala - .toList - .filter(f => - f.toString.endsWith(".xslt") || f.toString.endsWith(".xsl") || f.toString - .endsWith(".xml") - ) - - /* For each XSD file we have, we want to extract it from its original - * jar while maintaining its path from inside the jar, ex: - * com.owlcyberdefense.whatever.jar:com/owlcyberdefense/whatever/xsd/whatever.xsd - * - * extracts to - * - * target/flatExtractDir/com/owlcyberdefense/whatever/xsd/whatever.xsd - * - * We also want to exclude schemas from paths listed in flattenExcludes - * that are not also listed in flattenIncludes - */ - val filesToExtract = (xsdFiles ++ xsltFiles).filterNot { path => - val matchesExcludes = flattenExcludes.value.exists(glob => glob.matches(path)) - val matchesIncludes = flattenIncludes.value.exists(glob => glob.matches(path)) - matchesExcludes && !matchesIncludes - } - - filesToExtract.foreach { origPath => - val newPath = Paths.get(extractDir.toString, origPath.toString) - Files.createDirectories(newPath.getParent()) - Files.copy(origPath, newPath, StandardCopyOption.REPLACE_EXISTING) - } - } - fs.close() - } + val projectURLs = (Compile / resourceDirectories).value.map(_.toURI.toURL) + val allClasspathURLs = (Compile / fullClasspath).value.map(_.data.toURI.toURL) + val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null) val schemaLocationPattern = """schemaLocation=\"([^\"]*)\"""".r val hrefPattern = "href=\"([^\"]*)\"".r val documentPattern = "document[(]'([^']*)'[)]".r val referenceRegexes = List(schemaLocationPattern, hrefPattern, documentPattern) - val extractedFiles = - Files.walk(extractDir).iterator().asScala.filter(Files.isRegularFile(_)) val referencedFiles = { // This annotation simply warns if the compiler cannot enable tail call optimization @scala.annotation.tailrec - def getReferences(parents: Seq[Path], acc: Seq[Path] = List.empty): Seq[Path] = { + def getReferences(root: URL, parents: Seq[URL], acc: ArrayBuffer[URL]): Seq[URL] = { + val rootPath = Paths.get(root.toURI) parents match { + // Already processed this file (h), proceed with rest of files (t) case h :: t if (acc.contains(h)) => getReferences( + root, t, acc - ) // Already processed this file (h), proceed with rest of files (t) - case h :: t => { // Need to process this file (h) - val resourcePath = extractDir.relativize(h) + ) + // Need to process this file (h) + case h :: t => { + val relPath = h.toString match { + case j if (j.startsWith("jar:")) => Paths.get(j.toString.split("!")(1).tail) + case f if (f.startsWith("file:")) => rootPath.relativize(Paths.get(h.toURI)) + } + val bytes = h.openStream().readAllBytes() val newPath = Paths.get( flatDir.toString, - extractDir.relativize(h).toString.replaceAll("/", "__") + relPath.toString.replaceAll("/", "__") ) val bw = Files.newBufferedWriter(newPath) - val fileAsString = new String(Files.readAllBytes(h), Charset.defaultCharset()) + val fileAsString = new String(bytes, Charset.defaultCharset()) val references = { referenceRegexes.flatMap { re => re.findAllIn(fileAsString).matchData.map(_.group(1)) @@ -1044,35 +959,28 @@ object DaffodilPlugin extends AutoPlugin { val resolvedReferences = references .map { ref => val origLocation = { - if (ref.contains("urn:")) - ref.split(" ")(1) + if (ref.contains("urn:")) { + val urn = ref.split(" ")(1) + if (urn.startsWith("/")) + urn.tail + else + urn + } else if (ref.startsWith("/")) + ref.tail else ref } - - val resolvedPath: Option[Path] = { - if (Files.exists(Paths.get(extractDir.toString, origLocation))) { - // Original schemaLocation is full path, ex: - // com/whatever/schema.xsd - Some(Paths.get(extractDir.toString, origLocation)) - } else if (Files.exists(Paths.get(h.getParent().toString, origLocation))) { - // Original schemaLocation is a relative path to the current - // schema - Some(Paths.get(h.getParent().toString, origLocation).normalize()) - } else if (origLocation.startsWith("http")) { - None - } else { - if (origLocation.contains("DFDLGeneralFormat")) - throw new MessageOnlyException( - s"Unable to locate file: $origLocation, required by: $h. Consider adding 'daffodil-lib' to this project's list of dependencies" - ) - else - throw new MessageOnlyException( - s"Unable to locate file: $origLocation, required by: $h" - ) - } + val res = { + val r = classLoader.findResource(origLocation) + if (r == null) { + val rrel = List(relPath.getParent, origLocation).mkString("/") + classLoader.findResource(rrel) + } else + r } - ref -> resolvedPath + if (res == null) + logger.warn(s"Unable to resolve reference to $ref from source file $h") + ref -> Option(res) } .toMap .filter(e => e._2.isDefined) @@ -1083,10 +991,12 @@ object DaffodilPlugin extends AutoPlugin { case (input, (ref, resolvedRef)) => { // For each reference replace each instance of it in the // file with the same reference but with "/" changed to "__" - input.replaceAll( - ref, - extractDir.relativize(resolvedRef.get).toString.replaceAll("/", "__") - ) + val rref = resolvedRef.get.toString + val relativized = rref match { + case j if (j.startsWith("jar")) => j.split("!")(1).tail + case f if (f.startsWith("file")) => rootPath.relativize(Paths.get(resolvedRef.get.toURI)).toString + } + input.replaceAll(ref, relativized.tail.replaceAll("/", "__")) } } } @@ -1099,42 +1009,41 @@ object DaffodilPlugin extends AutoPlugin { * file. Add this file to the accumulator list */ getReferences( + root, t ++ resolvedReferences.values - .collect { case Some(path) => path } + .collect { case Some(url) => url } .filterNot(t.contains), - acc :+ h + acc += h ) } case _ => acc // Have processed all referenced files, return acc } } - getReferences(extractedProjectFiles) + val acc = new ArrayBuffer[URL]() + projectResources foreach { case (root, files) => + getReferences(root, files, acc) + } + acc } /* Create zip file containing all flattened schemas */ - val flattenedFiles = Files.list(flatDir).iterator().asScala.filter(Files.isRegularFile(_)) - val zipPath = Paths.get(flattenTarget.value.toString) - val zos = new ZipOutputStream(Files.newOutputStream(zipPath)) - flattenedFiles.foreach { file => - zos.putNextEntry(new ZipEntry(flatDir.relativize(file).toString)) - Files.copy(file, zos) - zos.closeEntry() - } - zos.close() - logger.info(s"Generated flattened schema package at ${flattenTarget.value.toString}") - flattenTarget.value + val flattenedFiles = IO.listFiles(flatDir) + val sources = flattenedFiles.map(file => file -> file.getName()) + IO.zip(sources, daffodilFlattenTarget.value, Some(System.currentTimeMillis())) + logger.info(s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}") + daffodilFlattenTarget.value }, artifacts ++= { - if ((flattenSchemas / publishArtifact).value) { - Seq((flattenSchemas / artifact).value) + if ((daffodilFlattenSchemas / publishArtifact).value) { + Seq((daffodilFlattenSchemas / artifact).value) } else { Seq.empty } }, packagedArtifacts ++= { - if ((flattenSchemas / publishArtifact).value) { - Map((flattenSchemas / artifact).value -> flattenSchemas.value) + if ((daffodilFlattenSchemas / publishArtifact).value) { + Map((daffodilFlattenSchemas / artifact).value -> daffodilFlattenSchemas.value) } else { Map.empty[Artifact, File] } From c3bd5b91136bb3e0e1c59511902a0dc655cb9235 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Fri, 8 May 2026 15:00:35 -0400 Subject: [PATCH 04/11] fixup! --- src/main/scala/org/apache/daffodil/DaffodilPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 50034ce..03c1418 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -996,7 +996,7 @@ object DaffodilPlugin extends AutoPlugin { case j if (j.startsWith("jar")) => j.split("!")(1).tail case f if (f.startsWith("file")) => rootPath.relativize(Paths.get(resolvedRef.get.toURI)).toString } - input.replaceAll(ref, relativized.tail.replaceAll("/", "__")) + input.replaceAll(ref, relativized.replaceAll("/", "__")) } } } From 105780d470ac84b2fac14a6f860380d58379aee1 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Tue, 12 May 2026 16:15:45 -0400 Subject: [PATCH 05/11] Address comments from review No more recursion and a few other fixups --- README.md | 30 +++ .../org/apache/daffodil/DaffodilPlugin.scala | 237 ++++++++---------- 2 files changed, 137 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 969c65f..69e64c6 100644 --- a/README.md +++ b/README.md @@ -323,6 +323,36 @@ root `src/` directory, and all test source and resource files to be in a root `test/` directory. Source files are those that end with `*.scala` or `*.java`, and resource files are anything else. +### Flatten Schemas + +This plugin has functionality to flatten the directory structure of 1 or more +schema projects, renaming the schema files and upating schemaLocation's as +necessary. + +```bash +sbt daffodilFlattenSchemas +``` + +The renaming works as follows: + +`org/apache/daffodil/xsd/main.dfdl.xsd` + +will be renamed to: + +`org__apache__daffodil__xsd__main.dfdl.xsd` + +Note: Original files are not modified, they are simply copied to the specified +output directory with the new name and updated schemaLocation's. + +Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve +scheamLocations in the same way that Daffodil does and will often stuggle to +find schemas that aren't in the same directory as the root schema. By flattening +the directory structure at a common root (ie the directory containing 'com/' +and/or 'org/' we can rename all of the schemas while avoiding any conflicts in +schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having +all of the schemas and schemaLocation's renamed should allow tools with less +robust schemaLocation resolvers to just work. + ### Cross-Building In some cases it is helpful to have a single SBT project that supports the diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 03c1418..58badbc 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -17,10 +17,10 @@ package org.apache.daffodil -import scala.collection.mutable.ArrayBuffer +import scala.util.matching.Regex import java.io.File -import java.net.URLClassLoader +import java.net.{ URI, URLClassLoader } import java.nio.charset.Charset import java.nio.file.{ Files, Paths } import scala.language.implicitConversions @@ -74,13 +74,9 @@ object DaffodilPlugin extends AutoPlugin { val daffodilFlattenSchemas = taskKey[File]( "flatten the directory structure of all schemas and schema dependencies to a single common directory and update 'schemaLocation' paths to match" ) - val daffodilFlattenIncludes = settingKey[FileFilter]( - "File extensions to include when flattening resources. Defaults to *.xsd | *.xsl | *.xslt | *.xml" + val daffodilFlattenResourceReferencePatterns = settingKey[Seq[Regex]]( + """Sequence of patterns that will match resource references, ie 'schemaLocation="/org/apache/..."'. Note that the pattern must capture the reference in the first capture group""" ) - val daffodilFlattenExcludes = settingKey[FileFilter]( - "Globs of paths to exclude from schema flattening" - ) - /** * Class to define daffodilPackageBinInfos, auto-imported to simplify sbt configs */ @@ -860,14 +856,6 @@ object DaffodilPlugin extends AutoPlugin { def daffodilFlattenSettings: Seq[Setting[_]] = Seq( daffodilFlattenTarget := target.value / s"${name.value}-${version.value}-flat.zip", - /* Paths in daffodilFlattenExcludes/Includes that are not globbed at the start are - * generally only going to match paths within JAR files on the classpath. - * In order to deal with paths on the filesystem we need to glob the start - * of the path to account for different directory structure before the - * schema project path. - */ - daffodilFlattenIncludes := "*.xsd" | "*.xsl" | "*.xslt" | "*.xml", - daffodilFlattenExcludes := HiddenFileFilter, /** * Whether or not to publish the flattened schemas zip. Defaults to false. @@ -888,10 +876,25 @@ object DaffodilPlugin extends AutoPlugin { Vector(), None ), - daffodilFlattenSchemas := { + + /** + * Only grab the file types that need to be flattened. These files are only + * grabbed from the root project. Any referenced files will be pulled in + * from the classpath. Note that XML files are commonly used in XSLT for + * settings. + */ + daffodilFlattenSchemas / includeFilter := "*.xsd" | "*.xsl" | "*.xslt" | "*.xml", + daffodilFlattenSchemas / excludeFilter := HiddenFileFilter, + + daffodilFlattenResourceReferencePatterns := List( + """schemaLocation=\"([^\"]*)\"""".r, + "href=\"([^\"]*)\"".r, + "document[(]'([^']*)'[)]".r), + + daffodilFlattenSchemas / products := { val logger = streams.value.log - val filter = daffodilFlattenIncludes.value -- daffodilFlattenExcludes.value + val filter = (daffodilFlattenSchemas / includeFilter).value -- (daffodilFlattenSchemas / excludeFilter).value val flatDir = target.value / "flatDir" if (flatDir.exists()) @@ -902,136 +905,110 @@ object DaffodilPlugin extends AutoPlugin { .map { root => root.toURI.toURL -> (root ** filter).get.map(_.toURI.toURL).toList } .toMap - /* Get all dependency jars and resources specific to this project. Note - * that the "Test" configuration is used for JAR files in order to ensure - * we pull in XSD files from daffodil-lib, as in many schema projects the - * daffodil dependencies are only used for testing, not compiling. Also - * note that we want to use Test/externalDependencyClasspath to get - * dependency jars, and not something like Test/fullClasspath or - * Test/dependencyClasspath, since those could trigger expensive resource - * generators or compilation of internal test jars that we don't need--we - * only need Compile/resources from this project and Test/dependency jars - * from external projects + /** + * Create a URLClassLoader object with URLs to all resources used by the + * project. The class loader will be used to resolve references made + * within the flattened files. */ val projectURLs = (Compile / resourceDirectories).value.map(_.toURI.toURL) - val allClasspathURLs = (Compile / fullClasspath).value.map(_.data.toURI.toURL) + val allClasspathURLs = (Test / externalDependencyClasspath).value.map(_.data.toURI.toURL) val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null) - val schemaLocationPattern = """schemaLocation=\"([^\"]*)\"""".r - val hrefPattern = "href=\"([^\"]*)\"".r - val documentPattern = "document[(]'([^']*)'[)]".r - val referenceRegexes = List(schemaLocationPattern, hrefPattern, documentPattern) - - val referencedFiles = { - // This annotation simply warns if the compiler cannot enable tail call optimization - @scala.annotation.tailrec - def getReferences(root: URL, parents: Seq[URL], acc: ArrayBuffer[URL]): Seq[URL] = { - val rootPath = Paths.get(root.toURI) - parents match { - // Already processed this file (h), proceed with rest of files (t) - case h :: t if (acc.contains(h)) => - getReferences( - root, - t, - acc - ) - // Need to process this file (h) - case h :: t => { - val relPath = h.toString match { - case j if (j.startsWith("jar:")) => Paths.get(j.toString.split("!")(1).tail) - case f if (f.startsWith("file:")) => rootPath.relativize(Paths.get(h.toURI)) - } - val bytes = h.openStream().readAllBytes() - val newPath = Paths.get( - flatDir.toString, - relPath.toString.replaceAll("/", "__") - ) - val bw = Files.newBufferedWriter(newPath) - val fileAsString = new String(bytes, Charset.defaultCharset()) - val references = { - referenceRegexes.flatMap { re => - re.findAllIn(fileAsString).matchData.map(_.group(1)) - } - } + val referenceRegexes = daffodilFlattenResourceReferencePatterns.value + + val processed = scala.collection.mutable.Set[URI]() + projectResources foreach { case (rootURL, urls) => + val unprocessed = scala.collection.mutable.Stack[URI]() + val rootURI = rootURL.toURI + val rootPath = Paths.get(rootURI) + unprocessed.pushAll(urls.map(_.toURI)) + while (!unprocessed.isEmpty) { + val contextURI = unprocessed.pop + val bytes = contextURI.toURL.openStream().readAllBytes() + val contextRelPath = contextURI.getScheme match { + case "jar" => Paths.get(contextURI.toString.split("!")(1).tail) + case "file" => Paths.get(rootURI.relativize(contextURI).getPath) + case _ => throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI") + } + val contextFlatPath = Paths.get( + flatDir.toString, + contextRelPath.toString.replaceAll("/", "__")) + val bw = Files.newBufferedWriter(contextFlatPath) + val fileAsString = new String(bytes, Charset.defaultCharset()) + + val references = referenceRegexes.flatMap { re => + re.findAllIn(fileAsString).matchData.map(_.group(1)) + } - // Resolve the location of all references on the actual file - // system - val resolvedReferences = references - .map { ref => - val origLocation = { - if (ref.contains("urn:")) { - val urn = ref.split(" ")(1) - if (urn.startsWith("/")) - urn.tail - else - urn - } else if (ref.startsWith("/")) - ref.tail - else - ref - } - val res = { - val r = classLoader.findResource(origLocation) - if (r == null) { - val rrel = List(relPath.getParent, origLocation).mkString("/") - classLoader.findResource(rrel) - } else - r + val resolvedRefs = references.map { ref => + val origLocation = if (ref contains " ") ref.split(" ")(1) else ref + if (origLocation.startsWith("/")) { + // Dealing with an absolute path + val url = Option(classLoader.findResource(origLocation.tail)) + if (url.isDefined) + Some(origLocation -> url.get.toURI) + else { + logger.warn(s"Unable to resolve absolute reference to $ref from source file $contextURI") + None + } + } else { + // Relative path + val relPath = contextRelPath.resolveSibling(origLocation).normalize() + if (Files.exists(rootPath.resolve(relPath))) { + // Found the file on the regular filesystem + Some(origLocation -> rootURI.resolve(relPath.toString)) + } else { + // Check the classpath for the current reference relative to the + // current contextURI + val url = Option(classLoader.findResource(relPath.toString)) + if (url.isDefined) + Some(origLocation -> url.get.toURI) + else { + // Maybe this is actually an absolute path not within the same + // context. Check the classpath for the original location + val url2 = Option(classLoader.findResource(origLocation)) + if (url2.isDefined) + Some(origLocation -> url2.get.toURI) + else { + logger.warn(s"Unable to resolve local reference to $ref from source file $contextURI") + None } - if (res == null) - logger.warn(s"Unable to resolve reference to $ref from source file $h") - ref -> Option(res) } - .toMap - .filter(e => e._2.isDefined) - - // For each reference do a search and replace of the entire file - val updatedFileAsString = { - resolvedReferences.foldLeft(fileAsString) { - case (input, (ref, resolvedRef)) => { - // For each reference replace each instance of it in the - // file with the same reference but with "/" changed to "__" - val rref = resolvedRef.get.toString - val relativized = rref match { - case j if (j.startsWith("jar")) => j.split("!")(1).tail - case f if (f.startsWith("file")) => rootPath.relativize(Paths.get(resolvedRef.get.toURI)).toString - } - input.replaceAll(ref, relativized.replaceAll("/", "__")) - } + } + } + }.flatten.toMap + + val updatedFileAsString = { + resolvedRefs.foldLeft(fileAsString) { + case (input, (ref, resolvedRef)) => { + // For each reference replace each instance of it in the + // file with the same reference but with "/" changed to "__" + val relativized = resolvedRef.getScheme match { + case "jar" => resolvedRef.toString.split("!")(1).tail + case _ => rootURI.relativize(resolvedRef).toString } + input.replaceAll(ref, relativized.replaceAll("/", "__")) } - - bw.write(updatedFileAsString) - bw.close() - - /* Have succesfully processed this file (h), call getReferences - * again on the rest of the list (t) + any references from this - * file. Add this file to the accumulator list - */ - getReferences( - root, - t ++ resolvedReferences.values - .collect { case Some(url) => url } - .filterNot(t.contains), - acc += h - ) } - case _ => acc // Have processed all referenced files, return acc } + + bw.write(updatedFileAsString) + bw.close() + processed += contextURI + unprocessed.pushAll((resolvedRefs.values.toSet diff processed).filterNot(unprocessed contains _)) } - val acc = new ArrayBuffer[URL]() - projectResources foreach { case (root, files) => - getReferences(root, files, acc) - } - acc } /* Create zip file containing all flattened schemas */ val flattenedFiles = IO.listFiles(flatDir) val sources = flattenedFiles.map(file => file -> file.getName()) - IO.zip(sources, daffodilFlattenTarget.value, Some(System.currentTimeMillis())) + IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp) logger.info(s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}") - daffodilFlattenTarget.value + Seq(daffodilFlattenTarget.value) + }, + + daffodilFlattenSchemas := { + (daffodilFlattenSchemas / products).value.head }, artifacts ++= { From f97510da4f26f565f746de8e8d8e7919b3e0a70d Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Sun, 17 May 2026 22:49:24 -0400 Subject: [PATCH 06/11] ~fixup --- .../org/apache/daffodil/DaffodilPlugin.scala | 198 +++++++++++------- 1 file changed, 117 insertions(+), 81 deletions(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 58badbc..274e4ae 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -17,14 +17,13 @@ package org.apache.daffodil -import scala.util.matching.Regex - import java.io.File import java.net.{ URI, URLClassLoader } import java.nio.charset.Charset -import java.nio.file.{ Files, Paths } +import java.nio.file.{ FileSystemNotFoundException, FileSystems, Files, Paths } import scala.language.implicitConversions import scala.util.Properties +import scala.util.matching.Regex import sbt.Keys._ import sbt._ @@ -77,6 +76,7 @@ object DaffodilPlugin extends AutoPlugin { val daffodilFlattenResourceReferencePatterns = settingKey[Seq[Regex]]( """Sequence of patterns that will match resource references, ie 'schemaLocation="/org/apache/..."'. Note that the pattern must capture the reference in the first capture group""" ) + /** * Class to define daffodilPackageBinInfos, auto-imported to simplify sbt configs */ @@ -887,123 +887,159 @@ object DaffodilPlugin extends AutoPlugin { daffodilFlattenSchemas / excludeFilter := HiddenFileFilter, daffodilFlattenResourceReferencePatterns := List( - """schemaLocation=\"([^\"]*)\"""".r, - "href=\"([^\"]*)\"".r, - "document[(]'([^']*)'[)]".r), + """(? root.toURI.toURL -> (root ** filter).get.map(_.toURI.toURL).toList } - .toMap + val projectURLs = (Compile / resourceDirectories).value.map { root => + (root ** filter).get.map(_.toURI.toURL).toList + }.flatten /** * Create a URLClassLoader object with URLs to all resources used by the * project. The class loader will be used to resolve references made * within the flattened files. */ - val projectURLs = (Compile / resourceDirectories).value.map(_.toURI.toURL) val allClasspathURLs = (Test / externalDependencyClasspath).value.map(_.data.toURI.toURL) val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null) val referenceRegexes = daffodilFlattenResourceReferencePatterns.value - - val processed = scala.collection.mutable.Set[URI]() - projectResources foreach { case (rootURL, urls) => - val unprocessed = scala.collection.mutable.Stack[URI]() - val rootURI = rootURL.toURI - val rootPath = Paths.get(rootURI) - unprocessed.pushAll(urls.map(_.toURI)) - while (!unprocessed.isEmpty) { - val contextURI = unprocessed.pop - val bytes = contextURI.toURL.openStream().readAllBytes() - val contextRelPath = contextURI.getScheme match { - case "jar" => Paths.get(contextURI.toString.split("!")(1).tail) - case "file" => Paths.get(rootURI.relativize(contextURI).getPath) - case _ => throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI") - } - val contextFlatPath = Paths.get( - flatDir.toString, - contextRelPath.toString.replaceAll("/", "__")) - val bw = Files.newBufferedWriter(contextFlatPath) - val fileAsString = new String(bytes, Charset.defaultCharset()) - - val references = referenceRegexes.flatMap { re => - re.findAllIn(fileAsString).matchData.map(_.group(1)) + val jarRegex = "(.*)!(.*)".r + + val seen = scala.collection.mutable.Set[URI]() + val unprocessed = scala.collection.mutable.Stack[URI]() + unprocessed.pushAll(projectURLs.map(_.toURI)) + seen ++= projectURLs.map(_.toURI) + while (!unprocessed.isEmpty) { + val contextURI = unprocessed.pop + val bytes = contextURI.toURL.openStream().readAllBytes() + val (contextPath, flatPath) = contextURI.getScheme match { + case "jar" => + contextURI.toString match { + case jarRegex(jarPath, path) => { + val fs = try { + FileSystems.getFileSystem(contextURI) + } catch { + case e: FileSystemNotFoundException => + FileSystems.newFileSystem(contextURI, new java.util.HashMap[String, Any]()) + } + val cPath = fs.getPath(path) + (cPath, Paths.get(flatDir.toString, cPath.toString.tail.replaceAll("/", "__"))) + } + case _ => + throw new IllegalArgumentException(s"Unable to parse JAR URI: $contextURI") + } + case "file" => { + val path = Paths.get(contextURI) + val root = (Compile / resourceDirectories).value + .find(file => contextURI.toString.contains(file.toString)) + .get + .toURI + ( + path, + Paths.get( + flatDir.toString, + Paths.get(root).relativize(path).toString.replaceAll("/", "__") + ) + ) } + case _ => + throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI") + } - val resolvedRefs = references.map { ref => - val origLocation = if (ref contains " ") ref.split(" ")(1) else ref - if (origLocation.startsWith("/")) { - // Dealing with an absolute path - val url = Option(classLoader.findResource(origLocation.tail)) - if (url.isDefined) - Some(origLocation -> url.get.toURI) - else { - logger.warn(s"Unable to resolve absolute reference to $ref from source file $contextURI") - None - } + val bw = Files.newBufferedWriter(flatPath) + val fileAsString = new String(bytes, Charset.defaultCharset()) + + val references = referenceRegexes.flatMap { re => + re.findAllIn(fileAsString).matchData.map(_.group(1)) + } + + val resolvedRefs = references.map { ref => + if (ref.startsWith("/")) { + // Dealing with an absolute path + ref -> Option(classLoader.findResource(ref.tail)) + } else { + // Relative path + val refPath = contextPath.resolveSibling(ref).normalize() + if (Files.exists(refPath)) { + // Referenced path exists in either the same root resource + // diretory or jar file as the context schema + ref -> Some(refPath.toUri.toURL) } else { - // Relative path - val relPath = contextRelPath.resolveSibling(origLocation).normalize() - if (Files.exists(rootPath.resolve(relPath))) { - // Found the file on the regular filesystem - Some(origLocation -> rootURI.resolve(relPath.toString)) - } else { - // Check the classpath for the current reference relative to the - // current contextURI - val url = Option(classLoader.findResource(relPath.toString)) - if (url.isDefined) - Some(origLocation -> url.get.toURI) - else { - // Maybe this is actually an absolute path not within the same - // context. Check the classpath for the original location - val url2 = Option(classLoader.findResource(origLocation)) - if (url2.isDefined) - Some(origLocation -> url2.get.toURI) + ref -> None + contextURI.getScheme match { + case "file" => { + // Need to check other resource directories + val resolvedRoot = (Compile / resourceDirectories).value.find(root => + Files.exists(Paths.get(root.toURI.resolve(ref))) + ) + if (resolvedRoot.isDefined) + ref -> Some(resolvedRoot.get.toURI.resolve(ref).toURL) else { - logger.warn(s"Unable to resolve local reference to $ref from source file $contextURI") - None + // Maybe this path is actually absolute, just missing the + // leading '/', check the classpath + ref -> Option(classLoader.findResource(ref)) } } + case _ => ref -> None } } - }.flatten.toMap - - val updatedFileAsString = { - resolvedRefs.foldLeft(fileAsString) { - case (input, (ref, resolvedRef)) => { - // For each reference replace each instance of it in the - // file with the same reference but with "/" changed to "__" - val relativized = resolvedRef.getScheme match { - case "jar" => resolvedRef.toString.split("!")(1).tail - case _ => rootURI.relativize(resolvedRef).toString + } + }.toMap + + // Warn about unresolved references + resolvedRefs.filter(_._2 == None).map { case (ref, _) => + logger.warn(s"Unable to resolve reference to $ref from source file $contextURI") + } + + val fullyResolved = resolvedRefs.filter(_._2 != None) + + val updatedFileAsString = { + fullyResolved.foldLeft(fileAsString) { + case (input, (ref, optResolvedURL)) => { + // For each reference replace each instance of it in the + // file with the same reference but with "/" changed to "__" + val resolvedURI = optResolvedURL.get.toURI + val relativized = resolvedURI.getScheme match { + case "jar" => resolvedURI.toString.split("!")(1).tail + case _ => { + val resolvedRoot = (Compile / resourceDirectories).value.find(root => + resolvedURI.toString.contains(root.toString) + ) + resolvedRoot.get.toURI.relativize(resolvedURI).toString } - input.replaceAll(ref, relativized.replaceAll("/", "__")) } + input.replaceAll(ref, relativized.replaceAll("/", "__")) } } - - bw.write(updatedFileAsString) - bw.close() - processed += contextURI - unprocessed.pushAll((resolvedRefs.values.toSet diff processed).filterNot(unprocessed contains _)) } + + bw.write(updatedFileAsString) + bw.close() + seen += contextURI + val unseen = fullyResolved.values.map(_.get.toURI).filterNot(seen) + unprocessed.pushAll(unseen) } /* Create zip file containing all flattened schemas */ val flattenedFiles = IO.listFiles(flatDir) val sources = flattenedFiles.map(file => file -> file.getName()) IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp) - logger.info(s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}") + logger.info( + s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}" + ) Seq(daffodilFlattenTarget.value) }, From 61ed02f24cafc97b257444602287a5d1d51b6335 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Mon, 18 May 2026 08:21:49 -0400 Subject: [PATCH 07/11] !fixup --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 69e64c6..f4751e9 100644 --- a/README.md +++ b/README.md @@ -325,6 +325,15 @@ and resource files are anything else. ### Flatten Schemas +Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve +schemaLocation's in the same way that Daffodil does and will often stuggle to +find schemas that aren't in the same directory as the root schema. By flattening +the directory structure at a common root (ie the directory containing 'com/' +and/or 'org/' we can rename all of the schemas while avoiding any conflicts in +schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having +all of the schemas and schemaLocation's renamed should allow tools with less +robust schemaLocation resolvers to just work. + This plugin has functionality to flatten the directory structure of 1 or more schema projects, renaming the schema files and upating schemaLocation's as necessary. @@ -344,15 +353,6 @@ will be renamed to: Note: Original files are not modified, they are simply copied to the specified output directory with the new name and updated schemaLocation's. -Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve -scheamLocations in the same way that Daffodil does and will often stuggle to -find schemas that aren't in the same directory as the root schema. By flattening -the directory structure at a common root (ie the directory containing 'com/' -and/or 'org/' we can rename all of the schemas while avoiding any conflicts in -schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having -all of the schemas and schemaLocation's renamed should allow tools with less -robust schemaLocation resolvers to just work. - ### Cross-Building In some cases it is helpful to have a single SBT project that supports the From 57e7ab73f8aa038de8de258bd6f4c2c37910e862 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Mon, 18 May 2026 08:26:22 -0400 Subject: [PATCH 08/11] !fixup --- src/main/scala/org/apache/daffodil/DaffodilPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 274e4ae..4f6af11 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -1034,7 +1034,7 @@ object DaffodilPlugin extends AutoPlugin { } /* Create zip file containing all flattened schemas */ - val flattenedFiles = IO.listFiles(flatDir) + val flattenedFiles = IO.listFiles(flatDir).sorted val sources = flattenedFiles.map(file => file -> file.getName()) IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp) logger.info( From c9c449302c20f350955bda55a0f09c3e1f5dddd7 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Mon, 18 May 2026 17:22:03 -0400 Subject: [PATCH 09/11] !fixup --- VERSION | 2 +- .../org/apache/daffodil/DaffodilPlugin.scala | 99 +++++++++++++------ 2 files changed, 69 insertions(+), 32 deletions(-) diff --git a/VERSION b/VERSION index c4954f2..bd8bf88 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.8.0-SNAPSHOT +1.7.0 diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 4f6af11..0d418f6 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -20,7 +20,7 @@ package org.apache.daffodil import java.io.File import java.net.{ URI, URLClassLoader } import java.nio.charset.Charset -import java.nio.file.{ FileSystemNotFoundException, FileSystems, Files, Paths } +import java.nio.file.{ FileSystem, FileSystems, Files, Paths } import scala.language.implicitConversions import scala.util.Properties import scala.util.matching.Regex @@ -916,27 +916,41 @@ object DaffodilPlugin extends AutoPlugin { val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null) val referenceRegexes = daffodilFlattenResourceReferencePatterns.value - val jarRegex = "(.*)!(.*)".r + val jarRegex = "(.*/(.*).jar)!(.*)".r val seen = scala.collection.mutable.Set[URI]() val unprocessed = scala.collection.mutable.Stack[URI]() + val unresolved = scala.collection.mutable.ArrayBuffer[(URI, String)]() + val jarFileSystems = scala.collection.mutable.Map[String, FileSystem]() unprocessed.pushAll(projectURLs.map(_.toURI)) seen ++= projectURLs.map(_.toURI) while (!unprocessed.isEmpty) { val contextURI = unprocessed.pop val bytes = contextURI.toURL.openStream().readAllBytes() + + // Get a Java Path for the current file (whether it is in a JAR or + // just a regular file) and a Path for where the current file will be + // moved to in the flattened directory structure. Note that using a + // Java Path on a regular file will use the default FileSystem while for + // JARs a FileSystem is created for interacting with the files inside + // the JAR. This allows all Path functions like resolveSibling/exists to + // work with both regular files or files contained inside a JAR. val (contextPath, flatPath) = contextURI.getScheme match { case "jar" => contextURI.toString match { - case jarRegex(jarPath, path) => { - val fs = try { - FileSystems.getFileSystem(contextURI) - } catch { - case e: FileSystemNotFoundException => - FileSystems.newFileSystem(contextURI, new java.util.HashMap[String, Any]()) - } + case jarRegex(jarPath, jarName, path) => { + val fs = jarFileSystems.getOrElseUpdate( + Paths.get(jarPath).toString, + FileSystems.newFileSystem(contextURI, new java.util.HashMap[String, Any]()) + ) val cPath = fs.getPath(path) - (cPath, Paths.get(flatDir.toString, cPath.toString.tail.replaceAll("/", "__"))) + ( + cPath, + Paths.get( + flatDir.toString, + s"${jarName}__${cPath.toString.tail.replaceAll("/", "__")}" + ) + ) } case _ => throw new IllegalArgumentException(s"Unable to parse JAR URI: $contextURI") @@ -944,7 +958,7 @@ object DaffodilPlugin extends AutoPlugin { case "file" => { val path = Paths.get(contextURI) val root = (Compile / resourceDirectories).value - .find(file => contextURI.toString.contains(file.toString)) + .find(dir => contextURI.getPath.startsWith(dir.toString)) .get .toURI ( @@ -959,6 +973,10 @@ object DaffodilPlugin extends AutoPlugin { throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI") } + assert( + !Files.exists(flatPath), + s"File $flatPath already exists and would be overwritten, aborting!" + ) val bw = Files.newBufferedWriter(flatPath) val fileAsString = new String(bytes, Charset.defaultCharset()) @@ -966,19 +984,18 @@ object DaffodilPlugin extends AutoPlugin { re.findAllIn(fileAsString).matchData.map(_.group(1)) } - val resolvedRefs = references.map { ref => - if (ref.startsWith("/")) { + val resolvedRefs = references.flatMap { ref => + val optResolved = if (ref.startsWith("/")) { // Dealing with an absolute path - ref -> Option(classLoader.findResource(ref.tail)) + Option(classLoader.findResource(ref.tail)) } else { // Relative path val refPath = contextPath.resolveSibling(ref).normalize() if (Files.exists(refPath)) { // Referenced path exists in either the same root resource // diretory or jar file as the context schema - ref -> Some(refPath.toUri.toURL) + Some(refPath.toUri.toURL) } else { - ref -> None contextURI.getScheme match { case "file" => { // Need to check other resource directories @@ -986,34 +1003,44 @@ object DaffodilPlugin extends AutoPlugin { Files.exists(Paths.get(root.toURI.resolve(ref))) ) if (resolvedRoot.isDefined) - ref -> Some(resolvedRoot.get.toURI.resolve(ref).toURL) + Some(resolvedRoot.get.toURI.resolve(ref).toURL) else { // Maybe this path is actually absolute, just missing the // leading '/', check the classpath - ref -> Option(classLoader.findResource(ref)) + Option(classLoader.findResource(ref)) } } - case _ => ref -> None + case "jar" => { + // Maybe this path in the JAR is actually absolute, just + // missing the leading '/', try resolving it relative to the + // root of the JAR + val jarPath = contextPath.getRoot().resolve(ref) + if (Files.exists(jarPath)) + Some(jarPath.toUri.toURL) + else + None + } + case _ => None } } } + if (optResolved.isEmpty) + unresolved.append((contextURI, ref)) + optResolved.map(resolved => ref -> resolved) }.toMap - // Warn about unresolved references - resolvedRefs.filter(_._2 == None).map { case (ref, _) => - logger.warn(s"Unable to resolve reference to $ref from source file $contextURI") - } - - val fullyResolved = resolvedRefs.filter(_._2 != None) - val updatedFileAsString = { - fullyResolved.foldLeft(fileAsString) { + resolvedRefs.foldLeft(fileAsString) { case (input, (ref, optResolvedURL)) => { // For each reference replace each instance of it in the // file with the same reference but with "/" changed to "__" - val resolvedURI = optResolvedURL.get.toURI + val resolvedURI = optResolvedURL.toURI val relativized = resolvedURI.getScheme match { - case "jar" => resolvedURI.toString.split("!")(1).tail + case "jar" => + resolvedURI.toString match { + case jarRegex(_, jarName, path) => + s"${jarName}__${path.tail.replaceAll("/", "__")}" + } case _ => { val resolvedRoot = (Compile / resourceDirectories).value.find(root => resolvedURI.toString.contains(root.toString) @@ -1028,11 +1055,21 @@ object DaffodilPlugin extends AutoPlugin { bw.write(updatedFileAsString) bw.close() - seen += contextURI - val unseen = fullyResolved.values.map(_.get.toURI).filterNot(seen) + val unseen = resolvedRefs.values.map(_.toURI).filter(seen.add(_)) unprocessed.pushAll(unseen) } + // Close any open JAR FileSystems + jarFileSystems.values.foreach(_.close()) + + // Error unresolved references + unresolved.foreach { case (context, ref) => + logger.error(s"Unable to resolve reference to $ref from source file $context") + } + + // Error out if we have any unresolved references + assert(unresolved.isEmpty) + /* Create zip file containing all flattened schemas */ val flattenedFiles = IO.listFiles(flatDir).sorted val sources = flattenedFiles.map(file => file -> file.getName()) From 2093bad33f60eb0459547975e5518b750528f370 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Tue, 19 May 2026 10:16:31 -0400 Subject: [PATCH 10/11] !fixup --- .../org/apache/daffodil/DaffodilPlugin.scala | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 0d418f6..74028d6 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -918,6 +918,17 @@ object DaffodilPlugin extends AutoPlugin { val referenceRegexes = daffodilFlattenResourceReferencePatterns.value val jarRegex = "(.*/(.*).jar)!(.*)".r + def getRootPath(path: URI) = { + path.getScheme match { + case "file" => { + (Compile / resourceDirectories).value + .find(dir => path.getPath.startsWith(dir.toString)) + .get + .toPath + } + } + } + val seen = scala.collection.mutable.Set[URI]() val unprocessed = scala.collection.mutable.Stack[URI]() val unresolved = scala.collection.mutable.ArrayBuffer[(URI, String)]() @@ -957,15 +968,12 @@ object DaffodilPlugin extends AutoPlugin { } case "file" => { val path = Paths.get(contextURI) - val root = (Compile / resourceDirectories).value - .find(dir => contextURI.getPath.startsWith(dir.toString)) - .get - .toURI + val root = getRootPath(contextURI) ( path, Paths.get( flatDir.toString, - Paths.get(root).relativize(path).toString.replaceAll("/", "__") + root.relativize(path).toString.replaceAll("/", "__") ) ) } @@ -996,32 +1004,21 @@ object DaffodilPlugin extends AutoPlugin { // diretory or jar file as the context schema Some(refPath.toUri.toURL) } else { - contextURI.getScheme match { + val contextRoot = getRootPath(contextURI) + val relPath = contextRoot.relativize(refPath) + val optResolvedRelative = contextURI.getScheme match { case "file" => { // Need to check other resource directories val resolvedRoot = (Compile / resourceDirectories).value.find(root => - Files.exists(Paths.get(root.toURI.resolve(ref))) + Files.exists(root.toPath.resolve(relPath)) ) - if (resolvedRoot.isDefined) - Some(resolvedRoot.get.toURI.resolve(ref).toURL) - else { - // Maybe this path is actually absolute, just missing the - // leading '/', check the classpath - Option(classLoader.findResource(ref)) - } + resolvedRoot.map(_.toURI.resolve(ref).toURL) } - case "jar" => { - // Maybe this path in the JAR is actually absolute, just - // missing the leading '/', try resolving it relative to the - // root of the JAR - val jarPath = contextPath.getRoot().resolve(ref) - if (Files.exists(jarPath)) - Some(jarPath.toUri.toURL) - else - None - } - case _ => None + case "jar" => + // Nothing else to check, resolveSibling should have found it in the same jar + None } + optResolvedRelative.orElse(Option(classLoader.findResource(ref))) } } if (optResolved.isEmpty) @@ -1039,11 +1036,11 @@ object DaffodilPlugin extends AutoPlugin { case "jar" => resolvedURI.toString match { case jarRegex(_, jarName, path) => - s"${jarName}__${path.tail.replaceAll("/", "__")}" + s"$jarName/${path.tail}" } case _ => { val resolvedRoot = (Compile / resourceDirectories).value.find(root => - resolvedURI.toString.contains(root.toString) + resolvedURI.getPath.startsWith(root.toString) ) resolvedRoot.get.toURI.relativize(resolvedURI).toString } @@ -1068,7 +1065,8 @@ object DaffodilPlugin extends AutoPlugin { } // Error out if we have any unresolved references - assert(unresolved.isEmpty) + if (!unresolved.isEmpty) + throw new MessageOnlyException("Unable to resolve one or more references while flattening") /* Create zip file containing all flattened schemas */ val flattenedFiles = IO.listFiles(flatDir).sorted From 9cb446a7fba3dc99312f6af575a00dff8e902c24 Mon Sep 17 00:00:00 2001 From: Josh Adams Date: Tue, 19 May 2026 11:08:27 -0400 Subject: [PATCH 11/11] !fixup --- .../org/apache/daffodil/DaffodilPlugin.scala | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 74028d6..64fac86 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -1004,20 +1004,22 @@ object DaffodilPlugin extends AutoPlugin { // diretory or jar file as the context schema Some(refPath.toUri.toURL) } else { - val contextRoot = getRootPath(contextURI) - val relPath = contextRoot.relativize(refPath) val optResolvedRelative = contextURI.getScheme match { case "file" => { // Need to check other resource directories + val contextRoot = getRootPath(contextURI) + val relPath = contextRoot.relativize(refPath) val resolvedRoot = (Compile / resourceDirectories).value.find(root => Files.exists(root.toPath.resolve(relPath)) ) - resolvedRoot.map(_.toURI.resolve(ref).toURL) + resolvedRoot.map(_.toPath.resolve(relPath).toUri.toURL) } case "jar" => // Nothing else to check, resolveSibling should have found it in the same jar None } + // The orElse call is to support the deprecated behavior of + // resolving relative paths asif they were absolute optResolvedRelative.orElse(Option(classLoader.findResource(ref))) } } @@ -1039,10 +1041,8 @@ object DaffodilPlugin extends AutoPlugin { s"$jarName/${path.tail}" } case _ => { - val resolvedRoot = (Compile / resourceDirectories).value.find(root => - resolvedURI.getPath.startsWith(root.toString) - ) - resolvedRoot.get.toURI.relativize(resolvedURI).toString + val resolvedRoot = getRootPath(contextURI) + resolvedRoot.relativize(Paths.get(resolvedURI)).toString } } input.replaceAll(ref, relativized.replaceAll("/", "__")) @@ -1066,7 +1066,9 @@ object DaffodilPlugin extends AutoPlugin { // Error out if we have any unresolved references if (!unresolved.isEmpty) - throw new MessageOnlyException("Unable to resolve one or more references while flattening") + throw new MessageOnlyException( + "Unable to resolve one or more references while flattening" + ) /* Create zip file containing all flattened schemas */ val flattenedFiles = IO.listFiles(flatDir).sorted