diff --git a/README.md b/README.md index 969c65f..f4751e9 100644 --- a/README.md +++ b/README.md @@ -323,6 +323,36 @@ root `src/` directory, and all test source and resource files to be in a root `test/` directory. Source files are those that end with `*.scala` or `*.java`, and resource files are anything else. +### Flatten Schemas + +Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve +schemaLocation's in the same way that Daffodil does and will often stuggle to +find schemas that aren't in the same directory as the root schema. By flattening +the directory structure at a common root (ie the directory containing 'com/' +and/or 'org/' we can rename all of the schemas while avoiding any conflicts in +schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having +all of the schemas and schemaLocation's renamed should allow tools with less +robust schemaLocation resolvers to just work. + +This plugin has functionality to flatten the directory structure of 1 or more +schema projects, renaming the schema files and upating schemaLocation's as +necessary. + +```bash +sbt daffodilFlattenSchemas +``` + +The renaming works as follows: + +`org/apache/daffodil/xsd/main.dfdl.xsd` + +will be renamed to: + +`org__apache__daffodil__xsd__main.dfdl.xsd` + +Note: Original files are not modified, they are simply copied to the specified +output directory with the new name and updated schemaLocation's. + ### Cross-Building In some cases it is helpful to have a single SBT project that supports the diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala index 4592b40..64fac86 100644 --- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala +++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala @@ -18,8 +18,12 @@ package org.apache.daffodil import java.io.File +import java.net.{ URI, URLClassLoader } +import java.nio.charset.Charset +import java.nio.file.{ FileSystem, FileSystems, Files, Paths } import scala.language.implicitConversions import scala.util.Properties +import scala.util.matching.Regex import sbt.Keys._ import sbt._ @@ -63,6 +67,15 @@ object DaffodilPlugin extends AutoPlugin { val daffodilTdmlUsesPackageBin = settingKey[Boolean]( "Whether or not TDML files use the saved parsers created by daffodilPackageBin" ) + val daffodilFlattenTarget = settingKey[File]( + "File to write the flattened schemas package to" + ) + val daffodilFlattenSchemas = taskKey[File]( + "flatten the directory structure of all schemas and schema dependencies to a single common directory and update 'schemaLocation' paths to match" + ) + val daffodilFlattenResourceReferencePatterns = settingKey[Seq[Regex]]( + """Sequence of patterns that will match resource references, ie 'schemaLocation="/org/apache/..."'. Note that the pattern must capture the reference in the first capture group""" + ) /** * Class to define daffodilPackageBinInfos, auto-imported to simplify sbt configs @@ -598,7 +611,8 @@ object DaffodilPlugin extends AutoPlugin { inConfig(Compile)(packageDaffodilBinSettings) ++ inConfig(Test)(packageDaffodilBinSettings) ++ inConfig(Compile)(flatLayoutSettings("src")) ++ - inConfig(Test)(flatLayoutSettings("test")) + inConfig(Test)(flatLayoutSettings("test")) ++ + inConfig(Compile)(daffodilFlattenSettings) /** * Define the artifacts, products, and packageDaffodilBin task that creates the saved parsers @@ -840,6 +854,252 @@ object DaffodilPlugin extends AutoPlugin { } } + def daffodilFlattenSettings: Seq[Setting[_]] = Seq( + daffodilFlattenTarget := target.value / s"${name.value}-${version.value}-flat.zip", + + /** + * Whether or not to publish the flattened schemas zip. Defaults to false. + * + * If projects want to publish flattened schemas then they must explicitly enable it by + * setting 'daffodilFlattenSchemas / publishArtifact := true'. + * + * If false, flattened schemas will not be created unless you explicitly run the + * daffodilFlattenSchemas. + */ + daffodilFlattenSchemas / publishArtifact := false, + + daffodilFlattenSchemas / artifact := Artifact( + name.value, + "flat", + "zip", + Some("flat"), + Vector(), + None + ), + + /** + * Only grab the file types that need to be flattened. These files are only + * grabbed from the root project. Any referenced files will be pulled in + * from the classpath. Note that XML files are commonly used in XSLT for + * settings. + */ + daffodilFlattenSchemas / includeFilter := "*.xsd" | "*.xsl" | "*.xslt" | "*.xml", + daffodilFlattenSchemas / excludeFilter := HiddenFileFilter, + + daffodilFlattenResourceReferencePatterns := List( + """(? + (root ** filter).get.map(_.toURI.toURL).toList + }.flatten + + /** + * Create a URLClassLoader object with URLs to all resources used by the + * project. The class loader will be used to resolve references made + * within the flattened files. + */ + val allClasspathURLs = (Test / externalDependencyClasspath).value.map(_.data.toURI.toURL) + val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null) + + val referenceRegexes = daffodilFlattenResourceReferencePatterns.value + val jarRegex = "(.*/(.*).jar)!(.*)".r + + def getRootPath(path: URI) = { + path.getScheme match { + case "file" => { + (Compile / resourceDirectories).value + .find(dir => path.getPath.startsWith(dir.toString)) + .get + .toPath + } + } + } + + val seen = scala.collection.mutable.Set[URI]() + val unprocessed = scala.collection.mutable.Stack[URI]() + val unresolved = scala.collection.mutable.ArrayBuffer[(URI, String)]() + val jarFileSystems = scala.collection.mutable.Map[String, FileSystem]() + unprocessed.pushAll(projectURLs.map(_.toURI)) + seen ++= projectURLs.map(_.toURI) + while (!unprocessed.isEmpty) { + val contextURI = unprocessed.pop + val bytes = contextURI.toURL.openStream().readAllBytes() + + // Get a Java Path for the current file (whether it is in a JAR or + // just a regular file) and a Path for where the current file will be + // moved to in the flattened directory structure. Note that using a + // Java Path on a regular file will use the default FileSystem while for + // JARs a FileSystem is created for interacting with the files inside + // the JAR. This allows all Path functions like resolveSibling/exists to + // work with both regular files or files contained inside a JAR. + val (contextPath, flatPath) = contextURI.getScheme match { + case "jar" => + contextURI.toString match { + case jarRegex(jarPath, jarName, path) => { + val fs = jarFileSystems.getOrElseUpdate( + Paths.get(jarPath).toString, + FileSystems.newFileSystem(contextURI, new java.util.HashMap[String, Any]()) + ) + val cPath = fs.getPath(path) + ( + cPath, + Paths.get( + flatDir.toString, + s"${jarName}__${cPath.toString.tail.replaceAll("/", "__")}" + ) + ) + } + case _ => + throw new IllegalArgumentException(s"Unable to parse JAR URI: $contextURI") + } + case "file" => { + val path = Paths.get(contextURI) + val root = getRootPath(contextURI) + ( + path, + Paths.get( + flatDir.toString, + root.relativize(path).toString.replaceAll("/", "__") + ) + ) + } + case _ => + throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI") + } + + assert( + !Files.exists(flatPath), + s"File $flatPath already exists and would be overwritten, aborting!" + ) + val bw = Files.newBufferedWriter(flatPath) + val fileAsString = new String(bytes, Charset.defaultCharset()) + + val references = referenceRegexes.flatMap { re => + re.findAllIn(fileAsString).matchData.map(_.group(1)) + } + + val resolvedRefs = references.flatMap { ref => + val optResolved = if (ref.startsWith("/")) { + // Dealing with an absolute path + Option(classLoader.findResource(ref.tail)) + } else { + // Relative path + val refPath = contextPath.resolveSibling(ref).normalize() + if (Files.exists(refPath)) { + // Referenced path exists in either the same root resource + // diretory or jar file as the context schema + Some(refPath.toUri.toURL) + } else { + val optResolvedRelative = contextURI.getScheme match { + case "file" => { + // Need to check other resource directories + val contextRoot = getRootPath(contextURI) + val relPath = contextRoot.relativize(refPath) + val resolvedRoot = (Compile / resourceDirectories).value.find(root => + Files.exists(root.toPath.resolve(relPath)) + ) + resolvedRoot.map(_.toPath.resolve(relPath).toUri.toURL) + } + case "jar" => + // Nothing else to check, resolveSibling should have found it in the same jar + None + } + // The orElse call is to support the deprecated behavior of + // resolving relative paths asif they were absolute + optResolvedRelative.orElse(Option(classLoader.findResource(ref))) + } + } + if (optResolved.isEmpty) + unresolved.append((contextURI, ref)) + optResolved.map(resolved => ref -> resolved) + }.toMap + + val updatedFileAsString = { + resolvedRefs.foldLeft(fileAsString) { + case (input, (ref, optResolvedURL)) => { + // For each reference replace each instance of it in the + // file with the same reference but with "/" changed to "__" + val resolvedURI = optResolvedURL.toURI + val relativized = resolvedURI.getScheme match { + case "jar" => + resolvedURI.toString match { + case jarRegex(_, jarName, path) => + s"$jarName/${path.tail}" + } + case _ => { + val resolvedRoot = getRootPath(contextURI) + resolvedRoot.relativize(Paths.get(resolvedURI)).toString + } + } + input.replaceAll(ref, relativized.replaceAll("/", "__")) + } + } + } + + bw.write(updatedFileAsString) + bw.close() + val unseen = resolvedRefs.values.map(_.toURI).filter(seen.add(_)) + unprocessed.pushAll(unseen) + } + + // Close any open JAR FileSystems + jarFileSystems.values.foreach(_.close()) + + // Error unresolved references + unresolved.foreach { case (context, ref) => + logger.error(s"Unable to resolve reference to $ref from source file $context") + } + + // Error out if we have any unresolved references + if (!unresolved.isEmpty) + throw new MessageOnlyException( + "Unable to resolve one or more references while flattening" + ) + + /* Create zip file containing all flattened schemas */ + val flattenedFiles = IO.listFiles(flatDir).sorted + val sources = flattenedFiles.map(file => file -> file.getName()) + IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp) + logger.info( + s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}" + ) + Seq(daffodilFlattenTarget.value) + }, + + daffodilFlattenSchemas := { + (daffodilFlattenSchemas / products).value.head + }, + + artifacts ++= { + if ((daffodilFlattenSchemas / publishArtifact).value) { + Seq((daffodilFlattenSchemas / artifact).value) + } else { + Seq.empty + } + }, + packagedArtifacts ++= { + if ((daffodilFlattenSchemas / publishArtifact).value) { + Map((daffodilFlattenSchemas / artifact).value -> daffodilFlattenSchemas.value) + } else { + Map.empty[Artifact, File] + } + } + ) + class DaffodilProject(rootProject: Project, crossProjects: Seq[Project]) extends CompositeProject {