Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,36 @@ root `src/` directory, and all test source and resource files to be in a root
`test/` directory. Source files are those that end with `*.scala` or `*.java`,
and resource files are anything else.

### Flatten Schemas
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this contain explanations of the other flatten options for completeness ex daffodilFlattenTarget and the regex one?


Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve
schemaLocation's in the same way that Daffodil does and will often stuggle to
find schemas that aren't in the same directory as the root schema. By flattening
the directory structure at a common root (ie the directory containing 'com/'
and/or 'org/' we can rename all of the schemas while avoiding any conflicts in
schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having
all of the schemas and schemaLocation's renamed should allow tools with less
robust schemaLocation resolvers to just work.

This plugin has functionality to flatten the directory structure of 1 or more
schema projects, renaming the schema files and upating schemaLocation's as
necessary.

```bash
sbt daffodilFlattenSchemas
```

The renaming works as follows:

`org/apache/daffodil/xsd/main.dfdl.xsd`

will be renamed to:

`org__apache__daffodil__xsd__main.dfdl.xsd`

Note: Original files are not modified, they are simply copied to the specified
output directory with the new name and updated schemaLocation's.

### Cross-Building

In some cases it is helpful to have a single SBT project that supports the
Expand Down
262 changes: 261 additions & 1 deletion src/main/scala/org/apache/daffodil/DaffodilPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
package org.apache.daffodil

import java.io.File
import java.net.{ URI, URLClassLoader }
import java.nio.charset.Charset
import java.nio.file.{ FileSystem, FileSystems, Files, Paths }
import scala.language.implicitConversions
import scala.util.Properties
import scala.util.matching.Regex

import sbt.Keys._
import sbt._
Expand Down Expand Up @@ -63,6 +67,15 @@ object DaffodilPlugin extends AutoPlugin {
val daffodilTdmlUsesPackageBin = settingKey[Boolean](
Comment thread
stevedlawrence marked this conversation as resolved.
"Whether or not TDML files use the saved parsers created by daffodilPackageBin"
)
val daffodilFlattenTarget = settingKey[File](
"File to write the flattened schemas package to"
)
val daffodilFlattenSchemas = taskKey[File](
"flatten the directory structure of all schemas and schema dependencies to a single common directory and update 'schemaLocation' paths to match"
)
val daffodilFlattenResourceReferencePatterns = settingKey[Seq[Regex]](
"""Sequence of patterns that will match resource references, ie 'schemaLocation="/org/apache/..."'. Note that the pattern must capture the reference in the first capture group"""
)

/**
* Class to define daffodilPackageBinInfos, auto-imported to simplify sbt configs
Expand Down Expand Up @@ -598,7 +611,8 @@ object DaffodilPlugin extends AutoPlugin {
inConfig(Compile)(packageDaffodilBinSettings) ++
inConfig(Test)(packageDaffodilBinSettings) ++
inConfig(Compile)(flatLayoutSettings("src")) ++
inConfig(Test)(flatLayoutSettings("test"))
inConfig(Test)(flatLayoutSettings("test")) ++
inConfig(Compile)(daffodilFlattenSettings)

/**
* Define the artifacts, products, and packageDaffodilBin task that creates the saved parsers
Expand Down Expand Up @@ -840,6 +854,252 @@ object DaffodilPlugin extends AutoPlugin {
}
}

def daffodilFlattenSettings: Seq[Setting[_]] = Seq(
daffodilFlattenTarget := target.value / s"${name.value}-${version.value}-flat.zip",

/**
* Whether or not to publish the flattened schemas zip. Defaults to false.
*
* If projects want to publish flattened schemas then they must explicitly enable it by
* setting 'daffodilFlattenSchemas / publishArtifact := true'.
*
* If false, flattened schemas will not be created unless you explicitly run the
* daffodilFlattenSchemas.
*/
daffodilFlattenSchemas / publishArtifact := false,

daffodilFlattenSchemas / artifact := Artifact(
name.value,
"flat",
"zip",
Some("flat"),
Vector(),
None
),

/**
* Only grab the file types that need to be flattened. These files are only
* grabbed from the root project. Any referenced files will be pulled in
* from the classpath. Note that XML files are commonly used in XSLT for
* settings.
*/
daffodilFlattenSchemas / includeFilter := "*.xsd" | "*.xsl" | "*.xslt" | "*.xml",
daffodilFlattenSchemas / excludeFilter := HiddenFileFilter,

daffodilFlattenResourceReferencePatterns := List(
"""(?<!xsi:)schemaLocation=\"([^\"]*)\"""".r,
"href=\"([^\"]*)\"".r,
"document[(]'([^']*)'[)]".r
),

daffodilFlattenSchemas / products := {

val logger = streams.value.log
val filter =
(daffodilFlattenSchemas / includeFilter).value -- (daffodilFlattenSchemas / excludeFilter).value

Comment thread
stevedlawrence marked this conversation as resolved.
val flatDir = target.value / "flatDir"
if (flatDir.exists())
IO.delete(flatDir)
IO.createDirectory(flatDir)

val projectURLs = (Compile / resourceDirectories).value.map { root =>
(root ** filter).get.map(_.toURI.toURL).toList
}.flatten

/**
* Create a URLClassLoader object with URLs to all resources used by the
* project. The class loader will be used to resolve references made
* within the flattened files.
*/
val allClasspathURLs = (Test / externalDependencyClasspath).value.map(_.data.toURI.toURL)
val classLoader = new URLClassLoader((projectURLs ++ allClasspathURLs).toArray, null)

val referenceRegexes = daffodilFlattenResourceReferencePatterns.value
val jarRegex = "(.*/(.*).jar)!(.*)".r

def getRootPath(path: URI) = {
path.getScheme match {
case "file" => {
(Compile / resourceDirectories).value
.find(dir => path.getPath.startsWith(dir.toString))
.get
.toPath
}
}
}

val seen = scala.collection.mutable.Set[URI]()
val unprocessed = scala.collection.mutable.Stack[URI]()
val unresolved = scala.collection.mutable.ArrayBuffer[(URI, String)]()
val jarFileSystems = scala.collection.mutable.Map[String, FileSystem]()
unprocessed.pushAll(projectURLs.map(_.toURI))
seen ++= projectURLs.map(_.toURI)
while (!unprocessed.isEmpty) {
val contextURI = unprocessed.pop
val bytes = contextURI.toURL.openStream().readAllBytes()

// Get a Java Path for the current file (whether it is in a JAR or
// just a regular file) and a Path for where the current file will be
// moved to in the flattened directory structure. Note that using a
// Java Path on a regular file will use the default FileSystem while for
// JARs a FileSystem is created for interacting with the files inside
// the JAR. This allows all Path functions like resolveSibling/exists to
// work with both regular files or files contained inside a JAR.
val (contextPath, flatPath) = contextURI.getScheme match {
Comment thread
jadams-tresys marked this conversation as resolved.
case "jar" =>
contextURI.toString match {
case jarRegex(jarPath, jarName, path) => {
val fs = jarFileSystems.getOrElseUpdate(
Paths.get(jarPath).toString,
FileSystems.newFileSystem(contextURI, new java.util.HashMap[String, Any]())
)
val cPath = fs.getPath(path)
(
cPath,
Paths.get(
flatDir.toString,
s"${jarName}__${cPath.toString.tail.replaceAll("/", "__")}"
)
)
}
case _ =>
throw new IllegalArgumentException(s"Unable to parse JAR URI: $contextURI")
}
case "file" => {
val path = Paths.get(contextURI)
val root = getRootPath(contextURI)
(
path,
Paths.get(
flatDir.toString,
root.relativize(path).toString.replaceAll("/", "__")
)
)
}
case _ =>
throw new IllegalArgumentException(s"Unrecognized URI scheme: $contextURI")
}

assert(
!Files.exists(flatPath),
s"File $flatPath already exists and would be overwritten, aborting!"
)
val bw = Files.newBufferedWriter(flatPath)
Comment thread
jadams-tresys marked this conversation as resolved.
val fileAsString = new String(bytes, Charset.defaultCharset())

val references = referenceRegexes.flatMap { re =>
re.findAllIn(fileAsString).matchData.map(_.group(1))
}

val resolvedRefs = references.flatMap { ref =>
val optResolved = if (ref.startsWith("/")) {
// Dealing with an absolute path
Option(classLoader.findResource(ref.tail))
} else {
// Relative path
val refPath = contextPath.resolveSibling(ref).normalize()
if (Files.exists(refPath)) {
// Referenced path exists in either the same root resource
// diretory or jar file as the context schema
Some(refPath.toUri.toURL)
} else {
val optResolvedRelative = contextURI.getScheme match {
case "file" => {
// Need to check other resource directories
val contextRoot = getRootPath(contextURI)
val relPath = contextRoot.relativize(refPath)
val resolvedRoot = (Compile / resourceDirectories).value.find(root =>
Files.exists(root.toPath.resolve(relPath))
)
resolvedRoot.map(_.toPath.resolve(relPath).toUri.toURL)
}
case "jar" =>
// Nothing else to check, resolveSibling should have found it in the same jar
None
}
// The orElse call is to support the deprecated behavior of
// resolving relative paths asif they were absolute
optResolvedRelative.orElse(Option(classLoader.findResource(ref)))
Comment thread
jadams-tresys marked this conversation as resolved.
}
}
if (optResolved.isEmpty)
unresolved.append((contextURI, ref))
optResolved.map(resolved => ref -> resolved)
}.toMap

val updatedFileAsString = {
resolvedRefs.foldLeft(fileAsString) {
case (input, (ref, optResolvedURL)) => {
// For each reference replace each instance of it in the
// file with the same reference but with "/" changed to "__"
val resolvedURI = optResolvedURL.toURI
val relativized = resolvedURI.getScheme match {
case "jar" =>
resolvedURI.toString match {
case jarRegex(_, jarName, path) =>
s"$jarName/${path.tail}"
}
case _ => {
val resolvedRoot = getRootPath(contextURI)
resolvedRoot.relativize(Paths.get(resolvedURI)).toString
}
}
input.replaceAll(ref, relativized.replaceAll("/", "__"))
}
Comment thread
jadams-tresys marked this conversation as resolved.
}
}

bw.write(updatedFileAsString)
bw.close()
val unseen = resolvedRefs.values.map(_.toURI).filter(seen.add(_))
unprocessed.pushAll(unseen)
}

// Close any open JAR FileSystems
jarFileSystems.values.foreach(_.close())

// Error unresolved references
unresolved.foreach { case (context, ref) =>
logger.error(s"Unable to resolve reference to $ref from source file $context")
}

// Error out if we have any unresolved references
if (!unresolved.isEmpty)
throw new MessageOnlyException(
"Unable to resolve one or more references while flattening"
)

/* Create zip file containing all flattened schemas */
val flattenedFiles = IO.listFiles(flatDir).sorted
val sources = flattenedFiles.map(file => file -> file.getName())
IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp)
logger.info(
s"Generated flattened schema package at ${daffodilFlattenTarget.value.toString}"
)
Seq(daffodilFlattenTarget.value)
},

daffodilFlattenSchemas := {
(daffodilFlattenSchemas / products).value.head
},

artifacts ++= {
if ((daffodilFlattenSchemas / publishArtifact).value) {
Seq((daffodilFlattenSchemas / artifact).value)
} else {
Seq.empty
}
},
packagedArtifacts ++= {
if ((daffodilFlattenSchemas / publishArtifact).value) {
Map((daffodilFlattenSchemas / artifact).value -> daffodilFlattenSchemas.value)
} else {
Map.empty[Artifact, File]
}
}
)

class DaffodilProject(rootProject: Project, crossProjects: Seq[Project])
extends CompositeProject {

Expand Down
Loading