diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..740e822 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,12 @@ +{ + "permissions": { + "allow": [ + "Bash(\"/c/Program Files/IBM/ACE/12.0.12.17/common/java17/bin/jar.exe\" tf \"C:/Users/Bmatt/AppData/Local/Coursier/cache/v1/https/repo1.maven.org/maven2/org/apache/daffodil/daffodil-tdml-lib_3/4.1.0/daffodil-tdml-lib_3-4.1.0.jar\")", + "Bash(grep -v \"$\")", + "Bash(grep \"\\\\.class$\")", + "Bash(\"/c/Program Files/IBM/ACE/12.0.12.17/common/java17/bin/javap.exe\" -classpath \"C:/Users/Bmatt/AppData/Local/Coursier/cache/v1/https/repo1.maven.org/maven2/org/apache/daffodil/daffodil-tdml-lib_3/4.1.0/daffodil-tdml-lib_3-4.1.0.jar\" \"org.apache.daffodil.tdml.processor.AbstractTDMLDFDLProcessorFactory\")", + "Bash(\"/c/Program Files/IBM/ACE/12.0.12.17/common/java17/bin/javap.exe\" -classpath \"C:/Users/Bmatt/AppData/Local/Coursier/cache/v1/https/repo1.maven.org/maven2/org/apache/daffodil/daffodil-tdml-lib_3/4.1.0/daffodil-tdml-lib_3-4.1.0.jar\" \"org.apache.daffodil.tdml.processor.TDMLDFDLProcessor\")", + "WebFetch(domain:matthiasblomme.github.io)" + ] + } +} diff --git a/.gitignore b/.gitignore index 4733d9d..f81b2fa 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ project/errors .\#.* /out/ /bin/ +test/* diff --git a/HOW-TO-TEST.md b/HOW-TO-TEST.md new file mode 100644 index 0000000..c631535 --- /dev/null +++ b/HOW-TO-TEST.md @@ -0,0 +1,159 @@ +# How to Test a DFDL Schema Against IBM DFDL + +This guide covers the quickest path from zero to running IBM DFDL against your +own schema and data file — no TDML knowledge required. + +--- + +## Prerequisites + +- IBM ACE 12 or 13 installed (default paths below, or use `-AcePath` for custom) +- SBT installed at `C:\Program Files (x86)\sbt\bin\sbt.bat` +- This project checked out and `lib/` populated (see Step 1) + +--- + +## Step 1 — Populate `lib/` (one-time setup) + +Copy the IBM DFDL jars from your ACE installation into the project: + +```powershell +# ACE 12 (default) +.\setup-ace-jars.ps1 + +# ACE 13 +.\setup-ace-jars.ps1 -AceVersion 13 + +# Custom install path +.\setup-ace-jars.ps1 -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" +``` + +This copies the runtime jars into `lib/` and the IBM sample files into +`src/test/resources/`. You only need to do this once (or when switching ACE versions). + +--- + +## Step 2 — Run the validator + +Point `validate.ps1` at your schema and data file: + +```powershell +.\validate.ps1 ` + -Schema "C:\path\to\MySchema.xsd" ` + -Data "C:\path\to\mydata.txt" +``` + +### What it does + +1. Resolves the ACE Java 17 installation +2. If your schema imports `IBMdefined/RecordSeparatedFieldFormat.xsd` (relative + path), copies that folder next to your schema automatically +3. Auto-detects the root element (looks for `ibmSchExtn:docRoot="true"` or + `ibmDfdlExtn:docRoot="true"` in the schema; falls back to the first top-level + element) +4. Compiles the schema with IBM DFDL +5. Parses the data file +6. Prints either the parsed XML infoset or a clear error message + +### Success output + +``` +Schema : C:\path\to\MySchema.xsd +Data file : C:\path\to\mydata.txt +Root : MyRootElement namespace: (none) + +=== PARSE SUCCESSFUL === + + value1 + ... + +``` + +### Failure output + +``` +=== PARSE FAILED === +[Parse error] Unexpected data found after end of parse. +``` + +--- + +## Optional switches + +| Switch | Default | Description | +|--------|---------|-------------| +| `-Root ` | auto-detect | Force a specific root element name | +| `-AceVersion 12\|13` | `12` | Select ACE version by number | +| `-AcePath ` | — | Full path to ACE install root (overrides `-AceVersion`) | +| `-SbtPath ` | `C:\Program Files (x86)\sbt\bin\sbt.bat` | Full path to `sbt.bat` | +| `-Trace` | off | Print IBM DFDL's full service trace to stderr (note: `-Verbose` is reserved by PowerShell) | + +Examples: + +```powershell +# Schema has two doc-root elements — pick one explicitly +.\validate.ps1 -Schema "..." -Data "..." -Root "MyRootElement" + +# ACE 13 +.\validate.ps1 -Schema "..." -Data "..." -AceVersion 13 + +# Non-standard ACE install location +.\validate.ps1 -Schema "..." -Data "..." -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" + +# sbt installed in a non-standard location +.\validate.ps1 -Schema "..." -Data "..." -SbtPath "D:\tools\sbt\bin\sbt.bat" + +# Detailed trace output for diagnosing parse failures +.\validate.ps1 -Schema "..." -Data "..." -Trace +``` + +### Understanding verbose output + +With `-Trace`, IBM DFDL's service trace is written to stderr before the +summary. Each line shows the event level, the error code (if any), the byte +offset, and the schema location: + +``` +TraceListener: error: CTDP3041E: Initiator 'ABC' not found at offset '42' for element '.../MyElement[1]'. +TraceListener: info: Offset: 42. Parser was unable to resolve data on the current branch ... +TraceListener: info: Offset: 42. Element 'OptionalChild' is optional or missing. The element will not be included in the infoset. +TraceListener: fatal: CTDP3002E: Unexpected data found at offset '42' after parsing completed. +=== PARSE FAILED === +[Fatal error] CTDP3002E: Unexpected data found at offset '42' after parsing completed. +``` + +The `error:` lines identify which schema element failed to match and why — +this is the key diagnostic for schema or data mismatches. + +--- + +## Schema requirements + +Your DFDL schema must be compatible with IBM DFDL. Common pitfalls: + +- **No generic `padChar`** — IBM DFDL requires the explicit property + (`dfdl:textStringPadCharacter`, not `dfdl:padChar` on `dfdl:format`). +- **`IBMdefined/` imports** — schemas that import + `IBMdefined/RecordSeparatedFieldFormat.xsd` need that file adjacent to the + schema. `validate.ps1` handles this automatically using the copy in + `src/test/resources/IBMdefined/`. +- **Root element annotation** — mark your root element with + `ibmSchExtn:docRoot="true"` or `ibmDfdlExtn:docRoot="true"` so that + auto-detection works. If the schema has no such annotation, pass `-Root`. + +--- + +## Adding a repeatable TDML test (optional next step) + +Once parsing works, you can capture the expected infoset output and turn it +into a permanent TDML regression test: + +1. Copy the printed infoset XML into a new `` in a `.tdml` + file under `src/test/resources/` +2. Create a matching Scala test class (see `TestSimpleEDI.scala` as a template) +3. Run `sbt test` to verify + +This gives you a permanent cross-test that runs against both IBM DFDL and +Daffodil on every build. See `src/test/resources/SimpleEDITests.tdml` and +`src/test/scala/io/github/openDFDL/TestSimpleEDI.scala` for a complete +working example. diff --git a/README.md b/README.md index 31cad6b..e140196 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ + # IBM DFDL Cross Tester for Daffodil This is a test rig that drives TDML tests against IBM DFDL enabling -cross testing of tests easily against both Daffodl and the IBM DFDL implementation. -It uses the TDML Runner library from Daffodil, and complements that with a IBM-specific +cross testing of tests easily against both Daffodil and the IBM DFDL implementation. +It uses the TDML Runner library from Daffodil, and complements that with an IBM-specific TDML processor which drives IBM DFDL from Daffodil's TDML dialect test files. The purpose of this tool is to help demonstrate interoperability of IBM DFDL and @@ -12,14 +13,33 @@ portable DFDL schemas. Similar cross testers can be created for other DFDL implementations as well. -Requirements +## Community + +Questions or feedback? Join the discussion on [Discord](https://discord.gg/58JXXk4uDG). + +## Requirements + +* IBM DFDL - included in IBM App Connect Enterprise (ACE). A developer edition is available. +* Daffodil 4.1.0 or newer (resolved automatically by the sbt-daffodil plugin) +* **Java 17** - required by Daffodil 4.1.0. See the Java setup note below. -* IBM DFDL - Note that a developer edition is available. -* Daffodil - Version 2.3.0 or newer +> Earlier versions of Daffodil (3.x and older) used a different internal API. +> See `UPGRADE-NOTES.md` for details on the migration. Note that Daffodil 3.1.0 or newer is required for TDML parser negative test -cases to work properly. Many DFDL schemas do not have such tests and will work -with older revisions of Daffodil back to 2.3.0. +cases to work properly. Many DFDL schemas do not have such tests. + +## Java Setup + +Daffodil 4.1.0 is compiled for Java 17. If your system default Java is older, +pass the `-java-home` flag to every sbt invocation: + +``` +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" compile +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" test +``` + +Java 17 is bundled with ACE at `\common\java17`. ## Daffodil Setup @@ -34,50 +54,66 @@ Some dependencies are only distributed as part of the IBM DFDL developer edition, which is part of the IBM App Connect Enterprise (ACE) product. > This cross test rig was originally created and tested against IBM ACE-11.0.0.1. +> It has since been updated and verified against ACE 12 (12.0.12.17) and ACE 13 (13.0.6.0). -In the IBM ACE product you will find a DFDL jar that you can unzip--various -files must be copied out of this jar to enable building and testing the cross -tester. +### Quick setup using the PowerShell script (Windows, ACE 12 or 13) + +A setup script is included that copies all required jars and sample files +automatically. Run it from the project root: + +```powershell +.\setup-ace-jars.ps1 # defaults to ACE 12 default path +.\setup-ace-jars.ps1 -AceVersion 13 # ACE 13 default path +.\setup-ace-jars.ps1 -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" # custom install location +``` + +### Manual setup 1. Copy the IBM DFDL jars into the ``lib`` subdirectory of this project. - - Note that you must also copy the ``samples/dfdlsample_java.jar`` into the - ``lib`` subdirectory, as that also has class files in it that this test rig - uses. + + Also copy ``dfdlsample_java.jar`` from the samples directory into ``lib``, + as it contains class files used by this test rig. + + On ACE 12/13 the jars are pre-extracted — no unzip step is needed: ``` - cp ~/path/to/IBMDFDL/*.jar lib/ - cp ~/path/to/IBMDFDL/samples/*.jar lib/ + cp "\server\dfdl\lib\*.jar" lib\ + cp "\server\sample\dfdl\dfdlsample_java.jar" lib\ ``` - The resulting directory looks like this. + + The resulting ``lib`` directory looks like this (ACE 12/13): ``` lib ├── dfdlsample_java.jar - ├── emf.common_2.6.0.jar - ├── emf.ecore_2.6.1.jar - ├── emf.ecore.xmi_2.5.0.jar - ├── gpb.jar ├── ibm-dfdl.jar - ├── icu4j-charsets.jar + ├── ibm-dfdl-eclipse-dependencies.jar ├── icu4j.jar - ├── scd.jar - └── xsd_2.6.0.jar + ├── icu4j-charset.jar + ├── icu4j-localespi.jar + ├── org.eclipse.emf.common-2.30.0.jar + ├── org.eclipse.emf.ecore-2.36.0.jar + ├── org.eclipse.emf.ecore.xmi-2.37.0.jar + ├── org.eclipse.xsd-2.12.0.jar + └── protobuf-java-3.25.5.jar ``` -2. Copy the sample "company" files to ``src/test/resources`` + > **ACE 11 jar names differ.** See `UPGRADE-NOTES.md` for the name mapping + > between ACE 11 and ACE 12/13. + +2. Copy the sample "company" files to ``src/test/resources``: ``` - cp ~/path/to/IBMDFDL/company.* src/test/resources + cp "\server\sample\dfdl\company.*" src\test\resources\ ``` -3. Copy the sample schema file into ``src/test/resources/IBMdefined/`` +3. Copy the sample schema file into ``src/test/resources/IBMdefined/``: ``` - cp ~/path/to/IBMDFDL/IBMdefined/RecordSeparatedFieldFormat.xsd src/test/resources/IBMdefined + cp "\server\sample\dfdl\IBMdefined\RecordSeparatedFieldFormat.xsd" src\test\resources\IBMdefined\ ``` -The files in step 2 and 3 are examples created by IBM which this test rig will +The files in steps 2 and 3 are examples created by IBM which this test rig will invoke using TDML to show that everything is working properly. After completing those steps, the resulting tree under ``src/test/resources`` @@ -92,27 +128,64 @@ src/test/resources/ ├── company.xsd (copy from IBM DFDL) ├── crossTestRigTestSchema.dfdl.xsd - supplied by this cross test rig, to test the rig itself. ├── crossTestRigTests.tdml - supplied by this cross test rig, to test the rig itself. +├── SimpleEDI.dfdl.xsd - supplied by this cross test rig, example EDI schema. +├── SimpleEDITests.tdml - supplied by this cross test rig, example EDI tests. +├── SimpleEDI-happy.txt - supplied by this cross test rig, valid test data. +├── SimpleEDI-error.txt - supplied by this cross test rig, malformed test data. └── IBMdefined └── RecordSeparatedFieldFormat.xsd (copy from IBM DFDL) ``` +## Ad-hoc Validation (validate.ps1) + +If you just want to parse a data file against a DFDL schema without writing +TDML test files, use the included `validate.ps1` script: + +```powershell +.\validate.ps1 -Schema "path\to\MySchema.xsd" -Data "path\to\mydata.txt" +``` + +The script auto-detects the root element from the schema and prints the parsed +XML infoset on success, or a clear error message on failure. Exit code is 0 +on success and non-zero on failure (usable in CI). + +```powershell +# Explicit root element (when schema has multiple doc-root candidates) +.\validate.ps1 -Schema "..." -Data "..." -Root "MyRootElement" + +# Custom ACE install path +.\validate.ps1 -Schema "..." -Data "..." -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" + +# Custom sbt location (if not installed at the default path) +.\validate.ps1 -Schema "..." -Data "..." -SbtPath "D:\tools\sbt\bin\sbt.bat" +``` + +> sbt defaults to `C:\Program Files (x86)\sbt\bin\sbt.bat`. If sbt is installed +> elsewhere, pass `-SbtPath` with the full path to `sbt.bat`. + +> The script automatically copies `IBMdefined/RecordSeparatedFieldFormat.xsd` +> next to your schema if the schema imports it via a relative path. Run +> `setup-ace-jars.ps1` first to populate that folder. + +See [HOW-TO-TEST.md](HOW-TO-TEST.md) for a step-by-step walkthrough. + ## Build & Test -To build and test the cross tester to make sure all dependencies and samples +To build and test the cross tester to make sure all dependencies and sample files have been copied correctly, run the following commands: ``` -sbt compile -sbt test +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" compile +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" test ``` -This runs the cross testers own self-tests and two versions of the IBM-supplied -example tests. One version is with a TDML file, ``company.tdml``, which refers -to the separate files supplied by IBM. The other is an example of a -self-contained test. It's the same test, just with the file contents collapsed -into the TDML file. This is just a useful thing to know about for creating -small bug-report TDML files, or for creating a TDML file that illustrates a -non-portability issue. +This runs the cross tester's own self-tests, two versions of the IBM-supplied +example tests, and the example SimpleEDI schema tests. One version of the +company test is with a TDML file, ``company.tdml``, which refers to the separate +files supplied by IBM. The other is an example of a self-contained test. It's +the same test, just with the file contents collapsed into the TDML file. This is +just a useful thing to know about for creating small bug-report TDML files, or +for creating a TDML file that illustrates a non-portability issue. > See TestIBMDFDLSamples.scala, company.tdml, companySelfContained.tdml. @@ -125,18 +198,18 @@ or those for the DFDLSchemas projects on github, follow these steps: it, with the following command: ``` - sbt publishLocal + sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" publishLocal ``` -2. Setup the sbt plugin that makes it easy to use this cross test rig, with the +2. Set up the sbt plugin that makes it easy to use this cross test rig, with the following command: ``` mkdir -p ~/.sbt/1.0/plugins/ - cp plugin/ibmDFDLCrossTesterSBTPlugin.scala ~/.sbt/1.0./plugins/ + cp plugin/ibmDFDLCrossTesterSBTPlugin.scala ~/.sbt/1.0/plugins/ ``` -3. Modify ``~/.sbt/1.0./plugins/bmDFDLCrossTesterSBTPlugin.scala`` to include +3. Modify ``~/.sbt/1.0/plugins/ibmDFDLCrossTesterSBTPlugin.scala`` to include the path to the lib directory containing the IBM DFDL jars, for example: ```scala @@ -153,14 +226,14 @@ your file to enable the IBM cross tester: IBMDFDLCrossTesterPlugin.settings ``` -If you use the newer SBT config file style, add the settings like your +If you use the newer SBT config file style, add the settings to your ``project`` like so: ``` .settings(IBMDFDLCrossTesterPlugin.settings) ``` -With the settings added above, he plugin will modify the classpath so that the +With the settings added above, the plugin will modify the classpath so that the IBM DFDL Cross Tester's ibm-tdml-processor will be used instead of the daffodil-tdml-processor. Then when you run ``sbt test`` in your schema project, it will use IBM DFDL to run the tests. @@ -170,7 +243,7 @@ Keep in mind the tests must have "ibm" as a member of a TDML testSuite unparserTestCase ``implementations`` attribute. Otherwise the test is skipped for that implementation. Tests intended to be portable should list ``defaultImplementations="ibm daffodil"`` (or on the test case, -``implemenations="ibm daffodil``) so that both implementations will attempt +``implementations="ibm daffodil"``) so that both implementations will attempt to run the test. A test that works only on one of the implementations should leave out the other implementation. diff --git a/UPGRADE-NOTES.md b/UPGRADE-NOTES.md new file mode 100644 index 0000000..dc88737 --- /dev/null +++ b/UPGRADE-NOTES.md @@ -0,0 +1,285 @@ +# Upgrade Notes + +## Overview + +This document covers the changes made to bring the project up to date with +**ACE v12/v13** and **Daffodil 4.1.0**, and to add an initial test schema for +local schema development. + +--- + +## 1. Java Version Requirement + +### Issue +The `sbt-daffodil` plugin resolves Daffodil **4.1.0**, which was compiled with +Java 17 (class file version 61). Running `sbt` under the system default Java 8 +(class file version 52) causes an immediate crash: + +``` +java.lang.UnsupportedClassVersionError: org/apache/daffodil/lib/exceptions/AssertMacros$ +has been compiled by a more recent version of the Java Runtime (class file version 61.0), +this version of the Java Runtime only recognizes class file versions up to 52.0 +``` + +### Fix +Pass `-java-home` to every sbt invocation, pointing at the Java 17 JDK bundled +inside the ACE installation: + +``` +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" compile +sbt -java-home "C:\Program Files\IBM\ACE\12.0.12.17\common\java17" test +``` + +For ACE 13, replace the path with: +`C:\Program Files\IBM\ACE\13.0.6.0\common\java17` + +--- + +## 2. Daffodil 4.1.0 API Migration (`IBM_DFDL.scala`) + +The source file `src/main/scala/org/apache/daffodil/tdml/processor/IBM_DFDL.scala` +was written against Daffodil 3.x. The following API changes were required. + +### 2.1 Package renames — `lib.api` → `lib.iapi` + +Several types moved from `org.apache.daffodil.lib.api` to +`org.apache.daffodil.lib.iapi`: + +| Old import | New import | +|----|---| +| `lib.api.DaffodilSchemaSource` | `lib.iapi.DaffodilSchemaSource` | +| `lib.api.EmbeddedSchemaSource` | `lib.iapi.EmbeddedSchemaSource` | +| `lib.api.URISchemaSource` | `lib.iapi.URISchemaSource` | +| `lib.api.Diagnostic` | `lib.iapi.Diagnostic` | +| `lib.api.DataLocation` | `api.DataLocation` | +| `lib.api.ValidationMode` | *(removed — see §2.4)* | + +### 2.2 `getDiagnostics` return type — `Seq` → `java.util.List` + +The `TDMLResult` interface now returns `java.util.List` instead +of a Scala `Seq`. Two aliases and a conversion import were added: + +```scala +import java.util.{List => JList} +import scala.jdk.CollectionConverters._ +import org.apache.daffodil.api.{Diagnostic => ApiDiagnostic} +``` + +`getDiagnostics` in both `DiagnosticsMixin` and `IBMTDMLResult` was changed to: + +```scala +def getDiagnostics: JList[ApiDiagnostic] = (diagnostics: Seq[ApiDiagnostic]).asJava +``` + +Internal logic that called `.exists()` on the result was changed to call +`.exists()` on the raw `diagnostics: Seq[IBMTDMLDiagnostic]` directly. + +### 2.3 `getProcessor` signature — removed `useSerializedProcessor` + +The `AbstractTDMLDFDLProcessorFactory.getProcessor` no longer accepts a +`useSerializedProcessor: Boolean` parameter. The override signature was updated +accordingly: + +```scala +// Before +override def getProcessor( + schemaSource: DaffodilSchemaSource, + useSerializedProcessor: Boolean, // ← removed + optRootName: Option[String], + optRootNamespace: Option[String], + tunables: Map[String, String]): TDML.CompileResult + +// After +override def getProcessor( + schemaSource: DaffodilSchemaSource, + optRootName: Option[String], + optRootNamespace: Option[String], + tunables: Map[String, String]): Either[JList[ApiDiagnostic], (JList[ApiDiagnostic], TDMLDFDLProcessor)] +``` + +`TDML.CompileResult` no longer exists; the return type is spelled out in full. + +### 2.4 `withValidationMode` → `withValidation(String)` + +The `TDMLDFDLProcessor` interface replaced `withValidationMode(ValidationMode.Type)` +with `withValidation(String)`. The `ValidationMode` enum is gone entirely. + +```scala +// Before +override def withValidationMode(validationMode: ValidationMode.Type): IBMTDMLDFDLProcessor = + copy(shouldValidate = validationMode match { + case ValidationMode.Full => true + case ValidationMode.Limited => true + case ValidationMode.Off => false + }) + +// After +override def withValidation(validationMode: String): IBMTDMLDFDLProcessor = + copy(shouldValidate = validationMode match { + case "full" => true + case "limited" => true + case "off" => false + }) +``` + +### 2.5 `withDebugging` removed + +`TDMLDFDLProcessor.withDebugging(Boolean)` was removed from the interface. The +override was deleted. `withDebugger(Object)` remains and still throws `???`. + +### 2.6 `Diagnostic` constructor — added trailing `Nil` argument + +The `lib.iapi.Diagnostic` abstract class gained a trailing `Seq[Any]` parameter +for format-string arguments. The `IBMTDMLDiagnostic` super-constructor call was +updated to add `Nil` and to use positional (not named) arguments, which is +required in Scala 3: + +```scala +// Before +extends Diagnostic(Maybe.Nope, Maybe.Nope, + maybeCause = Maybe(throwable), + maybeFormatString = Maybe(...)) + +// After +extends Diagnostic( + Maybe.Nope, + Maybe.Nope, + Maybe(throwable), + Maybe(...), + Nil) +``` + +### 2.7 `getSomeMessage.get` → `getMessage` + +The `Diagnostic.getSomeMessage` helper method was removed. The standard Java +`getMessage()` method is used instead: + +```scala +// Before +diagnostics.map(_.getSomeMessage.get).mkString("\n") + +// After +diagnostics.map(_.getMessage).mkString("\n") +``` + +### 2.8 `EmbeddedSchemaSource.copy` — all arguments now required + +The `EmbeddedSchemaSource.copy` method no longer accepts named/optional +parameters. All three fields must be passed explicitly: + +```scala +// Before +ess.copy(node = newNode) + +// After +ess.copy(newNode, ess.nameHint, ess.optTmpDir) +``` + +### 2.9 Scala 3 vararg syntax + +Two Scala 2 vararg patterns in `RemoveDafintTransformer` were updated to Scala 3 +syntax: + +```scala +// Before +case Elem(prefix, label, attributes, scope, children @ _ *) => + new Elem(prefix, label, newAttributes, scope, true, children: _*) + +// After +case Elem(prefix, label, attributes, scope, children*) => + new Elem(prefix, label, newAttributes, scope, true, children*) +``` + +--- + +## 3. ACE v12 / v13 Jar Changes + +The project README documents the ACE 11 jar list. The jars shipped with +ACE 12 and 13 are different: + +| ACE 11 name | ACE 12/13 name | +|---|---| +| `gpb.jar` | `protobuf-java-3.25.5.jar` | +| `scd.jar` | *(gone — functionality folded into ibm-dfdl-eclipse-dependencies.jar)* | +| `icu4j-charsets.jar` | `icu4j-charset.jar` | +| `emf.common_2.6.0.jar` | `org.eclipse.emf.common-2.30.0.jar` | +| `emf.ecore_2.6.1.jar` | `org.eclipse.emf.ecore-2.36.0.jar` | +| `emf.ecore.xmi_2.5.0.jar` | `org.eclipse.emf.ecore.xmi-2.37.0.jar` | +| `xsd_2.6.0.jar` | `org.eclipse.xsd-2.12.0.jar` | +| *(new)* | `ibm-dfdl-eclipse-dependencies.jar` | +| *(new)* | `icu4j-localespi.jar` | + +Both ACE 12 and ACE 13 ship the same 10 runtime jars plus `dfdlsample_java.jar` +from the samples directory (11 jars total in `lib/`). + +--- + +## 4. New Files Added + +### `setup-ace-jars.ps1` + +A PowerShell script that copies the required IBM DFDL jars and sample files from +an ACE installation into the project. Accepts an `-AceVersion` parameter +(`"12"` or `"13"`, defaulting to `"12"`): + +```powershell +.\setup-ace-jars.ps1 # uses ACE 12 +.\setup-ace-jars.ps1 -AceVersion 13 +``` + +What it copies: +- All `*.jar` from `\server\dfdl\lib\` → `lib\` +- `dfdlsample_java.jar` from `\server\sample\dfdl\` → `lib\` +- `company.*` from `\server\sample\dfdl\` → `src\test\resources\` +- `RecordSeparatedFieldFormat.xsd` → `src\test\resources\IBMdefined\` + +### `src/test/resources/SimpleEDI.dfdl.xsd` + +A self-contained EDI-inspired DFDL schema for use as a development and testing +template. Models an invoice-like message with `HDR`, `DTL` (0–99 occurrences), +and `TRL` segments, delimited by `+` (field separator) and `'` (segment +terminator). Compatible with both IBM DFDL and Daffodil. + +### `src/test/resources/SimpleEDI-happy.txt` + +Valid test input for `SimpleEDI.dfdl.xsd`: +``` +HDR+SENDER01+RECEIVER1+20250301+INVOIC' +DTL+1+WIDGET-A+100+EA' +DTL+2+GADGET-B+50+BX' +TRL+2' +``` + +### `src/test/resources/SimpleEDI-error.txt` + +Intentionally malformed input (TRL missing its `ControlCount` field) used to +verify error detection. + +### `src/test/resources/SimpleEDITests.tdml` + +TDML test suite with three test cases targeting both IBM DFDL and Daffodil +(`defaultImplementations="ibm daffodil"`): + +| Test | What it checks | +|---|---| +| `parseHappy` | Parses the happy-flow file and verifies the infoset | +| `roundTrip` | Two-pass round-trip (parse → unparse → parse) | +| `parseError` | Expects a parse error on the malformed input | + +Note: `parseHappy` uses `roundTrip="none"` because the input file contains CRLF +newlines after each segment terminator that IBM DFDL does not reproduce on +unparse (the schema allows but does not require trailing whitespace). + +### `src/test/scala/io/github/openDFDL/TestSimpleEDI.scala` + +JUnit test runner for the `SimpleEDITests.tdml` suite. + +--- + +## 5. Git Remote + +The git remote `origin` was updated to point to the project fork: + +``` +https://github.com/matthiasblomme/ibmDFDLCrossTester.git +``` diff --git a/demo/DemoSchema.dfdl.xsd b/demo/DemoSchema.dfdl.xsd new file mode 100644 index 0000000..f315de9 --- /dev/null +++ b/demo/DemoSchema.dfdl.xsd @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/demo/IBMdefined/.keep b/demo/IBMdefined/.keep new file mode 100644 index 0000000..e69de29 diff --git a/demo/IBMdefined/RecordSeparatedFieldFormat.xsd b/demo/IBMdefined/RecordSeparatedFieldFormat.xsd new file mode 100644 index 0000000..2a4ab3a --- /dev/null +++ b/demo/IBMdefined/RecordSeparatedFieldFormat.xsd @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/demo/sample.txt b/demo/sample.txt new file mode 100644 index 0000000..54fdcd2 --- /dev/null +++ b/demo/sample.txt @@ -0,0 +1 @@ +John,Doe diff --git a/setup-ace-jars.ps1 b/setup-ace-jars.ps1 new file mode 100644 index 0000000..47794f6 --- /dev/null +++ b/setup-ace-jars.ps1 @@ -0,0 +1,117 @@ +<# +.SYNOPSIS + Copies IBM DFDL jars and sample files from an ACE installation into the cross-tester project. + +.PARAMETER AceVersion + The ACE major version to use. Supported values: "12" (default), "13". + Ignored when -AcePath is supplied. + +.PARAMETER AcePath + Full path to the ACE installation root (e.g. "C:\Program Files\IBM\ACE\12.0.12.17"). + When supplied, -AceVersion is ignored and no version label is required. + +.EXAMPLE + .\setup-ace-jars.ps1 + .\setup-ace-jars.ps1 -AceVersion 13 + .\setup-ace-jars.ps1 -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" +#> +param( + [string]$AceVersion = "12", + [string]$AcePath = "" +) + +$scriptDir = $PSScriptRoot + +if ($AcePath -ne "") { + # Custom path supplied — use it directly + $aceRoot = $AcePath + $aceLabel = $AcePath +} else { + # Look up path from version string + $acePaths = @{ + "12" = "C:\Program Files\IBM\ACE\12.0.12.17" + "13" = "C:\Program Files\IBM\ACE\13.0.6.0" + } + + if (-not $acePaths.ContainsKey($AceVersion)) { + Write-Error "Unsupported AceVersion '$AceVersion'. Supported: $($acePaths.Keys -join ', '). Use -AcePath to supply a custom path." + exit 1 + } + + $aceRoot = $acePaths[$AceVersion] + $aceLabel = "ACE $AceVersion" +} + +if (-not (Test-Path $aceRoot)) { + Write-Error "ACE install not found at: $aceRoot" + exit 1 +} + +$dfdlLibDir = Join-Path $aceRoot "server\dfdl\lib" +$sampleDfdlDir = Join-Path $aceRoot "server\sample\dfdl" + +$libDir = Join-Path $scriptDir "lib" +$testResDir = Join-Path $scriptDir "src\test\resources" +$ibmDefinedDir = Join-Path $testResDir "IBMdefined" + +# Ensure destination directories exist +New-Item -ItemType Directory -Force -Path $libDir | Out-Null +New-Item -ItemType Directory -Force -Path $testResDir | Out-Null +New-Item -ItemType Directory -Force -Path $ibmDefinedDir | Out-Null + +$copied = @() +$errors = @() + +function Copy-File($src, $dst) { + if (Test-Path $src) { + Copy-Item -Force $src $dst + $script:copied += " $src -> $dst" + } else { + $script:errors += " NOT FOUND: $src" + } +} + +Write-Host "" +Write-Host "=== IBM DFDL Cross-Tester Setup ($aceLabel) ===" -ForegroundColor Cyan +Write-Host "ACE root : $aceRoot" +Write-Host "" + +# 1. Copy all DFDL runtime jars from server/dfdl/lib/ +Write-Host "Copying DFDL runtime jars from $dfdlLibDir ..." +Get-ChildItem -Path $dfdlLibDir -Filter "*.jar" -ErrorAction Stop | ForEach-Object { + $dst = Join-Path $libDir $_.Name + Copy-Item -Force $_.FullName $dst + $copied += " $($_.FullName) -> $dst" +} + +# 2. Copy dfdlsample_java.jar from sample directory +Write-Host "Copying dfdlsample_java.jar ..." +Copy-File (Join-Path $sampleDfdlDir "dfdlsample_java.jar") $libDir + +# 3. Copy company sample files +Write-Host "Copying company sample files ..." +foreach ($name in @("company.txt", "company.xml", "company.xsd")) { + Copy-File (Join-Path $sampleDfdlDir $name) $testResDir +} + +# 4. Copy IBMdefined schema +Write-Host "Copying RecordSeparatedFieldFormat.xsd ..." +Copy-File (Join-Path $sampleDfdlDir "IBMdefined\RecordSeparatedFieldFormat.xsd") $ibmDefinedDir + +# Summary +Write-Host "" +if ($copied.Count -gt 0) { + Write-Host "Copied ($($copied.Count) files):" -ForegroundColor Green + $copied | ForEach-Object { Write-Host $_ } +} +if ($errors.Count -gt 0) { + Write-Host "" + Write-Host "Warnings - files not found:" -ForegroundColor Yellow + $errors | ForEach-Object { Write-Host $_ } +} + +Write-Host "" +Write-Host "lib/ contents:" -ForegroundColor Cyan +Get-ChildItem -Path $libDir -Filter "*.jar" | Select-Object -ExpandProperty Name | ForEach-Object { Write-Host " $_" } +Write-Host "" +Write-Host "Setup complete. Run 'sbt compile' then 'sbt test' to verify." -ForegroundColor Green diff --git a/src/main/scala/org/apache/daffodil/tdml/processor/IBM_DFDL.scala b/src/main/scala/org/apache/daffodil/tdml/processor/IBM_DFDL.scala index 46cdfc1..7b69b6b 100644 --- a/src/main/scala/org/apache/daffodil/tdml/processor/IBM_DFDL.scala +++ b/src/main/scala/org/apache/daffodil/tdml/processor/IBM_DFDL.scala @@ -19,19 +19,21 @@ package org.apache.daffodil.processor.tdml import java.io.StringReader import java.net.URI +import java.util.{List => JList} +import scala.jdk.CollectionConverters._ import scala.xml.Node import scala.xml.Elem import scala.xml.transform.RuleTransformer import scala.xml.transform.RewriteRule import org.apache.commons.io.input.ReaderInputStream -import org.apache.daffodil.lib.api.DaffodilSchemaSource -import org.apache.daffodil.lib.api.DataLocation -import org.apache.daffodil.lib.api.Diagnostic -import org.apache.daffodil.lib.api.EmbeddedSchemaSource -import org.apache.daffodil.lib.api.URISchemaSource -import org.apache.daffodil.lib.api.ValidationMode +import org.apache.daffodil.api.{Diagnostic => ApiDiagnostic} +import org.apache.daffodil.api.DataLocation +import org.apache.daffodil.lib.iapi.Diagnostic +import org.apache.daffodil.lib.iapi.DaffodilSchemaSource +import org.apache.daffodil.lib.iapi.EmbeddedSchemaSource +import org.apache.daffodil.lib.iapi.URISchemaSource import org.apache.daffodil.lib.exceptions.Assert import org.apache.daffodil.lib.externalvars.Binding import org.apache.daffodil.lib.util.Maybe @@ -83,9 +85,12 @@ final class IDFDLDiagFromThrowable(cause: Throwable) extends IDFDLDiagnostic { } final class IBMTDMLDiagnostic(iddArg: IDFDLDiagnostic, throwable: Throwable, mode: IBMDFDLMode.Type) - extends Diagnostic(Maybe.Nope, Maybe.Nope, - maybeCause = Maybe(throwable), - maybeFormatString = Maybe(if (iddArg ne null) iddArg.getSummary() else null)) { + extends Diagnostic( + Maybe.Nope, + Maybe.Nope, + Maybe(throwable), + Maybe(if (iddArg ne null) iddArg.getSummary() else null), + Nil) { lazy val idd: IDFDLDiagnostic = if (iddArg ne null) iddArg @@ -113,12 +118,13 @@ final class IBMTDMLDiagnostic(iddArg: IDFDLDiagnostic, throwable: Throwable, mod case DFDLDiagnosticType.WARNING | DFDLDiagnosticType.RECOVERABLEERROR => false case _ => true } + /** * Define as "Parse", "Unparse", "Schema Definition", "Configuration". * * This is combined with the word "Error" or "Warning" */ - override protected def modeName: String = idd.getType match { + override def modeName: String = idd.getType match { case DFDLDiagnosticType.PROCESSINGERROR => mode.toString() case DFDLDiagnosticType.SCHEMADEFINITIONERROR => "Schema Definition" case DFDLDiagnosticType.RECOVERABLEERROR => mode.toString() @@ -137,9 +143,9 @@ object RemoveDafintTransformer { private lazy val transformer = { val removeDafintRule = new RewriteRule() { override def transform(node: Node) = node match { - case Elem(prefix, label, attributes, scope, children @ _ *) => { + case Elem(prefix, label, attributes, scope, children*) => { val newAttributes = attributes.filter(!_.prefixedKey.startsWith("dafint:")) - new Elem(prefix, label, newAttributes, scope, true, children: _*) + new Elem(prefix, label, newAttributes, scope, true, children*) } case other => other } @@ -184,7 +190,7 @@ final class TDMLDFDLProcessorFactory private ( private def toss(e: Throwable) = { val exc = e System.err.println("DFDL exception creating grammar: " + exc.getMessage) - System.err.println(diagnostics.map(_.getSomeMessage.get).mkString("\n")) + System.err.println(diagnostics.map(_.getMessage).mkString("\n")) throw exc } @@ -192,10 +198,9 @@ final class TDMLDFDLProcessorFactory private ( override def getProcessor( schemaSource: DaffodilSchemaSource, - useSerializedProcessor: Boolean, optRootName: Option[String], optRootNamespace: Option[String], - tunables: Map[String, String]): TDML.CompileResult = { + tunables: Map[String, String]): Either[JList[ApiDiagnostic], (JList[ApiDiagnostic], TDMLDFDLProcessor)] = { val rootNamespace = optRootNamespace.getOrElse(null) @@ -207,12 +212,11 @@ final class TDMLDFDLProcessorFactory private ( val schemaUri: URI = schemaSource match { case ess: EmbeddedSchemaSource => { val newNode = RemoveDafintTransformer(ess.node) - ess.copy(node = newNode).uriForLoading + ess.copy(newNode, ess.nameHint, ess.optTmpDir).uriForLoading } case ss => schemaSource.uriForLoading } - Assert.invariant(schemaSource.isInstanceOf[URISchemaSource]) val grammar = try { // @@ -236,7 +240,7 @@ final class TDMLDFDLProcessorFactory private ( toss(e) } } - if (grammar == null || getDiagnostics.exists(_.isError)) { + if (grammar == null || diagnostics.exists(_.isError)) { Left(getDiagnostics) } else { Right((getDiagnostics, new IBMTDMLDFDLProcessor(diagnostics, grammar, bindings, optRootName, rootNamespace))) @@ -252,7 +256,7 @@ sealed trait DiagnosticsMixin { def isError: Boolean = diagnostics.exists { _.isError } - def getDiagnostics: Seq[Diagnostic] = diagnostics + def getDiagnostics: JList[ApiDiagnostic] = (diagnostics: Seq[ApiDiagnostic]).asJava protected var diagnostics: Seq[IBMTDMLDiagnostic] = Seq() @@ -360,10 +364,6 @@ final class IBMTDMLDFDLProcessor private ( shouldValidate = shouldValidate) override def withDebugger(db: Object): IBMTDMLDFDLProcessor = ??? - override def withDebugging(onOff: Boolean): IBMTDMLDFDLProcessor = { - if (onOff) ??? - this - } override def withExternalDFDLVariables(externalVarBindings: Seq[Binding]): IBMTDMLDFDLProcessor = copy(bindings = externalVarBindings) @@ -371,11 +371,11 @@ final class IBMTDMLDFDLProcessor private ( override def withTracing(onOff: Boolean): IBMTDMLDFDLProcessor = copy(isTraceMode = onOff) - override def withValidationMode(validationMode: ValidationMode.Type): IBMTDMLDFDLProcessor = + override def withValidation(validationMode: String): IBMTDMLDFDLProcessor = copy(shouldValidate = validationMode match { - case ValidationMode.Full => true - case ValidationMode.Limited => true - case ValidationMode.Off => false + case "full" => true + case "limited" => true + case "off" => false case _ => Assert.usageError("validation mode " + validationMode + " is unsupported.") }) @@ -510,7 +510,7 @@ sealed class IBMTDMLResult(diags: Seq[IBMTDMLDiagnostic]) { def isValidationError: Boolean = diagnostics.exists { _.getType() == DFDLDiagnosticType.VALIDATIONERROR } - def getDiagnostics: Seq[Diagnostic] = diagnostics + def getDiagnostics: JList[ApiDiagnostic] = (diagnostics: Seq[ApiDiagnostic]).asJava def addDiagnostic(diag: Diagnostic): Unit = { diagnostics = diag.asInstanceOf[IBMTDMLDiagnostic] +: diagnostics } diff --git a/src/test/resources/SimpleEDI-error.txt b/src/test/resources/SimpleEDI-error.txt new file mode 100644 index 0000000..b4b88ea --- /dev/null +++ b/src/test/resources/SimpleEDI-error.txt @@ -0,0 +1,3 @@ +HDR+SENDER01+RECEIVER1+20250301+INVOIC' +DTL+1+WIDGET-A+100+EA' +TRL' diff --git a/src/test/resources/SimpleEDI-happy.txt b/src/test/resources/SimpleEDI-happy.txt new file mode 100644 index 0000000..95f1387 --- /dev/null +++ b/src/test/resources/SimpleEDI-happy.txt @@ -0,0 +1,4 @@ +HDR+SENDER01+RECEIVER1+20250301+INVOIC' +DTL+1+WIDGET-A+100+EA' +DTL+2+GADGET-B+50+BX' +TRL+2' diff --git a/src/test/resources/SimpleEDI.dfdl.xsd b/src/test/resources/SimpleEDI.dfdl.xsd new file mode 100644 index 0000000..3d199bf --- /dev/null +++ b/src/test/resources/SimpleEDI.dfdl.xsd @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/resources/SimpleEDITests.tdml b/src/test/resources/SimpleEDITests.tdml new file mode 100644 index 0000000..5776918 --- /dev/null +++ b/src/test/resources/SimpleEDITests.tdml @@ -0,0 +1,117 @@ + + + + + + + + + SimpleEDI-happy.txt + + + + + + + SENDER01 + RECEIVER1 + 20250301 + INVOIC + + + 1 + WIDGET-A + 100 + EA + + + 2 + GADGET-B + 50 + BX + + + 2 + + + + + + + + + + + SimpleEDI-happy.txt + + + + + + + SENDER01 + RECEIVER1 + 20250301 + INVOIC + + + 1 + WIDGET-A + 100 + EA + + + 2 + GADGET-B + 50 + BX + + + 2 + + + + + + + + + + + SimpleEDI-error.txt + + + + Parse Error + + + + diff --git a/src/test/scala/io/github/openDFDL/TestSimpleEDI.scala b/src/test/scala/io/github/openDFDL/TestSimpleEDI.scala new file mode 100644 index 0000000..fd5c5cb --- /dev/null +++ b/src/test/scala/io/github/openDFDL/TestSimpleEDI.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.openDFDL + +import org.apache.daffodil.tdml.Runner +import org.junit.Test + +object TestSimpleEDI { + lazy val runner = Runner("", "SimpleEDITests.tdml") +} + +class TestSimpleEDI { + import TestSimpleEDI._ + + @Test def parseHappy(): Unit = { runner.runOneTest("parseHappy") } + @Test def roundTrip(): Unit = { runner.runOneTest("roundTrip") } + @Test def parseError(): Unit = { runner.runOneTest("parseError") } +} diff --git a/src/test/scala/io/github/openDFDL/ValidateFile.scala b/src/test/scala/io/github/openDFDL/ValidateFile.scala new file mode 100644 index 0000000..090fe83 --- /dev/null +++ b/src/test/scala/io/github/openDFDL/ValidateFile.scala @@ -0,0 +1,127 @@ +package io.github.openDFDL + +import java.io.{File, FileInputStream} + +import scala.xml.XML + +import org.xml.sax.{ErrorHandler => SaxErrorHandler, InputSource, SAXParseException} +import org.junit.Test +import org.junit.Assert + +import com.ibm.dfdl.grammar.DFDLGrammarFactory +import com.ibm.dfdl.processor.{DFDLProcessorFactory, IDFDLDiagnostic, IDFDLProcessorErrorHandler} +import com.ibm.dfdl.sample.sax.reader.DFDLReader + +import org.apache.daffodil.lib.xml.DFDLCatalogResolver + +/** + * Ad-hoc IBM DFDL validator driven entirely by system properties. + * Run via validate.ps1 or: + * sbt -Dvalidate.schema= -Dvalidate.data= [-Dvalidate.root=] \ + * "testOnly io.github.openDFDL.ValidateFile" + */ +class ValidateFile { + + private val errors = scala.collection.mutable.ListBuffer[String]() + + /** Combined IBM DFDL + SAX error handler that collects all errors. */ + private val errorHandler = new IDFDLProcessorErrorHandler with SaxErrorHandler { + override def processingError(d: IDFDLDiagnostic): Unit = errors += s"[Processing error] ${d.getSummary}" + override def schemaDefinitionError(d: IDFDLDiagnostic): Unit = errors += s"[Schema error] ${d.getSummary}" + override def validationError(d: IDFDLDiagnostic): Unit = errors += s"[Validation error] ${d.getSummary}" + override def warning(d: IDFDLDiagnostic): Unit = System.err.println(s"[Warning] ${d.getSummary}") + override def error(e: SAXParseException): Unit = errors += s"[Parse error] ${e.getMessage}" + override def fatalError(e: SAXParseException): Unit = errors += s"[Fatal error] ${e.getMessage}" + override def warning(e: SAXParseException): Unit = System.err.println(s"[Warning] ${e.getMessage}") + } + + /** + * Reads the schema XSD and returns (rootElementName, targetNamespace). + * Prefers elements annotated with ibmSchExtn:docRoot="true" or ibmDfdlExtn:docRoot="true", + * falls back to the first top-level xsd:element. + */ + private def autoDetectRoot(schemaFile: File): (String, String) = { + val schema = XML.loadFile(schemaFile) + val targetNs = (schema \ "@targetNamespace").text + + val ibmSchExtn = "http://www.ibm.com/schema/extensions" + val ibmDfdlExtn = "http://www.ibm.com/dfdl/extensions" + + val topLevelElems = schema \ "element" + val docRootElem = topLevelElems.find { e => + e.attribute(ibmSchExtn, "docRoot").exists(_.text == "true") || + e.attribute(ibmDfdlExtn, "docRoot").exists(_.text == "true") + } + + val elem = docRootElem.orElse(topLevelElems.headOption).getOrElse( + sys.error("No top-level elements found in schema — use -Dvalidate.root=") + ) + val name = (elem \ "@name").text + (name, if (targetNs.isEmpty) null else targetNs) + } + + @Test def validate(): Unit = { + val schemaPath = sys.props.getOrElse("validate.schema", + throw new IllegalArgumentException("Required: -Dvalidate.schema=")) + val dataPath = sys.props.getOrElse("validate.data", + throw new IllegalArgumentException("Required: -Dvalidate.data=")) + + val schemaFile = new File(schemaPath) + val dataFile = new File(dataPath) + require(schemaFile.exists(), s"Schema not found: $schemaPath") + require(dataFile.exists(), s"Data file not found: $dataPath") + + val rootProp = sys.props.get("validate.root").filter(_.nonEmpty) + val (rootName, rootNamespace) = rootProp match { + case Some(r) => (r, sys.props.get("validate.namespace").orNull) + case None => autoDetectRoot(schemaFile) + } + + val verbose = sys.props.get("validate.verbose").contains("true") + + println(s"Schema : $schemaPath") + println(s"Data file : $dataPath") + println(s"Root : $rootName namespace: ${Option(rootNamespace).getOrElse("(none)")}") + if (verbose) println("Verbose : on (IBM DFDL service trace enabled)") + println("") + + // Optional trace listener — attaches to both grammar factory and parser when verbose=true + val tracer = if (verbose) Some(new TraceListener()) else None + + // --- 1. Compile schema --- + val grammarFactory = new DFDLGrammarFactory() + grammarFactory.setErrorHandler(errorHandler) + tracer.foreach(grammarFactory.setServiceTraceListener) + val grammar = grammarFactory.buildGrammarFromSchema(schemaFile.toURI, DFDLCatalogResolver.get) + + if (grammar == null || errors.nonEmpty) { + System.err.println("=== SCHEMA COMPILATION FAILED ===") + errors.foreach(System.err.println) + Assert.fail("Schema compilation failed:\n" + errors.mkString("\n")) + } + + // --- 2. Parse data file --- + val processorFactory = new DFDLProcessorFactory() + val parser = processorFactory.createParser + parser.setGrammar(grammar) + parser.setRootElement(rootName, rootNamespace) + tracer.foreach(parser.addServiceTraceListener) + + val sb = new java.lang.StringBuilder + val contentHandler = new XMLSAXContentHandler1(sb) + val dfdlReader = new DFDLReader2(parser) + dfdlReader.setContentHandler(contentHandler) + dfdlReader.setErrorHandler(errorHandler) + dfdlReader.setFeature(DFDLReader.SAX_FEATURE_NAMESPACES, true) + dfdlReader.parse(new InputSource(new FileInputStream(dataFile))) + + if (errors.nonEmpty) { + System.err.println("=== PARSE FAILED ===") + errors.foreach(System.err.println) + Assert.fail("Parse failed:\n" + errors.mkString("\n")) + } + + println("=== PARSE SUCCESSFUL ===") + println(sb.toString()) + } +} diff --git a/validate.ps1 b/validate.ps1 new file mode 100644 index 0000000..6a5d240 --- /dev/null +++ b/validate.ps1 @@ -0,0 +1,129 @@ +<# +.SYNOPSIS + Validates a data file against an IBM DFDL schema using the ibmDFDLCrossTester rig. + +.PARAMETER Schema + Full path to the DFDL schema (.xsd). + +.PARAMETER Data + Full path to the data file to validate. + +.PARAMETER Root + Root element name. Auto-detected from the schema if omitted + (looks for ibmSchExtn:docRoot="true" or ibmDfdlExtn:docRoot="true"). + +.PARAMETER AceVersion + ACE major version: "12" (default) or "13". Ignored when -AcePath is supplied. + +.PARAMETER AcePath + Full path to the ACE installation root. Overrides -AceVersion. + +.PARAMETER Trace + Attach IBM DFDL's service trace listener. Prints info/error/fatal trace lines to + stderr, including schema path and byte offset for each parse decision. Useful for + diagnosing parse failures. + +.PARAMETER SbtPath + Full path to sbt.bat. Defaults to "C:\Program Files (x86)\sbt\bin\sbt.bat". + +.EXAMPLE + .\validate.ps1 -Schema "C:\path\to\MySchema.xsd" -Data "C:\path\to\mydata.txt" + .\validate.ps1 -Schema "..." -Data "..." -Root "MyRootElement" + .\validate.ps1 -Schema "..." -Data "..." -AceVersion 13 + .\validate.ps1 -Schema "..." -Data "..." -AcePath "C:\Program Files\IBM\ACE\12.0.12.17" + .\validate.ps1 -Schema "..." -Data "..." -Trace + .\validate.ps1 -Schema "..." -Data "..." -SbtPath "D:\tools\sbt\bin\sbt.bat" +#> +param( + [Parameter(Mandatory=$true)] [string]$Schema, + [Parameter(Mandatory=$true)] [string]$Data, + [string]$Root = "", + [string]$AceVersion = "12", + [string]$AcePath = "", + [string]$SbtPath = "C:\Program Files (x86)\sbt\bin\sbt.bat", + [switch]$Trace +) + +$scriptDir = $PSScriptRoot + +# --- Resolve ACE root --- +if ($AcePath -ne "") { + $aceRoot = $AcePath + $aceLabel = $AcePath +} else { + $acePaths = @{ + "12" = "C:\Program Files\IBM\ACE\12.0.12.17" + "13" = "C:\Program Files\IBM\ACE\13.0.6.0" + } + if (-not $acePaths.ContainsKey($AceVersion)) { + Write-Error "Unsupported AceVersion '$AceVersion'. Use -AcePath for custom installs." + exit 1 + } + $aceRoot = $acePaths[$AceVersion] + $aceLabel = "ACE $AceVersion" +} + +if (-not (Test-Path $aceRoot)) { + Write-Error "ACE install not found at: $aceRoot" + exit 1 +} + +$java17Home = Join-Path $aceRoot "common\java17" + +# --- Validate inputs --- +if (-not (Test-Path $Schema)) { + Write-Error "Schema not found: $Schema" + exit 1 +} +if (-not (Test-Path $Data)) { + Write-Error "Data file not found: $Data" + exit 1 +} + +# --- Ensure IBMdefined/ is next to the schema --- +# Schemas that import IBMdefined/RecordSeparatedFieldFormat.xsd use a relative path, +# so the IBMdefined/ folder must exist in the same directory as the schema. +$schemaDir = Split-Path $Schema -Parent +$ibmDefinedAtSchema = Join-Path $schemaDir "IBMdefined" +$ibmDefinedInProject = Join-Path $scriptDir "src\test\resources\IBMdefined" + +if (-not (Test-Path $ibmDefinedAtSchema)) { + if (Test-Path $ibmDefinedInProject) { + Write-Host "Copying IBMdefined/ to schema directory ..." -ForegroundColor Cyan + Copy-Item -Recurse -Force $ibmDefinedInProject $schemaDir + } else { + Write-Warning "IBMdefined/ not found in project. Run setup-ace-jars.ps1 first if the schema imports RecordSeparatedFieldFormat.xsd." + } +} + +# --- Build sbt arguments --- +$sbtArgs = @( + "-java-home", $java17Home, + "-Dvalidate.schema=$Schema", + "-Dvalidate.data=$Data" +) +if ($Root -ne "") { + $sbtArgs += "-Dvalidate.root=$Root" +} +if ($Trace) { + $sbtArgs += "-Dvalidate.verbose=true" +} +$sbtArgs += "testOnly io.github.openDFDL.ValidateFile" + +# --- Run --- +Write-Host "" +$header = "=== IBM DFDL Validator ($aceLabel)" +if ($Trace) { $header += " [verbose]" } +$header += " ===" +Write-Host $header -ForegroundColor Cyan +Write-Host "Schema : $Schema" +Write-Host "Data : $Data" +if ($Root -ne "") { Write-Host "Root : $Root" } +Write-Host "" + +if (-not (Test-Path $SbtPath)) { + Write-Error "sbt not found at: $SbtPath. Use -SbtPath to specify the correct location." + exit 1 +} +& $SbtPath @sbtArgs +exit $LASTEXITCODE