Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ jobs:
with:
project: ${{ matrix.pkg.dir }}
- uses: julia-actions/julia-processcoverage@v1
if: matrix.pkg.name == 'Arrow.jl' && matrix.version == '1' && matrix.os == 'macos-latest' && matrix.nthreads == 1
- uses: codecov/codecov-action@v5
if: matrix.pkg.name == 'Arrow.jl' && matrix.version == '1' && matrix.os == 'macos-latest' && matrix.nthreads == 1
with:
files: lcov.info
test_monorepo:
Expand Down Expand Up @@ -168,6 +170,50 @@ jobs:
continue-on-error: false
run: >
julia --color=yes --project=monorepo -e 'using Pkg; Pkg.test("Arrow")'
flight_interop:
name: Arrow Flight interop - Julia 1 - ubuntu-latest
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install Flight Python dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pyarrow grpcio grpcio-tools
- uses: julia-actions/setup-julia@v2
with:
version: '1'
- uses: actions/cache@v5
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-flight-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-flight-${{ env.cache-name }}-
${{ runner.os }}-flight-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1.6
with:
project: .
- name: Dev local ArrowTypes for Arrow.jl tests
shell: julia --project=. {0}
run: |
using Pkg
Pkg.develop(PackageSpec(path="src/ArrowTypes"))
- name: Run Arrow Flight interop tests
env:
ARROW_FLIGHT_PYTHON: ${{ env.pythonLocation }}/bin/python
run: >
julia --color=yes --project=test -e 'using Pkg;
Pkg.develop(PackageSpec(path="."));
Pkg.develop(PackageSpec(path="src/ArrowTypes"));
Pkg.instantiate();
using Test, Arrow;
include("test/flight.jl")'
docs:
name: Documentation
runs-on: ubuntu-latest
Expand Down
49 changes: 45 additions & 4 deletions .github/workflows/ci_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ jobs:
JULIA_NUM_THREADS: ${{ matrix.nthreads }}
with:
project: ${{ matrix.pkg.dir }}
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
with:
files: lcov.info
test_monorepo:
name: Monorepo dev - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
Expand Down Expand Up @@ -106,3 +102,48 @@ jobs:
continue-on-error: false
run: >
julia --color=yes --project=monorepo -e 'using Pkg; Pkg.test("Arrow")'
flight_interop:
name: Arrow Flight interop - Julia nightly - ubuntu-latest
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install Flight Python dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pyarrow grpcio grpcio-tools
- uses: julia-actions/setup-julia@v2
with:
version: 'nightly'
arch: x64
- uses: actions/cache@v5
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-flight-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-flight-${{ env.cache-name }}-
${{ runner.os }}-flight-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1.6
with:
project: .
- name: Dev local ArrowTypes for Arrow.jl tests
shell: julia --project=. {0}
run: |
using Pkg
Pkg.develop(PackageSpec(path="src/ArrowTypes"))
- name: Run Arrow Flight interop tests
env:
ARROW_FLIGHT_PYTHON: ${{ env.pythonLocation }}/bin/python
run: >
julia --color=yes --project=test -e 'using Pkg;
Pkg.develop(PackageSpec(path="."));
Pkg.develop(PackageSpec(path="src/ArrowTypes"));
Pkg.instantiate();
using Test, Arrow;
include("test/flight.jl")'
19 changes: 18 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ version = "2.8.1"

[deps]
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
ConcurrentUtilities = "f0e56b4a-5159-44fe-b623-3e5288b988bb"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429"
gRPCClient = "aaca4a50-36af-4a1d-b878-4c443f2061ad"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
Expand All @@ -37,6 +41,15 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
gRPCServer = "608c6337-0d7d-447f-bb69-0f5674ee3959"

[extensions]
ArrowgRPCServerExt = "gRPCServer"

[sources]
ArrowTypes = { path = "src/ArrowTypes" }

[compat]
ArrowTypes = "1.1,2"
BitIntegers = "0.2, 0.3"
Expand All @@ -45,10 +58,14 @@ CodecZstd = "0.7, 0.8"
ConcurrentUtilities = "2"
DataAPI = "1"
EnumX = "1"
JSON3 = "1"
ProtoBuf = "~1.2.1"
gRPCClient = "1"
gRPCServer = "0.1"
PooledArrays = "0.5, 1.0"
SentinelArrays = "1"
StringViews = "1"
Tables = "1.1"
TimeZones = "1"
TranscodingStreams = "0.9.12, 0.10, 0.11"
julia = "1.9"
julia = "1.12"
46 changes: 44 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ The package can be installed by typing in the following in a Julia REPL:
julia> using Pkg; Pkg.add("Arrow")
```

Arrow.jl currently requires Julia `1.12+`.

## Local Development

When developing on Arrow.jl it is recommended that you run the following to ensure that any
Expand All @@ -49,23 +51,63 @@ changes to ArrowTypes.jl are immediately available to Arrow.jl without requiring
julia --project -e 'using Pkg; Pkg.develop(path="src/ArrowTypes")'
```

Current write-path notes:
* `Arrow.tobuffer` includes a direct single-partition fast path for eligible inputs
* `Arrow.tobuffer(Tables.partitioner(...))` also includes a targeted direct multi-record-batch path for single-column top-level strings and single-column non-missing binary/code-units columns
* `Arrow.write(io, Tables.partitioner(...))` now reuses that same targeted direct multi-record-batch path instead of always going through the legacy `Writer` orchestration
* multi-column partitions, dictionary-encoded top-level columns, map-heavy inputs, and missing-binary partitions retain the existing writer path

## Format Support

This implementation supports the 1.0 version of the specification, including support for:
* All primitive data types
* All nested data types
* Dictionary encodings and messages
* Dictionary-encoded `CategoricalArray` interop, including missing-value roundtrips through `Arrow.Table`, `copy`, and `DataFrame(...; copycols=true)`
* Extension types
* Lightweight schema/field metadata overlays via `Arrow.withmetadata(...)` for Tables.jl-compatible sources before serialization
* Base Julia `Enum` logical types via the `JuliaLang.Enum` extension label, with native Julia roundtrips back to the original enum type while `convert=false` and non-Julia consumers still see the primitive storage type
* View-backed Utf8/Binary columns, including recovery from under-reported variadic buffer counts by inferring the required external buffers from valid view elements
* Streaming, file, record batch, and replacement and isdelta dictionary messages

It currently doesn't include support for:
* Tensors or sparse tensors
* Flight RPC
* Tensor or sparse tensor IPC payload semantics; Arrow.jl now recognizes those message headers explicitly and rejects them with precise errors instead of falling through to a generic unsupported-message path
* C data interface
* Writing Run-End Encoded arrays; Arrow.jl now reads REE arrays and exposes them as read-only vectors, but still rejects REE on write paths

Flight RPC status:
* Experimental `Arrow.Flight` support is available in-tree
* Requires Julia `1.12+`
* Includes generated protocol bindings and complete client constructors for the `FlightService` RPC surface
* Keeps the top-level Flight module shell thin, with exports and generated-protocol setup split out of `src/flight/Flight.jl`
* Includes high-level `FlightData <-> Arrow IPC` helpers for `Arrow.Table`, `Arrow.Stream`, and DoPut/DoExchange payload generation, plus opt-in `app_metadata` surfacing through `include_app_metadata=true` on `Arrow.Flight.stream(...)` / `Arrow.Flight.table(...)`, explicit batch-wise `app_metadata=...` emission on `Arrow.Flight.flightdata(...)`, `Arrow.Flight.putflightdata!(...)`, and source-based `Arrow.Flight.doexchange(...)`, and a reusable `Arrow.Flight.withappmetadata(...)` wrapper so source-level batch metadata can stay attached without manual keyword threading
* Keeps the Flight IPC conversion layer modular under `src/flight/convert/`, with `src/flight/convert.jl` retained as a thin entrypoint
* Includes client helpers for request headers, binary metadata, handshake token reuse, and TLS configuration via `withheaders`, `withtoken`, and `authenticate`
* Keeps the Flight client implementation modular under `src/flight/client/`, with thin entrypoints at `src/flight/client.jl` and `src/flight/client/rpc_methods.jl`
* Includes a transport-agnostic server core (`Service`, `ServerCallContext`, `ServiceDescriptor`, `MethodDescriptor`) for local Flight method dispatch, path lookup, and handler testing
* Keeps the transport-agnostic server core modular under `src/flight/server/`, with `src/flight/server.jl` retained as a thin entrypoint
* Includes an optional `gRPCServer.jl` package extension that maps `Arrow.Flight.Service` into `gRPCServer.ServiceDescriptor` and registers Flight proto types with the external server package when it is present
* Keeps the optional `gRPCServer.jl` bridge modular under `ext/arrowgrpcserverext/`, with `ext/ArrowgRPCServerExt.jl` retained as a thin entrypoint
* Includes optional live interoperability coverage for `Handshake`, authenticated token propagation, `PollFlightInfo`, and TLS via dedicated Python reference servers
* Includes optional live `pyarrow.flight` interoperability coverage for `ListFlights`, `GetFlightInfo`, `GetSchema`, `DoGet`, `DoPut`, `DoExchange`, `ListActions`, and `DoAction`
* Keeps targeted Flight verification modular under `test/flight/`, with `test/flight.jl` retained as a thin entrypoint for local and CI invocation stability, the client-constructor/protocol-wrapper checks decomposed under `test/flight/client_surface/`, the optional `gRPCServer` extension scenarios decomposed under `test/flight/grpcserver_extension/`, the `pyarrow.flight` interop scenarios decomposed under `test/flight/pyarrow_interop/`, and the transport-agnostic server-core checks decomposed under `test/flight/server_core/`
* Includes `test/flight_grpcserver.jl` as a temporary-environment runner for optional native `gRPCServer` coverage without mutating `test/Project.toml`
* Dedicated CI jobs now exercise the Flight interop suite on stable and nightly Linux; native Julia server transport remains optional/experimental and is not part of the default Flight suite

Third-party data formats:
* CSV, parquet and avro support via the existing [CSV.jl](https://github.com/JuliaData/CSV.jl), [Parquet.jl](https://github.com/JuliaIO/Parquet.jl) and [Avro.jl](https://github.com/JuliaData/Avro.jl) packages
* Other Tables.jl-compatible packages automatically supported ([DataFrames.jl](https://github.com/JuliaData/DataFrames.jl), [JSONTables.jl](https://github.com/JuliaData/JSONTables.jl), [JuliaDB.jl](https://github.com/JuliaData/JuliaDB.jl), [SQLite.jl](https://github.com/JuliaDatabases/SQLite.jl), [MySQL.jl](https://github.com/JuliaDatabases/MySQL.jl), [JDBC.jl](https://github.com/JuliaDatabases/JDBC.jl), [ODBC.jl](https://github.com/JuliaDatabases/ODBC.jl), [XLSX.jl](https://github.com/felipenoris/XLSX.jl), etc.)
* No current Julia packages support ORC

Canonical extension highlights:
* `UUID` now writes the canonical `arrow.uuid` extension name by default while retaining reader compatibility with legacy `JuliaLang.UUID` metadata
* `Arrow.TimestampWithOffset{U}` provides a canonical `arrow.timestamp_with_offset` logical type without conflating offset-only semantics with `ZonedDateTime`
* `Arrow.Bool8` provides an explicit opt-in writer/reader surface for the canonical `arrow.bool8` extension without changing the default packed-bit `Bool` path
* `Arrow.JSONText{String}` provides a text-backed logical type for the canonical `arrow.json` extension without parsing payloads during read or write
* `arrow.opaque` now reads as the underlying storage type without warning, and explicit writer metadata can be generated with `Arrow.opaquemetadata(type_name, vendor_name)`
* `Arrow.variantmetadata()`, `Arrow.fixedshapetensormetadata(...)`, and `Arrow.variableshapetensormetadata(...)` generate canonical metadata strings for advanced canonical extensions
* `arrow.fixed_shape_tensor` and `arrow.variable_shape_tensor` are recognized on read as canonical passthrough extensions over their storage types, and Arrow.jl now validates their canonical metadata plus top-level storage shape before accepting them
* `arrow.parquet.variant` is recognized on read as a canonical passthrough extension over its storage type; Arrow.jl currently validates that its canonical metadata is the required empty string, but does not yet implement deeper variant semantics or an automatic writer surface
* Legacy `JuliaLang.ZonedDateTime-UTC` and `JuliaLang.ZonedDateTime` files remain readable for backward compatibility

See the [full documentation](https://arrow.apache.org/julia/) for details on reading and writing arrow data.
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

Manifest.toml
*/Manifest.toml
dev/release/apache-rat-*.jar
dev/release/filtered_rat.txt
dev/release/rat.xml
Expand Down
Loading