Skip to content

Commit 1b3518d

Browse files
committed
[Java IO] Add ArrowFlight IO connector
Add a new IO connector for Apache Arrow Flight, enabling high-performance data transfer over gRPC using the Arrow columnar format. Includes read (BoundedSource) and write (DoFn with doPut) support with endpoint-level split parallelism and bearer token authentication. Fixes #20116
1 parent 7620a93 commit 1b3518d

8 files changed

Lines changed: 1106 additions & 0 deletions

File tree

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
## I/Os
6666

6767
* Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
68+
* Add ArrowFlight IO (Java) ([#20116](https://github.com/apache/beam/issues/20116)).
6869
* DebeziumIO (Java): added `OffsetRetainer` interface and `FileSystemOffsetRetainer` implementation to persist and restore CDC offsets across pipeline restarts, and exposed `withStartOffset` / `withOffsetRetainer` on `DebeziumIO.Read` and the cross-language `ReadBuilder` ([#28248](https://github.com/apache/beam/issues/28248)).
6970

7071
## New Features / Improvements

buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,8 @@ class BeamModulePlugin implements Plugin<Project> {
925925
arrow_vector : "org.apache.arrow:arrow-vector:$arrow_version",
926926
arrow_memory_core : "org.apache.arrow:arrow-memory-core:$arrow_version",
927927
arrow_memory_netty : "org.apache.arrow:arrow-memory-netty:$arrow_version",
928+
arrow_flight_core : "org.apache.arrow:flight-core:$arrow_version",
929+
arrow_flight_sql : "org.apache.arrow:flight-sql:$arrow_version",
928930
],
929931
groovy: [
930932
groovy_all: "org.codehaus.groovy:groovy-all:2.4.13",
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* License); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an AS IS BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
plugins { id 'org.apache.beam.module' }
20+
applyJavaNature(automaticModuleName: 'org.apache.beam.sdk.io.arrowflight')
21+
22+
description = "Apache Beam :: SDKs :: Java :: IO :: Arrow Flight"
23+
ext.summary = "IO to read and write data using Apache Arrow Flight RPC."
24+
25+
dependencies {
26+
implementation project(path: ":sdks:java:core", configuration: "shadow")
27+
implementation project(path: ":sdks:java:extensions:arrow")
28+
implementation library.java.joda_time
29+
implementation library.java.slf4j_api
30+
implementation library.java.vendored_guava_32_1_2_jre
31+
implementation(library.java.arrow_flight_core)
32+
implementation(library.java.arrow_memory_core)
33+
implementation(library.java.arrow_vector)
34+
testImplementation library.java.hamcrest
35+
testImplementation library.java.junit
36+
testImplementation(library.java.arrow_memory_netty)
37+
testRuntimeOnly library.java.slf4j_simple
38+
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
39+
}

0 commit comments

Comments
 (0)