Chapter 7: Transport Protocols

AstraeaDB exposes three network protocols, each optimized for a different use case. This chapter walks through all three—JSON over TCP, gRPC, and Apache Arrow Flight—so you can choose the right tool for every situation.

Key takeaway You do not need to master all three protocols. JSON-TCP is the easiest way to get started and is perfectly adequate for development and light workloads. Move to gRPC or Arrow Flight when your application demands type safety, streaming, or zero-copy analytics.

7.1 JSON over TCP (Port 7687)

The default protocol is the simplest: newline-delimited JSON messages sent over a raw TCP socket. Every request is a single JSON object terminated by a newline character (\n), and the server responds with a single JSON object (also newline-terminated).

Why start here?

Raw TCP example

You can test the connection without any client library at all. The following command sends a Ping message to the server and prints the response:

# Send a Ping message via netcat
echo '{"type":"Ping"}\n' | nc localhost 7687

# Expected response:
# {"type":"Pong","version":"0.1.0"}

You can also send a full GQL query this way:

echo '{"type":"GqlQuery","query":"MATCH (n) RETURN n LIMIT 5"}\n' | nc localhost 7687

Client library connections

Every official client library provides a JSON-TCP client class. Here is how to connect in each language:

from astraeadb.client import JsonClient

# Connect via JSON-TCP (default port 7687)
client = JsonClient("localhost", port=7687)
client.connect()

# Ping the server
response = client.ping()
print(response)  # {'type': 'Pong', 'version': '0.1.0'}

# Run a GQL query
results = client.gql_query("MATCH (n:Person) RETURN n.name LIMIT 10")
for row in results:
    print(row)
library(astraea)

# Connect via JSON-TCP (default port 7687)
client <- AstraeaClient$new("localhost", port = 7687)
client$connect()

# Ping the server
response <- client$ping()
print(response)

# Run a GQL query
results <- client$gql_query("MATCH (n:Person) RETURN n.name LIMIT 10")
print(results)
package main

import (
    "fmt"
    "log"
    astraea "github.com/AstraeaDB/AstraeaDB-Official"
)

func main() {
    // Connect via JSON-TCP (default port 7687)
    client, err := astraea.NewJSONClient("localhost", 7687)
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()

    // Ping the server
    pong, _ := client.Ping()
    fmt.Println(pong)

    // Run a GQL query
    results, _ := client.GqlQuery("MATCH (n:Person) RETURN n.name LIMIT 10")
    for _, row := range results {
        fmt.Println(row)
    }
}
import com.astraeadb.client.JsonClient;

public class TcpExample {
    public static void main(String[] args) {
        // Connect via JSON-TCP (default port 7687)
        JsonClient client = new JsonClient("localhost", 7687);
        client.connect();

        // Ping the server
        String pong = client.ping();
        System.out.println(pong);

        // Run a GQL query
        List<Map<String, Object>> results =
            client.gqlQuery("MATCH (n:Person) RETURN n.name LIMIT 10");
        results.forEach(System.out::println);
    }
}
Best for Getting started, debugging, shell scripts, lightweight workloads, and any environment where you want zero external dependencies.

7.2 gRPC (Port 7688)

For production deployments, AstraeaDB provides a gRPC interface on port 7688. gRPC uses Protocol Buffers (protobuf) as its serialization format, which means every message is strongly typed, compact, and fast to encode/decode.

Why gRPC?

The proto definition (excerpt)

syntax = "proto3";

package astraeadb;

service AstraeaDB {
  rpc Ping       (PingRequest)       returns (PongResponse);
  rpc CreateNode (CreateNodeRequest) returns (CreateNodeResponse);
  rpc GetNode    (GetNodeRequest)    returns (GetNodeResponse);
  rpc GqlQuery   (GqlQueryRequest)   returns (GqlQueryResponse);
  // ... 10 more RPCs
}

message CreateNodeRequest {
  repeated string labels     = 1;
  string          properties = 2;  // JSON-encoded
  repeated float  embedding  = 3;
}

Connecting via gRPC

The Go and Java clients have the strongest native gRPC support, since both ecosystems have mature protobuf tooling. Python and R primarily use JSON-TCP, though you can use the generated Python stubs directly if needed.

package main

import (
    "context"
    "fmt"
    "log"

    pb "github.com/AstraeaDB/AstraeaDB-Official/proto"
    "google.golang.org/grpc"
    "google.golang.org/grpc/credentials/insecure"
)

func main() {
    // Connect to gRPC port
    conn, err := grpc.NewClient(
        "localhost:7688",
        grpc.WithTransportCredentials(insecure.NewCredentials()),
    )
    if err != nil {
        log.Fatal(err)
    }
    defer conn.Close()

    client := pb.NewAstraeaDBClient(conn)
    ctx := context.Background()

    // Ping
    pong, _ := client.Ping(ctx, &pb.PingRequest{})
    fmt.Println("Version:", pong.Version)

    // Create a node
    resp, _ := client.CreateNode(ctx, &pb.CreateNodeRequest{
        Labels:     []string{"Person"},
        Properties: `{"name":"Alice","age":30}`,
    })
    fmt.Println("Node ID:", resp.NodeId)
}
import com.astraeadb.client.GrpcClient;

public class GrpcExample {
    public static void main(String[] args) {
        // Connect to gRPC port
        GrpcClient client = new GrpcClient("localhost", 7688);
        client.connect();

        // Ping
        String version = client.ping();
        System.out.println("Version: " + version);

        // Create a node
        String nodeId = client.createNode(
            List.of("Person"),
            Map.of("name", "Alice", "age", 30)
        );
        System.out.println("Node ID: " + nodeId);

        // GQL query
        List<Map<String, Object>> results =
            client.gqlQuery("MATCH (n:Person) RETURN n.name");
        results.forEach(System.out::println);

        client.close();
    }
}
# Python: gRPC via generated stubs
# pip install grpcio grpcio-tools
import grpc
from astraeadb.proto import astraeadb_pb2, astraeadb_pb2_grpc

channel = grpc.insecure_channel("localhost:7688")
stub = astraeadb_pb2_grpc.AstraeaDBStub(channel)

# Ping
pong = stub.Ping(astraeadb_pb2.PingRequest())
print("Version:", pong.version)

# Create a node
resp = stub.CreateNode(astraeadb_pb2.CreateNodeRequest(
    labels=["Person"],
    properties='{"name":"Alice","age":30}'
))
print("Node ID:", resp.node_id)
# R does not have native gRPC support.
# Use the JSON-TCP client (AstraeaClient) for R workflows.
# If you need gRPC from R, call the Python stubs via reticulate:

library(reticulate)
grpc <- import("grpc")
pb   <- import("astraeadb.proto.astraeadb_pb2")
stub_mod <- import("astraeadb.proto.astraeadb_pb2_grpc")

channel <- grpc$insecure_channel("localhost:7688")
stub    <- stub_mod$AstraeaDBStub(channel)

pong <- stub$Ping(pb$PingRequest())
print(pong$version)
Best for Production microservices, polyglot environments, streaming large result sets, and any deployment where you want compile-time type safety and maximum wire efficiency.

7.3 Apache Arrow Flight (Port 7689)

For analytics-heavy workloads, AstraeaDB supports Apache Arrow Flight on port 7689. Arrow Flight transfers query results as columnar RecordBatches—the same in-memory format used by Pandas, Polars, DuckDB, and the broader Arrow ecosystem. This means data flows from the database into your DataFrame without any serialization or deserialization overhead.

Why Arrow Flight?

Two key operations

Operation Description Use case
do_get Execute a GQL query and receive the results as an Arrow table Analytics, dashboards, DataFrame workflows
do_put Upload an Arrow table into AstraeaDB as new nodes/edges Bulk import, ETL pipelines, batch ingestion

Connecting via Arrow Flight

from astraeadb.arrow_client import ArrowClient

# Connect to Arrow Flight endpoint
client = ArrowClient("grpc://localhost:50051")
client.connect()

# do_get: execute GQL and receive a Pandas DataFrame
df = client.query_to_dataframe(
    "MATCH (n:Person) RETURN n.name, n.age"
)
print(df)
#       name  age
# 0    Alice   30
# 1      Bob   25
# 2  Charlie   35

# do_get: receive a Polars DataFrame instead
import polars as pl
arrow_table = client.query_to_arrow(
    "MATCH (n:Person) RETURN n.name, n.age"
)
polars_df = pl.from_arrow(arrow_table)

# do_put: bulk import a DataFrame as nodes
import pandas as pd
new_people = pd.DataFrame({
    "name": ["Dave", "Eve"],
    "age": [28, 32],
    "_label": ["Person", "Person"]
})
client.put_dataframe(new_people)
library(arrow)
library(astraea)

# Connect to Arrow Flight endpoint
client <- ArrowFlightClient$new("grpc://localhost:50051")

# do_get: execute GQL and receive an Arrow Table
tbl <- client$query("MATCH (n:Person) RETURN n.name, n.age")

# Convert to a base R data.frame
df <- as.data.frame(tbl)
print(df)

# do_put: bulk import a data.frame as nodes
new_people <- data.frame(
  name   = c("Dave", "Eve"),
  age    = c(28L, 32L),
  _label = c("Person", "Person")
)
client$put(new_people)
package main

import (
    "context"
    "fmt"
    "log"

    "github.com/apache/arrow/go/v17/arrow/flight"
    "google.golang.org/grpc"
    "google.golang.org/grpc/credentials/insecure"
)

func main() {
    // Connect to Arrow Flight endpoint
    client, err := flight.NewClientWithMiddleware(
        "localhost:7689",
        nil,
        nil,
        grpc.WithTransportCredentials(insecure.NewCredentials()),
    )
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()

    ctx := context.Background()

    // do_get: execute a GQL query
    desc := &flight.FlightDescriptor{
        Type: flight.DescriptorCMD,
        Cmd:  []byte("MATCH (n:Person) RETURN n.name, n.age"),
    }
    info, _ := client.GetFlightInfo(ctx, desc)

    stream, _ := client.DoGet(ctx, info.Endpoint[0].Ticket)
    for {
        record, err := stream.Recv()
        if err != nil {
            break
        }
        fmt.Println(record)
    }
}
import org.apache.arrow.flight.*;
import org.apache.arrow.memory.RootAllocator;

public class ArrowFlightExample {
    public static void main(String[] args) {
        RootAllocator allocator = new RootAllocator();
        FlightClient client = FlightClient.builder(
            allocator,
            Location.forGrpcInsecure("localhost", 7689)
        ).build();

        // do_get: execute GQL and receive Arrow RecordBatches
        FlightInfo info = client.getInfo(
            FlightDescriptor.command(
                "MATCH (n:Person) RETURN n.name, n.age".getBytes()
            )
        );

        FlightStream stream = client.getStream(
            info.getEndpoints().get(0).getTicket()
        );

        while (stream.next()) {
            System.out.println(stream.getRoot().contentToTSVString());
        }

        client.close();
        allocator.close();
    }
}
Best for Analytics pipelines, data science workflows, bulk import/export, and any scenario where you want query results to land directly in a DataFrame without serialization overhead.

7.4 Choosing the Right Protocol

The table below summarizes the trade-offs. In practice, most teams start with JSON-TCP during development and add gRPC or Arrow Flight when specific requirements arise.

Criterion JSON-TCP gRPC Arrow Flight
Default port 7687 7688 7689
Setup complexity Lowest Medium Highest
Dependencies None protobuf pyarrow / arrow
Wire format JSON text Binary (protobuf) Binary (Arrow IPC)
Performance Good Better Best (analytics)
Debugging Easy (human-readable) Hard (binary) Hard (binary)
Type safety None (schemaless JSON) Strong (proto schema) Strong (Arrow schema)
Streaming No Yes Yes
Best for Development, scripts Production, microservices Analytics, DataFrames

The UnifiedClient: automatic protocol selection

If you do not want to think about protocols at all, use the UnifiedClient (Python) or AstraeaClient (R). These wrapper classes detect which server endpoints are available and automatically select the best protocol:

  1. If the Arrow Flight port is reachable and pyarrow is installed, use Arrow Flight.
  2. Otherwise, if the gRPC port is reachable and stubs are available, use gRPC.
  3. Otherwise, fall back to JSON-TCP.
from astraeadb.client import UnifiedClient

# Auto-selects the best available protocol
client = UnifiedClient("localhost")
client.connect()

print("Using protocol:", client.protocol)
# "arrow_flight", "grpc", or "json_tcp"

# All the same methods work regardless of protocol
results = client.gql_query("MATCH (n) RETURN n LIMIT 5")
library(astraea)

# Auto-selects the best available protocol
client <- AstraeaClient$new("localhost", auto = TRUE)
client$connect()

cat("Using protocol:", client$protocol, "\n")

# All the same methods work regardless of protocol
results <- client$gql_query("MATCH (n) RETURN n LIMIT 5")
package main

import (
    "fmt"
    "log"
    astraea "github.com/AstraeaDB/AstraeaDB-Official"
)

func main() {
    // Auto-selects the best available protocol
    client, err := astraea.NewAutoClient("localhost")
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()

    fmt.Println("Using protocol:", client.Protocol())

    results, _ := client.GqlQuery("MATCH (n) RETURN n LIMIT 5")
    fmt.Println(results)
}
import com.astraeadb.client.UnifiedClient;

public class UnifiedExample {
    public static void main(String[] args) {
        // Auto-selects the best available protocol
        UnifiedClient client = new UnifiedClient("localhost");
        client.connect();

        System.out.println("Using: " + client.getProtocol());

        var results = client.gqlQuery("MATCH (n) RETURN n LIMIT 5");
        results.forEach(System.out::println);

        client.close();
    }
}
Recommendation Use UnifiedClient / AstraeaClient(auto=TRUE) in application code. This future-proofs your application: if you later install pyarrow or deploy a gRPC-enabled server, your code automatically benefits without any changes.
← Ch 6: Graph Traversals Ch 8: Vector Search →