Chapter 4: Your First Graph
Build a movie database from scratch using AstraeaDB. You will create nodes for movies and people, connect them with edges, query the data with GQL, and learn how to update, delete, and bulk-import records.
4.1 Creating Nodes
Nodes are the fundamental building blocks of a graph. Each node has one or more labels (like Movie or Person) and a set of properties (key-value pairs stored as JSON).
Let us create four nodes: one movie and three people.
from astraeadb import AstraeaClient with AstraeaClient() as client: # Create a Movie node matrix = client.create_node( ["Movie"], {"title": "The Matrix", "year": 1999} ) # Create Person nodes for actors and director keanu = client.create_node( ["Person"], {"name": "Keanu Reeves", "born": 1964} ) carrie = client.create_node( ["Person"], {"name": "Carrie-Anne Moss", "born": 1967} ) lana = client.create_node( ["Person"], {"name": "Lana Wachowski", "born": 1965} ) print(f"Created movie: {matrix}") print(f"Created people: {keanu}, {carrie}, {lana}")
source("r_client.R") client <- AstraeaClient$new("127.0.0.1", 7687) client$connect() # Create a Movie node matrix <- client$create_node( labels = list("Movie"), properties = list(title = "The Matrix", year = 1999) ) # Create Person nodes keanu <- client$create_node( labels = list("Person"), properties = list(name = "Keanu Reeves", born = 1964) ) carrie <- client$create_node( labels = list("Person"), properties = list(name = "Carrie-Anne Moss", born = 1967) ) lana <- client$create_node( labels = list("Person"), properties = list(name = "Lana Wachowski", born = 1965) ) cat("Created movie:", matrix$id, "\n") cat("Created people:", keanu$id, carrie$id, lana$id, "\n") client$close()
package main import ( "context" "fmt" "github.com/AstraeaDB/AstraeaDB-Official" ) func main() { client := astraeadb.NewClient(astraeadb.WithAddress("127.0.0.1", 7687)) ctx := context.Background() client.Connect(ctx) defer client.Close() // Create a Movie node matrix, _ := client.CreateNode(ctx, []string{"Movie"}, map[string]interface{}{ "title": "The Matrix", "year": 1999, }) // Create Person nodes keanu, _ := client.CreateNode(ctx, []string{"Person"}, map[string]interface{}{ "name": "Keanu Reeves", "born": 1964, }) carrie, _ := client.CreateNode(ctx, []string{"Person"}, map[string]interface{}{ "name": "Carrie-Anne Moss", "born": 1967, }) lana, _ := client.CreateNode(ctx, []string{"Person"}, map[string]interface{}{ "name": "Lana Wachowski", "born": 1965, }) fmt.Println("Created movie:", matrix.ID) fmt.Println("Created people:", keanu.ID, carrie.ID, lana.ID) }
import com.astraeadb.unified.UnifiedClient; import java.util.List; import java.util.Map; try (var client = UnifiedClient.builder() .host("127.0.0.1").port(7687).build()) { client.connect(); // Create a Movie node var matrix = client.createNode( List.of("Movie"), Map.of("title", "The Matrix", "year", 1999) ); // Create Person nodes var keanu = client.createNode( List.of("Person"), Map.of("name", "Keanu Reeves", "born", 1964) ); var carrie = client.createNode( List.of("Person"), Map.of("name", "Carrie-Anne Moss", "born", 1967) ); var lana = client.createNode( List.of("Person"), Map.of("name", "Lana Wachowski", "born", 1965) ); System.out.println("Created movie: " + matrix.id()); System.out.println("Created people: " + keanu.id() + ", " + carrie.id() + ", " + lana.id()); }
Each create_node call returns a node object (or ID) that you can use to create edges and retrieve data later. The server assigns a unique identifier to every node automatically.
4.2 Creating Edges
Edges (also called relationships) connect two nodes with a type and optional properties. In our movie database, we use ACTED_IN to connect actors to movies and DIRECTED to connect directors to movies.
# Continuing from the previous example (inside the `with` block) # Connect actors to the movie edge1 = client.create_edge( keanu, matrix, "ACTED_IN", {"role": "Neo"} ) edge2 = client.create_edge( carrie, matrix, "ACTED_IN", {"role": "Trinity"} ) # Connect the director (no extra properties needed) edge3 = client.create_edge(lana, matrix, "DIRECTED") print(f"Created {3} edges")
# Continuing from the previous example # Connect actors to the movie edge1 <- client$create_edge( from_node = keanu$id, to_node = matrix$id, edge_type = "ACTED_IN", properties = list(role = "Neo") ) edge2 <- client$create_edge( from_node = carrie$id, to_node = matrix$id, edge_type = "ACTED_IN", properties = list(role = "Trinity") ) # Connect the director edge3 <- client$create_edge( from_node = lana$id, to_node = matrix$id, edge_type = "DIRECTED" ) cat("Created 3 edges\n")
// Continuing from the previous example // Connect actors to the movie edge1, _ := client.CreateEdge(ctx, keanu.ID, matrix.ID, "ACTED_IN", map[string]interface{}{"role": "Neo"}) edge2, _ := client.CreateEdge(ctx, carrie.ID, matrix.ID, "ACTED_IN", map[string]interface{}{"role": "Trinity"}) // Connect the director edge3, _ := client.CreateEdge(ctx, lana.ID, matrix.ID, "DIRECTED", nil) fmt.Println("Created 3 edges:", edge1.ID, edge2.ID, edge3.ID)
// Continuing from the previous example // Connect actors to the movie var edge1 = client.createEdge( keanu.id(), matrix.id(), "ACTED_IN", Map.of("role", "Neo") ); var edge2 = client.createEdge( carrie.id(), matrix.id(), "ACTED_IN", Map.of("role", "Trinity") ); // Connect the director var edge3 = client.createEdge( lana.id(), matrix.id(), "DIRECTED", Map.of() ); System.out.println("Created 3 edges");
4.3 Retrieving Data
Once you have created nodes and edges, you can retrieve them by their IDs. This is the simplest way to fetch data -- direct lookup by identifier.
# Retrieve a node by ID node = client.get_node(matrix) print(node) # {'id': 'nd-1', 'labels': ['Movie'], # 'properties': {'title': 'The Matrix', 'year': 1999}} # Retrieve an edge by ID edge = client.get_edge(edge1) print(edge) # {'id': 'ed-1', 'source': 'nd-2', 'target': 'nd-1', # 'type': 'ACTED_IN', 'properties': {'role': 'Neo'}} # Get all edges connected to a node edges = client.get_edges(matrix) for e in edges: print(f" {e['type']}: {e['source']} -> {e['target']}")
# Retrieve a node by ID node <- client$get_node(matrix$id) print(node) # $id: "nd-1" # $labels: "Movie" # $properties: list(title = "The Matrix", year = 1999) # Retrieve an edge by ID edge <- client$get_edge(edge1$id) print(edge) # $id: "ed-1" # $source: "nd-2", $target: "nd-1" # $type: "ACTED_IN", $properties: list(role = "Neo") # Get all edges connected to a node edges <- client$get_edges(matrix$id) for (e in edges) { cat(" ", e$type, ":", e$source, "->", e$target, "\n") }
// Retrieve a node by ID node, _ := client.GetNode(ctx, matrix.ID) fmt.Printf("Node: %+v\n", node) // Node: {ID:nd-1 Labels:[Movie] // Properties:map[title:The Matrix year:1999]} // Retrieve an edge by ID edge, _ := client.GetEdge(ctx, edge1.ID) fmt.Printf("Edge: %+v\n", edge) // Edge: {ID:ed-1 Source:nd-2 Target:nd-1 // Type:ACTED_IN Properties:map[role:Neo]} // Get all edges connected to a node edges, _ := client.GetEdges(ctx, matrix.ID) for _, e := range edges { fmt.Printf(" %s: %s -> %s\n", e.Type, e.Source, e.Target) }
// Retrieve a node by ID var node = client.getNode(matrix.id()); System.out.println(node); // Node{id=nd-1, labels=[Movie], // properties={title=The Matrix, year=1999}} // Retrieve an edge by ID var edge = client.getEdge(edge1.id()); System.out.println(edge); // Edge{id=ed-1, source=nd-2, target=nd-1, // type=ACTED_IN, properties={role=Neo}} // Get all edges connected to a node var edges = client.getEdges(matrix.id()); for (var e : edges) { System.out.printf(" %s: %s -> %s%n", e.type(), e.source(), e.target()); }
4.4 Your First Query (GQL)
While direct ID lookups are useful, the real power of a graph database comes from pattern matching with GQL (Graph Query Language). GQL lets you describe the shape of the data you are looking for, and the engine finds all matching subgraphs.
Understanding MATCH Patterns
A GQL MATCH clause uses an ASCII-art syntax to describe graph patterns. Here is how to read it:
Basic Query: Find All Actors and Their Movies
# Find all actors and the movies they acted in result = client.query( 'MATCH (p:Person)-[:ACTED_IN]->(m:Movie) ' 'RETURN p.name, m.title' ) for row in result["rows"]: print(row) # ['Keanu Reeves', 'The Matrix'] # ['Carrie-Anne Moss', 'The Matrix']
# Find all actors and the movies they acted in result <- client$query( "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name, m.title" ) for (row in result$rows) { cat(row[[1]], "-", row[[2]], "\n") } # Keanu Reeves - The Matrix # Carrie-Anne Moss - The Matrix
// Find all actors and the movies they acted in result, _ := client.Query(ctx, "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name, m.title") for _, row := range result.Rows { fmt.Println(row) } // [Keanu Reeves The Matrix] // [Carrie-Anne Moss The Matrix]
// Find all actors and the movies they acted in var result = client.query( "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name, m.title" ); for (var row : result.rows()) { System.out.println(row); } // [Keanu Reeves, The Matrix] // [Carrie-Anne Moss, The Matrix]
Filtering with WHERE
Use WHERE to add conditions that narrow down your results:
# Find actors born before 1966 result = client.query( 'MATCH (p:Person)-[r:ACTED_IN]->(m:Movie) ' 'WHERE p.born < 1966 ' 'RETURN p.name, p.born, r.role, m.title' ) for row in result["rows"]: print(row) # ['Keanu Reeves', 1964, 'Neo', 'The Matrix']
# Find actors born before 1966 result <- client$query(paste( "MATCH (p:Person)-[r:ACTED_IN]->(m:Movie)", "WHERE p.born < 1966", "RETURN p.name, p.born, r.role, m.title" )) for (row in result$rows) { cat(row[[1]], "born", row[[2]], "as", row[[3]], "in", row[[4]], "\n") } # Keanu Reeves born 1964 as Neo in The Matrix
// Find actors born before 1966 result, _ := client.Query(ctx, "MATCH (p:Person)-[r:ACTED_IN]->(m:Movie) "+ "WHERE p.born < 1966 "+ "RETURN p.name, p.born, r.role, m.title") for _, row := range result.Rows { fmt.Println(row) } // [Keanu Reeves 1964 Neo The Matrix]
// Find actors born before 1966 var result = client.query( "MATCH (p:Person)-[r:ACTED_IN]->(m:Movie) " + "WHERE p.born < 1966 " + "RETURN p.name, p.born, r.role, m.title" ); for (var row : result.rows()) { System.out.println(row); } // [Keanu Reeves, 1964, Neo, The Matrix]
Sorting with ORDER BY
You can sort results just like in SQL:
-- Find all people, sorted by birth year (oldest first) MATCH (p:Person) RETURN p.name, p.born ORDER BY p.born ASC
Result:
| p.name | p.born |
|---|---|
| Keanu Reeves | 1964 |
| Lana Wachowski | 1965 |
| Carrie-Anne Moss | 1967 |
Finding Directors (Traversing a Different Edge Type)
-- Who directed The Matrix? MATCH (d:Person)-[:DIRECTED]->(m:Movie {title: "The Matrix"}) RETURN d.name -- Result: Lana Wachowski
4.5 Updating and Deleting
Graphs are living data structures. You will frequently need to update properties or remove nodes and edges as your data evolves.
Updating Node Properties
# Add a rating to the movie client.update_node(matrix, {"rating": 8.7}) # Verify the update updated = client.get_node(matrix) print(updated["properties"]["rating"]) # 8.7
# Add a rating to the movie client$update_node(matrix$id, list(rating = 8.7)) # Verify the update updated <- client$get_node(matrix$id) cat("Rating:", updated$properties$rating, "\n") # Rating: 8.7
// Add a rating to the movie client.UpdateNode(ctx, matrix.ID, map[string]interface{}{ "rating": 8.7, }) // Verify the update updated, _ := client.GetNode(ctx, matrix.ID) fmt.Println("Rating:", updated.Properties["rating"]) // Rating: 8.7
// Add a rating to the movie client.updateNode(matrix.id(), Map.of("rating", 8.7)); // Verify the update var updated = client.getNode(matrix.id()); System.out.println("Rating: " + updated.properties().get("rating")); // Rating: 8.7
Deleting Edges
# Delete a specific edge client.delete_edge(edge1) print("Edge deleted") # Verify: query should now return only Carrie-Anne Moss result = client.query( 'MATCH (p:Person)-[:ACTED_IN]->(m:Movie) ' 'RETURN p.name' ) print(result["rows"]) # [['Carrie-Anne Moss']]
# Delete a specific edge client$delete_edge(edge1$id) cat("Edge deleted\n") # Verify: query should now return only Carrie-Anne Moss result <- client$query( "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name" ) print(result$rows) # [['Carrie-Anne Moss']]
// Delete a specific edge client.DeleteEdge(ctx, edge1.ID) fmt.Println("Edge deleted") // Verify result, _ := client.Query(ctx, "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name") fmt.Println(result.Rows) // [[Carrie-Anne Moss]]
// Delete a specific edge client.deleteEdge(edge1.id()); System.out.println("Edge deleted"); // Verify var result = client.query( "MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN p.name" ); System.out.println(result.rows()); // [[Carrie-Anne Moss]]
Deleting Nodes
force option to automatically remove connected edges.
# Option 1: Delete a disconnected node client.delete_node(keanu) # Option 2: Force-delete a node and all its edges client.delete_node(carrie, force=True) print("Node and all connected edges deleted")
# Option 1: Delete a disconnected node client$delete_node(keanu$id) # Option 2: Force-delete a node and all its edges client$delete_node(carrie$id, force = TRUE) cat("Node and all connected edges deleted\n")
// Option 1: Delete a disconnected node client.DeleteNode(ctx, keanu.ID) // Option 2: Force-delete a node and all its edges client.DeleteNode(ctx, carrie.ID, astraeadb.WithForce(true)) fmt.Println("Node and all connected edges deleted")
// Option 1: Delete a disconnected node client.deleteNode(keanu.id()); // Option 2: Force-delete a node and all its edges client.deleteNode(carrie.id(), true); // force=true System.out.println("Node and all connected edges deleted");
4.6 Bulk Import and Export
When working with larger datasets, creating nodes one at a time is inefficient. AstraeaDB supports bulk operations through both the CLI and the client APIs.
JSON Import Format
AstraeaDB uses a straightforward JSON format for import files. Here is an example file called movies.json:
{
"nodes": [
{
"labels": ["Movie"],
"properties": {"title": "The Matrix", "year": 1999}
},
{
"labels": ["Movie"],
"properties": {"title": "John Wick", "year": 2014}
},
{
"labels": ["Person"],
"properties": {"name": "Keanu Reeves", "born": 1964}
},
{
"labels": ["Person"],
"properties": {"name": "Carrie-Anne Moss", "born": 1967}
},
{
"labels": ["Person"],
"properties": {"name": "Lana Wachowski", "born": 1965}
},
{
"labels": ["Person"],
"properties": {"name": "Chad Stahelski", "born": 1968}
}
],
"edges": [
{
"source_match": {"name": "Keanu Reeves"},
"target_match": {"title": "The Matrix"},
"type": "ACTED_IN",
"properties": {"role": "Neo"}
},
{
"source_match": {"name": "Carrie-Anne Moss"},
"target_match": {"title": "The Matrix"},
"type": "ACTED_IN",
"properties": {"role": "Trinity"}
},
{
"source_match": {"name": "Lana Wachowski"},
"target_match": {"title": "The Matrix"},
"type": "DIRECTED"
},
{
"source_match": {"name": "Keanu Reeves"},
"target_match": {"title": "John Wick"},
"type": "ACTED_IN",
"properties": {"role": "John Wick"}
},
{
"source_match": {"name": "Chad Stahelski"},
"target_match": {"title": "John Wick"},
"type": "DIRECTED"
}
]
}
CLI Import and Export
Use the AstraeaDB CLI to import and export data:
# Import nodes and edges from a JSON file astraea-cli import movies.json # Imported 6 nodes and 5 edges in 12ms # Export the entire graph to a JSON file astraea-cli export output.json # Exported 6 nodes and 5 edges to output.json
source_match and target_match to find existing nodes by property values rather than by internal IDs. This makes import files portable -- you do not need to know the internal IDs ahead of time.
Batch API (Programmatic)
For programmatic bulk operations, all client libraries offer batch methods that are significantly faster than individual calls:
from astraeadb import AstraeaClient with AstraeaClient() as client: # Batch-create multiple nodes at once nodes = client.create_nodes([ {"labels": ["Movie"], "properties": {"title": "Speed", "year": 1994}}, {"labels": ["Movie"], "properties": {"title": "Point Break", "year": 1991}}, {"labels": ["Person"], "properties": {"name": "Sandra Bullock", "born": 1964}}, ]) print(f"Created {len(nodes)} nodes in batch") # Batch-create multiple edges at once speed, point_break, sandra = nodes[0], nodes[1], nodes[2] edges = client.create_edges([ {"source": sandra, "target": speed, "type": "ACTED_IN", "properties": {"role": "Annie Porter"}}, {"source": keanu, "target": speed, "type": "ACTED_IN", "properties": {"role": "Jack Traven"}}, {"source": keanu, "target": point_break, "type": "ACTED_IN", "properties": {"role": "Johnny Utah"}}, ]) print(f"Created {len(edges)} edges in batch")
# Batch-create multiple nodes at once nodes <- client$create_nodes(list( list(labels = list("Movie"), properties = list(title = "Speed", year = 1994)), list(labels = list("Movie"), properties = list(title = "Point Break", year = 1991)), list(labels = list("Person"), properties = list(name = "Sandra Bullock", born = 1964)) )) cat("Created", length(nodes), "nodes in batch\n") # Batch-create multiple edges at once edges <- client$create_edges(list( list(source = nodes[[3]]$id, target = nodes[[1]]$id, type = "ACTED_IN", properties = list(role = "Annie Porter")), list(source = keanu$id, target = nodes[[1]]$id, type = "ACTED_IN", properties = list(role = "Jack Traven")), list(source = keanu$id, target = nodes[[2]]$id, type = "ACTED_IN", properties = list(role = "Johnny Utah")) )) cat("Created", length(edges), "edges in batch\n")
// Batch-create multiple nodes at once nodes, _ := client.CreateNodes(ctx, []astraeadb.NodeInput{ {Labels: []string{"Movie"}, Properties: map[string]interface{}{ "title": "Speed", "year": 1994, }}, {Labels: []string{"Movie"}, Properties: map[string]interface{}{ "title": "Point Break", "year": 1991, }}, {Labels: []string{"Person"}, Properties: map[string]interface{}{ "name": "Sandra Bullock", "born": 1964, }}, }) fmt.Printf("Created %d nodes in batch\n", len(nodes)) // Batch-create multiple edges at once edges, _ := client.CreateEdges(ctx, []astraeadb.EdgeInput{ {Source: nodes[2].ID, Target: nodes[0].ID, Type: "ACTED_IN", Properties: map[string]interface{}{"role": "Annie Porter"}}, {Source: keanu.ID, Target: nodes[0].ID, Type: "ACTED_IN", Properties: map[string]interface{}{"role": "Jack Traven"}}, {Source: keanu.ID, Target: nodes[1].ID, Type: "ACTED_IN", Properties: map[string]interface{}{"role": "Johnny Utah"}}, }) fmt.Printf("Created %d edges in batch\n", len(edges))
// Batch-create multiple nodes at once var nodes = client.createNodes(List.of( new NodeInput(List.of("Movie"), Map.of("title", "Speed", "year", 1994)), new NodeInput(List.of("Movie"), Map.of("title", "Point Break", "year", 1991)), new NodeInput(List.of("Person"), Map.of("name", "Sandra Bullock", "born", 1964)) )); System.out.println("Created " + nodes.size() + " nodes in batch"); // Batch-create multiple edges at once var speed = nodes.get(0); var pointBreak = nodes.get(1); var sandra = nodes.get(2); var edges = client.createEdges(List.of( new EdgeInput(sandra.id(), speed.id(), "ACTED_IN", Map.of("role", "Annie Porter")), new EdgeInput(keanu.id(), speed.id(), "ACTED_IN", Map.of("role", "Jack Traven")), new EdgeInput(keanu.id(), pointBreak.id(), "ACTED_IN", Map.of("role", "Johnny Utah")) )); System.out.println("Created " + edges.size() + " edges in batch");
Summary
In this chapter you learned the fundamental CRUD operations for working with AstraeaDB:
- Create nodes with labels and properties, and edges with types and properties
- Read data by ID lookup or by GQL pattern matching with
MATCH - Update node and edge properties
- Delete edges and nodes (with referential integrity checks)
- Bulk import/export via the CLI or batch API methods
In the next chapter, we will explore schema design and data modeling -- how to structure your graph for real-world applications.