From ac28b4d40444f2711efc931378a91546fe525ec7 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 25 Mar 2026 15:01:21 -0700 Subject: [PATCH 1/2] store, chain/ethereum: Add header-only ChainStore query methods Add `ancestor_block_ptr` and `block_parent_ptr` to the `ChainStore` trait. These methods read only header columns (hash, number, parent_hash) and skip fetching/deserializing the `data` JSONB column, which is significant for callers that never use the block body. `ancestor_block_ptr` reuses the existing recursive CTE but omits the second SQL query that fetches `data`. `block_parent_ptr` queries only the parent_hash and number columns for a given block hash. Update the two callers that were discarding block data anyway: - graphman `chain info` now uses `ancestor_block_ptr` - Ethereum `parent_ptr` (Firehose path) now uses `block_parent_ptr` This is preparatory work for the block cache revamp where the data column becomes nullable for old blocks. --- chain/ethereum/src/chain.rs | 8 +- graph/src/blockchain/mock.rs | 14 ++ graph/src/components/store/traits.rs | 17 +++ node/src/manager/commands/chain.rs | 9 +- store/postgres/src/chain_store.rs | 188 +++++++++++++++++++++++++++ 5 files changed, 227 insertions(+), 9 deletions(-) diff --git a/chain/ethereum/src/chain.rs b/chain/ethereum/src/chain.rs index 14119ddc8fa..96cdd57a639 100644 --- a/chain/ethereum/src/chain.rs +++ b/chain/ethereum/src/chain.rs @@ -1148,11 +1148,9 @@ impl TriggersAdapterTrait for TriggersAdapter { let block = match self.chain_client.as_ref() { ChainClient::Firehose(endpoints) => { let chain_store = self.chain_store.cheap_clone(); - // First try to get the block from the store (typed cache) - if let Ok(blocks) = chain_store.blocks(vec![block.hash.clone()]).await { - if let Some(cached_block) = blocks.into_iter().next() { - return Ok(cached_block.light_block().parent_ptr()); - } + // First try to get the parent pointer from the store header columns + if let Ok(Some(parent)) = chain_store.block_parent_ptr(&block.hash).await { + return Ok(Some(parent)); } // If not in store, fetch from Firehose diff --git a/graph/src/blockchain/mock.rs b/graph/src/blockchain/mock.rs index 5e05e63ee5c..d5d26447c64 100644 --- a/graph/src/blockchain/mock.rs +++ b/graph/src/blockchain/mock.rs @@ -530,6 +530,12 @@ impl ChainStore for MockChainStore { async fn blocks(self: Arc, _hashes: Vec) -> Result, Error> { unimplemented!() } + async fn block_parent_ptr( + self: Arc, + _hash: &BlockHash, + ) -> Result, Error> { + unimplemented!() + } async fn ancestor_block( self: Arc, _block_ptr: BlockPtr, @@ -538,6 +544,14 @@ impl ChainStore for MockChainStore { ) -> Result, Error> { unimplemented!() } + async fn ancestor_block_ptr( + self: Arc, + _block_ptr: BlockPtr, + _offset: BlockNumber, + _root: Option, + ) -> Result, Error> { + unimplemented!() + } async fn cleanup_cached_blocks( &self, _ancestor_count: BlockNumber, diff --git a/graph/src/components/store/traits.rs b/graph/src/components/store/traits.rs index ffa42d5f6aa..755267f4324 100644 --- a/graph/src/components/store/traits.rs +++ b/graph/src/components/store/traits.rs @@ -560,6 +560,13 @@ pub trait ChainStore: ChainHeadStore { /// Returns the blocks present in the store as typed cached blocks. async fn blocks(self: Arc, hashes: Vec) -> Result, Error>; + /// Return the parent block pointer for the block with the given hash. + /// Only reads header columns (hash, number, parent_hash), not the data + /// column. More efficient than `blocks` when only the parent pointer is + /// needed. + async fn block_parent_ptr(self: Arc, hash: &BlockHash) + -> Result, Error>; + /// Returns the blocks present in the store for the given block numbers. async fn block_ptrs_by_numbers( self: Arc, @@ -587,6 +594,16 @@ pub trait ChainStore: ChainHeadStore { root: Option, ) -> Result, Error>; + /// Like `ancestor_block` but returns only the block pointer, not the + /// block data. More efficient when callers only need to identify which + /// block is the ancestor without reading the full block body. + async fn ancestor_block_ptr( + self: Arc, + block_ptr: BlockPtr, + offset: BlockNumber, + root: Option, + ) -> Result, Error>; + /// Remove old blocks from the cache we maintain in the database and /// return a pair containing the number of the oldest block retained /// and the number of blocks deleted. diff --git a/node/src/manager/commands/chain.rs b/node/src/manager/commands/chain.rs index f0dfd61b23d..8afff467ffc 100644 --- a/node/src/manager/commands/chain.rs +++ b/node/src/manager/commands/chain.rs @@ -134,10 +134,11 @@ pub async fn info( let head_block = chain_store.cheap_clone().chain_head_ptr().await?; let ancestor = match &head_block { None => None, - Some(head_block) => chain_store - .ancestor_block(head_block.clone(), offset, None) - .await? - .map(|x| x.1), + Some(head_block) => { + chain_store + .ancestor_block_ptr(head_block.clone(), offset, None) + .await? + } }; row("name", chain.name); diff --git a/store/postgres/src/chain_store.rs b/store/postgres/src/chain_store.rs index 5bd15371eb0..b5b777665f5 100644 --- a/store/postgres/src/chain_store.rs +++ b/store/postgres/src/chain_store.rs @@ -745,6 +745,52 @@ mod data { .collect()) } + /// Return the parent block pointer for the block with the given hash. + /// Only reads header columns, not the data column. + pub(super) async fn block_parent_ptr( + &self, + conn: &mut AsyncPgConnection, + chain: &str, + hash: &BlockHash, + ) -> Result, Error> { + let result = match self { + Storage::Shared => { + use public::ethereum_blocks as b; + + b::table + .select((b::parent_hash, b::number)) + .filter(b::network_name.eq(chain)) + .filter(b::hash.eq(format!("{:x}", hash))) + .first::<(Option, i64)>(conn) + .await + .optional()? + .and_then(|(parent_hash, number)| { + parent_hash.map(|ph| { + Ok::<_, Error>(BlockPtr::new( + ph.parse()?, + i32::try_from(number).unwrap() - 1, + )) + }) + }) + .transpose()? + } + Storage::Private(Schema { blocks, .. }) => blocks + .table() + .select((blocks.parent_hash(), blocks.number())) + .filter(blocks.hash().eq(hash.as_slice())) + .first::<(Vec, i64)>(conn) + .await + .optional()? + .map(|(parent_hash, number)| { + BlockPtr::new( + BlockHash::from(parent_hash), + i32::try_from(number).unwrap() - 1, + ) + }), + }; + Ok(result) + } + pub(super) async fn block_hashes_by_block_number( &self, conn: &mut AsyncPgConnection, @@ -1222,6 +1268,89 @@ mod data { Ok(data_and_ptr) } + /// Like `ancestor_block` but returns only the `BlockPtr` without + /// fetching the `data` column. + pub(super) async fn ancestor_block_ptr( + &self, + conn: &mut AsyncPgConnection, + block_ptr: BlockPtr, + offset: BlockNumber, + root: Option, + ) -> Result, Error> { + let short_circuit_predicate = match root { + Some(_) => "and b.parent_hash <> $3", + None => "", + }; + + match self { + Storage::Shared => { + let query = + self.ancestor_block_query(short_circuit_predicate, "ethereum_blocks"); + + #[derive(QueryableByName)] + struct BlockHashAndNumber { + #[diesel(sql_type = Text)] + hash: String, + #[diesel(sql_type = BigInt)] + number: i64, + } + + let block = match root { + Some(root) => sql_query(query) + .bind::(block_ptr.hash_hex()) + .bind::(offset as i64) + .bind::(root.hash_hex()) + .get_result::(conn), + None => sql_query(query) + .bind::(block_ptr.hash_hex()) + .bind::(offset as i64) + .get_result::(conn), + } + .await + .optional()?; + + match block { + None => Ok(None), + Some(block) => Ok(Some(BlockPtr::new( + BlockHash::from_str(&block.hash)?, + i32::try_from(block.number).unwrap(), + ))), + } + } + Storage::Private(Schema { blocks, .. }) => { + let query = + self.ancestor_block_query(short_circuit_predicate, blocks.qname.as_str()); + + #[derive(QueryableByName)] + struct BlockHashAndNumber { + #[diesel(sql_type = Bytea)] + hash: Vec, + #[diesel(sql_type = BigInt)] + number: i64, + } + + let block = match &root { + Some(root) => sql_query(query) + .bind::(block_ptr.hash_slice()) + .bind::(offset as i64) + .bind::(root.as_slice()) + .get_result::(conn), + None => sql_query(query) + .bind::(block_ptr.hash_slice()) + .bind::(offset as i64) + .get_result::(conn), + } + .await + .optional()?; + + match block { + None => Ok(None), + Some(block) => Ok(Some(BlockPtr::from((block.hash, block.number)))), + } + } + } + } + pub(super) async fn delete_blocks_before( &self, conn: &mut AsyncPgConnection, @@ -2925,6 +3054,47 @@ impl ChainStoreTrait for ChainStore { Ok(result) } + async fn ancestor_block_ptr( + self: Arc, + block_ptr: BlockPtr, + offset: BlockNumber, + root: Option, + ) -> Result, Error> { + ensure!( + block_ptr.number >= offset, + "block offset {} for block `{}` points to before genesis block", + offset, + block_ptr.hash_hex() + ); + + // Check the in-memory cache first. + if let Some((ptr, _)) = self.recent_blocks_cache.get_ancestor(&block_ptr, offset) { + return Ok(Some(ptr)); + } + + // Cache miss, query the database (CTE only, no data fetch). + let mut conn = self.pool.get_permitted().await?; + self.storage + .ancestor_block_ptr(&mut conn, block_ptr, offset, root) + .await + } + + async fn block_parent_ptr( + self: Arc, + hash: &BlockHash, + ) -> Result, Error> { + // Check the in-memory cache first. + if let Some(parent_ptr) = self.recent_blocks_cache.get_parent_ptr_by_hash(hash) { + return Ok(Some(parent_ptr)); + } + + // Cache miss, query the database. + let mut conn = self.pool.get_permitted().await?; + self.storage + .block_parent_ptr(&mut conn, &self.chain, hash) + .await + } + async fn cleanup_cached_blocks( &self, ancestor_count: BlockNumber, @@ -3121,6 +3291,13 @@ mod recent_blocks_cache { .and_then(|block| block.data.as_ref().map(|data| (&block.ptr, data))) } + fn get_parent_ptr_by_hash(&self, hash: &BlockHash) -> Option { + self.blocks + .values() + .find(|block| &block.ptr.hash == hash) + .map(|block| BlockPtr::new(block.parent_hash.clone(), block.ptr.number - 1)) + } + fn get_block_by_number(&self, number: BlockNumber) -> Option<&CacheBlock> { self.blocks.get(&number) } @@ -3237,6 +3414,17 @@ mod recent_blocks_cache { block_opt } + pub fn get_parent_ptr_by_hash(&self, hash: &BlockHash) -> Option { + let inner = self.inner.read(); + let result = inner.get_parent_ptr_by_hash(hash); + if result.is_some() { + inner.metrics.record_cache_hit(&inner.network); + } else { + inner.metrics.record_cache_miss(&inner.network); + } + result + } + pub fn get_blocks_by_hash(&self, hashes: &[BlockHash]) -> Vec<(BlockPtr, CachedBlock)> { let inner = self.inner.read(); let blocks: Vec<_> = hashes From 9bf36c700b2fa2515217c7f5798a77acbf0f1172 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 26 Mar 2026 12:02:24 -0700 Subject: [PATCH 2/2] store: Fix block_parent_ptr assuming sequential block numbers Look up the parent block's actual number instead of computing `current_number - 1`, which is incorrect for blockchains with gaps in block numbers (e.g., NEAR). --- store/postgres/src/chain_store.rs | 81 ++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/store/postgres/src/chain_store.rs b/store/postgres/src/chain_store.rs index b5b777665f5..fc0e15f6b04 100644 --- a/store/postgres/src/chain_store.rs +++ b/store/postgres/src/chain_store.rs @@ -129,7 +129,10 @@ mod data { use diesel::dsl::sql; use diesel::insert_into; use diesel::sql_types::{Array, Binary, Bool, Nullable, Text}; - use diesel::{delete, sql_query, ExpressionMethods, JoinOnDsl, OptionalExtension, QueryDsl}; + use diesel::{ + delete, sql_query, BoolExpressionMethods, ExpressionMethods, JoinOnDsl, + NullableExpressionMethods, OptionalExtension, QueryDsl, + }; use diesel::{ deserialize::FromSql, pg::Pg, @@ -757,36 +760,57 @@ mod data { Storage::Shared => { use public::ethereum_blocks as b; - b::table - .select((b::parent_hash, b::number)) - .filter(b::network_name.eq(chain)) - .filter(b::hash.eq(format!("{:x}", hash))) - .first::<(Option, i64)>(conn) + let (child, parent) = diesel::alias!( + public::ethereum_blocks as child, + public::ethereum_blocks as parent + ); + + child + .inner_join( + parent.on(child + .field(b::parent_hash) + .assume_not_null() + .eq(parent.field(b::hash)) + .and(parent.field(b::network_name).eq(chain))), + ) + .select((parent.field(b::hash), parent.field(b::number))) + .filter(child.field(b::hash).eq(format!("{:x}", hash))) + .filter(child.field(b::network_name).eq(chain)) + .first::<(String, i64)>(conn) .await .optional()? - .and_then(|(parent_hash, number)| { - parent_hash.map(|ph| { - Ok::<_, Error>(BlockPtr::new( - ph.parse()?, - i32::try_from(number).unwrap() - 1, - )) - }) + .map(|(h, n)| { + Ok::<_, Error>(BlockPtr::new(h.parse()?, i32::try_from(n).unwrap())) }) .transpose()? } - Storage::Private(Schema { blocks, .. }) => blocks - .table() - .select((blocks.parent_hash(), blocks.number())) - .filter(blocks.hash().eq(hash.as_slice())) - .first::<(Vec, i64)>(conn) - .await - .optional()? - .map(|(parent_hash, number)| { - BlockPtr::new( - BlockHash::from(parent_hash), - i32::try_from(number).unwrap() - 1, - ) - }), + Storage::Private(Schema { blocks, .. }) => { + // We can't use diesel::alias! here because the table is + // dynamic, so we write the SQL query manually + + let query = format!( + "SELECT parent.hash, parent.number \ + FROM {qname} child, {qname} parent \ + WHERE child.hash = $1 \ + AND child.parent_hash = parent.hash", + qname = blocks.qname + ); + + #[derive(QueryableByName)] + struct BlockHashAndNumber { + #[diesel(sql_type = Bytea)] + hash: Vec, + #[diesel(sql_type = BigInt)] + number: i64, + } + + sql_query(query) + .bind::(hash.as_slice()) + .get_result::(conn) + .await + .optional()? + .map(|block| BlockPtr::from((block.hash, block.number))) + } }; Ok(result) } @@ -3292,10 +3316,11 @@ mod recent_blocks_cache { } fn get_parent_ptr_by_hash(&self, hash: &BlockHash) -> Option { + let block = self.blocks.values().find(|b| &b.ptr.hash == hash)?; self.blocks .values() - .find(|block| &block.ptr.hash == hash) - .map(|block| BlockPtr::new(block.parent_hash.clone(), block.ptr.number - 1)) + .find(|b| b.ptr.hash == block.parent_hash) + .map(|parent| parent.ptr.clone()) } fn get_block_by_number(&self, number: BlockNumber) -> Option<&CacheBlock> {