Skip to content

Commit 8722f9e

Browse files
authored
fingerprint_raw and scan_raw (#2)
1 parent aa735eb commit 8722f9e

8 files changed

Lines changed: 218 additions & 1 deletion

File tree

build.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
6969
// Blocklist raw deparse functions that use types from bindings_raw
7070
.blocklist_function("pg_query_deparse_raw")
7171
.blocklist_function("pg_query_deparse_raw_opts")
72+
// Blocklist raw fingerprint function that uses types from bindings_raw
73+
.blocklist_function("pg_query_fingerprint_raw")
7274
.generate()
7375
.map_err(|_| "Unable to generate bindings")?
7476
.write_to_file(out_dir.join("bindings.rs"))?;
@@ -433,6 +435,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
433435
.allowlist_function("pg_query_list_make1")
434436
.allowlist_function("pg_query_list_append")
435437
.allowlist_function("pg_query_deparse_nodes")
438+
// Raw scan functions (bypasses protobuf)
439+
.allowlist_type("PgQueryRawScanToken")
440+
.allowlist_type("PgQueryRawScanResult")
441+
.allowlist_function("pg_query_scan_raw")
442+
.allowlist_function("pg_query_free_raw_scan_result")
443+
// Raw fingerprint (works with raw parse result)
444+
.allowlist_type("PgQueryFingerprintResult")
445+
.allowlist_function("pg_query_fingerprint_raw")
446+
.allowlist_function("pg_query_free_fingerprint_result")
436447
.generate()
437448
.map_err(|_| "Unable to generate raw bindings")?
438449
.write_to_file(out_dir.join("bindings_raw.rs"))?;

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ mod parse_result;
5050
pub mod protobuf;
5151
mod query;
5252
mod raw_deparse;
53+
mod raw_fingerprint;
5354
mod raw_parse;
55+
mod raw_scan;
5456
mod summary;
5557
mod summary_result;
5658
mod truncate;
@@ -62,7 +64,9 @@ pub use node_ref::*;
6264
pub use parse_result::*;
6365
pub use query::*;
6466
pub use raw_deparse::deparse_raw;
67+
pub use raw_fingerprint::fingerprint_raw;
6568
pub use raw_parse::parse_raw;
69+
pub use raw_scan::scan_raw;
6670
pub use summary::*;
6771
pub use summary_result::*;
6872
pub use truncate::*;

src/node_enum.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ impl NodeEnum {
1919
})
2020
}
2121

22+
pub fn deparse_raw(&self) -> Result<String> {
23+
crate::deparse_raw(&protobuf::ParseResult {
24+
version: crate::bindings::PG_VERSION_NUM as i32,
25+
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.clone()) })), stmt_location: 0, stmt_len: 0 }],
26+
})
27+
}
28+
2229
pub fn nodes(&self) -> Vec<(NodeRef<'_>, i32, Context, bool)> {
2330
let mut iter = vec![(self.to_ref(), 0, Context::None, false)];
2431
let mut nodes = Vec::new();

src/node_mut.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ impl NodeMut {
280280
})
281281
}
282282

283+
pub fn deparse_raw(&self) -> Result<String> {
284+
crate::deparse_raw(&protobuf::ParseResult {
285+
version: crate::bindings::PG_VERSION_NUM as i32,
286+
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.to_enum()?) })), stmt_location: 0, stmt_len: 0 }],
287+
})
288+
}
289+
283290
pub fn to_enum(&self) -> Result<NodeEnum> {
284291
unsafe {
285292
let err = Error::InvalidPointer;

src/node_structs.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ impl Node {
77
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }],
88
})
99
}
10+
11+
pub fn deparse_raw(&self) -> Result<String> {
12+
crate::deparse_raw(&protobuf::ParseResult {
13+
version: crate::bindings::PG_VERSION_NUM as i32,
14+
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }],
15+
})
16+
}
1017
}
1118

1219
impl protobuf::Alias {

src/raw_fingerprint.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
//! Direct fingerprinting that bypasses protobuf serialization/deserialization.
2+
//!
3+
//! This module provides a faster alternative to the standard fingerprint function by
4+
//! parsing directly into PostgreSQL's internal structures and fingerprinting them
5+
//! without going through protobuf serialization.
6+
7+
use crate::bindings_raw;
8+
use crate::query::Fingerprint;
9+
use crate::{Error, Result};
10+
use std::ffi::{CStr, CString};
11+
12+
/// Fingerprints a SQL statement without going through protobuf serialization.
13+
///
14+
/// This function is faster than `fingerprint` because it skips the protobuf encode/decode step.
15+
/// The SQL is parsed directly into PostgreSQL's internal structures and fingerprinted there.
16+
///
17+
/// # Example
18+
///
19+
/// ```rust
20+
/// let result = pg_query::fingerprint_raw("SELECT * FROM contacts WHERE name='Paul'").unwrap();
21+
/// assert_eq!(result.hex, "0e2581a461ece536");
22+
/// ```
23+
pub fn fingerprint_raw(statement: &str) -> Result<Fingerprint> {
24+
let input = CString::new(statement)?;
25+
26+
// Parse the SQL into raw C structures
27+
let parse_result = unsafe { bindings_raw::pg_query_parse_raw(input.as_ptr()) };
28+
29+
// Fingerprint the raw parse tree
30+
let fingerprint_result = unsafe { bindings_raw::pg_query_fingerprint_raw(parse_result) };
31+
32+
// Free the parse result (the fingerprint result has its own copies of any needed data)
33+
unsafe { bindings_raw::pg_query_free_raw_parse_result(parse_result) };
34+
35+
// Convert the fingerprint result to Rust types
36+
let result = if !fingerprint_result.error.is_null() {
37+
let message = unsafe { CStr::from_ptr((*fingerprint_result.error).message) }.to_string_lossy().to_string();
38+
Err(Error::Parse(message))
39+
} else {
40+
let hex = unsafe { CStr::from_ptr(fingerprint_result.fingerprint_str) };
41+
Ok(Fingerprint { value: fingerprint_result.fingerprint, hex: hex.to_string_lossy().to_string() })
42+
};
43+
44+
unsafe { bindings_raw::pg_query_free_fingerprint_result(fingerprint_result) };
45+
result
46+
}
47+
48+
#[cfg(test)]
49+
mod tests {
50+
use super::*;
51+
52+
#[test]
53+
fn test_fingerprint_raw_basic() {
54+
let result = fingerprint_raw("SELECT * FROM users").unwrap();
55+
assert!(!result.hex.is_empty());
56+
assert_eq!(result.hex.len(), 16);
57+
}
58+
59+
#[test]
60+
fn test_fingerprint_raw_matches_fingerprint() {
61+
let sql = "SELECT * FROM contacts WHERE name='Paul'";
62+
let raw_result = fingerprint_raw(sql).unwrap();
63+
let std_result = crate::fingerprint(sql).unwrap();
64+
65+
assert_eq!(raw_result.value, std_result.value);
66+
assert_eq!(raw_result.hex, std_result.hex);
67+
}
68+
69+
#[test]
70+
fn test_fingerprint_raw_normalizes_values() {
71+
// These should have the same fingerprint since values are normalized
72+
let fp1 = fingerprint_raw("SELECT * FROM users WHERE id = 1").unwrap();
73+
let fp2 = fingerprint_raw("SELECT * FROM users WHERE id = 999").unwrap();
74+
assert_eq!(fp1.value, fp2.value);
75+
assert_eq!(fp1.hex, fp2.hex);
76+
}
77+
78+
#[test]
79+
fn test_fingerprint_raw_error() {
80+
let result = fingerprint_raw("NOT VALID SQL @#$");
81+
assert!(result.is_err());
82+
}
83+
}

src/raw_scan.rs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//! Direct scanning that bypasses protobuf serialization/deserialization.
2+
//!
3+
//! This module provides a faster alternative to the protobuf-based scanning by
4+
//! directly reading the scanner's token output and converting it to Rust protobuf types.
5+
6+
use crate::bindings;
7+
use crate::bindings_raw;
8+
use crate::protobuf;
9+
use crate::{Error, Result};
10+
use std::ffi::{CStr, CString};
11+
12+
/// Scans a SQL statement directly into protobuf types without going through protobuf serialization.
13+
///
14+
/// This function is faster than `scan` because it skips the protobuf encode/decode step.
15+
/// The tokens are read directly from the C scanner output.
16+
///
17+
/// # Example
18+
///
19+
/// ```rust
20+
/// let result = pg_query::scan_raw("SELECT * FROM users").unwrap();
21+
/// assert!(!result.tokens.is_empty());
22+
/// ```
23+
pub fn scan_raw(sql: &str) -> Result<protobuf::ScanResult> {
24+
let input = CString::new(sql)?;
25+
let result = unsafe { bindings_raw::pg_query_scan_raw(input.as_ptr()) };
26+
27+
let scan_result = if !result.error.is_null() {
28+
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
29+
Err(Error::Scan(message))
30+
} else {
31+
// Convert the C tokens to protobuf types
32+
let tokens = unsafe { convert_tokens(result.tokens, result.n_tokens) };
33+
Ok(protobuf::ScanResult { version: bindings::PG_VERSION_NUM as i32, tokens })
34+
};
35+
36+
unsafe { bindings_raw::pg_query_free_raw_scan_result(result) };
37+
scan_result
38+
}
39+
40+
/// Converts C scan tokens to protobuf ScanToken vector.
41+
unsafe fn convert_tokens(tokens: *mut bindings_raw::PgQueryRawScanToken, n_tokens: usize) -> Vec<protobuf::ScanToken> {
42+
if tokens.is_null() || n_tokens == 0 {
43+
return Vec::new();
44+
}
45+
46+
let mut result = Vec::with_capacity(n_tokens);
47+
48+
for i in 0..n_tokens {
49+
let token = &*tokens.add(i);
50+
result.push(protobuf::ScanToken { start: token.start, end: token.end, token: token.token, keyword_kind: token.keyword_kind });
51+
}
52+
53+
result
54+
}
55+
56+
#[cfg(test)]
57+
mod tests {
58+
use super::*;
59+
60+
#[test]
61+
fn test_scan_raw_basic() {
62+
let result = scan_raw("SELECT * FROM users").unwrap();
63+
assert!(!result.tokens.is_empty());
64+
// First token should be SELECT
65+
assert_eq!(result.tokens[0].start, 0);
66+
assert_eq!(result.tokens[0].end, 6);
67+
}
68+
69+
#[test]
70+
fn test_scan_raw_matches_scan() {
71+
let sql = "SELECT id, name FROM users WHERE active = true";
72+
let raw_result = scan_raw(sql).unwrap();
73+
let prost_result = crate::scan(sql).unwrap();
74+
75+
assert_eq!(raw_result.version, prost_result.version);
76+
assert_eq!(raw_result.tokens.len(), prost_result.tokens.len());
77+
78+
for (raw_token, prost_token) in raw_result.tokens.iter().zip(prost_result.tokens.iter()) {
79+
assert_eq!(raw_token.start, prost_token.start);
80+
assert_eq!(raw_token.end, prost_token.end);
81+
assert_eq!(raw_token.token, prost_token.token);
82+
assert_eq!(raw_token.keyword_kind, prost_token.keyword_kind);
83+
}
84+
}
85+
86+
#[test]
87+
fn test_scan_raw_empty() {
88+
let result = scan_raw("").unwrap();
89+
assert!(result.tokens.is_empty());
90+
}
91+
92+
#[test]
93+
fn test_scan_raw_complex() {
94+
let sql = r#"SELECT "column" AS left /* comment */ FROM between"#;
95+
let result = scan_raw(sql).unwrap();
96+
assert!(!result.tokens.is_empty());
97+
}
98+
}

0 commit comments

Comments
 (0)