diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index 8167dc2574e0..da13322fe60c 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -305,7 +305,18 @@ fn convert_nodes( // type. let members: Set<&str> = n_members .iter() - .map(|n| nodes.get(n).unwrap().dbscheme_name.as_str()) + .map(|n| { + nodes + .get(n) + .unwrap_or_else(|| { + panic!( + "union type '{}' references unknown member node type {:?}", + node.dbscheme_name, n + ) + }) + .dbscheme_name + .as_str() + }) .collect(); entries.push(dbscheme::Entry::Union(dbscheme::Union { name: &node.dbscheme_name, diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 608c332d47e8..eb3b161b295a 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -411,7 +411,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result::into) .collect(); }); - field_args.push(quote! { (#field_str, #temp) }); + // An empty splice means the field is absent — skip it + // entirely rather than emitting an empty named field. + field_args.push(quote! { + if !#temp.is_empty() { __fields.push((#field_str, #temp)); } + }); continue; } } @@ -445,7 +449,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result)> = Vec::new(); + #(#field_args)* + #ctx.node(#kind_str, __fields) } }) } @@ -475,6 +481,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result( &mut self, - mut f: impl FnMut(Id) -> Result, + mut f: impl FnMut(Id) -> Result, E>, ) -> Result<(), E> { for ids in self.captures.values_mut() { - for id in ids { - *id = f(*id)?; + let mut new_ids = Vec::with_capacity(ids.len()); + for &id in ids.iter() { + new_ids.extend(f(id)?); } + *ids = new_ids; } Ok(()) } diff --git a/shared/yeast/src/dump.rs b/shared/yeast/src/dump.rs index 07ee134e058c..d046c192053d 100644 --- a/shared/yeast/src/dump.rs +++ b/shared/yeast/src/dump.rs @@ -273,6 +273,16 @@ fn dump_node( } } + // Check for required fields that are absent + if let Some((schema, _, _)) = type_check { + for (field_id, field_name) in schema.required_fields_for_kind(node.kind_name()) { + if !node.fields.contains_key(&field_id) { + let name = field_name.unwrap_or("child"); + writeln!(out, "{prefix} <-- ERROR: missing required field '{name}'").unwrap(); + } + } + } + // Unnamed children — skip unnamed tokens (keywords, punctuation) if let Some(children) = node.fields.get(&CHILD_FIELD) { let child_type_check = type_check.map(|(schema, _, _)| { diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 8826c3d6c1e3..0717d27e4b8b 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -563,6 +563,15 @@ impl Node { NodeContent::DynamicString(s) => Some(s.to_string()), } } + + /// Read the child ids stored under a given field, or an empty slice if + /// no such field is present on this node. + pub fn field_children(&self, field_id: FieldId) -> &[Id] { + self.fields + .get(&field_id) + .map(|v| v.as_slice()) + .unwrap_or(&[]) + } } /// The contents of a node is either a range in the original source file, @@ -836,17 +845,9 @@ fn apply_one_shot_rules_inner( // pattern root): re-analyzing it would match the same rule // again indefinitely. if captured_id == id { - return Ok(captured_id); - } - let result = - apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?; - if result.len() != 1 { - return Err(format!( - "OneShot: recursion on captured node produced {} results, expected exactly 1", - result.len() - )); + return Ok(vec![captured_id]); } - Ok(result[0]) + apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1) })?; return Ok(rule.run_transform(ast, captures, id, fresh)); } diff --git a/shared/yeast/src/node_types_yaml.rs b/shared/yeast/src/node_types_yaml.rs index eb191076be48..797f14cba720 100644 --- a/shared/yeast/src/node_types_yaml.rs +++ b/shared/yeast/src/node_types_yaml.rs @@ -314,6 +314,14 @@ fn apply_yaml_to_schema( node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named))); node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named); schema.set_field_types(parent_kind, field_id, node_types); + schema.set_field_cardinality( + parent_kind, + field_id, + crate::schema::FieldCardinality { + multiple: spec.multiple, + required: spec.required, + }, + ); } } } diff --git a/shared/yeast/src/query.rs b/shared/yeast/src/query.rs index 01e5e22ad730..bcf0f7facab1 100644 --- a/shared/yeast/src/query.rs +++ b/shared/yeast/src/query.rs @@ -178,11 +178,15 @@ impl QueryListElem { let Some(child) = remaining_children.next() else { return Ok(false); }; - if skip_unnamed { - let node = ast.get_node(child).unwrap(); - if !node.is_named() { - continue; - } + let node = ast.get_node(child).unwrap(); + // Skip tree-sitter `extras` (e.g. comments) during + // positional matching: they are conceptually invisible + // between siblings, mirroring tree-sitter query semantics. + if node.is_extra() { + continue; + } + if skip_unnamed && !node.is_named() { + continue; } let snapshot = matches.clone(); if sub_query.do_match(ast, child, matches)? { diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs index c832a57b23ad..bbd425f15a2c 100644 --- a/shared/yeast/src/schema.rs +++ b/shared/yeast/src/schema.rs @@ -8,6 +8,15 @@ pub struct NodeType { pub named: bool, } +/// Multiplicity/optionality of a field declaration. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct FieldCardinality { + /// Whether the field may hold more than one child. + pub multiple: bool, + /// Whether at least one child must be present. + pub required: bool, +} + /// A schema defining node kinds and field names for the output AST. /// Built from a node-types.yml file, independent of any tree-sitter grammar. /// @@ -32,6 +41,7 @@ pub struct Schema { kind_names: BTreeMap, next_kind_id: KindId, field_types: BTreeMap<(String, FieldId), Vec>, + field_cardinalities: BTreeMap<(String, FieldId), FieldCardinality>, supertypes: BTreeMap>, } @@ -52,6 +62,7 @@ impl Schema { kind_names: BTreeMap::new(), next_kind_id: 1, // 0 is reserved field_types: BTreeMap::new(), + field_cardinalities: BTreeMap::new(), supertypes: BTreeMap::new(), } } @@ -196,6 +207,42 @@ impl Schema { .get(&(parent_kind.to_string(), field_id)) } + pub fn set_field_cardinality( + &mut self, + parent_kind: &str, + field_id: FieldId, + cardinality: FieldCardinality, + ) { + self.field_cardinalities + .insert((parent_kind.to_string(), field_id), cardinality); + } + + /// Returns the declared cardinality for a field, if known. + pub fn field_cardinality( + &self, + parent_kind: &str, + field_id: FieldId, + ) -> Option { + self.field_cardinalities + .get(&(parent_kind.to_string(), field_id)) + .copied() + } + + /// Returns an iterator over all `(field_id, field_name)` pairs that are + /// declared as required (`required: true`) for the given `parent_kind`. + pub fn required_fields_for_kind<'a>( + &'a self, + parent_kind: &'a str, + ) -> impl Iterator)> + 'a { + self.field_cardinalities + .iter() + .filter(move |((kind, _), card)| kind == parent_kind && card.required) + .map(move |((_, field_id), _)| { + let name = self.field_name_for_id(*field_id); + (*field_id, name) + }) + } + pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec) { self.supertypes.insert(supertype.to_string(), node_types); } diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 7f4db1541836..7645f3776f87 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -274,6 +274,44 @@ fn test_query_no_match() { assert!(!matched); } +#[test] +fn test_query_skips_extras_in_positional_match() { + // Regression test: positional wildcards `(_)` must not bind to + // tree-sitter `extras` (e.g. comments) during forward-scan; extras + // are conceptually invisible between siblings, matching tree-sitter + // query semantics. Without this, a later rule that translates a + // captured comment to nothing (a common idiom, e.g. + // `(comment) => ()` in Swift) leaves the capture's match-list empty + // and causes the transform to fail with "Variable X has 0 matches". + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("[1, # comment\n2]").unwrap(); + + // Navigate to the `array` node: program -> array. + let mut cursor = AstCursor::new(&ast); + cursor.goto_first_child(); + let array_id = cursor.node_id(); + assert_eq!(ast.get_node(array_id).unwrap().kind(), "array"); + + // Two positional wildcards should bind to the two integers, skipping + // the comment that sits between them. + let query = yeast::query!((array (_) @a (_) @b)); + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, array_id, &mut captures).unwrap(); + assert!(matched); + assert_eq!( + ast.get_node(captures.get_var("a").unwrap()) + .unwrap() + .kind(), + "integer" + ); + assert_eq!( + ast.get_node(captures.get_var("b").unwrap()) + .unwrap() + .kind(), + "integer" + ); +} + #[test] fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index ac93dd586ec1..0f1057a8e5b9 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -26,6 +26,13 @@ fn is_header_rule(line: &str) -> bool { trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=') } +fn is_next_case_header(lines: &[&str], i: usize) -> bool { + is_header_rule(lines[i]) + && i + 2 < lines.len() + && !lines[i + 1].trim().is_empty() + && is_header_rule(lines[i + 2]) +} + fn parse_corpus(content: &str) -> Vec { let lines: Vec<&str> = content.lines().collect(); let mut i = 0; @@ -58,48 +65,51 @@ fn parse_corpus(content: &str) -> Vec { let input_start = i; while i < lines.len() && lines[i].trim() != "---" { + if is_next_case_header(&lines, i) { + break; + } i += 1; } - assert!(i < lines.len(), "Missing --- separator for case {name}"); let input = lines[input_start..i].join("\n").trim_end().to_string(); - i += 1; + let raw; + let expected; + if i >= lines.len() || lines[i].trim() != "---" { + // No `---` separator before next case (or EOF). Treat the + // remaining sections as empty. + raw = String::new(); + expected = String::new(); + } else { + i += 1; - // Raw tree-sitter parse section. New-format files have a second - // `---` separator between the raw tree and the mapped AST. Legacy - // files (with only one separator) have no raw section — in that - // case `raw` stays empty and update mode will populate it. - let raw_start = i; - let mut next_sep = i; - while next_sep < lines.len() && lines[next_sep].trim() != "---" { - if is_header_rule(lines[next_sep]) - && next_sep + 2 < lines.len() - && !lines[next_sep + 1].trim().is_empty() - && is_header_rule(lines[next_sep + 2]) - { - break; + // Raw tree-sitter parse section. New-format files have a second + // `---` separator between the raw tree and the mapped AST. Legacy + // files (with only one separator) have no raw section — in that + // case `raw` stays empty and update mode will populate it. + let raw_start = i; + let mut next_sep = i; + while next_sep < lines.len() && lines[next_sep].trim() != "---" { + if is_next_case_header(&lines, next_sep) { + break; + } + next_sep += 1; } - next_sep += 1; - } - let raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" { - let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string(); - i = next_sep + 1; - raw_text - } else { - String::new() - }; + raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" { + let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string(); + i = next_sep + 1; + raw_text + } else { + String::new() + }; - let expected_start = i; - while i < lines.len() { - if is_header_rule(lines[i]) - && i + 2 < lines.len() - && !lines[i + 1].trim().is_empty() - && is_header_rule(lines[i + 2]) - { - break; + let expected_start = i; + while i < lines.len() { + if is_next_case_header(&lines, i) { + break; + } + i += 1; } - i += 1; + expected = lines[expected_start..i].join("\n").trim().to_string(); } - let expected = lines[expected_start..i].join("\n").trim().to_string(); cases.push(CorpusCase { name,