Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion shared/tree-sitter-extractor/src/generator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,18 @@ fn convert_nodes(
// type.
let members: Set<&str> = n_members
.iter()
.map(|n| nodes.get(n).unwrap().dbscheme_name.as_str())
.map(|n| {
nodes
.get(n)
.unwrap_or_else(|| {
panic!(
"union type '{}' references unknown member node type {:?}",
node.dbscheme_name, n
)
})
.dbscheme_name
.as_str()
})
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: &node.dbscheme_name,
Expand Down
19 changes: 15 additions & 4 deletions shared/yeast-macros/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Named fields — compute each value into a temp, then reference it
while peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name.to_string();
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
expect_punct(tokens, ':', "expected `:` after field name")?;
let temp = Ident::new(
&format!("__field_{field_str}_{field_counter}"),
Expand All @@ -437,15 +437,19 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
.map(::std::convert::Into::<usize>::into)
.collect();
});
field_args.push(quote! { (#field_str, #temp) });
// An empty splice means the field is absent — skip it
// entirely rather than emitting an empty named field.
field_args.push(quote! {
if !#temp.is_empty() { __fields.push((#field_str, #temp)); }
});
continue;
}
}
}

let value = parse_direct_node(tokens, ctx)?;
stmts.push(quote! { let #temp: usize = #value; });
field_args.push(quote! { (#field_str, vec![#temp]) });
field_args.push(quote! { __fields.push((#field_str, vec![#temp])); });
}

// After all named fields, no other tokens are allowed.
Expand All @@ -461,7 +465,9 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
Ok(quote! {
{
#(#stmts)*
#ctx.node(#kind_str, vec![#(#field_args),*])
let mut __fields: Vec<(&str, Vec<usize>)> = Vec::new();
#(#field_args)*
#ctx.node(#kind_str, __fields)
}
})
}
Expand All @@ -475,6 +481,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
let group = expect_group(tokens, Delimiter::Parenthesis)?;
let mut inner = group.stream().into_iter().peekable();

// Empty `()` represents an empty sequence — emit nothing.
if inner.peek().is_none() {
continue;
}

// Regular node
let node = parse_direct_node_inner(&mut inner, ctx)?;
items.push(quote! { __nodes.push(#node); });
Expand Down
14 changes: 9 additions & 5 deletions shared/yeast/src/captures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,20 @@ impl Captures {
}

/// Apply a fallible function to every captured id (across all keys),
/// replacing each id with the result. Stops and returns the error on
/// the first failure.
/// replacing each id with the results. A function returning an empty
/// vector removes the capture; returning multiple ids splices them
/// into the capture's value list (suitable for `*`/`+` captures).
/// Stops and returns the error on the first failure.
pub fn try_map_all_captures<E>(
&mut self,
mut f: impl FnMut(Id) -> Result<Id, E>,
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
) -> Result<(), E> {
for ids in self.captures.values_mut() {
for id in ids {
*id = f(*id)?;
let mut new_ids = Vec::with_capacity(ids.len());
for &id in ids.iter() {
new_ids.extend(f(id)?);
}
*ids = new_ids;
}
Ok(())
}
Expand Down
10 changes: 10 additions & 0 deletions shared/yeast/src/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,16 @@ fn dump_node(
}
}

// Check for required fields that are absent
if let Some((schema, _, _)) = type_check {
for (field_id, field_name) in schema.required_fields_for_kind(node.kind_name()) {
if !node.fields.contains_key(&field_id) {
let name = field_name.unwrap_or("child");
writeln!(out, "{prefix} <-- ERROR: missing required field '{name}'").unwrap();
}
}
}

// Unnamed children — skip unnamed tokens (keywords, punctuation)
if let Some(children) = node.fields.get(&CHILD_FIELD) {
let child_type_check = type_check.map(|(schema, _, _)| {
Expand Down
21 changes: 11 additions & 10 deletions shared/yeast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,15 @@ impl Node {
NodeContent::DynamicString(s) => Some(s.to_string()),
}
}

/// Read the child ids stored under a given field, or an empty slice if
/// no such field is present on this node.
pub fn field_children(&self, field_id: FieldId) -> &[Id] {
self.fields
.get(&field_id)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
}

/// The contents of a node is either a range in the original source file,
Expand Down Expand Up @@ -836,17 +845,9 @@ fn apply_one_shot_rules_inner(
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(captured_id);
}
let result =
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
if result.len() != 1 {
return Err(format!(
"OneShot: recursion on captured node produced {} results, expected exactly 1",
result.len()
));
return Ok(vec![captured_id]);
}
Ok(result[0])
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
}
Expand Down
8 changes: 8 additions & 0 deletions shared/yeast/src/node_types_yaml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,14 @@ fn apply_yaml_to_schema(
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
schema.set_field_types(parent_kind, field_id, node_types);
schema.set_field_cardinality(
parent_kind,
field_id,
crate::schema::FieldCardinality {
multiple: spec.multiple,
required: spec.required,
},
);
}
}
}
Expand Down
14 changes: 9 additions & 5 deletions shared/yeast/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,15 @@ impl QueryListElem {
let Some(child) = remaining_children.next() else {
return Ok(false);
};
if skip_unnamed {
let node = ast.get_node(child).unwrap();
if !node.is_named() {
continue;
}
let node = ast.get_node(child).unwrap();
// Skip tree-sitter `extras` (e.g. comments) during
// positional matching: they are conceptually invisible
// between siblings, mirroring tree-sitter query semantics.
if node.is_extra() {
continue;
}
if skip_unnamed && !node.is_named() {
continue;
}
let snapshot = matches.clone();
if sub_query.do_match(ast, child, matches)? {
Expand Down
47 changes: 47 additions & 0 deletions shared/yeast/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ pub struct NodeType {
pub named: bool,
}

/// Multiplicity/optionality of a field declaration.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct FieldCardinality {
/// Whether the field may hold more than one child.
pub multiple: bool,
/// Whether at least one child must be present.
pub required: bool,
}

/// A schema defining node kinds and field names for the output AST.
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
///
Expand All @@ -32,6 +41,7 @@ pub struct Schema {
kind_names: BTreeMap<KindId, &'static str>,
next_kind_id: KindId,
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
field_cardinalities: BTreeMap<(String, FieldId), FieldCardinality>,
supertypes: BTreeMap<String, Vec<NodeType>>,
}

Expand All @@ -52,6 +62,7 @@ impl Schema {
kind_names: BTreeMap::new(),
next_kind_id: 1, // 0 is reserved
field_types: BTreeMap::new(),
field_cardinalities: BTreeMap::new(),
supertypes: BTreeMap::new(),
}
}
Expand Down Expand Up @@ -196,6 +207,42 @@ impl Schema {
.get(&(parent_kind.to_string(), field_id))
}

pub fn set_field_cardinality(
&mut self,
parent_kind: &str,
field_id: FieldId,
cardinality: FieldCardinality,
) {
self.field_cardinalities
.insert((parent_kind.to_string(), field_id), cardinality);
}

/// Returns the declared cardinality for a field, if known.
pub fn field_cardinality(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<FieldCardinality> {
self.field_cardinalities
.get(&(parent_kind.to_string(), field_id))
.copied()
}

/// Returns an iterator over all `(field_id, field_name)` pairs that are
/// declared as required (`required: true`) for the given `parent_kind`.
pub fn required_fields_for_kind<'a>(
&'a self,
parent_kind: &'a str,
) -> impl Iterator<Item = (FieldId, Option<&'static str>)> + 'a {
self.field_cardinalities
.iter()
.filter(move |((kind, _), card)| kind == parent_kind && card.required)
.map(move |((_, field_id), _)| {
let name = self.field_name_for_id(*field_id);
(*field_id, name)
})
}

pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
self.supertypes.insert(supertype.to_string(), node_types);
}
Expand Down
38 changes: 38 additions & 0 deletions shared/yeast/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,44 @@ fn test_query_no_match() {
assert!(!matched);
}

#[test]
fn test_query_skips_extras_in_positional_match() {
// Regression test: positional wildcards `(_)` must not bind to
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
// are conceptually invisible between siblings, matching tree-sitter
// query semantics. Without this, a later rule that translates a
// captured comment to nothing (a common idiom, e.g.
// `(comment) => ()` in Swift) leaves the capture's match-list empty
// and causes the transform to fail with "Variable X has 0 matches".
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("[1, # comment\n2]").unwrap();

// Navigate to the `array` node: program -> array.
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let array_id = cursor.node_id();
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");

// Two positional wildcards should bind to the two integers, skipping
// the comment that sits between them.
let query = yeast::query!((array (_) @a (_) @b));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
"integer"
);
}

#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
Expand Down
Loading
Loading