From c9fcdf3e8037f70cd46a7466266074b4b557c459 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Tue, 24 Mar 2026 18:47:52 +0000 Subject: [PATCH] JS: Add regression test for YAML extraction SnakeYAML 2.3 has [a bug](https://bitbucket.org/snakeyaml/snakeyaml/issues/1098) where it crashes with an `IndexOutOfBoundsException` when a Unicode surrogate pair (e.g. an emoji) straddles the 1024 character internal buffer boundary. This happens because the high surrogate can end up as the last character in the data window, and the reader tries to read the low surrogate past the end of the buffer. This caused languages that extract YAML, most notably JavaScript and Actions, to fail when the codebase contained a YAML file with an emoji at an unlucky position in the file. --- .../yaml/input/emoji_buffer_boundary.yml | 2 ++ .../trap/emoji_buffer_boundary.yml.trap | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 javascript/extractor/tests/yaml/input/emoji_buffer_boundary.yml create mode 100644 javascript/extractor/tests/yaml/output/trap/emoji_buffer_boundary.yml.trap diff --git a/javascript/extractor/tests/yaml/input/emoji_buffer_boundary.yml b/javascript/extractor/tests/yaml/input/emoji_buffer_boundary.yml new file mode 100644 index 000000000000..a254c885d383 --- /dev/null +++ b/javascript/extractor/tests/yaml/input/emoji_buffer_boundary.yml @@ -0,0 +1,2 @@ +# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +key: 🚀 diff --git a/javascript/extractor/tests/yaml/output/trap/emoji_buffer_boundary.yml.trap b/javascript/extractor/tests/yaml/output/trap/emoji_buffer_boundary.yml.trap new file mode 100644 index 000000000000..936088d8c091 --- /dev/null +++ b/javascript/extractor/tests/yaml/output/trap/emoji_buffer_boundary.yml.trap @@ -0,0 +1,27 @@ +#10000=@"/emoji_buffer_boundary.yml;sourcefile" +files(#10000,"/emoji_buffer_boundary.yml") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=* +#20001=* +yaml_scalars(#20001,0,"key") +yaml(#20001,0,#20000,1,"tag:yaml.org,2002:str","key") +#20002=@"loc,{#10000},2,1,2,3" +locations_default(#20002,#10000,2,1,2,3) +yaml_locations(#20001,#20002) +#20003=* +yaml_scalars(#20003,0,"🚀") +yaml(#20003,0,#20000,-1,"tag:yaml.org,2002:str","\u1f680\ude80") +#20004=@"loc,{#10000},2,6,2,6" +locations_default(#20004,#10000,2,6,2,6) +yaml_locations(#20003,#20004) +yaml(#20000,1,#10000,0,"tag:yaml.org,2002:map","key: \u1f680\ude80") +#20005=@"loc,{#10000},2,1,2,8" +locations_default(#20005,#10000,2,1,2,8) +yaml_locations(#20000,#20005) +numlines(#10000,2,0,0) +filetype(#10000,"yaml")