diff --git a/src/Query/ADQL_parser.hxx b/src/Query/ADQL_parser.hxx index ddfa99d..5b1f98d 100644 --- a/src/Query/ADQL_parser.hxx +++ b/src/Query/ADQL_parser.hxx @@ -97,12 +97,11 @@ struct ADQL_parser : boost::spirit::qi::grammar position_function; - boost::spirit::qi::rule - user_defined_function; + whitelisted_function; boost::spirit::qi::rule - value_expression, user_defined_function_param, pattern; + value_expression, whitelisted_function_param, pattern; boost::spirit::qi::rule table_reference; - boost::spirit::qi::rule - table_valued_function; - boost::spirit::qi::rule(), boost::spirit::ascii::space_type> diff --git a/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx b/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx index 8aba013..0bd4120 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx @@ -9,10 +9,6 @@ void ADQL_parser::init_factor() { using boost::spirit::qi::digit; using boost::spirit::qi::double_; using boost::spirit::qi::hold; - using boost::spirit::qi::labels::_1; - using boost::spirit::qi::labels::_2; - using boost::spirit::qi::labels::_3; - using boost::spirit::qi::labels::_val; using boost::spirit::qi::lexeme; using boost::spirit::qi::lit; using boost::spirit::qi::lower; @@ -20,6 +16,10 @@ void ADQL_parser::init_factor() { using boost::spirit::qi::omit; using boost::spirit::qi::print; using boost::spirit::qi::ulong_long; + using boost::spirit::qi::labels::_1; + using boost::spirit::qi::labels::_2; + using boost::spirit::qi::labels::_3; + using boost::spirit::qi::labels::_val; namespace ascii = boost::spirit::ascii; set_function_type %= ascii::no_case[ascii::string("AVG")] | @@ -62,8 +62,8 @@ void ADQL_parser::init_factor() { ascii::no_case["ELSE"] >> &no_skip[boost::spirit::qi::space] >> result; else_clause.name("else_clause"); - /// boost::spirit gets wonky if I try to use the '>' operator for - /// simple_whens + // boost::spirit gets wonky if I try to use the '>' operator for + // simple_whens simple_case %= value_expression >> simple_whens >> -else_clause > ascii::no_case["END"]; simple_case.name("simple_case"); @@ -124,33 +124,116 @@ void ADQL_parser::init_factor() { *array_index; value_expression_primary.name("value_expression_primary"); - /// Custom array_expression so that SQL 99 array literals can pass - /// through + // Custom array_expression so that SQL 99 array literals can pass + // through array_constructor %= ascii::no_case["ARRAY"] >> '[' >> (value_expression % ',') > ']'; array_constructor.name("array_constructor"); - /// We do not have a rule for default_function_prefix since, being - /// optional, it does not change whether something parses. - - /// Add a bunch of functions that are normally reserved words, but - /// also really useful string functions (at least in Postgres) - user_defined_function_name %= regular_identifier | - ascii::no_case[ascii::string("RIGHT")] | - ascii::no_case[ascii::string("LEFT")] | - ascii::no_case[ascii::string("UPPER")] | - ascii::no_case[ascii::string("LOWER")] | - ascii::no_case[ascii::string("DISTINCT")] | - ascii::no_case[ascii::string("TRIM")]; - user_defined_function_name.name("user_defined_function_name"); - - user_defined_function_param %= value_expression; - user_defined_function_param.name("user_defined_function_param"); - - user_defined_function %= hold[user_defined_function_name >> '('] >> - -(user_defined_function_param % ',') >> ')'; - - user_defined_function.name("user_defined_function"); + // Reverse-sorted within groups as in init_reserved_word() to + // prevent early matches. Could be reverse-sorted in one big group + // if necessary. + + // Note: Despite their potential for misuse in blind injection + // attacks, the functions SUBSTR, SUBSTRING, and INSTR are + // whitelisted because they have legitimate uses in TAP + // queries. The functions ASCII() and CHR(), though, are omitted + // from the whitelist as of 22May26. + + whitelisted_function_name %= + // IRSA UDFs + ascii::no_case[ascii::string("STRIP_URL_PREFIX")] | + ascii::no_case[ascii::string("SIA2_CLOUD_ACCESS_COLUMN")] | + ascii::no_case[ascii::string("SIA1_CLOUD_ACCESS_COLUMN")] | + ascii::no_case[ascii::string("RA_TO_SEXAGESIMAL")] | + ascii::no_case[ascii::string("PT_TO_REGION")] | + ascii::no_case[ascii::string("POLY_TO_REGION")] | + ascii::no_case[ascii::string("POLY_TO_RA")] | + ascii::no_case[ascii::string("POLY_TO_DEC")] | + ascii::no_case[ascii::string("GET_MOCS")] | + ascii::no_case[ascii::string("GET_CONTENTTYPE_SORT_SURROGATE")] | + ascii::no_case[ascii::string("EXTRACT_URL_BASENAME")] | + ascii::no_case[ascii::string("DEC_TO_SEXAGESIMAL")] | + + // PostgreSQL/SQL functions + ascii::no_case[ascii::string("TYPEOF")] | + ascii::no_case[ascii::string("TO_TIMESTAMP")] | + ascii::no_case[ascii::string("TO_NUMBER")] | + ascii::no_case[ascii::string("TO_DATE")] | + ascii::no_case[ascii::string("TO_CHAR")] | + ascii::no_case[ascii::string("TIMEZONE")] | + ascii::no_case[ascii::string("SYSDATE")] | + ascii::no_case[ascii::string("SUBSTRING")] | + ascii::no_case[ascii::string("SUBSTR")] | + ascii::no_case[ascii::string("STRPOS")] | + ascii::no_case[ascii::string("STRING_AGG")] | + ascii::no_case[ascii::string("STDDEV")] | + ascii::no_case[ascii::string("SPLIT_PART")] | + ascii::no_case[ascii::string("REPLACE")] | + ascii::no_case[ascii::string("RANDOM")] | + ascii::no_case[ascii::string("NOW")] | + ascii::no_case[ascii::string("MEDIAN")] | + ascii::no_case[ascii::string("MD5")] | ascii::no_case[ascii::string("LN")] | + ascii::no_case[ascii::string("LENGTH")] | + ascii::no_case[ascii::string("LEAST")] | + ascii::no_case[ascii::string("JSONB_EXTRACT_PATH_TEXT")] | + ascii::no_case[ascii::string("JSON_EXTRACT_PATH_TEXT")] | + ascii::no_case[ascii::string("INSTR")] | + ascii::no_case[ascii::string("GREATEST")] | + ascii::no_case[ascii::string("GETDATE")] | + ascii::no_case[ascii::string("FORMAT")] | + ascii::no_case[ascii::string("FLOOR")] | + ascii::no_case[ascii::string("CONCAT")] | + ascii::no_case[ascii::string("CLOCK_TIMESTAMP")] | + ascii::no_case[ascii::string("CHAR_LENGTH")] | + ascii::no_case[ascii::string("CEILING")] | + ascii::no_case[ascii::string("CEIL")] | + + // PostGIS type constructors + ascii::no_case[ascii::string("GEOMETRY")] | + ascii::no_case[ascii::string("GEOGRAPHY")] | + + // ADQL reserved words that are also valid function names + ascii::no_case[ascii::string("SQRT")] | + ascii::no_case[ascii::string("ROUND")] | + ascii::no_case[ascii::string("POWER")] | + ascii::no_case[ascii::string("MOD")] | + ascii::no_case[ascii::string("LOG10")] | + ascii::no_case[ascii::string("LOG")] | + ascii::no_case[ascii::string("EXP")] | + ascii::no_case[ascii::string("COORD2")] | + ascii::no_case[ascii::string("COORD1")] | + ascii::no_case[ascii::string("ABS")] | + + // SQL reserved words that are also valid function names + ascii::no_case[ascii::string("UPPER")] | + ascii::no_case[ascii::string("TRIM")] | + ascii::no_case[ascii::string("SUM")] | + ascii::no_case[ascii::string("ROW_NUMBER")] | + ascii::no_case[ascii::string("RIGHT")] | + ascii::no_case[ascii::string("LOWER")] | + ascii::no_case[ascii::string("LEFT")] | + ascii::no_case[ascii::string("DISTINCT")] | + ascii::no_case[ascii::string("CAST")] | + + // ST_ prefix — PostGIS functions + (ascii::no_case[ascii::string("ST_")] >> all_identifiers) | + + // ivo_ prefix — IVOA functions + (ascii::no_case[ascii::string("ivo_")] >> all_identifiers) | + + // q3c_ prefix — Q3C spatial indexing functions + (ascii::no_case[ascii::string("q3c_")] >> all_identifiers); + + whitelisted_function_name.name("whitelisted_function_name"); + + whitelisted_function_param %= value_expression; + whitelisted_function_param.name("whitelisted_function_param"); + + whitelisted_function %= hold[whitelisted_function_name >> '('] >> + -(whitelisted_function_param % ',') >> ')'; + + whitelisted_function.name("whitelisted_function"); sql_no_arg_function %= ascii::no_case[ascii::string("CURRENT_TIMESTAMP")]; @@ -182,7 +265,8 @@ void ADQL_parser::init_factor() { // numeric_geometry_function numeric_value_function %= trig_function | math_function | cast_function | position_function | non_predicate_geometry_function | - user_defined_function | sql_no_arg_function; + whitelisted_function | sql_no_arg_function; + numeric_value_function.name("numeric_value_function"); // Flipped the order here, because a value_expression can match a // function name. @@ -216,9 +300,9 @@ void ADQL_parser::init_factor() { BOOST_SPIRIT_DEBUG_NODE(null_cast); BOOST_SPIRIT_DEBUG_NODE(value_expression_primary); BOOST_SPIRIT_DEBUG_NODE(array_constructor); - BOOST_SPIRIT_DEBUG_NODE(user_defined_function_name); - BOOST_SPIRIT_DEBUG_NODE(user_defined_function_param); - BOOST_SPIRIT_DEBUG_NODE(user_defined_function); + BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_name); + BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_param); + BOOST_SPIRIT_DEBUG_NODE(whitelisted_function); BOOST_SPIRIT_DEBUG_NODE(cast_function); BOOST_SPIRIT_DEBUG_NODE(position_function); BOOST_SPIRIT_DEBUG_NODE(numeric_value_function); diff --git a/src/Query/ADQL_parser/ADQL_parser/init_predicate.cxx b/src/Query/ADQL_parser/ADQL_parser/init_predicate.cxx index 2c2951c..4dad567 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_predicate.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_predicate.cxx @@ -9,10 +9,6 @@ void ADQL_parser::init_predicate() { using boost::spirit::qi::digit; using boost::spirit::qi::double_; using boost::spirit::qi::hold; - using boost::spirit::qi::labels::_1; - using boost::spirit::qi::labels::_2; - using boost::spirit::qi::labels::_3; - using boost::spirit::qi::labels::_val; using boost::spirit::qi::lexeme; using boost::spirit::qi::lit; using boost::spirit::qi::lower; @@ -20,24 +16,17 @@ void ADQL_parser::init_predicate() { using boost::spirit::qi::omit; using boost::spirit::qi::print; using boost::spirit::qi::ulong_long; + using boost::spirit::qi::labels::_1; + using boost::spirit::qi::labels::_2; + using boost::spirit::qi::labels::_3; + using boost::spirit::qi::labels::_val; namespace ascii = boost::spirit::ascii; derived_correlation %= subquery >> correlation_specification; derived_correlation.name("derived correlation"); - table_valued_function_name %= possibly_qualified_identifier; - table_valued_function_name.name("table_valued_function_name"); - - table_valued_function_param %= possibly_qualified_identifier; - table_valued_function_param.name("table_valued_function_param"); - - table_valued_function %= hold[lexeme[ascii::no_case["table("] >> - table_valued_function_name >> "('"]] >> - -(table_valued_function_param % "','") >> "')" >> ')'; - table_valued_function.name("table_valued_function"); + table_reference %= joined_table | table_correlation | derived_correlation; - table_reference %= joined_table | table_correlation | derived_correlation | - table_valued_function; table_reference.name("table reference"); from_clause %= lexeme[ascii::no_case["FROM"] > &boost::spirit::qi::space] > @@ -88,7 +77,6 @@ void ADQL_parser::init_predicate() { null_predicate | like_predicate | exists_predicate; #ifdef DEBUG_PRED - BOOST_SPIRIT_DEBUG_NODE(table_valued_function); BOOST_SPIRIT_DEBUG_NODE(derived_correlation); BOOST_SPIRIT_DEBUG_NODE(table_reference); BOOST_SPIRIT_DEBUG_NODE(from_clause); diff --git a/src/Query/ADQL_parser/ADQL_parser/init_query.cxx b/src/Query/ADQL_parser/ADQL_parser/init_query.cxx index cce430c..7e8c4cc 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_query.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_query.cxx @@ -56,7 +56,7 @@ void ADQL_parser::init_query() { search_condition; having.name("having"); - sort_key %= case_expression | user_defined_function | column_reference | + sort_key %= case_expression | whitelisted_function | column_reference | unsigned_integer; ordering_specification %= ascii::no_case[ascii::string("ASC")] | diff --git a/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx b/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx index 8207a6b..534bf34 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx @@ -9,10 +9,6 @@ void ADQL_parser::init_reserved_words() { using boost::spirit::qi::digit; using boost::spirit::qi::double_; using boost::spirit::qi::hold; - using boost::spirit::qi::labels::_1; - using boost::spirit::qi::labels::_2; - using boost::spirit::qi::labels::_3; - using boost::spirit::qi::labels::_val; using boost::spirit::qi::lexeme; using boost::spirit::qi::lit; using boost::spirit::qi::lower; @@ -20,9 +16,13 @@ void ADQL_parser::init_reserved_words() { using boost::spirit::qi::omit; using boost::spirit::qi::print; using boost::spirit::qi::ulong_long; + using boost::spirit::qi::labels::_1; + using boost::spirit::qi::labels::_2; + using boost::spirit::qi::labels::_3; + using boost::spirit::qi::labels::_val; namespace ascii = boost::spirit::ascii; - /// Reverse sort to avoid early matches. + // Reverse sort to avoid early matches. ADQL_reserved_word %= ascii::no_case["TRUNCATE"] | ascii::no_case["TOP"] | ascii::no_case["TAP_UPLOAD"] | ascii::no_case["TAN"] | @@ -41,7 +41,7 @@ void ADQL_parser::init_reserved_words() { ascii::no_case["ATAN2"] | ascii::no_case["ATAN"] | ascii::no_case["ASIN"] | ascii::no_case["AREA"] | ascii::no_case["ACOS"] | ascii::no_case["ABS"]; - /// Split up SQL_reserved_word to help memory usage and compile times. + // Split up SQL_reserved_word to help memory usage and compile times. SQL_reserved_word_00 %= ascii::no_case["ZONE"] | ascii::no_case["YEAR"] | ascii::no_case["WRITE"] | ascii::no_case["WORK"] | ascii::no_case["WITH"] | ascii::no_case["WHERE"] | diff --git a/src/Query/Query_Preprocessor/Top_Level_Components.hxx b/src/Query/Query_Preprocessor/Top_Level_Components.hxx index a425aa5..9ef9345 100644 --- a/src/Query/Query_Preprocessor/Top_Level_Components.hxx +++ b/src/Query/Query_Preprocessor/Top_Level_Components.hxx @@ -4,8 +4,7 @@ #include #include -// A general ADQL query can be split into 3 parts as follows: - +// A general ADQL query supported by LibADQL can be split into 3 parts as follows: // (1) An optional WITH clause @@ -15,10 +14,8 @@ // (3) Optional HAVING, GROUP BY, and/or ORDER BY clauses. - // For example: - // "WITH tempTable (avgDist) AS (SELECT avg(dist) FROM distTable) " // "SELECT table_name, dist FROM distTable, tempTable WHERE distTable.dist > " @@ -29,8 +26,6 @@ // " ORDER BY dist " - - // In what follows, "SFW" represents a SELECT...FROM...[WHERE...] string. // The components of the Top_Level_Components class correspond to diff --git a/src/Query/Query_Preprocessor/Top_Level_Parser.hxx b/src/Query/Query_Preprocessor/Top_Level_Parser.hxx index 250830c..79882aa 100644 --- a/src/Query/Query_Preprocessor/Top_Level_Parser.hxx +++ b/src/Query/Query_Preprocessor/Top_Level_Parser.hxx @@ -11,10 +11,16 @@ #include "Top_Level_Components.hxx" -// This parser parses a general ADQL query into the 3 components -// described in Top_Level_Components.hxx, thus making the query's +// This parser parses ADQL queries into the 3 components +// described in Top_Level_Components.hxx, thus making the queries' // select_from_where strings accessible to the Query_Preprocessor. +// Note: Queries using WITHIN GROUP (...) syntax (e.g. LISTAGG, +// PERCENTILE_CONT) will fail because Top_Level_Parser stops at the +// bare GROUP keyword expecting a trailing GROUP BY clause. Supporting +// WITHIN GROUP would require changes to how trailing clauses are +// detected. + namespace ADQL { struct Top_Level_Parser diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx index d985062..c479746 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx @@ -1,20 +1,20 @@ #pragma once +#include + #include "../../Non_Predicate_Geometry_Function.hxx" -#include "../../User_Defined_Function_Wrap.hxx" +#include "../../Whitelisted_Function_Wrap.hxx" #include "Numeric_Value_Function/Cast_Function.hxx" #include "Numeric_Value_Function/Math_Function.hxx" #include "Numeric_Value_Function/Position_Function.hxx" #include "Numeric_Value_Function/Trig_Function.hxx" -#include - namespace ADQL { class Numeric_Value_Function { public: typedef boost::variant + Whitelisted_Function_Wrap, std::string> Variant; Variant variant; bool empty() const; diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx index 1e66ccd..15d1149 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx @@ -1,5 +1,5 @@ #include "../../../../empty_variant.hxx" -#include "../../../User_Defined_Function.hxx" +#include "../../../Whitelisted_Function.hxx" #include "../Numeric_Value_Function.hxx" bool ADQL::Numeric_Value_Function::empty() const { return empty_variant(variant); } diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx index d530df7..8977afa 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx @@ -1,4 +1,4 @@ -#include "../../../User_Defined_Function.hxx" +#include "../../../Whitelisted_Function.hxx" #include "../Numeric_Value_Function.hxx" namespace ADQL { diff --git a/src/Query/Query_Specification/Order_By/Sort_Key.hxx b/src/Query/Query_Specification/Order_By/Sort_Key.hxx index 5ee5700..a858eca 100644 --- a/src/Query/Query_Specification/Order_By/Sort_Key.hxx +++ b/src/Query/Query_Specification/Order_By/Sort_Key.hxx @@ -1,9 +1,11 @@ #pragma once #include "../Column_Reference.hxx" -#include "../User_Defined_Function.hxx" #include "../Value_Expression_Primary/Case_Expression.hxx" +#include "../Whitelisted_Function.hxx" namespace ADQL { - typedef boost::variant Sort_Key; +typedef boost::variant + Sort_Key; } diff --git a/src/Query/Query_Specification/Select.hxx b/src/Query/Query_Specification/Select.hxx index 2047620..9c98702 100644 --- a/src/Query/Query_Specification/Select.hxx +++ b/src/Query/Query_Specification/Select.hxx @@ -2,9 +2,8 @@ #include "As.hxx" #include "Non_As.hxx" -#include "Table_Valued_Function.hxx" -#include "User_Defined_Function.hxx" #include "Value_Expression_Primary.hxx" +#include "Whitelisted_Function.hxx" namespace ADQL { class Select { diff --git a/src/Query/Query_Specification/Select_From_Where.hxx b/src/Query/Query_Specification/Select_From_Where.hxx index 8a277a0..82ebc4b 100644 --- a/src/Query/Query_Specification/Select_From_Where.hxx +++ b/src/Query/Query_Specification/Select_From_Where.hxx @@ -3,7 +3,6 @@ #include "Joined_Table.hxx" #include "Select.hxx" #include "Table_Reference.hxx" -#include "Table_Valued_Function.hxx" #include "Where.hxx" namespace ADQL { diff --git a/src/Query/Query_Specification/Table_Reference.hxx b/src/Query/Query_Specification/Table_Reference.hxx index cafbd0e..81ed8ce 100644 --- a/src/Query/Query_Specification/Table_Reference.hxx +++ b/src/Query/Query_Specification/Table_Reference.hxx @@ -8,13 +8,11 @@ #include "Derived_Correlation.hxx" #include "Joined_Table_Wrap.hxx" #include "Table_Reference/Correlation_Join.hxx" -#include "Table_Valued_Function.hxx" namespace ADQL { class Table_Reference { public: - typedef boost::variant + typedef boost::variant Variant; Variant variant; bool empty() const; diff --git a/src/Query/Query_Specification/Table_Valued_Function.hxx b/src/Query/Query_Specification/Table_Valued_Function.hxx deleted file mode 100644 index c9962c2..0000000 --- a/src/Query/Query_Specification/Table_Valued_Function.hxx +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include - -#include -#include - -namespace ADQL { -class Table_Valued_Function { -public: - std::string function_name; - std::vector args; - bool empty() const { return function_name.empty(); } -}; - -inline std::ostream &operator<<( - std::ostream &os, const ADQL::Table_Valued_Function &table_valued_function) { - os << "table(" << table_valued_function.function_name << '('; - for (auto a = table_valued_function.args.begin(); - a != table_valued_function.args.end();) { - os << '\''<< *a << '\''; - ++a; - if (a != table_valued_function.args.end()) { - os << ','; - } - } - os << "))"; - return os; -} -} // namespace ADQL - -BOOST_FUSION_ADAPT_STRUCT(ADQL::Table_Valued_Function, - (std::string, function_name)(std::vector, - args)) diff --git a/src/Query/Query_Specification/User_Defined_Function_Wrap.hxx b/src/Query/Query_Specification/User_Defined_Function_Wrap.hxx deleted file mode 100644 index 9117ad0..0000000 --- a/src/Query/Query_Specification/User_Defined_Function_Wrap.hxx +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include - -#include - -namespace ADQL { -class User_Defined_Function; -typedef boost::recursive_wrapper User_Defined_Function_Wrap; -std::ostream &operator<<(std::ostream &os, const User_Defined_Function_Wrap &s); -} // namespace ADQL diff --git a/src/Query/Query_Specification/User_Defined_Function_Wrap/ostream.cxx b/src/Query/Query_Specification/User_Defined_Function_Wrap/ostream.cxx deleted file mode 100644 index e27a9ca..0000000 --- a/src/Query/Query_Specification/User_Defined_Function_Wrap/ostream.cxx +++ /dev/null @@ -1,9 +0,0 @@ -#include "../User_Defined_Function.hxx" -#include "../User_Defined_Function_Wrap.hxx" - -namespace ADQL { -std::ostream &operator<<(std::ostream &os, - const ADQL::User_Defined_Function_Wrap &wrap) { - return os << wrap.get(); -} -} // namespace ADQL diff --git a/src/Query/Query_Specification/User_Defined_Function.hxx b/src/Query/Query_Specification/Whitelisted_Function.hxx similarity index 62% rename from src/Query/Query_Specification/User_Defined_Function.hxx rename to src/Query/Query_Specification/Whitelisted_Function.hxx index f770182..ae47e3e 100644 --- a/src/Query/Query_Specification/User_Defined_Function.hxx +++ b/src/Query/Query_Specification/Whitelisted_Function.hxx @@ -1,15 +1,15 @@ #pragma once -#include "Value_Expression.hxx" +#include +#include #include #include -#include -#include +#include "Value_Expression.hxx" namespace ADQL { -class User_Defined_Function { +class Whitelisted_Function { public: std::string function; std::vector args; @@ -17,13 +17,13 @@ public: }; inline std::ostream &operator<<( - std::ostream &os, const ADQL::User_Defined_Function &user_defined_function) { - os << user_defined_function.function << '('; - for (auto a = user_defined_function.args.begin(); - a != user_defined_function.args.end();) { + std::ostream &os, const ADQL::Whitelisted_Function &whitelisted_function) { + os << whitelisted_function.function << '('; + for (auto a = whitelisted_function.args.begin(); + a != whitelisted_function.args.end();) { os << *a; ++a; - if (a != user_defined_function.args.end()) { + if (a != whitelisted_function.args.end()) { os << ','; } } @@ -32,6 +32,6 @@ inline std::ostream &operator<<( } } // namespace ADQL -BOOST_FUSION_ADAPT_STRUCT(ADQL::User_Defined_Function, +BOOST_FUSION_ADAPT_STRUCT(ADQL::Whitelisted_Function, (std::string, function)(std::vector, args)) diff --git a/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx b/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx new file mode 100644 index 0000000..5f9d786 --- /dev/null +++ b/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +namespace ADQL { +class Whitelisted_Function; +typedef boost::recursive_wrapper Whitelisted_Function_Wrap; +std::ostream &operator<<(std::ostream &os, const Whitelisted_Function_Wrap &s); +} // namespace ADQL diff --git a/src/Query/Query_Specification/Whitelisted_Function_Wrap/ostream.cxx b/src/Query/Query_Specification/Whitelisted_Function_Wrap/ostream.cxx new file mode 100644 index 0000000..7616fec --- /dev/null +++ b/src/Query/Query_Specification/Whitelisted_Function_Wrap/ostream.cxx @@ -0,0 +1,9 @@ +#include "../Whitelisted_Function.hxx" +#include "../Whitelisted_Function_Wrap.hxx" + +namespace ADQL { +std::ostream &operator<<(std::ostream &os, + const ADQL::Whitelisted_Function_Wrap &wrap) { + return os << wrap.get(); +} +} // namespace ADQL diff --git a/test/parse_adql.cxx b/test/parse_adql.cxx index 3e9aecf..fc492ee 100644 --- a/test/parse_adql.cxx +++ b/test/parse_adql.cxx @@ -119,10 +119,6 @@ int main(int argc, char *argv[]) { "radians(ra),sqrt(ra) FROM my_table1", "SELECT mod(ra, dec),power( ra,dec ),pi(),rand(ra),rand()," "round(ra,10),round(ra),truncate(ra),truncate(ra,10) FROM my_table1", - "SELECT modern() FROM my_table1", - "SELECT my_modern_function(ra,dec) FROM my_table1", - "SELECT my_modern_function(ra,dec), modern() FROM my_table1", - "SELECT my_modern_function(ra,dec) || modern() FROM my_table1", "select 'a b c','a','a ''bv' from b", "select \"a b\",\"a \"\" b\" from b", "select 'a' 'b' from b", @@ -142,15 +138,12 @@ int main(int argc, char *argv[]) { "select single from a", "select a,b from a group by a", "select a,b from a group by a having x>2", - "select f(a, b, c), max(d), max(e) from t group by f(a,b,c)", "SELECT * FROM my_table1 order by x", "SELECT * FROM my_table1 order by x asc", "SELECT * FROM my_table1 order by x desc", "select b from a where x<1 order by a", "SELECT * FROM my_table1 where x like y", "SELECT * FROM my_table1 where x like y || z", - "SELECT * FROM my_table1 where x not like my_sin(x)", - "SELECT * FROM my_table1 where x like my_sin(x) || x", "SELECT * FROM my_table1 where exists (select a from b)", "SELECT * FROM my_table WHERE (mjd>=55550.0 and mjd<=65650.5)", "SELECT my_table1.*,'table' from my_table1", @@ -335,7 +328,6 @@ int main(int argc, char *argv[]) { " AND caom.observation.instrument = 'Spex Spectrograph')" " GROUP BY" " name, DATE_TIME_OF_OBS, PROGRAM_ID, DY, Instrument_Setup, Group_ID", - "SELECT bar FROM foo where (ST(ST(ST(ST(ST(ST(ST(ST(ST())))))))))", "SELECT TAP_UPLOAD.pos.cntr as in_row_id, TAP_UPLOAD.pos.ra as in_ra, " "TAP_UPLOAD.pos.dec as in_dec, " "wise.wise_allwise_p3am_cdd.* " @@ -497,23 +489,6 @@ int main(int argc, char *argv[]) { "WITH alpha_subset AS (SELECT * FROM alpha_source WHERE mod(id,10) = 0) " "SELECT ra, dec FROM alpha_subset WHERE ra > 10 and ra < 20", - // support for table() function - "WITH temp (collection, multi_type) AS (SELECT collection,mytype " - "FROM table(tap_ancillary.DCE_DATATYPE('irsa_directory'))) " - "SELECT DISTINCT projectshort AS facility_name,description," - "irsa_directory.collection AS obs_collection FROM irsa_directory,temp " - "WHERE irsa_directory.collection=temp.collection", - - "SELECT DISTINCT projectshort AS " - "facility_name,description,irsa_directory.collection AS obs_collection," - "instrument AS instrument_name,coverage,band,info_url,temp.multi_type AS " - "dataproduct_type " - "FROM irsa_directory, (SELECT collection,mytype as multi_type " - "FROM table(tap_ancillary.DCE_DATATYPE('irsa_directory'))) as temp " - "WHERE semantics like '%primary%' AND " - "irsa_directory.collection=temp.collection " - "ORDER BY facility_name,irsa_directory.collection,instrument_name", - // IRSA-5856: column values as Shape arguments "Select * From mytable JOIN dbtable WHERE " @@ -766,8 +741,8 @@ int main(int argc, char *argv[]) { // IRSA-7432 POSITION(string IN column_value) "SELECT CASE WHEN POSITION('s3.amazonaws.com' IN a.uri) > 0 THEN a.uri " - "ELSE 'https://bacchus1.ipac.caltech.edu/' || regexp_replace(a.uri, " - "'https.*edu/', '') END as access_url FROM caom.artifact a", + "ELSE 'https://bacchus1.ipac.caltech.edu/' || strip_url_prefix(a.uri, " + "'https.*edu/') END as access_url FROM caom.artifact a", "SELECT POSITION('s3.amazonaws.com' IN a.uri) AS pos FROM caom.artifact a", @@ -821,6 +796,21 @@ int main(int argc, char *argv[]) { " tempTable.avgDist / 10 " " ORDER BY dist ", + // IRSA-7735: add whitelisted functions; reject functions not whitelisted + "select sum(x,y) from my_table", + "select abs(x) from my_table", + "select round(x) from my_table", + "SELECT DISTINCT projectshort AS " + "facility_name,description,collection_label,d.collection AS " + "obs_collection,'https://irsawebdev1.ipac.caltech.edu:9801' || " + "strip_url_prefix(info_url, 'https.*edu/') AS info_url,instrument AS " + "instrument_name,coverage,band,'https://irsawebdev1.ipac.caltech.edu:9801/" + "IRSA_Directory/searchpage?collection='||d.collection AS " + "access_url,'application/x-votable+xml' AS access_format,v.mytype AS " + "dataproduct_type FROM irsa_directory d JOIN " + "tap_ancillary.irsa_directory_datatypes v ON d.collection=v.collection " + "WHERE d.semantics like '%primary%' ORDER BY " + "LOWER(facility_name),d.collection,instrument", #endif // RUN_ALL }; @@ -846,10 +836,8 @@ int main(int argc, char *argv[]) { "And x<1 And x>2 Or y < 3 Or y >5" "SELECT *,ra FROM my_table1", "SELECT my_tablel1.* as ra_dec FROM my_table1", - "select sum(a,b) from a", "select sum from a", "select sum( from a", - "select abs(a,b) from a", "select sin from a", "select sin() from a", "select sin( from a", @@ -859,7 +847,6 @@ int main(int argc, char *argv[]) { "select atan2(a,) from a", "select atan2(a,b from a", "select round from a", - "select round() from a", "select round(a,) from a", "select round(a,b from a", "select round(a,10 from a", @@ -922,6 +909,34 @@ int main(int argc, char *argv[]) { "SELECT * from spherex.obscore WHERE " "INTERSECTS(CIRCLE('ICRS',162.12766666, -38.924749999, 0.002777777), " "s_region)=1", + + // IRSA-7735: reject non-whitelisted functions + "SELECT modern() FROM my_table1", + "SELECT my_modern_function(ra,dec) FROM my_table1", + "SELECT my_modern_function(ra,dec), modern() FROM my_table1", + "SELECT my_modern_function(ra,dec) || modern() FROM my_table1", + "select f(a, b, c), max(d), max(e) from t group by f(a,b,c)", + "SELECT * FROM my_table1 where x not like my_sin(x)", + "SELECT * FROM my_table1 where x like my_sin(x) || x", + "SELECT bar FROM foo where (ST(ST(ST(ST(ST(ST(ST(ST(ST())))))))))", + "SELECT * FROM my_table1 where sys_context('USERENV','DB_NAME')='wise1'", + + // IRSA-7735: retire support for table() function + "WITH temp (collection, multi_type) AS (SELECT collection,mytype " + "FROM table(tap_ancillary.DCE_DATATYPE('irsa_directory'))) " + "SELECT DISTINCT projectshort AS facility_name,description," + "irsa_directory.collection AS obs_collection FROM irsa_directory,temp " + "WHERE irsa_directory.collection=temp.collection", + + "SELECT DISTINCT projectshort AS " + "facility_name,description,irsa_directory.collection AS obs_collection," + "instrument AS instrument_name,coverage,band,info_url,temp.multi_type AS " + "dataproduct_type " + "FROM irsa_directory, (SELECT collection,mytype as multi_type " + "FROM table(tap_ancillary.DCE_DATATYPE('irsa_directory'))) as temp " + "WHERE semantics like '%primary%' AND " + "irsa_directory.collection=temp.collection " + "ORDER BY facility_name,irsa_directory.collection,instrument_name", }; int result(0); diff --git a/wscript b/wscript index 715831b..669f347 100644 --- a/wscript +++ b/wscript @@ -113,7 +113,7 @@ def build(ctx): 'src/Query/Query_Specification/Value_Expression_Primary/Case_Expression/Case_Abbreviation/Nullif/empty.cxx', 'src/Query/Query_Specification/Value_Expression_Primary/Case_Expression/Case_Abbreviation/Nullif/ostream.cxx', 'src/Query/Query_Specification/Value_Expression_Primary/Case_Expression/Case_Abbreviation/Coalesce/ostream.cxx', - 'src/Query/Query_Specification/User_Defined_Function_Wrap/ostream.cxx', + 'src/Query/Query_Specification/Whitelisted_Function_Wrap/ostream.cxx', 'src/Query/Query_Specification/Value_Expression/empty.cxx', 'src/Query/Query_Specification/Value_Expression/ostream.cxx', 'src/Query/Query_Specification/Value_Expression_Wrap/ostream.cxx',