c++ - Spirit-Qi: ¿Cómo puedo escribir un analizador no terminal?

parsing boost (1)

Su ejemplo

No veo cómo su muestra requiere nada de esto. Solo reordena tus ramas, luego date cuenta de que la rama corta es solo un caso especial del caso calificado con n = 1: Live On Coliru ¹ (o usando la versión X3 si lo prefieres).

El caso genérico

Ahora, habiendo mencionado x3, ¡tiene la capacidad de hacer tu vida mucho más fácil!

Esto es lo que creo que querías, en el caso general:

namespace parser { template <typename... Parsers> struct longest_parser : x3::parser_base { longest_parser(Parsers... sub) : _alternatives {sub...} { } template <typename It, typename Ctx, typename Other, typename Attr> bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const { auto const saved = f; //// To exclude pre-skip from length comparisons, do pre-skip here: // x3::skip_over(f, l, ctx); auto seq = std::make_index_sequence<sizeof...(Parsers)>(); auto best = select_best(f, l, ctx, seq); //std::cout << "Longest match at index #" << best << "/n"; bool ok = dispatch(f, l, ctx, other, attr, best, seq); if (!ok) f = saved; return ok; } private: template <typename It, typename Ctx, typename P> size_t length_of(It f, It l, Ctx ctx, P const& p) const { boost::iterator_range<It> matched; return x3::raw[p].parse(f, l, ctx, x3::unused, matched)? boost::size(matched) : 0; } template <typename It, typename Ctx, size_t... I> size_t select_best(It f, It l, Ctx& ctx, std::index_sequence<I...>) const { std::array<size_t, sizeof...(I)> lengths { length_of(f, l, ctx, std::get<I>(_alternatives))... }; return std::distance(lengths.begin(), std::max_element(lengths.begin(), lengths.end())); } template <typename It, typename Ctx, typename Other, typename Attr, size_t... I> bool dispatch(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx, std::index_sequence<I...>) const { //return (real_parse<I>(f, l, ctx, other, attr, targetIdx) || ...); std::array<bool, sizeof...(I)> b = { real_parse<I>(f, l, ctx, other, attr, targetIdx)... }; return std::accumulate(b.begin(), b.end(), false, std::logical_or<bool>()); } template <size_t Idx, typename It, typename Ctx, typename Other, typename Attr> bool real_parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx) const { if (targetIdx != Idx) return false; return std::get<Idx>(_alternatives).parse(f, l, ctx, other, attr); } std::tuple<Parsers...> _alternatives; }; template <typename... Ps> longest_parser<Ps...> longest(Ps... p) { return {x3::as_parser(p)...}; } }

Tenga en cuenta la expresión de plegado que podría usar en el dispatch si su compilador lo admite (Coliru lo hace, edítelo para verlo )

Tenga en cuenta también la sutil elección con respecto a saltable (probablemente espacios en blanco); Si no es significativo para las comparaciones de longitud, descomente el salto previo.

Demo en vivo

Live On Coliru

#include <boost/spirit/home/x3.hpp> #include <type_traits> #include <iostream> #include <numeric> namespace x3 = boost::spirit::x3; namespace std { template <typename T> // just for easy debug printing; hack static std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) { for (auto& el : v) std::cout << ''['' << el << '']''; return os; } } using string_vec = std::vector<std::string>; using result_type = boost::variant<std::string, double, string_vec>; template <typename Parser> void parse(const std::string message, const std::string &input, const std::string &rule, const Parser &parser) { auto iter = input.begin(), end = input.end(); std::cout << "-------------------------/n"; std::cout << message << "/n"; std::cout << "Rule: " << rule << "/n"; std::cout << "Parsing: ''" << input << "''/n"; result_type parsed_result; bool result = phrase_parse(iter, end, parser, x3::space, parsed_result); if (result) { std::cout << "Parsed " << parsed_result << "/n"; } else { std::cout << "Parser failed/n"; } if (iter != end) std::cout << "EOI not reached. Unparsed: ''" << std::string(iter, end) << "''/n"; } namespace parser { template <typename... Parsers> struct longest_parser : x3::parser_base { longest_parser(Parsers... sub) : _alternatives {sub...} { } template <typename It, typename Ctx, typename Other, typename Attr> bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const { auto const saved = f; //// To exclude pre-skip from length comparisons, do pre-skip here: // x3::skip_over(f, l, ctx); auto seq = std::make_index_sequence<sizeof...(Parsers)>(); auto best = select_best(f, l, ctx, seq); //std::cout << "Longest match at index #" << best << "/n"; bool ok = dispatch(f, l, ctx, other, attr, best, seq); if (!ok) f = saved; return ok; } private: template <typename It, typename Ctx, typename P> size_t length_of(It f, It l, Ctx ctx, P const& p) const { boost::iterator_range<It> matched; return x3::raw[p].parse(f, l, ctx, x3::unused, matched)? boost::size(matched) : 0; } template <typename It, typename Ctx, size_t... I> size_t select_best(It f, It l, Ctx& ctx, std::index_sequence<I...>) const { std::array<size_t, sizeof...(I)> lengths { length_of(f, l, ctx, std::get<I>(_alternatives))... }; return std::distance(lengths.begin(), std::max_element(lengths.begin(), lengths.end())); } template <typename It, typename Ctx, typename Other, typename Attr, size_t... I> bool dispatch(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx, std::index_sequence<I...>) const { //return (real_parse<I>(f, l, ctx, other, attr, targetIdx) || ...); std::array<bool, sizeof...(I)> b = { real_parse<I>(f, l, ctx, other, attr, targetIdx)... }; return std::accumulate(b.begin(), b.end(), false, std::logical_or<bool>()); } template <size_t Idx, typename It, typename Ctx, typename Other, typename Attr> bool real_parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx) const { if (targetIdx != Idx) return false; return std::get<Idx>(_alternatives).parse(f, l, ctx, other, attr); } std::tuple<Parsers...> _alternatives; }; template <typename... Ps> longest_parser<Ps...> longest(Ps... p) { return {x3::as_parser(p)...}; } } int main() { auto id = x3::rule<void, std::string> {} = x3::lexeme [ x3::char_("a-zA-Z_") >> *x3::char_("a-zA-Z0-9_") ]; auto qualified = x3::rule<void, string_vec> {} = id % "::"; #define TEST_CASE(label, input, rule) parse(label, input, #rule, rule) TEST_CASE("unqualified" , "willy" , parser::longest(id, x3::int_, x3::double_)); TEST_CASE("unqualified with whitespace", " willy /t" , parser::longest(id, x3::int_, x3::double_)); TEST_CASE("integral or number" , "123.78::anton::lutz" , parser::longest(id, x3::int_, x3::double_)); TEST_CASE("qualified" , "willy anton::lutz" , parser::longest(id, x3::int_, x3::double_)); TEST_CASE("qualified with whitespace" , "willy /tanton::lutz" , parser::longest(id, x3::int_, x3::double_)); TEST_CASE("unqualified" , "willy" , parser::longest(id, x3::int_, x3::double_, qualified)); TEST_CASE("unqualified with whitespace", " willy /t" , parser::longest(id, x3::int_, x3::double_, qualified)); TEST_CASE("integral or number" , "123.78::anton::lutz" , parser::longest(id, x3::int_, x3::double_, qualified)); TEST_CASE("qualified" , "willy::anton::lutz" , parser::longest(id, x3::int_, x3::double_, qualified)); TEST_CASE("qualified with whitespace" , "willy ::/tanton::lutz", parser::longest(id, x3::int_, x3::double_, qualified)); TEST_CASE("unqualified" , "willy" , parser::longest(x3::int_, x3::double_, qualified)); TEST_CASE("unqualified with whitespace", " willy /t" , parser::longest(x3::int_, x3::double_, qualified)); TEST_CASE("integral or number" , "123.78::anton::lutz" , parser::longest(x3::int_, x3::double_, qualified)); TEST_CASE("qualified" , "willy::anton::lutz" , parser::longest(x3::int_, x3::double_, qualified)); TEST_CASE("qualified with whitespace" , "willy ::/tanton::lutz", parser::longest(x3::int_, x3::double_, qualified)); }

Huellas dactilares

------------------------- unqualified Rule: parser::longest(id, x3::int_, x3::double_) Parsing: ''willy'' Parsed willy ------------------------- unqualified with whitespace Rule: parser::longest(id, x3::int_, x3::double_) Parsing: '' willy '' Parsed willy ------------------------- integral or number Rule: parser::longest(id, x3::int_, x3::double_) Parsing: ''123.78::anton::lutz'' Parsed 123.78 EOI not reached. Unparsed: ''::anton::lutz'' ------------------------- qualified Rule: parser::longest(id, x3::int_, x3::double_) Parsing: ''willy anton::lutz'' Parsed willy EOI not reached. Unparsed: ''anton::lutz'' ------------------------- qualified with whitespace Rule: parser::longest(id, x3::int_, x3::double_) Parsing: ''willy anton::lutz'' Parsed willy EOI not reached. Unparsed: ''anton::lutz'' ------------------------- unqualified Rule: parser::longest(id, x3::int_, x3::double_, qualified) Parsing: ''willy'' Parsed willy ------------------------- unqualified with whitespace Rule: parser::longest(id, x3::int_, x3::double_, qualified) Parsing: '' willy '' Parsed willy ------------------------- integral or number Rule: parser::longest(id, x3::int_, x3::double_, qualified) Parsing: ''123.78::anton::lutz'' Parsed 123.78 EOI not reached. Unparsed: ''::anton::lutz'' ------------------------- qualified Rule: parser::longest(id, x3::int_, x3::double_, qualified) Parsing: ''willy::anton::lutz'' Parsed [willy][anton][lutz] ------------------------- qualified with whitespace Rule: parser::longest(id, x3::int_, x3::double_, qualified) Parsing: ''willy :: anton::lutz'' Parsed [willy][anton][lutz] ------------------------- unqualified Rule: parser::longest(x3::int_, x3::double_, qualified) Parsing: ''willy'' Parsed [willy] ------------------------- unqualified with whitespace Rule: parser::longest(x3::int_, x3::double_, qualified) Parsing: '' willy '' Parsed [willy] ------------------------- integral or number Rule: parser::longest(x3::int_, x3::double_, qualified) Parsing: ''123.78::anton::lutz'' Parsed 123.78 EOI not reached. Unparsed: ''::anton::lutz'' ------------------------- qualified Rule: parser::longest(x3::int_, x3::double_, qualified) Parsing: ''willy::anton::lutz'' Parsed [willy][anton][lutz] ------------------------- qualified with whitespace Rule: parser::longest(x3::int_, x3::double_, qualified) Parsing: ''willy :: anton::lutz'' Parsed [willy][anton][lutz]

Tenga en cuenta los diferentes resultados dependiendo de las expresiones del analizador en las alternativas.

Quiero escribir un analizador (como una extensión qi) que se puede usar a través de my_parser(p1, p2, ...) donde p1, p2, ... son expresiones del analizador qi.

En realidad, quiero implementar un analizador best_match que funcione como alternativa qi, pero no selecciona la primera regla de coincidencia sino la regla que ''explica'' la mayor parte de la entrada.

Dadas dos reglas simple_id = +(qi::alpha) y complex_id = simple_id >> *(qi::string("::") > simple_id) seleccionaría complex_id en la entrada willy::anton . Y no produciría atributos intermedios al hacerlo. Estos beneficios se pagan con tiempo de ejecución porque se requiere un análisis anticipado.

En mi opinión, hay varios casos de uso para una construcción de analizador de este tipo. benchmark(p1, ...) como un analizador de soporte para la optimización, solo un ejemplo. Proporcionaré eso, una vez que sepa cómo hacerlo.

Este analizador sería un no terminal. Lo intenté (duro), pero no puedo encontrar respuestas a mi problema dentro del qi. Supongo que los mecanismos de C ++ están tan estrechamente integrados con c qi que no hay un punto de entrada comprensible, al menos para mí.

Es bastante fácil implementar lo que quiero presentar a un operador. Adjunto la solución actual a continuación. Se compila como se esperaba pero está incompleto.

Un operador qi obtiene una fusión :: vector / secuencia (lo que sea) preparada y actúa sobre ella. Parece que no hay ofertas de la biblioteca para resolver mi problema. Incluso make_nary_composite ya espera que los make_nary_composite en Elements .

Lo intenté mucho, todo falló, así que no quiero aburrirte con eso.

Una solución alternativa que puedo imaginar podría ser proporcionar un operador con estado, que convertiría p1, ... en una expresión legal qi. Luego implemente unary_parser best_match o directiva para procesar esa expresión. El , obtendría dos modos: obtener la longitud de entrada que consume el analizador actual (exitoso) y realmente analizar el seleccionado de la fase 1. El contenedor que ejecuta ese analizador de coma primero en el modo 1 y luego en el modo 2. Puede ser feo , pero podría funcionar.

La implementación del operador p1 |= p2 |= ... es la más costosa con respecto al tiempo de ejecución para n> 2. Me encantaría solucionarlo.

La sobrecarga mínima (pero sin embargo razonable) tendría best_match(p1, ...) . ¿Es eso posible?

Como no tengo mucha experiencia con boost :: fusion, también agregué algunas preguntas al código (cómo hacer con respecto a la fusión).

Se siente mal, presionar algo que en realidad es un analizador no terminal nulo en el esquema de analizador unario. Pero con mi pobre comprensión, parece ser la única forma de hacerlo. Agradecería cualquier ayuda para sacarme de esto.

Mi entorno: Boost 1.61, MSVC 2015 Upd 2, target win32console.

PROGRESO:

Creo que lo estoy obteniendo lenta pero seguramente. Estaba muy feliz de ver un error_invalid_expression . La compilación falló debido a la siguiente expresión de protocolo:

boost::proto::exprns_::expr< boost::proto::tagns_::tag::function,boost::proto::argsns_::list5< const boost::proto::exprns_::expr< boost::proto::tagns_::tag::terminal,boost::proto::argsns_::term< mxc::qitoo::tag::best_match > ,0> & ,const boost::spirit::qi::rule< iterator_type,std::string (void), boost::spirit::standard::space_type, boost::spirit::unused_type,boost::spirit::unused_type> & ,const boost::spirit::qi::rule< iterator_type,std::string (void), boost::spirit::standard::space_type, boost::spirit::unused_type, boost::spirit::unused_type> & , const boost::spirit::qi::rule< iterator_type,std::string (void), boost::spirit::standard::space_type, boost::spirit::unused_type, boost::spirit::unused_type> & ,const boost::spirit::qi::rule< iterator_type,std::string (void), boost::spirit::standard::space_type, boost::spirit::unused_type, boost::spirit::unused_type> & > ,5>

En realidad, esta es la expresión que describe mi uso de estilo de función de mi analizador best_match. Mi regla de prueba se ve así

qitoo::best_match(id, qualified_id, id, qualified_id)

donde id e qualified_id son las reglas mencionadas anteriormente. Todo lo que quiero. El error se produce porque esta expresión no tiene un parse miembros. Bueno. Profundizando más, encontré que el metacompilador de qi admite analizadores unarios, binarios y narios (lo que significa variados). Pero no admite la sintaxis de estilo de función. Qi parece utilizar unary, binary y nary solo para operadores. Y aunque el tipo nary es compatible, es más o menos obsoleto, porque los operadores qi son binarios max.

EDICIÓN DE PROGRESO FINAL

#include <boost/spirit/home/qi.hpp> namespace boost { namespace spirit { /////////////////////////////////////////////////////////////////////////// // Enablers /////////////////////////////////////////////////////////////////////////// template <> struct use_operator<qi::domain, proto::tag::bitwise_or_assign> // enables |= : mpl::true_ {}; template <> struct flatten_tree<qi::domain, proto::tag::bitwise_or_assign> // flattens |= : mpl::true_ {}; } } namespace mxc { namespace qitoo { namespace spirit = boost::spirit; namespace qi = spirit::qi; namespace fusion = boost::fusion; template <typename Elements> struct best_match_parser : qi::nary_parser<mxc::qitoo::best_match_parser<Elements>> { // This one does a lookahead parse of each qi expression to find out which rule matches // most of the input template <typename Iterator, typename Context, typename Skipper> struct best_function { best_function(Iterator& first, Iterator const& last, Context& context, Skipper const& skipper, int& best, int& idx, int& size) : first(first), last(last), context(context), skipper(skipper), best(best), idx(idx), size(size) {}; template <typename Component> void operator()(Component& component) const { Iterator f = first; if (component.parse(f, last, context, skipper, qi::unused)) { // please have a look on how I transport best, idx and size // into the parser context // // I guess, this is a nasty hack and could be done better // Any advice would be highliy appreciated int l = f - first; if (l > size) { size = l; best = idx++; } idx++; } } private: int& best; int& idx; int& size; Iterator& first; Iterator const& last; Context& context; Skipper const& skipper; }; template <typename Context, typename Iterator> struct attribute { // Put all the element attributes in a tuple typedef typename spirit::traits::build_attribute_sequence < Elements, Context, spirit::traits::alternative_attribute_transform , Iterator, qi::domain >::type all_attributes; // Ok, now make a variant over the attribute sequence. Note that // build_variant makes sure that 1) all attributes in the variant // are unique 2) puts the unused attribute, if there is any, to // the front and 3) collapses single element variants, variant<T> // to T. typedef typename spirit::traits::build_variant<all_attributes>::type type; }; best_match_parser(Elements const& elements_) : elements(elements_), size(0), idx(0), best(-1) {} template <typename Iterator, typename Context, typename Skipper, typename Attribute> bool parse(Iterator& first, Iterator const& last , Context& context, Skipper const& skipper , Attribute& attr_) const { best_function<Iterator, Context, Skipper> f(first, last, context, skipper, best, idx, size); // find out which parser matches most of the input fusion::for_each(elements, f); // best >= 0 if at least one parser was successful if (best >= 0) { // now that I have the best parser identified, how do I access it? // I understand that the next line can not work, but I''m looking for something like that // --> auto v = fusion::at<boost::mpl::int_<best>>(elements); }; return false; } template <typename Context> qi::info what(Context& context) const { qi::info result("best_match"); fusion::for_each(elements, spirit::detail::what_function<Context>(result, context)); return result; } Elements elements; mutable int best; mutable int idx; mutable int size; }; } } namespace boost { namespace spirit { namespace qi { /////////////////////////////////////////////////////////////////////////// // Parser generators: make_xxx function (objects) /////////////////////////////////////////////////////////////////////////// template <typename Elements, typename Modifiers> struct make_composite<proto::tag::bitwise_or_assign, Elements, Modifiers> : make_nary_composite < Elements, mxc::qitoo::best_match_parser > {}; } namespace traits { /////////////////////////////////////////////////////////////////////////// template <typename Elements> struct has_semantic_action<mxc::qitoo::best_match_parser<Elements> > : nary_has_semantic_action<Elements> {}; /////////////////////////////////////////////////////////////////////////// template <typename Elements, typename Attribute, typename Context , typename Iterator> struct handles_container<mxc::qitoo::best_match_parser<Elements>, Attribute, Context , Iterator> : nary_handles_container<Elements, Attribute, Context, Iterator> {}; } } } namespace qi = boost::spirit::qi; namespace qitoo = mxc::qitoo; using iterator_type = std::string::const_iterator; using result_type = std::string; template<typename Parser> void parse(const std::string message, const std::string& input, const std::string& rule, const Parser& parser) { iterator_type iter = input.begin(), end = input.end(); std::vector<result_type> parsed_result; std::cout << "-------------------------/n"; std::cout << message << "/n"; std::cout << "Rule: " << rule << std::endl; std::cout << "Parsing: /"" << input << "/"/n"; bool result = qi::phrase_parse(iter, end, parser, qi::space, parsed_result); if (result) { std::cout << "Parser succeeded./n"; std::cout << "Parsed " << parsed_result.size() << " elements:"; for (const auto& str : parsed_result) std::cout << "[" << str << "]"; std::cout << std::endl; } else { std::cout << "Parser failed" << std::endl; } if (iter == end) { std::cout << "EOI reached." << std::endl; } else { std::cout << "EOI not reached. Unparsed: /"" << std::string(iter, end) << "/"" << std::endl; } std::cout << "-------------------------/n"; } int main() { namespace qi = boost::spirit::qi; qi::rule < iterator_type, std::string(), qi::space_type> id = (qi::alpha | qi::char_(''_'')) >> *(qi::alnum | qi::char_(''_'')); qi::rule < iterator_type, std::string(), qi::space_type> qualified_id = id >> *(qi::string("::") > id); namespace qitoo = mxc::qitoo; namespace qi = boost::spirit::qi; parse("best match operator, select second rule" , "willy::anton::lutz" , "id |= qualified_id" , id |= qualified_id); }