/* Asterisk INI to Property List File Conversion Utility -- Version 1.10 * * parser.c * aini2plist * * Recursive descent parser for generic Asterisk INI configuration files * * Author: Benjamin Kowarsch * * (C) 2006 Sunrise Telephone Systems Ltd. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * In countries and territories where the above no-warranty disclaimer is * not permissible by applicable law, the following terms apply: * * NO PERMISSION TO USE THE SOFTWARE IS GRANTED AND THE SOFTWARE MUST NOT BE * USED AT ALL IN SUCH COUNTRIES AND TERRITORIES WHERE THE ABOVE NO-WARRANTY * DISCLAIMER IS NOT PERMISSIBLE AND INVALIDATED BY APPLICABLE LAW. HOWEVER, * THE COPYRIGHT HOLDERS HEREBY WAIVE THEIR RIGHT TO PURSUE OFFENDERS AS LONG * AS THEY OTHERWISE ABIDE BY THE TERMS OF THE LICENSE AS APPLICABLE FOR USE * OF THE SOFTWARE IN COUNTRIES AND TERRITORIES WHERE THE ABOVE NO-WARRANTY * DISCLAIMER IS PERMITTED BY APPLICABLE LAW. THIS WAIVER DOES NOT CONSTITUTE * A LICENSE TO USE THE SOFTWARE IN COUNTRIES AND TERRITORIES WHERE THE ABOVE * NO-WARRANTY DISCLAIMER IS NOT PERMISSIBLE AND INVALIDATED BY APPLICABLE * LAW. ANY LIABILITY OF ANY KIND IS CATEGORICALLY RULED OUT AT ALL TIMES. */ // This module implements a single pass recursive descent parser for the // LL(1) grammar below which describes Asterisk INI configuration files: // // Production rules // // (1) config-file = // { config-unit } end-of-file-mark // // (2) config-unit = // section | comment-line | empty-line // // (3) section = // section-header section-body // // (4) section-header = // start-of-section-header identifier end-of-section-header // [ comment ] end-of-line-mark // // (5) section-body = // { key-value-assignment | comment-line | empty-line } // // (6) key-value-assignment = // identifier assign-operator value-list // [ comment ] end-of-line-mark // // (7) value-list = // value { value-separator value } // // (8) value = // composite-plain-value | composite-quoted-value // // (9) composite-plain-value = // plain-value [ argument-list ] // // (10) composite-quoted-value = // quoted-value [ bracketed-value ] // // (11) comment-line = // comment end-of-line-mark // // (12) empty-line = // end-of-line-mark #include #include #include #include #include "globaldefs.h" #include "scanner.h" #define Symbol ScannerToken #define Position ScannerPosition #include "intermediate.h" #include "plistgen.h" #include "parser.h" // --------------------------------------------------------------------------- // Global parser defaults // --------------------------------------------------------------------------- static CARDINAL default_parser_verbosity_level = 0; static CARDINAL default_parser_comment_exclusion_level = 0; static CARDINAL error_count_of_last_parser_invocation = 0; // --------------------------------------------------------------------------- // Parser state type definition // --------------------------------------------------------------------------- typedef /* Parser */ struct { char *infile; // name of source file char *outfile; // name of target file CARDINAL verbosity; // verbosity level CARDINAL comment_exclusion; // comment exclusion level CARDINAL current_section; // hash of current section CARDINAL last_key_fetched; // hash of last fetched key bool last_assignment_was_list; // last assignment type CARDINAL error_count; // parser error count } Parser; // -------------------------------------------------------------------------- // Parser state variable // -------------------------------------------------------------------------- static Parser parser = { NULL, NULL, 0, 0, 0, 0, false, 0 }; // ========================================================================== // P R I V A T E F U N C T I O N S // ========================================================================== // -------------------------------------------------------------------------- // private function: reset_parser_state() // -------------------------------------------------------------------------- // static void reset_parser_state() { parser.verbosity = default_parser_verbosity_level; parser.comment_exclusion = default_parser_comment_exclusion_level; parser.current_section = 0; parser.last_key_fetched = 0; parser.last_assignment_was_list = false; parser.error_count = 0; } // end reset_parser_state // -------------------------------------------------------------------------- // E R R O R H A N D L I N G & R E P O R T I N G // -------------------------------------------------------------------------- // -------------------------------------------------------------------------- // private function: skip_symbols_until_resync_point_reached(sym, resync) // -------------------------------------------------------------------------- // static Symbol skip_symbols_until_resync_point_reached(Symbol current_sym, Symbol resync_point) { Symbol sym = current_sym; if (parser.verbosity > 0) { fprintf(stderr, "all symbols until next '%s' will be ignored.\n", token_name(resync_point)); } // end if while ((sym != resync_point) && (sym != EOF_MARK)) { if (parser.verbosity = 3) { fprintf(stderr, "ignoring symbol '%s'\n", token_name(sym)); } // end if sym = getsym(); } // end while return sym; } // end skip_symbols_until_resync_point_reached // -------------------------------------------------------------------------- // private function: report_illegal_syntax_and_skip_to(sym, resync_point) // -------------------------------------------------------------------------- // static Symbol report_illegal_syntax_and_skip_to(Symbol current_sym, Symbol resync_point) { Symbol sym; Position pos = getpos(); fprintf(stderr, "\nPARSER ERROR: Unexpected '%s' found\n in file: %s, line:%i, row:%i\n", token_name(current_sym), get_filename(), pos.line, pos.row); sym = skip_symbols_until_resync_point_reached(current_sym, resync_point); parser.error_count++; if (parser.verbosity = 2) { fprintf(stderr, "parse error count now %i\n", parser.error_count); } // end if return sym; } // end report_illegal_syntax_and_skip_to // -------------------------------------------------------------------------- // private function: report_illegal_duplicate_section_and_skip_to(sym, resync) // -------------------------------------------------------------------------- // static Symbol report_illegal_duplicate_section_and_skip_to(Symbol current_sym, Symbol resync_point) { Symbol sym; Position pos = getpos(); fprintf(stderr, "\nPARSER ERROR: Duplicate declaration of section '%s' found\n in file: %s, line:%i, row:%i\n", getlex(), get_filename(), pos.line, pos.row); sym = skip_symbols_until_resync_point_reached(current_sym, resync_point); parser.error_count++; if (parser.verbosity = 2) { fprintf(stderr, "parse error count now %i\n", parser.error_count); } // end if return sym; } // end report_illegal_duplicate_section_and_skip_to static void report_information_with_filename_and_position(const char *text) { Position pos = getpos(); fprintf(stderr, "%s in file: %s, line:%i, row:%i\n", text, get_filename(), pos.line, pos.row); return; } // end if // -------------------------------------------------------------------------- // P R O D U C T I O N R U L E I M P L E M E N T A T I O N S // -------------------------------------------------------------------------- // -------------------------------------------------------------------------- // private function: parse_config_file() // -------------------------------------------------------------------------- // // This function implements production rule # 1 ... // // (1) config-file = // { config-unit } end-of-file-mark // static Symbol parse_config_unit(const Symbol current_sym); /* FORWARD */ static Symbol parse_config_file(const Symbol current_sym) { Symbol sym; sym = current_sym; while (sym != EOF_MARK) { sym = parse_config_unit(sym); if (sym == EOF_MARK) { // we're done } // end if } // end while return sym; } // end parse_config_file // -------------------------------------------------------------------------- // private function: parse_config_unit() // -------------------------------------------------------------------------- // // This function implements production rule # 2 ... // // (2) config-unit = // section | comment-line | empty-line // static Symbol parse_section(const Symbol current_sym); /* FORWARD */ static Symbol parse_comment_line(const Symbol current_sym); /* FORWARD */ static Symbol parse_empty_line(const Symbol current_sym); /* FORWARD */ static Symbol parse_config_unit(const Symbol current_sym) { Symbol sym; sym = current_sym; if (sym == START_OF_SECTION_HEADER) { // found section sym = parse_section(sym); } else if (sym == COMMENT) { // found comment-line sym = parse_comment_line(sym); } else if (sym == EOL_MARK) { // found empty-line sym = parse_empty_line(sym); } else { // illegal syntax sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_config_unit // -------------------------------------------------------------------------- // private function: parse_section() // -------------------------------------------------------------------------- // // This function implements production rule # 3 ... // // (3) section = // section-header section-body // static Symbol parse_section_header(const Symbol current_sym); /* FORWARD */ static Symbol parse_section_body(const Symbol current_sym); /* FORWARD */ static Symbol parse_section(const Symbol current_sym) { Symbol sym; sym = current_sym; if (sym == START_OF_SECTION_HEADER) { sym = parse_section_header(sym); if (sym = IDENTIFIER) { sym = parse_section_body(sym); } else { sym = report_illegal_syntax_and_skip_to(sym, START_OF_SECTION_HEADER); } // end if } else { sym = report_illegal_syntax_and_skip_to(sym, START_OF_SECTION_HEADER); } // end if return sym; } // end parse_section // -------------------------------------------------------------------------- // private function: parse_section_header() // -------------------------------------------------------------------------- // // This function implements production rule # 4 ... // // (4) section-header = // start-of-section-header identifier end-of-section-header // [ comment ] end-of-line-mark // static Symbol parse_section_header(const Symbol current_sym) { Symbol sym; CARDINAL hash; sym = current_sym; if (sym == START_OF_SECTION_HEADER) { sym = getsym(); /* if (parser.current_section != 0) { // emit previous section emit_section(parser.current_section); } // end if */ if (sym == IDENTIFIER) { // section identifier hash = gethash(); // insert into lookup table if first occurence if (section_is_present(hash) == false) { new_section_with_name(hash, getlex()); parser.current_section = hash; } else /* section already defined */ { if (is_unique_section(hash) == false) { // legal duplicate // treat as continuation parser.current_section = hash; } else { // illegal duplicate // report error and skip section parser.current_section = 0; sym = report_illegal_duplicate_section_and_skip_to(sym, START_OF_SECTION_HEADER); return sym; } // end if } // end if // assign any pending comments to this section // unless level set to ignore all comment lines if (parser.comment_exclusion < 3) { assign_pending_comments_to_section(hash); } else { if (parser.verbosity == 3) { report_information_with_filename_and_position("excluding preceeding comment lines"); } // end if remove_all_pending_comments(); } // end if // get next symbol sym = getsym(); if (sym == END_OF_HEADER) { sym = getsym(); if (sym == COMMENT) { // found trailing comment /// TO DO: store trailing comment for section name sym = getsym(); } // end if if (sym == EOL_MARK) { // found end-of-line marker sym = getsym(); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if } else { sym = report_illegal_syntax_and_skip_to(sym, START_OF_SECTION_HEADER); } // end if } else { sym = report_illegal_syntax_and_skip_to(sym, START_OF_SECTION_HEADER); } // end if } else { sym = report_illegal_syntax_and_skip_to(sym, START_OF_SECTION_HEADER); } // end if return sym; } // end parse_section_header // -------------------------------------------------------------------------- // private function: parse_section_body() // -------------------------------------------------------------------------- // // This function implements production rule # 5... // // (5) section-body = // { key-value-assignment | comment-line | empty-line } // static Symbol parse_key_value_assignment(const Symbol current_sym); /* FORWARD */ static Symbol parse_empty_line(const Symbol current_sym); /* FORWARD */ static Symbol parse_section_body(const Symbol current_sym) { Symbol sym; sym = current_sym; while ((sym == IDENTIFIER) || (sym == COMMENT) || (sym == EOL_MARK)) { if (sym == IDENTIFIER) { // found key identifier sym = parse_key_value_assignment(sym); } else if (sym == COMMENT) { // found comment-line sym = parse_comment_line(sym); } else if (sym == EOL_MARK) { // found empty-line // token consumed, fetch next sym = parse_empty_line(sym); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if } // end while return sym; } // end parse_section_body // -------------------------------------------------------------------------- // private function: parse_key_value_assignment() // -------------------------------------------------------------------------- // // This function implements production rule # 6 ... // // (6) key-value-assignment = // identifier assign-operator value-list // [ comment ] end-of-line-mark // static Symbol parse_value_list(const Symbol current_sym); /* FORWARD */ static Symbol parse_key_value_assignment(const Symbol current_sym) { Symbol sym; CARDINAL hash; const char *ident = NULL; sym = current_sym; if (sym == IDENTIFIER) { // key identifier // look up this key hash = gethash(); // get key's lexeme ident = getlex(); // insert into lookup table if first occurence if (key_is_present(parser.current_section, hash) == false) { new_key_with_name(parser.current_section, hash, ident); } // end if // remember this key parser.last_key_fetched = hash; // assign any pending comments to this key // unless level set to ignore comment lines in sections if (parser.comment_exclusion < 1) { assign_pending_comments_to_key(parser.current_section, hash); } else { if (parser.verbosity == 3) { report_information_with_filename_and_position("excluding preceeding comment lines"); } // end if remove_all_pending_comments(); } // end if sym = getsym(); if (sym == ASSIGN_OPERATOR) { sym = getsym(); sym = parse_value_list(sym); if (sym == COMMENT) { // trailing comment // for now we only deal with trailing comments in assignments with a single value if ((parser.last_assignment_was_list == false) && (parser.comment_exclusion < 2)) { // store it as inline comment of last value set_comment_of_last_value_for_key(parser.current_section, hash, getlex()); } else if (parser.verbosity == 3) { report_information_with_filename_and_position("excluding inline comment"); } // end if sym = getsym(); } // end if /* // assign this key-value assignment to the current section assign_key_to_section(parser.current_section, hash); */ if (sym == EOL_MARK) { sym = getsym(); } else sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_key_value_assignment // -------------------------------------------------------------------------- // private function: parse_value_list() // -------------------------------------------------------------------------- // // This function implements production rule # 7 ... // // (7) value-list = // value { value-separator value } // static Symbol parse_value(const Symbol current_sym); /* FORWARD */ static Symbol parse_value_list(const Symbol current_sym) { Symbol sym; sym = current_sym; if ((sym == PLAIN_VALUE) || (sym == QUOTED_VALUE)) { sym = parse_value(sym); if (sym == VALUE_SEPARATOR) { while (sym == VALUE_SEPARATOR) { sym = getsym(); sym = parse_value(sym); } // end while parser.last_assignment_was_list = true; } else { parser.last_assignment_was_list = false; } // end if } else /* not a value */ { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_value_list // -------------------------------------------------------------------------- // private function: parse_value() // -------------------------------------------------------------------------- // // This function implements production rule # 8 ... // // (8) value = // composite-plain-value | composite-quoted-value // static Symbol parse_composite_plain_value(const Symbol current_sym); /* FORWARD */ static Symbol parse_composite_quoted_value(const Symbol current_sym); /* FORWARD */ static Symbol parse_value(const Symbol current_sym) { Symbol sym; sym = current_sym; if (sym == PLAIN_VALUE) { sym = parse_composite_plain_value(sym); } else if (sym == QUOTED_VALUE) { sym = parse_composite_quoted_value(sym); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_value // -------------------------------------------------------------------------- // private function: parse_composite_plain_value() // -------------------------------------------------------------------------- // // This function implements production rule # 9 ... // // (9) composite-plain-value = // plain-value [ argument-list ] static Symbol parse_composite_plain_value(const Symbol current_sym) { Symbol sym; char *str = NULL; sym = current_sym; if (sym == PLAIN_VALUE) { str = alloca(strlen(getlex()) + max_len_of(ARGUMENT_LIST)); str[0] = CSTRING_TERMINATOR; strncat(str, getlex(), strlen(getlex())); sym = getsym(); if (sym == ARGUMENT_LIST) { strncat(str, getlex(), strlen(getlex())); sym = getsym(); } // end if assign_value_to_key(parser.current_section, parser.last_key_fetched, str); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_composite_plain_value // -------------------------------------------------------------------------- // private function: parse_composite_quoted_value() // -------------------------------------------------------------------------- // // This function implements production rule # 10 ... // // (10) composite-quoted-value = // quoted-value [ bracketed-value ] static Symbol parse_composite_quoted_value(const Symbol current_sym) { Symbol sym; char *str = NULL; sym = current_sym; if (sym == QUOTED_VALUE) { str = alloca(strlen(getlex()) + max_len_of(BRACKETED_VALUE) + 2); str[0] = '\"'; str[1] = 0; strncat(str, getlex(), strlen(getlex())); strncat(str, "\"", 2); sym = getsym(); if (sym == BRACKETED_VALUE) { strncat(str, getlex(), strlen(getlex())); sym = getsym(); } // end if assign_value_to_key(parser.current_section, parser.last_key_fetched, str); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_composite_quoted_value // -------------------------------------------------------------------------- // private function: parse_comment_line() // -------------------------------------------------------------------------- // // This function implements production rule # 11 ... // // (11) comment-line = // comment end-of-line-mark static Symbol parse_comment_line(const Symbol current_sym) { Symbol sym; // process comments unless level is set to ignore all comments if (parser.comment_exclusion < 3) { new_unassigned_comment(getlex()); } // end if sym = getsym(); if ((sym == EOL_MARK) || (sym == EOF_MARK)) { sym = getsym(); } else { sym = report_illegal_syntax_and_skip_to(sym, EOL_MARK); } // end if return sym; } // end parse_comment_line // -------------------------------------------------------------------------- // private function: parse_empty_line() // -------------------------------------------------------------------------- // // This function implements production rule # 12 ... // // (12) empty-line = // end-of-line-mark static Symbol parse_empty_line(const Symbol current_sym) { Symbol sym; sym = getsym(); return sym; } // end parse_empty_line // ========================================================================== // P U B L I C F U N C T I O N S // ========================================================================== // -------------------------------------------------------------------------- // function set_global_parser_defaults(verbosity, comment_exclusion) // -------------------------------------------------------------------------- // // verbosity levels: // // level 0 (default) // least verbose // // level 3 // most verbose // // comment exclusion levels: // // level 0 (default) // all comments are processed // // level 1 // comment lines inside sections are ignored // inline comments inside sections are processed // comment lines outside of sections are processed // // level 2 // comment lines inside sections are ignored // inline comments inside sections are ignored // comment lines outside of sections are processed // // level 3 // all comments are ignored void set_global_parser_defaults(CARDINAL verbosity, CARDINAL comment_exclusion) { // set the defaults if ((verbosity >= 0) && (verbosity <= MAX_VERBOSITY)) { default_parser_verbosity_level = verbosity; } // end if if ((comment_exclusion >= 0) && (comment_exclusion <= MAX_COMMENT_EXCLUSION)) { default_parser_comment_exclusion_level = comment_exclusion; } // end if } // end set_global_parser_defaults // -------------------------------------------------------------------------- // function parse_file(pathname) // -------------------------------------------------------------------------- // int parse_file(const char *infile) { Symbol sym; CARDINAL status = 1; // initialise parser state reset_parser_state(); // initialise the scanner init_scanner(infile); // get the first symbol sym = getsym(); // parse and build IR tree parse_config_file(sym); if (parser.error_count == 1) { fprintf(stderr, "\n1 error.\n", parser.error_count); status = 0; } else if (parser.error_count > 1) { fprintf(stderr, "\n%i errors.\n", parser.error_count); status = 0; } // end if // reset scanner reset_scanner(); // save error count error_count_of_last_parser_invocation = parser.error_count; // reset parser state reset_parser_state(); return status; } // parse_file // --------------------------------------------------------------------------- // function: get_error_count() // --------------------------------------------------------------------------- // // Returns the error count of the pervious parser invocation. CARDINAL get_error_count() { return error_count_of_last_parser_invocation; } // end get_error_count // END OF FILE