/* Asterisk INI to Property List File Conversion Utility -- Version 1.10 * * scanner.h * aini2plist * * Lexical analyser for Asterisk INI configuration files. * * Author: Benjamin Kowarsch * * (C) 2005, 2006 Sunrise Telephone Systems Ltd. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * In countries and territories where the above no-warranty disclaimer is * not permissible by applicable law, the following terms apply: * * NO PERMISSION TO USE THE SOFTWARE IS GRANTED AND THE SOFTWARE MUST NOT BE * USED AT ALL IN SUCH COUNTRIES AND TERRITORIES WHERE THE ABOVE NO-WARRANTY * DISCLAIMER IS NOT PERMISSIBLE AND INVALIDATED BY APPLICABLE LAW. HOWEVER, * THE COPYRIGHT HOLDERS HEREBY WAIVE THEIR RIGHT TO PURSUE OFFENDERS AS LONG * AS THEY OTHERWISE ABIDE BY THE TERMS OF THE LICENSE AS APPLICABLE FOR USE * OF THE SOFTWARE IN COUNTRIES AND TERRITORIES WHERE THE ABOVE NO-WARRANTY * DISCLAIMER IS PERMITTED BY APPLICABLE LAW. THIS WAIVER DOES NOT CONSTITUTE * A LICENSE TO USE THE SOFTWARE IN COUNTRIES AND TERRITORIES WHERE THE ABOVE * NO-WARRANTY DISCLAIMER IS NOT PERMISSIBLE AND INVALIDATED BY APPLICABLE * LAW. ANY LIABILITY OF ANY KIND IS CATEGORICALLY RULED OUT AT ALL TIMES. */ #include #import "globaldefs.h" // --------------------------------------------------------------------------- // Symbol tokens // --------------------------------------------------------------------------- // // The scanner recognises the following symbols for which it returns tokens // // Comments: // start with a semicolon ";" and end with an end-of-line or end-of-file // marker. The maximum length of a comment is 256 characters. Any // excess characters will be ignored until the end of the comment is // reached. Comments may contain any characters other than control // characters (ASCII 0-31 and ASCII 127). End-of-line and end-of-file // control chracters will terminate a comment, other control characters // will be ignored. The token identifier for comments is COMMENT. // // Section header start marker: // the opening bracket "[" if it occurs as the first symbol on a line. // The token identifier for section header start markers is // START_OF_SECTION_HEADER. // // Virtual file header start marker: (used by FreeSwitch.org) // the opening bracket immediately followed by the plus sign "[+" if it // occurs as the first symbol on a line. The token identifier for virtual // file header start markers is START_OF_VFILE_HEADER. // // Section and virtual file header end marker: // the closing bracket "]" if it occurs on the left hand side of an // assignment. The token identifier for header end markers is // END_OF_HEADER. // // Identifiers: // start with any lowercase letter, uppercase letter or underscore and // end with the first character that is not allowed in an identifier. // Identifiers are only recognised as such if they occur on the left // hand side of an assignment. The maximum length of an identifier is // 32 characters. Any excess characters will be ignored until either // character not allowed in identifiers or an end-of-line or // end-of-file marker is reached. An identifier may contain lowercase // letters a-z, uppercase letters A-Z, digits 0-9, the underscore "_", // the hyphen "-" and the dot "." but it must begin with a letter or // an underscore. The token identifier for identifiers is IDENTIFIER. // // Assign operators: // either an equal sign "=" or an equal sign immediately followed by // a closing angular bracket "=>". The token identifier for assign // operators is ASSIGN_OPERATOR. // // Plain values: // start with any character other than the quotation mark '"', the opening // parenthesis "(", the opening angular bracket "<", the backquote "`", // the semicolon ";" and end with the first value separator or an // end-of-line or end-of-file marker. The maximum length for a plain value // is 256 characters. Any excess characters will be ignored until either a // comment delimiter or an end-of-line or end-of-file marker is reached. // Characters allowed within plain values are any 7-bit ASCII characters // which are not control characters (ASCII 0-31 and ASCII 127), not value // separators (see Value separators) and not comment delimiters (see // Comments). The token identifier for plain values is PLAIN_VALUE. // // Quoted values: // start with a quotation mark '"' and end either with the first quotation // mark that is not escaped by a preceeding backslash "\"" or with an // end-of-line or an end-of-file marker. The quotation mark delimiters are // not part of the quoted value, they will be removed by the scanner. The // maximum capacity for a quoted value is 1024 octets. Any excess // characters will be ignored until either a quotation mark not escaped by // a preceeding backslash or an end-of-line or end-of-file marker is // reached. Characters allowed within quoted values are any UTF-8 // characters other than the quotation mark which must be escaped by a // preceeding backslash "\"". The token identifier for quoted values is // QUOTED_VALUE. // // NB: In order to accommodate other encodings, an encoding prefix using // the backquote "`" followed by an encoding scheme identifier could be // introduced in the future, for example `UTF16"foo bar baz". The token // identifier ENCODING_PREFIX has been defined and reserved for future // support of different character encodings. // // Bracketed values: // start with an opening angular bracket "<" and end with the first // closing angular bracket ">" or with an end-of-line or end-of-file // marker. The maximum length of a bracketed value is 24 characters. // Any excess characters are ignored until either a closing angular // bracket or an end-of-line or end-of-file marker is reached. // Characters allowed within bracketed values are digits 0-9, opening // parenthesis "(", closing parenthesis ")", hyphen "-", a leading "+" // and non-leading non-trailing whitespace. The token identifier for // bracketed values is BRACKETED_VALUE. // // Value separators: // either a comma "," or a vertical bar "|". The token identifier for // value separators is VALUE_SEPARATOR. // // Argument lists: // start with an opening parenthesis "(" and end with a matching closing // parenthesis ")" or with an end-of-line or end-of-file marker. // Parenthesises may be nested within argument lists. The maximum length // of an argument list is 256 characters. Any excess characters are // ignored until either all opening parenthesises are matched with // closing parenthesises or an end-of-line or end-of-file marker is // reached. Characters allowed within argument lists are any 7-bit ASCII // characters which are not control characters (ASCII 0-31 and ASCII 127). // The token identifier for argument lists is ARGUMENT_LIST. // // End-of-line marker: // either an ASCII LF control character or an ASCII CR control character // immediately followed by an ASCII LF control character. The token // identifier for end-of-line markers is EOL_MARK. // // End-of-file marker: // the underlying file system has signalled that the end of the source // file has been reached at the previous attempt to read from the file. // The token identifier for the end-of-file marker is EOF_MARK. // typedef /* ScanerToken */ enum { NO_TOKEN = 0, COMMENT = 1, START_OF_SECTION_HEADER = 2, START_OF_VFILE_HEADER = 3, END_OF_HEADER = 4, IDENTIFIER = 5, ASSIGN_OPERATOR = 6, PLAIN_VALUE = 7, ENCODING_PREFIX = 8, /* RESERVED FOR FUTURE USE */ QUOTED_VALUE = 9, BRACKETED_VALUE = 10, VALUE_SEPARATOR = 11, ARGUMENT_LIST = 12, EOL_MARK = 13, EOF_MARK = 14 } ScannerToken; // -------------------------------------------------------------------------- // Status codes // -------------------------------------------------------------------------- // typedef /* ScannerStatus */ enum { SCANNER_STATUS_UNDEFINED = -1, SCANNER_STATUS_SUCCESS = 1, SCANNER_STATUS_ALREADY_INITIALIZED = 2, SCANNER_STATUS_NOT_INITIALIZED = 3, SCANNER_STATUS_FILE_NOT_FOUND = 4, SCANNER_STATUS_FILE_ACCESS_DENIED = 5, SCANNER_STATUS_OPEN_FILE_LIMIT_REACHED = 6, SCANNER_STATUS_PATH_NAME_TOO_LONG = 7, SCANNER_STATUS_OUT_OF_MEMORY = 8, SCANNER_STATUS_LOOP_IN_PATHNAME = 9, SCANNER_STATUS_ERROR_OPENING_FILE = 10, SCANNER_STATUS_ILLEGAL_CHARACTER = 11 } ScannerStatus; // --------------------------------------------------------------------------- // Position type definition // --------------------------------------------------------------------------- typedef /* ScannerPosition */ struct { // row number in the source file CARDINAL row; // line number in the source file CARDINAL line; } ScannerPosition; // --------------------------------------------------------------------------- // function: init_scanner(infile) // --------------------------------------------------------------------------- // // Initialises the scanner and opens the sourcefile for reading. ScannerStatus init_scanner(const char *infile); // --------------------------------------------------------------------------- // function: scanner_initialized() // --------------------------------------------------------------------------- // // Returns the intialisation status of the scanner, true or false. bool scanner_initialized(); // --------------------------------------------------------------------------- // function: getsym() // --------------------------------------------------------------------------- // // Reads the current symbol from the sourcefile and returns its token. ScannerToken getsym(); // --------------------------------------------------------------------------- // function: lookahead_sym() // --------------------------------------------------------------------------- // // Looks ahead one symbol in the sourcefile and returns its token. Subsequent // calls to this function will return the same symbol. Subsequent calls to // getpos() and getlex() will return the respective values for the current // symbol and not for the lookahead symbol. ScannerToken lookahead_sym(); // --------------------------------------------------------------------------- // function: getpos() // --------------------------------------------------------------------------- // // Returns the most previously read symbol's position in the sourcefile. ScannerPosition getpos(); // --------------------------------------------------------------------------- // function: get_filename() // --------------------------------------------------------------------------- // // Returns the name of the most previously read symbol's sourcefile. const char *get_filename(); // --------------------------------------------------------------------------- // function: gethash() // --------------------------------------------------------------------------- // // Returns the hash value of the most previously read symbol if the symbol is // an identifier, otherwise zero. // CARDINAL gethash(); // --------------------------------------------------------------------------- // function: getlex(*str) // --------------------------------------------------------------------------- // // Passes the most previously read symbol's lexical representation (lexeme) // in str. const char *getlex(); // --------------------------------------------------------------------------- // function: getstat() // --------------------------------------------------------------------------- // // Returns the status of the most recent operation. ScannerStatus getstat(); // --------------------------------------------------------------------------- // function: illegal_characters_skipped() // --------------------------------------------------------------------------- // // Returns true if illegal characters were skipped while last symbol was read, // otherwise false. bool illegal_characters_skipped(); // --------------------------------------------------------------------------- // function: excess_characters_ignored() // --------------------------------------------------------------------------- // // Returns true if excess characters were ignored while last symbol was read, // otherwise false. bool excess_characters_ignored(); // --------------------------------------------------------------------------- // function: auto_terminated() // --------------------------------------------------------------------------- // // Returns true if a closing delimiter was missing and the symbol was auto- // terminated by an end-of-line or end-of-file marker while the last symbol // was read, otherwise false. bool auto_terminated(); // --------------------------------------------------------------------------- // function: max_len_of(sym) // --------------------------------------------------------------------------- // // Returns the maximum allowable length for symbol sym. CARDINAL max_len_of(const ScannerToken sym); // --------------------------------------------------------------------------- // function: size_of_lexbuf() // --------------------------------------------------------------------------- // // Returns the size of the lexeme buffer. CARDINAL size_of_lexbuf(); // --------------------------------------------------------------------------- // function: token_name(sym) // --------------------------------------------------------------------------- // // Returns a human readable name for token sym. const char *token_name(const ScannerToken sym); // --------------------------------------------------------------------------- // function: reset_scanner() // --------------------------------------------------------------------------- // // Resets the scanner to pre-initialisation status and closes the sourcefile. ScannerStatus reset_scanner(); // END OF FILE