From 15bf28d258f17b5203ef367345247311a20a2c4b Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 28 Oct 2023 11:59:35 +0200 Subject: [PATCH] Code-cleanup - Removed file heads - Turned tabs into spaces - Updated pproto generation - Removed buildxml.c and XML target --- Makefile.gnu | 3 +- README.md | 5 +- src/build.c | 2975 +++++++++++++++++++++++------------------------ src/buildxml.c | 1182 ------------------- src/debug.c | 527 +++++---- src/error.c | 382 +++--- src/first.c | 164 ++- src/integrity.c | 973 ++++++++-------- src/lalr.c | 1373 +++++++++++----------- src/lex.c | 436 ++++--- src/list.c | 260 ++--- src/main.c | 702 ++++++----- src/mem.c | 806 +++++++------ src/parse.min | 981 ++++++++-------- src/parse.par | 1737 ++++++++++++++------------- src/proto.h | 35 +- src/rewrite.c | 1091 +++++++++-------- src/string.c | 46 +- src/unicc.h | 588 +++++----- src/utils.c | 332 +++--- src/virtual.c | 236 ++-- src/xml.c | 2255 ++++++++++++++++++----------------- src/xml.h | 126 +- 23 files changed, 7899 insertions(+), 9316 deletions(-) delete mode 100644 src/buildxml.c diff --git a/Makefile.gnu b/Makefile.gnu index 60a3cb7..db209b8 100644 --- a/Makefile.gnu +++ b/Makefile.gnu @@ -18,7 +18,6 @@ SOURCES = \ src/lex.c \ src/list.c \ src/build.c \ - src/buildxml.c \ src/main.c \ src/xml.c @@ -35,7 +34,7 @@ clean: boot_clean -rm unicc src/proto.h: boot_clean - lib/pproto *.c | awk "/int _parse/ { next } { print }" >$@ + lib/pproto src/*.c | awk "/int _parse/ { next } { print }" >$@ make_install: cp Makefile.gnu Makefile diff --git a/README.md b/README.md index 4f8c8c6..6978616 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
UniCC Logo -

LALR(1) Parser Generator

+

Universal LALR(1) Parser Generator

Badge displaying the test status @@ -8,7 +8,8 @@ Badge displaying the license
- The universal LALR(1) parser generator with built-in scanner generator, creating parsers in different target programming languages. + The universal LALR(1) parser generator with built-in scanner generator,
+ creating parsers in different target programming languages.
## About diff --git a/src/build.c b/src/build.c index 35cd432..8e472dc 100644 --- a/src/build.c +++ b/src/build.c @@ -1,15 +1,6 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: build.c -Author: Jan Max Meyer -Usage: The dynamic program module generator of the UniCC parser generator, - to construct a parser program module in a specific programming language - using a template. ------------------------------------------------------------------------------ */ +/* The dynamic program module generator of the UniCC parser generator, to +construct a parser program module in a specific programming language using +a template. */ #include "unicc.h" @@ -34,27 +25,27 @@ Returns a char*, which is the final (escaped) string. */ char* escape_for_target( GENERATOR* g, char* str, BOOLEAN clear ) { - int i; - char* ret; - char* tmp; + int i; + char* ret; + char* tmp; - if( !( ret = pstrdup( str ) ) ) - ret = pstrdup( "" ); + if( !( ret = pstrdup( str ) ) ) + ret = pstrdup( "" ); - if( clear ) - str = pfree( str ); + if( clear ) + str = pfree( str ); - for( i = 0; i < g->sequences_count; i++ ) - { - if( !( tmp = pstrrender( ret, g->for_sequences[ i ], - g->do_sequences[ i ], FALSE, (char*)NULL ) ) ) - OUTOFMEM; + for( i = 0; i < g->sequences_count; i++ ) + { + if( !( tmp = pstrrender( ret, g->for_sequences[ i ], + g->do_sequences[ i ], FALSE, (char*)NULL ) ) ) + OUTOFMEM; - pfree( ret ); - ret = tmp; - } + pfree( ret ); + ret = tmp; + } - return ret; + return ret; } /** Constructs target language code for production reduction code blocks. @@ -69,444 +60,444 @@ individually coded one. Returns a char*-pointer to the generated code - must be freed by the caller. */ char* build_action( PARSER* parser, GENERATOR* g, PROD* p, - char* base, BOOLEAN def_code ) + char* base, BOOLEAN def_code ) { - char* last = base; - char* start; - char* end; - int off; - unsigned int match; - char* ret = (char*)NULL; - char* chk; - char* tmp; - char* att; - plistel* e; - plist* rhs = p->rhs; - BOOLEAN on_error = FALSE; - SYMBOL* sym; - int i; - char rx [ ONE_LINE + 1 ]; - char* rx_postfix[] = { - "'([^']|\\')*'", - "\"([^\"]|\\\")*\"", - "[A-Za-z_][A-Za-z0-9_]*", - "[0-9]+", - "@", - "!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*" - }; - - PROC( "build_action" ); - PARMS( "parser", "%p", parser ); - PARMS( "g", "%p", g ); - PARMS( "p", "%p", p ); - PARMS( "base", "%s", base ); - PARMS( "def_code", "%s", BOOLEAN_STR( def_code ) ); - - /* Once generate a lexer */ - if( !action_lex ) - { - /* Prepare regular expression engine */ - action_lex = plex_create( 0 ); - - for( i = 0; i < 6; i++ ) - { - if( pstrlen( g->prefix ) + pstrlen( rx_postfix[ i ] ) - >= sizeof( rx ) ) - { - MSG( "Buffer too small!" ); - RETURN( (char*)NULL ); - } - - sprintf( rx, "%s%s", g->prefix, rx_postfix[ i ] ); - - VARS( "i", "%d", i ); - VARS( "rx", "%s", rx ); - - /* Watch out: First two regex match same id! */ - if( !plex_define( action_lex, rx, !i ? 1 : i, 0 ) ) - { - MSG( "Something went wrong with the action lexer definition" ); - RETURN( (char*)NULL ); - } - } - } - - VARS( "p->sem_rhs counts", "%d", plist_count( p->sem_rhs ) ); - - /* Ok, perform replacement operations */ - if( plist_count( p->sem_rhs ) && !def_code ) - { - MSG( "Replacing semantic right-hand side" ); - rhs = p->sem_rhs; - } - - MSG( "Iterating trough matches" ); - - while( ( start = plex_next( action_lex, last, &match, &end ) ) - && !on_error ) - { - off = 0; - tmp = (char*)NULL; - - if( last < start ) - { - if( !( ret = pstrncatstr( ret, last, start - last ) ) ) - OUTOFMEM; - - VARS( "ret", "%s", ret ); - } - - last = end; - - VARS( "match", "%d", match ); - switch( match ) - { - case 1: - start += 1; - end -= 1; - - case 2: - MSG( "Identifier" ); - for( e = plist_first( rhs ), off = 1; e; - e = plist_next( e ), off++ ) - { - chk = plist_key( e ); - VARS( "chk", "%s", chk ? chk : "(NULL)" ); - - if( chk && !strncmp( chk, start + pstrlen( g->prefix ), - end - start - pstrlen( g->prefix ) ) - && pstrlen( chk ) == - end - start - pstrlen( g->prefix ) ) - { - break; - } - } - - if( !e ) - { - print_error( parser, ERR_UNDEFINED_SYMREF, - ERRSTYLE_WARNING, end - start, start ); - off = 0; - tmp = pstrdup( start ); - } - - VARS( "off", "%d", off ); - break; - - case 3: - MSG( "Offset" ); - off = atoi( start + pstrlen( g->prefix ) ); - break; - - case 4: - MSG( "Left-hand side" ); - if( p->lhs->vtype && list_count( parser->vtypes ) > 1 ) - ret = pstrcatstr( ret, - pstrrender( g->action_lhs_union, - GEN_WILD_PREFIX "attribute", - pstrrender( g->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( p->lhs->vtype->id ), - TRUE, - (char*)NULL ), TRUE, - (char*)NULL ), TRUE ); - else - ret = pstrcatstr( ret, g->action_lhs_single, FALSE ); - - VARS( "ret", "%s", ret ); - break; - - case 5: - MSG( "Assign left-hand side symbol" ); - off = 0; - - if( !( tmp = pasprintf( "%.*s", - end - start - ( pstrlen( SYMBOL_VAR ) - + 2 + pstrlen( g->prefix ) ), - start + ( pstrlen( SYMBOL_VAR ) - + 2 + pstrlen( g->prefix ) ) - ) ) ) - { - OUTOFMEM; - RETURN( (char*)NULL ); - } - - VARS( "tmp", "%s", tmp ); - - /* Go through all possible left-hand side symbols */ - plist_for( p->all_lhs, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !strcmp( sym->name, tmp ) ) - { - MSG( "Found a matching symbol!" ); - - pfree( tmp ); - tmp = (char*)NULL; - - ret = pstrcatstr( ret, - pstrrender( g->action_set_lhs, - GEN_WILD_PREFIX "sym", - int_to_str( sym->id ), TRUE, - (char*)NULL ), TRUE ); - break; - } - } - - if( !e ) - { - MSG( "No match found..." ); - - print_error( parser, ERR_UNDEFINED_LHS, - ERRSTYLE_WARNING, tmp ); - pfree( tmp ); - - if( !( tmp = pstrdup( start ) ) ) - { - OUTOFMEM; - RETURN( (char*)NULL ); - } - } - - break; - - default: - MSG( "Uncaught regular expression match!" ); - break; - } - - VARS( "off", "%d", off ); - if( off > 0 ) - { - MSG( "Handling offset" ); - sym = (SYMBOL*)plist_access( plist_get( rhs, off - 1 ) ); - - if( sym && !( sym->keyword ) ) - { - if( list_count( parser->vtypes ) > 1 ) - { - if( sym->vtype ) - { - att = pstrrender( g->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( sym->vtype->id ), TRUE, - (char*)NULL ); - } - else - { - print_error( parser, ERR_NO_VALUE_TYPE, ERRSTYLE_FATAL, - sym->name, p->id, end - start + 1, start ); - - att = (char*)NULL; - on_error = TRUE; - } - - tmp = pstrrender( g->action_union, - GEN_WILD_PREFIX "offset", - int_to_str( plist_count( rhs ) - off ), TRUE, - GEN_WILD_PREFIX "attribute", att, TRUE, - (char*)NULL ); - } - else - tmp = pstrrender( g->action_single, - GEN_WILD_PREFIX "offset", - int_to_str( plist_count( rhs ) - off ), TRUE, - (char*)NULL ); - } - else - { - if( !def_code ) - { - print_error( parser, ERR_NO_VALUE_TYPE, ERRSTYLE_FATAL, - find_base_symbol( sym )->name, - p->id, end - start + 1, start ); - } - - on_error = TRUE; - } - } - - if( tmp ) - ret = pstrcatstr( ret, tmp, TRUE ); - } - - if( last && *last ) - ret = pstrcatstr( ret, last, FALSE ); - - VARS( "ret", "%s", ret ); - VARS( "on_error", "%s", BOOLEAN_STR( on_error ) ); - - if( on_error && ret ) - { - MSG( "Okay, on error, everything will be deleted!" ); - pfree( ret ); - ret = (char*)NULL; - } - - RETURN( ret ); + char* last = base; + char* start; + char* end; + int off; + unsigned int match; + char* ret = (char*)NULL; + char* chk; + char* tmp; + char* att; + plistel* e; + plist* rhs = p->rhs; + BOOLEAN on_error = FALSE; + SYMBOL* sym; + int i; + char rx [ ONE_LINE + 1 ]; + char* rx_postfix[] = { + "'([^']|\\')*'", + "\"([^\"]|\\\")*\"", + "[A-Za-z_][A-Za-z0-9_]*", + "[0-9]+", + "@", + "!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*" + }; + + PROC( "build_action" ); + PARMS( "parser", "%p", parser ); + PARMS( "g", "%p", g ); + PARMS( "p", "%p", p ); + PARMS( "base", "%s", base ); + PARMS( "def_code", "%s", BOOLEAN_STR( def_code ) ); + + /* Once generate a lexer */ + if( !action_lex ) + { + /* Prepare regular expression engine */ + action_lex = plex_create( 0 ); + + for( i = 0; i < 6; i++ ) + { + if( pstrlen( g->prefix ) + pstrlen( rx_postfix[ i ] ) + >= sizeof( rx ) ) + { + MSG( "Buffer too small!" ); + RETURN( (char*)NULL ); + } + + sprintf( rx, "%s%s", g->prefix, rx_postfix[ i ] ); + + VARS( "i", "%d", i ); + VARS( "rx", "%s", rx ); + + /* Watch out: First two regex match same id! */ + if( !plex_define( action_lex, rx, !i ? 1 : i, 0 ) ) + { + MSG( "Something went wrong with the action lexer definition" ); + RETURN( (char*)NULL ); + } + } + } + + VARS( "p->sem_rhs counts", "%d", plist_count( p->sem_rhs ) ); + + /* Ok, perform replacement operations */ + if( plist_count( p->sem_rhs ) && !def_code ) + { + MSG( "Replacing semantic right-hand side" ); + rhs = p->sem_rhs; + } + + MSG( "Iterating trough matches" ); + + while( ( start = plex_next( action_lex, last, &match, &end ) ) + && !on_error ) + { + off = 0; + tmp = (char*)NULL; + + if( last < start ) + { + if( !( ret = pstrncatstr( ret, last, start - last ) ) ) + OUTOFMEM; + + VARS( "ret", "%s", ret ); + } + + last = end; + + VARS( "match", "%d", match ); + switch( match ) + { + case 1: + start += 1; + end -= 1; + + case 2: + MSG( "Identifier" ); + for( e = plist_first( rhs ), off = 1; e; + e = plist_next( e ), off++ ) + { + chk = plist_key( e ); + VARS( "chk", "%s", chk ? chk : "(NULL)" ); + + if( chk && !strncmp( chk, start + pstrlen( g->prefix ), + end - start - pstrlen( g->prefix ) ) + && pstrlen( chk ) == + end - start - pstrlen( g->prefix ) ) + { + break; + } + } + + if( !e ) + { + print_error( parser, ERR_UNDEFINED_SYMREF, + ERRSTYLE_WARNING, end - start, start ); + off = 0; + tmp = pstrdup( start ); + } + + VARS( "off", "%d", off ); + break; + + case 3: + MSG( "Offset" ); + off = atoi( start + pstrlen( g->prefix ) ); + break; + + case 4: + MSG( "Left-hand side" ); + if( p->lhs->vtype && list_count( parser->vtypes ) > 1 ) + ret = pstrcatstr( ret, + pstrrender( g->action_lhs_union, + GEN_WILD_PREFIX "attribute", + pstrrender( g->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( p->lhs->vtype->id ), + TRUE, + (char*)NULL ), TRUE, + (char*)NULL ), TRUE ); + else + ret = pstrcatstr( ret, g->action_lhs_single, FALSE ); + + VARS( "ret", "%s", ret ); + break; + + case 5: + MSG( "Assign left-hand side symbol" ); + off = 0; + + if( !( tmp = pasprintf( "%.*s", + end - start - ( pstrlen( SYMBOL_VAR ) + + 2 + pstrlen( g->prefix ) ), + start + ( pstrlen( SYMBOL_VAR ) + + 2 + pstrlen( g->prefix ) ) + ) ) ) + { + OUTOFMEM; + RETURN( (char*)NULL ); + } + + VARS( "tmp", "%s", tmp ); + + /* Go through all possible left-hand side symbols */ + plist_for( p->all_lhs, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( !strcmp( sym->name, tmp ) ) + { + MSG( "Found a matching symbol!" ); + + pfree( tmp ); + tmp = (char*)NULL; + + ret = pstrcatstr( ret, + pstrrender( g->action_set_lhs, + GEN_WILD_PREFIX "sym", + int_to_str( sym->id ), TRUE, + (char*)NULL ), TRUE ); + break; + } + } + + if( !e ) + { + MSG( "No match found..." ); + + print_error( parser, ERR_UNDEFINED_LHS, + ERRSTYLE_WARNING, tmp ); + pfree( tmp ); + + if( !( tmp = pstrdup( start ) ) ) + { + OUTOFMEM; + RETURN( (char*)NULL ); + } + } + + break; + + default: + MSG( "Uncaught regular expression match!" ); + break; + } + + VARS( "off", "%d", off ); + if( off > 0 ) + { + MSG( "Handling offset" ); + sym = (SYMBOL*)plist_access( plist_get( rhs, off - 1 ) ); + + if( sym && !( sym->keyword ) ) + { + if( list_count( parser->vtypes ) > 1 ) + { + if( sym->vtype ) + { + att = pstrrender( g->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( sym->vtype->id ), TRUE, + (char*)NULL ); + } + else + { + print_error( parser, ERR_NO_VALUE_TYPE, ERRSTYLE_FATAL, + sym->name, p->id, end - start + 1, start ); + + att = (char*)NULL; + on_error = TRUE; + } + + tmp = pstrrender( g->action_union, + GEN_WILD_PREFIX "offset", + int_to_str( plist_count( rhs ) - off ), TRUE, + GEN_WILD_PREFIX "attribute", att, TRUE, + (char*)NULL ); + } + else + tmp = pstrrender( g->action_single, + GEN_WILD_PREFIX "offset", + int_to_str( plist_count( rhs ) - off ), TRUE, + (char*)NULL ); + } + else + { + if( !def_code ) + { + print_error( parser, ERR_NO_VALUE_TYPE, ERRSTYLE_FATAL, + find_base_symbol( sym )->name, + p->id, end - start + 1, start ); + } + + on_error = TRUE; + } + } + + if( tmp ) + ret = pstrcatstr( ret, tmp, TRUE ); + } + + if( last && *last ) + ret = pstrcatstr( ret, last, FALSE ); + + VARS( "ret", "%s", ret ); + VARS( "on_error", "%s", BOOLEAN_STR( on_error ) ); + + if( on_error && ret ) + { + MSG( "Okay, on error, everything will be deleted!" ); + pfree( ret ); + ret = (char*)NULL; + } + + RETURN( ret ); } /** Construct the scanner action code from templates. */ char* build_scan_action( PARSER* parser, GENERATOR* g, SYMBOL* s, char* base ) { - char* last = base; - char* start; - char* end; - unsigned int match; - char* ret = (char*)NULL; - char* tmp; - plistel* e; - SYMBOL* sym; - - int i; - char rx [ ONE_LINE + 1 ]; - char* rx_postfix[] = { - ">", - "<", - "@", - "!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*" - }; - - PROC( "build_scan_action" ); - PARMS( "parser", "%p", parser ); - PARMS( "g", "%p", g ); - PARMS( "s", "%p", s ); - PARMS( "base", "%s", base ); - - - /* Once generate a lexer */ - if( !scan_lex ) - { - /* Prepare regular expression engine */ - scan_lex = plex_create( 0 ); - - for( i = 0; i < 4; i++ ) - { - if( pstrlen( g->prefix ) + pstrlen( rx_postfix[ i ] ) - >= sizeof( rx ) ) - { - MSG( "Buffer too small!" ); - - plex_free( scan_lex ); - RETURN( (char*)NULL ); /* Not the best way to handle this */ - } - - sprintf( rx, "%s%s", g->prefix, rx_postfix[ i ] ); - - VARS( "i", "%d", i ); - VARS( "rx", "%s", rx ); - - if( !plex_define( scan_lex, rx, i + 1, 0 ) ) - { - plex_free( scan_lex ); - MSG( "Something went wrong with the action lexer definition" ); - RETURN( (char*)NULL ); - } - } - } - - MSG( "Iterating trough matches" ); - - while( ( start = plex_next( scan_lex, last, &match, &end ) ) ) - { - if( last < start ) - { - if( !( ret = pstrncatstr( - ret, last, start - last ) ) ) - OUTOFMEM; - - VARS( "ret", "%s", ret ); - } - - last = end; - - VARS( "match", "%d", match ); - switch( match ) - { - case 1: - MSG( "@>" ); - ret = pstrcatstr( ret, - g->scan_action_begin_offset, FALSE ); - break; - - case 2: - MSG( "@<" ); - ret = pstrcatstr( ret, - g->scan_action_end_offset, FALSE ); - - break; - - case 3: - MSG( "@@" ); - if( s->vtype && list_count( parser->vtypes ) > 1 ) - ret = pstrcatstr( ret, - pstrrender( g->scan_action_ret_union, - GEN_WILD_PREFIX "attribute", - pstrrender( g->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( s->vtype->id ), TRUE, - (char*)NULL ), TRUE, - (char*)NULL ), TRUE ); - else - ret = pstrcatstr( ret, - g->scan_action_ret_single, FALSE ); - break; - - case 4: - MSG( "Set terminal symbol" ); - - if( !( tmp = pasprintf( "%.*s", - end - start - ( pstrlen( SYMBOL_VAR ) - + 2 + pstrlen( g->prefix ) ), - start + ( pstrlen( SYMBOL_VAR ) - + 2 + pstrlen( g->prefix ) ) - ) ) ) - OUTOFMEM; - - VARS( "tmp", "%s", tmp ); - - /* Go through all possible terminal symbols */ - plist_for( s->all_sym, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !strcmp( sym->name, tmp ) ) - { - MSG( "Found a matching symbol!" ); - ret = pstrcatstr( ret, - pstrrender( g->scan_action_set_symbol, - GEN_WILD_PREFIX "sym", - int_to_str( sym->id ), TRUE, - (char*)NULL ), TRUE ); - break; - } - } - - if( !e ) - { - MSG( "No match found..." ); - - print_error( parser, ERR_UNDEFINED_TERMINAL, - ERRSTYLE_WARNING, tmp ); - } - - pfree( tmp ); - break; - - default: - MSG( "Uncaught regular expression match!" ); - break; - } - - VARS( "ret", "%s", ret ); - } - - if( last && *last ) - ret = pstrcatstr( ret, last, FALSE ); - - VARS( "ret", "%s", ret ); - RETURN( ret ); + char* last = base; + char* start; + char* end; + unsigned int match; + char* ret = (char*)NULL; + char* tmp; + plistel* e; + SYMBOL* sym; + + int i; + char rx [ ONE_LINE + 1 ]; + char* rx_postfix[] = { + ">", + "<", + "@", + "!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*" + }; + + PROC( "build_scan_action" ); + PARMS( "parser", "%p", parser ); + PARMS( "g", "%p", g ); + PARMS( "s", "%p", s ); + PARMS( "base", "%s", base ); + + + /* Once generate a lexer */ + if( !scan_lex ) + { + /* Prepare regular expression engine */ + scan_lex = plex_create( 0 ); + + for( i = 0; i < 4; i++ ) + { + if( pstrlen( g->prefix ) + pstrlen( rx_postfix[ i ] ) + >= sizeof( rx ) ) + { + MSG( "Buffer too small!" ); + + plex_free( scan_lex ); + RETURN( (char*)NULL ); /* Not the best way to handle this */ + } + + sprintf( rx, "%s%s", g->prefix, rx_postfix[ i ] ); + + VARS( "i", "%d", i ); + VARS( "rx", "%s", rx ); + + if( !plex_define( scan_lex, rx, i + 1, 0 ) ) + { + plex_free( scan_lex ); + MSG( "Something went wrong with the action lexer definition" ); + RETURN( (char*)NULL ); + } + } + } + + MSG( "Iterating trough matches" ); + + while( ( start = plex_next( scan_lex, last, &match, &end ) ) ) + { + if( last < start ) + { + if( !( ret = pstrncatstr( + ret, last, start - last ) ) ) + OUTOFMEM; + + VARS( "ret", "%s", ret ); + } + + last = end; + + VARS( "match", "%d", match ); + switch( match ) + { + case 1: + MSG( "@>" ); + ret = pstrcatstr( ret, + g->scan_action_begin_offset, FALSE ); + break; + + case 2: + MSG( "@<" ); + ret = pstrcatstr( ret, + g->scan_action_end_offset, FALSE ); + + break; + + case 3: + MSG( "@@" ); + if( s->vtype && list_count( parser->vtypes ) > 1 ) + ret = pstrcatstr( ret, + pstrrender( g->scan_action_ret_union, + GEN_WILD_PREFIX "attribute", + pstrrender( g->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( s->vtype->id ), TRUE, + (char*)NULL ), TRUE, + (char*)NULL ), TRUE ); + else + ret = pstrcatstr( ret, + g->scan_action_ret_single, FALSE ); + break; + + case 4: + MSG( "Set terminal symbol" ); + + if( !( tmp = pasprintf( "%.*s", + end - start - ( pstrlen( SYMBOL_VAR ) + + 2 + pstrlen( g->prefix ) ), + start + ( pstrlen( SYMBOL_VAR ) + + 2 + pstrlen( g->prefix ) ) + ) ) ) + OUTOFMEM; + + VARS( "tmp", "%s", tmp ); + + /* Go through all possible terminal symbols */ + plist_for( s->all_sym, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( !strcmp( sym->name, tmp ) ) + { + MSG( "Found a matching symbol!" ); + ret = pstrcatstr( ret, + pstrrender( g->scan_action_set_symbol, + GEN_WILD_PREFIX "sym", + int_to_str( sym->id ), TRUE, + (char*)NULL ), TRUE ); + break; + } + } + + if( !e ) + { + MSG( "No match found..." ); + + print_error( parser, ERR_UNDEFINED_TERMINAL, + ERRSTYLE_WARNING, tmp ); + } + + pfree( tmp ); + break; + + default: + MSG( "Uncaught regular expression match!" ); + break; + } + + VARS( "ret", "%s", ret ); + } + + if( last && *last ) + ret = pstrcatstr( ret, last, FALSE ); + + VARS( "ret", "%s", ret ); + RETURN( ret ); } /** Converts a production into a dynamic string. @@ -517,51 +508,51 @@ Returns a generated string. */ char* mkproduction_str( PROD* p ) { - char* ret; - char wtf [ 512 ]; /* yes, it stands for 'what the f...' */ - plistel* e; - SYMBOL* sym; - - sprintf( wtf, "%.*s : ", (int)sizeof( wtf ) - 5, p->lhs->name ); - ret = pstrdup( wtf ); - - plist_for( p->rhs, e ) - { - sym = (SYMBOL*)plist_access( e ); - - switch( sym->type ) - { - case SYM_CCL_TERMINAL: - sprintf( wtf, "\'%.*s\'", (int)sizeof( wtf ) - 4, - sym->name ); - break; - - case SYM_REGEX_TERMINAL: - if( sym->keyword ) - sprintf( wtf, "\"%.*s\"", (int)sizeof( wtf ) - 4, - sym->name ); - else - sprintf( wtf, "@%.*s", (int) sizeof( wtf ) - 3, - sym->name ); - break; - - case SYM_SYSTEM_TERMINAL: - sprintf( wtf, "~%s", sym->name ); - break; - - default: - sprintf( wtf, "%.*s", (int)sizeof( wtf ) - 2, - sym->name ); - break; - } - - if( plist_next( e ) ) - strcat( wtf, " " ); - - ret = pstrcatstr( ret, wtf, FALSE ); - } - - return ret; + char* ret; + char wtf [ 512 ]; /* yes, it stands for 'what the f...' */ + plistel* e; + SYMBOL* sym; + + sprintf( wtf, "%.*s : ", (int)sizeof( wtf ) - 5, p->lhs->name ); + ret = pstrdup( wtf ); + + plist_for( p->rhs, e ) + { + sym = (SYMBOL*)plist_access( e ); + + switch( sym->type ) + { + case SYM_CCL_TERMINAL: + sprintf( wtf, "\'%.*s\'", (int)sizeof( wtf ) - 4, + sym->name ); + break; + + case SYM_REGEX_TERMINAL: + if( sym->keyword ) + sprintf( wtf, "\"%.*s\"", (int)sizeof( wtf ) - 4, + sym->name ); + else + sprintf( wtf, "@%.*s", (int) sizeof( wtf ) - 3, + sym->name ); + break; + + case SYM_SYSTEM_TERMINAL: + sprintf( wtf, "~%s", sym->name ); + break; + + default: + sprintf( wtf, "%.*s", (int)sizeof( wtf ) - 2, + sym->name ); + break; + } + + if( plist_next( e ) ) + strcat( wtf, " " ); + + ret = pstrcatstr( ret, wtf, FALSE ); + } + + return ret; } /** Loads a XML-defined code generator into an adequate GENERATOR structure. @@ -576,175 +567,175 @@ Returns TRUE on success, FALSE on error. */ BOOLEAN load_generator( PARSER* parser, GENERATOR* g, char* genfile ) { - char* name; - char* version; - char* lname; - XML_T tmp; - char* att_for; - char* att_do; - int i; + char* name; + char* version; + char* lname; + XML_T tmp; + char* att_for; + char* att_do; + int i; #define GET_XML_DEF( source, target, tagname ) \ - if( xml_child( (source), (tagname) ) ) \ - (target) = (char*)( xml_txt( xml_child( (source), (tagname) ) ) ); \ - else \ - print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ - (tagname), genfile ); + if( xml_child( (source), (tagname) ) ) \ + (target) = (char*)( xml_txt( xml_child( (source), (tagname) ) ) ); \ + else \ + print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ + (tagname), genfile ); #define GET_XML_TAB_1D( target, tagname ) \ - if( ( tmp = xml_child( g->xml, (tagname) ) ) ) \ - { \ - GET_XML_DEF( tmp, (target).col, "col" ) \ - GET_XML_DEF( tmp, (target).col_sep, "col_sep" ) \ - } \ - else \ - print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ - (tagname), genfile ); + if( ( tmp = xml_child( g->xml, (tagname) ) ) ) \ + { \ + GET_XML_DEF( tmp, (target).col, "col" ) \ + GET_XML_DEF( tmp, (target).col_sep, "col_sep" ) \ + } \ + else \ + print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ + (tagname), genfile ); #define GET_XML_TAB_2D( target, tagname ) \ - if( ( tmp = xml_child( g->xml, (tagname) ) ) ) \ - { \ - GET_XML_DEF( tmp, (target).row_start, "row_start" ) \ - GET_XML_DEF( tmp, (target).row_end, "row_end" ) \ - GET_XML_DEF( tmp, (target).row_sep, "row_sep" ) \ - GET_XML_DEF( tmp, (target).col, "col" ) \ - GET_XML_DEF( tmp, (target).col_sep, "col_sep" ) \ - } \ - else \ - print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ - (tagname), genfile ); - - if( !( g->xml = xml_parse_file( genfile ) ) ) - { - print_error( parser, ERR_NO_GENERATOR_FILE, ERRSTYLE_FATAL, genfile ); - return FALSE; - } - - if( *xml_error( g->xml ) ) - { - print_error( parser, ERR_XML_ERROR, ERRSTYLE_FATAL, - genfile, xml_error( g->xml ) ); - return FALSE; - } - - if( ! *( g->truedef = (char*)xml_txt( xml_child( g->xml, "true" ) ) ) ) - g->truedef = "1"; - - if( ! *( g->falsedef = (char*)xml_txt( xml_child( g->xml, "false" ) ) ) ) - g->falsedef = "0"; - - GET_XML_DEF( g->xml, g->vstack_def_type, "vstack_def_type" ); - GET_XML_DEF( g->xml, g->vstack_term_type, "vstack_term_type" ); - - if( ! *( g->prefix = (char*)xml_txt( - xml_child( g->xml, "action_prefix" ) ) ) ) - g->prefix = "@"; - - GET_XML_DEF( g->xml, g->action_start, "action_start" ); - GET_XML_DEF( g->xml, g->action_end, "action_end" ); - GET_XML_DEF( g->xml, g->action_single, "action_single" ); - GET_XML_DEF( g->xml, g->action_union, "action_union" ); - GET_XML_DEF( g->xml, g->action_lhs_single, "action_lhs_single" ); - GET_XML_DEF( g->xml, g->action_lhs_union, "action_lhs_union" ); - GET_XML_DEF( g->xml, g->action_set_lhs, "action_set_lhs" ); - - GET_XML_DEF( g->xml, g->scan_action_start, "scan_action_start" ); - GET_XML_DEF( g->xml, g->scan_action_end, "scan_action_end" ); - GET_XML_DEF( g->xml, g->scan_action_begin_offset, - "scan_action_begin_offset" ); - GET_XML_DEF( g->xml, g->scan_action_end_offset, "scan_action_end_offset" ); - GET_XML_DEF( g->xml, g->scan_action_ret_single, "scan_action_ret_single" ); - GET_XML_DEF( g->xml, g->scan_action_ret_union, "scan_action_ret_union" ); - GET_XML_DEF( g->xml, g->scan_action_set_symbol, "scan_action_set_symbol" ); - - GET_XML_DEF( g->xml, g->vstack_single, "vstack_single" ); - GET_XML_DEF( g->xml, g->vstack_union_start, "vstack_union_start" ); - GET_XML_DEF( g->xml, g->vstack_union_end, "vstack_union_end" ); - GET_XML_DEF( g->xml, g->vstack_union_def, "vstack_union_def" ); - GET_XML_DEF( g->xml, g->vstack_union_att, "vstack_union_att" ); - - GET_XML_TAB_1D( g->defprod, "defprod" ) - - GET_XML_TAB_1D( g->dfa_select, "dfa_select" ) - GET_XML_TAB_1D( g->dfa_char, "dfa_char" ) - GET_XML_TAB_1D( g->dfa_trans, "dfa_trans" ) - - GET_XML_TAB_2D( g->acttab, "acttab" ) - GET_XML_TAB_2D( g->gotab, "gotab" ) - GET_XML_TAB_2D( g->dfa_idx, "dfa_idx" ) - GET_XML_TAB_2D( g->dfa_accept, "dfa_accept" ) - - GET_XML_TAB_1D( g->symbols, "symbols" ) - GET_XML_TAB_1D( g->productions, "productions" ) - - GET_XML_DEF( g->xml, g->code_localization, "code_localization" ); - - /* Escape sequence definitions */ - for( tmp = xml_child( g->xml, "escape-sequence" ); tmp; - tmp = xml_next( tmp ) ) - { - att_for = (char*)xml_attr( tmp, "for" ); - att_do = (char*)xml_attr( tmp, "do" ); - - if( att_for && att_do ) - { - for( i = 0; i < g->sequences_count; i++ ) - { - if( !strcmp( g->for_sequences[ i ], att_for ) ) - { - print_error( parser, ERR_DUPLICATE_ESCAPE_SEQ, - ERRSTYLE_WARNING, att_for, genfile ); - break; - } - } - - if( i < g->sequences_count ) - continue; - - g->for_sequences = (char**)prealloc( (char**)g->for_sequences, - ( g->sequences_count + 1 ) * sizeof( char* ) ); - g->do_sequences = (char**)prealloc( (char**)g->do_sequences, - ( g->sequences_count + 1 ) * sizeof( char* ) ); - - if( !( g->for_sequences && g->do_sequences ) ) - OUTOFMEM; - - g->for_sequences[ g->sequences_count ] = (char*)( att_for ); - g->do_sequences[ g->sequences_count ] = (char*)( att_do ); - - if( !( g->for_sequences[ g->sequences_count ] - && g->do_sequences[ g->sequences_count ] ) ) - OUTOFMEM; - - g->sequences_count++; - } - else - { - if( !att_for ) - print_error( parser, ERR_XML_INCOMPLETE, ERRSTYLE_FATAL, - genfile, xml_name( tmp ), "for" ); - if( !att_do ) - print_error( parser, ERR_XML_INCOMPLETE, ERRSTYLE_FATAL, - genfile, xml_name( tmp ), "do" ); - } - } - - /* Output some more information */ - if( parser->verbose ) - { - name = xml_attr( g->xml, "name" ); - version = xml_attr( g->xml, "version" ); - if( !( lname = xml_attr( g->xml, "long-name" ) ) ) - lname = name; - - if( lname && *lname && version && *version ) - { - fprintf( status, "[%s, v%s]...", lname, version ); - fflush( status ); - } - } - - return TRUE; + if( ( tmp = xml_child( g->xml, (tagname) ) ) ) \ + { \ + GET_XML_DEF( tmp, (target).row_start, "row_start" ) \ + GET_XML_DEF( tmp, (target).row_end, "row_end" ) \ + GET_XML_DEF( tmp, (target).row_sep, "row_sep" ) \ + GET_XML_DEF( tmp, (target).col, "col" ) \ + GET_XML_DEF( tmp, (target).col_sep, "col_sep" ) \ + } \ + else \ + print_error( parser, ERR_TAG_NOT_FOUND, ERRSTYLE_WARNING, \ + (tagname), genfile ); + + if( !( g->xml = xml_parse_file( genfile ) ) ) + { + print_error( parser, ERR_NO_GENERATOR_FILE, ERRSTYLE_FATAL, genfile ); + return FALSE; + } + + if( *xml_error( g->xml ) ) + { + print_error( parser, ERR_XML_ERROR, ERRSTYLE_FATAL, + genfile, xml_error( g->xml ) ); + return FALSE; + } + + if( ! *( g->truedef = (char*)xml_txt( xml_child( g->xml, "true" ) ) ) ) + g->truedef = "1"; + + if( ! *( g->falsedef = (char*)xml_txt( xml_child( g->xml, "false" ) ) ) ) + g->falsedef = "0"; + + GET_XML_DEF( g->xml, g->vstack_def_type, "vstack_def_type" ); + GET_XML_DEF( g->xml, g->vstack_term_type, "vstack_term_type" ); + + if( ! *( g->prefix = (char*)xml_txt( + xml_child( g->xml, "action_prefix" ) ) ) ) + g->prefix = "@"; + + GET_XML_DEF( g->xml, g->action_start, "action_start" ); + GET_XML_DEF( g->xml, g->action_end, "action_end" ); + GET_XML_DEF( g->xml, g->action_single, "action_single" ); + GET_XML_DEF( g->xml, g->action_union, "action_union" ); + GET_XML_DEF( g->xml, g->action_lhs_single, "action_lhs_single" ); + GET_XML_DEF( g->xml, g->action_lhs_union, "action_lhs_union" ); + GET_XML_DEF( g->xml, g->action_set_lhs, "action_set_lhs" ); + + GET_XML_DEF( g->xml, g->scan_action_start, "scan_action_start" ); + GET_XML_DEF( g->xml, g->scan_action_end, "scan_action_end" ); + GET_XML_DEF( g->xml, g->scan_action_begin_offset, + "scan_action_begin_offset" ); + GET_XML_DEF( g->xml, g->scan_action_end_offset, "scan_action_end_offset" ); + GET_XML_DEF( g->xml, g->scan_action_ret_single, "scan_action_ret_single" ); + GET_XML_DEF( g->xml, g->scan_action_ret_union, "scan_action_ret_union" ); + GET_XML_DEF( g->xml, g->scan_action_set_symbol, "scan_action_set_symbol" ); + + GET_XML_DEF( g->xml, g->vstack_single, "vstack_single" ); + GET_XML_DEF( g->xml, g->vstack_union_start, "vstack_union_start" ); + GET_XML_DEF( g->xml, g->vstack_union_end, "vstack_union_end" ); + GET_XML_DEF( g->xml, g->vstack_union_def, "vstack_union_def" ); + GET_XML_DEF( g->xml, g->vstack_union_att, "vstack_union_att" ); + + GET_XML_TAB_1D( g->defprod, "defprod" ) + + GET_XML_TAB_1D( g->dfa_select, "dfa_select" ) + GET_XML_TAB_1D( g->dfa_char, "dfa_char" ) + GET_XML_TAB_1D( g->dfa_trans, "dfa_trans" ) + + GET_XML_TAB_2D( g->acttab, "acttab" ) + GET_XML_TAB_2D( g->gotab, "gotab" ) + GET_XML_TAB_2D( g->dfa_idx, "dfa_idx" ) + GET_XML_TAB_2D( g->dfa_accept, "dfa_accept" ) + + GET_XML_TAB_1D( g->symbols, "symbols" ) + GET_XML_TAB_1D( g->productions, "productions" ) + + GET_XML_DEF( g->xml, g->code_localization, "code_localization" ); + + /* Escape sequence definitions */ + for( tmp = xml_child( g->xml, "escape-sequence" ); tmp; + tmp = xml_next( tmp ) ) + { + att_for = (char*)xml_attr( tmp, "for" ); + att_do = (char*)xml_attr( tmp, "do" ); + + if( att_for && att_do ) + { + for( i = 0; i < g->sequences_count; i++ ) + { + if( !strcmp( g->for_sequences[ i ], att_for ) ) + { + print_error( parser, ERR_DUPLICATE_ESCAPE_SEQ, + ERRSTYLE_WARNING, att_for, genfile ); + break; + } + } + + if( i < g->sequences_count ) + continue; + + g->for_sequences = (char**)prealloc( (char**)g->for_sequences, + ( g->sequences_count + 1 ) * sizeof( char* ) ); + g->do_sequences = (char**)prealloc( (char**)g->do_sequences, + ( g->sequences_count + 1 ) * sizeof( char* ) ); + + if( !( g->for_sequences && g->do_sequences ) ) + OUTOFMEM; + + g->for_sequences[ g->sequences_count ] = (char*)( att_for ); + g->do_sequences[ g->sequences_count ] = (char*)( att_do ); + + if( !( g->for_sequences[ g->sequences_count ] + && g->do_sequences[ g->sequences_count ] ) ) + OUTOFMEM; + + g->sequences_count++; + } + else + { + if( !att_for ) + print_error( parser, ERR_XML_INCOMPLETE, ERRSTYLE_FATAL, + genfile, xml_name( tmp ), "for" ); + if( !att_do ) + print_error( parser, ERR_XML_INCOMPLETE, ERRSTYLE_FATAL, + genfile, xml_name( tmp ), "do" ); + } + } + + /* Output some more information */ + if( parser->verbose ) + { + name = xml_attr( g->xml, "name" ); + version = xml_attr( g->xml, "version" ); + if( !( lname = xml_attr( g->xml, "long-name" ) ) ) + lname = name; + + if( lname && *lname && version && *version ) + { + fprintf( status, "[%s, v%s]...", lname, version ); + fflush( status ); + } + } + + return TRUE; } @@ -757,833 +748,833 @@ generator file). */ void build_code( PARSER* parser ) { - GENERATOR generator; - GENERATOR* gen = (GENERATOR*)NULL; - XML_T file; - FILE* stream; - - char* basename; - char tlt_file [ BUFSIZ + 1 ]; - char* tlt_path; - char* option; - char* complete = (char*)NULL; - char* all = (char*)NULL; - char* action_table = (char*)NULL; - char* action_table_row = (char*)NULL; - char* goto_table = (char*)NULL; - char* goto_table_row = (char*)NULL; - char* def_prod = (char*)NULL; - char* char_map = (char*)NULL; - char* char_map_sym = (char*)NULL; - char* symbols = (char*)NULL; - char* productions = (char*)NULL; - char* dfa_select = (char*)NULL; - char* dfa_idx = (char*)NULL; - char* dfa_idx_row = (char*)NULL; - char* dfa_char = (char*)NULL; - char* dfa_trans = (char*)NULL; - char* dfa_accept = (char*)NULL; - char* dfa_accept_row = (char*)NULL; - char* type_def = (char*)NULL; - char* actions = (char*)NULL; - char* scan_actions = (char*)NULL; - char* top_value = (char*)NULL; - char* goal_value = (char*)NULL; - char* act = (char*)NULL; - char* filename = (char*)NULL; - - int max_action = 0; - int max_goto = 0; - int max_dfa_idx = 0; - int max_dfa_accept = 0; - int max_symbol_name = 0; - int column; - int charmap_count = 0; - int row; - pregex_dfa* dfa; - pregex_dfa_st* dfa_st; - pregex_dfa_tr* dfa_ent; - SYMBOL* sym; - STATE* st; - TABCOL* col; - PROD* p; - PROD* goalprod; - VTYPE* vt; - wchar_t beg; - wchar_t end; - int i; - BOOLEAN is_default_code; - plistel* e; - plistel* f; - LIST* l; - LIST* m; - - PROC( "build_code" ); - PARMS( "parser", "%p", parser ); - - gen = &generator; - memset( gen, 0, sizeof( GENERATOR ) ); - - sprintf( tlt_file, "%s%s", parser->p_template, UNICC_TLT_EXTENSION ); - pstrlwr( tlt_file ); - VARS( "tlt_file", "%s", tlt_file ); - - if( !( tlt_path = pwhich( tlt_file, "targets" ) ) - && !( tlt_path = pwhich( tlt_file, getenv( "UNICC_TPLDIR" ) ) ) + GENERATOR generator; + GENERATOR* gen = (GENERATOR*)NULL; + XML_T file; + FILE* stream; + + char* basename; + char tlt_file [ BUFSIZ + 1 ]; + char* tlt_path; + char* option; + char* complete = (char*)NULL; + char* all = (char*)NULL; + char* action_table = (char*)NULL; + char* action_table_row = (char*)NULL; + char* goto_table = (char*)NULL; + char* goto_table_row = (char*)NULL; + char* def_prod = (char*)NULL; + char* char_map = (char*)NULL; + char* char_map_sym = (char*)NULL; + char* symbols = (char*)NULL; + char* productions = (char*)NULL; + char* dfa_select = (char*)NULL; + char* dfa_idx = (char*)NULL; + char* dfa_idx_row = (char*)NULL; + char* dfa_char = (char*)NULL; + char* dfa_trans = (char*)NULL; + char* dfa_accept = (char*)NULL; + char* dfa_accept_row = (char*)NULL; + char* type_def = (char*)NULL; + char* actions = (char*)NULL; + char* scan_actions = (char*)NULL; + char* top_value = (char*)NULL; + char* goal_value = (char*)NULL; + char* act = (char*)NULL; + char* filename = (char*)NULL; + + int max_action = 0; + int max_goto = 0; + int max_dfa_idx = 0; + int max_dfa_accept = 0; + int max_symbol_name = 0; + int column; + int charmap_count = 0; + int row; + pregex_dfa* dfa; + pregex_dfa_st* dfa_st; + pregex_dfa_tr* dfa_ent; + SYMBOL* sym; + STATE* st; + TABCOL* col; + PROD* p; + PROD* goalprod; + VTYPE* vt; + wchar_t beg; + wchar_t end; + int i; + BOOLEAN is_default_code; + plistel* e; + plistel* f; + LIST* l; + LIST* m; + + PROC( "build_code" ); + PARMS( "parser", "%p", parser ); + + gen = &generator; + memset( gen, 0, sizeof( GENERATOR ) ); + + sprintf( tlt_file, "%s%s", parser->p_template, UNICC_TLT_EXTENSION ); + pstrlwr( tlt_file ); + VARS( "tlt_file", "%s", tlt_file ); + + if( !( tlt_path = pwhich( tlt_file, "targets" ) ) + && !( tlt_path = pwhich( tlt_file, getenv( "UNICC_TPLDIR" ) ) ) #ifndef _WIN32 - && !( tlt_path = pwhich( tlt_file, + && !( tlt_path = pwhich( tlt_file, #ifdef TLTDIR - TLTDIR + TLTDIR #else - "/usr/share/unicc/targets" + "/usr/share/unicc/targets" #endif - ) ) + ) ) #endif - ) - { - tlt_path = tlt_file; - } - - VARS( "tlt_path", "%s", tlt_path ); - - MSG( "Loading generator" ); - if( !load_generator( parser, gen, tlt_path ) ) - VOIDRET; - - /* Now that we have the generator, do some code generation-related - integrity preparatories on the grammar */ - - MSG( "Performing code generation-related integrity preparatories" ); - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !( sym->vtype ) ) - sym->vtype = parser->p_def_type; - - if( sym->type == SYM_NON_TERMINAL && !( sym->vtype ) && - ( gen->vstack_def_type && *( gen->vstack_def_type ) ) ) - sym->vtype = create_vtype( parser, gen->vstack_def_type ); - else if( IS_TERMINAL( sym ) /* && !( sym->keyword ) */ - && !( sym->vtype ) && ( gen->vstack_term_type && - *( gen->vstack_term_type ) ) ) - sym->vtype = create_vtype( parser, gen->vstack_term_type ); - } - - /* Create piece of code for the value at the top of the value stack - (e.g. used to store the next terminal character onto the value stack) */ - if( list_count( parser->vtypes ) <= 1 ) - top_value = pstrrender( gen->action_single, - GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, - (char*)NULL ); - else if( ( vt = find_vtype( parser, gen->vstack_term_type ) ) ) - top_value = pstrrender( gen->action_union, - GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, - GEN_WILD_PREFIX "attribute", - pstrrender( gen->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( vt->id ), TRUE, - (char*)NULL ), TRUE, - (char*)NULL ); - else - /* ERROR */ - ; - - /* Create piece of code for the value that is associated with the - * goal symbol, to e.g. return it from the parser function */ - if( list_count( parser->vtypes ) <= 1 ) - goal_value = pstrrender( gen->action_single, - GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, - (char*)NULL ); - else - goal_value = pstrrender( gen->action_union, - GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, - GEN_WILD_PREFIX "attribute", - pstrrender( gen->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( parser->goal->vtype->id ), TRUE, - (char*)NULL ), TRUE, - (char*)NULL ); - - /* Build action, goto and dfa_select tables */ - MSG( "Action, Goto and DFA selection table" ); - - i = 0; - parray_for( parser->states, st ) - { - /* Action table */ - action_table_row = pstrrender( gen->acttab.row_start, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( list_count( st->actions ) ), TRUE, - GEN_WILD_PREFIX "state-number", - int_to_str( st->state_id ), TRUE, - (char*)NULL ); - - if( max_action < list_count( st->actions ) ) - max_action = list_count( st->actions ); - - for( m = st->actions, column = 0; m; m = m->next, column++ ) - { - col = (TABCOL*)(m->pptr); - - action_table_row = pstrcatstr( action_table_row, - pstrrender( gen->acttab.col, - GEN_WILD_PREFIX "symbol", - int_to_str( col->symbol->id ), TRUE, - GEN_WILD_PREFIX "action", int_to_str( col->action ), TRUE, - GEN_WILD_PREFIX "index", int_to_str( col->index ), TRUE, - GEN_WILD_PREFIX "column", int_to_str( column ), TRUE, - (char*)NULL ), TRUE ); - - if( m->next ) - action_table_row = pstrcatstr( action_table_row, - gen->acttab.col_sep, FALSE ); - } - - action_table_row = pstrcatstr( action_table_row, - pstrrender( gen->acttab.row_end, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( list_count( st->actions ) ), TRUE, - GEN_WILD_PREFIX "state-number", - int_to_str( st->state_id ), TRUE, - (char*)NULL ), TRUE ); - - if( parray_next( parser->states, st ) ) - action_table_row = pstrcatstr( action_table_row, - gen->acttab.row_sep, FALSE ); - - action_table = pstrcatstr( action_table, action_table_row, TRUE ); - - /* Goto table */ - goto_table_row = pstrrender( gen->gotab.row_start, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( list_count( st->gotos ) ), TRUE, - GEN_WILD_PREFIX "state-number", - int_to_str( st->state_id ), TRUE, - (char*)NULL ); - - if( max_goto < list_count( st->gotos ) ) - max_goto = list_count( st->gotos ); - - for( m = st->gotos, column = 0; m; m = m->next, column++ ) - { - col = (TABCOL*)(m->pptr); - - goto_table_row = pstrcatstr( goto_table_row, - pstrrender( gen->gotab.col, - GEN_WILD_PREFIX "symbol", - int_to_str( col->symbol->id ), TRUE, - GEN_WILD_PREFIX "action", - int_to_str( col->action ), TRUE, - GEN_WILD_PREFIX "index", - int_to_str( col->index ), TRUE, - GEN_WILD_PREFIX "column", - int_to_str( column ), TRUE, - (char*)NULL ), TRUE ); - - if( m->next ) - goto_table_row = pstrcatstr( goto_table_row, - gen->gotab.col_sep, FALSE ); - } - - goto_table_row = pstrcatstr( goto_table_row, - pstrrender( gen->gotab.row_end, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( list_count( st->actions ) ), TRUE, - GEN_WILD_PREFIX "state-number", - int_to_str( st->state_id ), TRUE, - (char*)NULL ), TRUE ); - - if( parray_next( parser->states, st ) ) - goto_table_row = pstrcatstr( goto_table_row, - gen->gotab.row_sep, FALSE ); - - goto_table = pstrcatstr( goto_table, goto_table_row, TRUE ); - - /* Only in scannerless mode */ - if( parser->p_mode == MODE_SCANNERLESS ) - { - /* dfa machine selection */ - dfa_select = pstrcatstr( dfa_select, - pstrrender( gen->dfa_select.col, - GEN_WILD_PREFIX "machine", - int_to_str( list_find( parser->dfas, st->dfa ) ), TRUE, - (char*)NULL ), TRUE ); - - if( parray_next( parser->states, st ) ) - dfa_select = pstrcatstr( dfa_select, - gen->dfa_select.col_sep, FALSE ); - } - - /* Default production table */ - def_prod = pstrcatstr( def_prod, - pstrrender( gen->defprod.col, - GEN_WILD_PREFIX "state-number", - int_to_str( st->state_id ), TRUE, - GEN_WILD_PREFIX "production-number", - int_to_str( - ( ( st->def_prod ) ? st->def_prod->id : -1 ) ), - TRUE, (char*)NULL ), TRUE ); - - if( parray_next( parser->states, st ) ) - def_prod = pstrcatstr( def_prod, gen->defprod.col_sep, FALSE ); - - i++; - } - - /* Lexical recognition machine table composition */ - MSG( "Lexical recognition machine" ); - for( l = parser->dfas, row = 0, column = 0; l; l = list_next( l ), row++ ) - { - dfa = (pregex_dfa*)list_access( l ); - - /* Row start */ - dfa_idx_row = pstrrender( gen->dfa_idx.row_start, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( plist_count( dfa->states ) ), TRUE, - GEN_WILD_PREFIX "row", - int_to_str( row ), TRUE, - (char*)NULL ); - - dfa_accept_row = pstrrender( gen->dfa_accept.row_start, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( plist_count( dfa->states ) ), TRUE, - GEN_WILD_PREFIX "row", - int_to_str( row ), TRUE, - (char*)NULL ); - - if( max_dfa_idx < plist_count( dfa->states ) ) - max_dfa_accept = max_dfa_idx = plist_count( dfa->states ); - - /* Building row entries */ - plist_for( dfa->states, e ) - { - dfa_st = (pregex_dfa_st*)plist_access( e ); - VARS( "dfa_st", "%p", dfa_st ); - - if( dfa_char && dfa_trans ) - { - dfa_char = pstrcatstr( dfa_char, - gen->dfa_char.col_sep, FALSE ); - dfa_trans = pstrcatstr( dfa_trans, - gen->dfa_trans.col_sep, FALSE ); - } - - dfa_idx_row = pstrcatstr( dfa_idx_row, - pstrrender( gen->dfa_idx.col, - GEN_WILD_PREFIX "index", - int_to_str( column ), TRUE, - (char*)NULL ), TRUE ); - - dfa_accept_row = pstrcatstr( dfa_accept_row, - pstrrender( gen->dfa_accept.col, - GEN_WILD_PREFIX "accept", - int_to_str( dfa_st->accept ), TRUE, - (char*)NULL ), TRUE ); - - /* Iterate trough all transitions */ - MSG( "Iterating to transitions of DFA" ); - plist_for( dfa_st->trans, f ) - { - dfa_ent = (pregex_dfa_tr*)plist_access( f ); - - for( i = 0; pccl_get( &beg, &end, dfa_ent->ccl, i ); i++ ) - { - dfa_char = pstrcatstr( dfa_char, - pstrrender( gen->dfa_char.col, - GEN_WILD_PREFIX "from", - int_to_str( beg ), TRUE, - GEN_WILD_PREFIX "to", - int_to_str( end ), TRUE, - GEN_WILD_PREFIX "goto", - int_to_str( dfa_st->accept ), TRUE, - (char*)NULL ), TRUE ); - - dfa_trans = pstrcatstr( dfa_trans, - pstrrender( gen->dfa_trans.col, - GEN_WILD_PREFIX "goto", - int_to_str( dfa_ent->go_to ), TRUE, - (char*)NULL ), TRUE ); - - - dfa_char = pstrcatstr( dfa_char, - gen->dfa_char.col_sep, FALSE ); - dfa_trans = pstrcatstr( dfa_trans, - gen->dfa_trans.col_sep, FALSE ); - - column++; - } - } - - /* DFA transition end marker */ - dfa_char = pstrcatstr( dfa_char, - pstrrender( gen->dfa_char.col, - GEN_WILD_PREFIX "from", - int_to_str( -1 ), TRUE, - GEN_WILD_PREFIX "to", - int_to_str( -1 ), TRUE, - (char*)NULL ), TRUE ); - - /* DFA transition */ - dfa_trans = pstrcatstr( dfa_trans, - pstrrender( gen->dfa_trans.col, - GEN_WILD_PREFIX "goto", - int_to_str( -1 ), - TRUE, (char*)NULL ), TRUE ); - - column++; - - if( plist_next( e ) ) - { - dfa_idx_row = pstrcatstr( dfa_idx_row, - gen->dfa_idx.col_sep, FALSE ); - dfa_accept_row = pstrcatstr( dfa_accept_row, - gen->dfa_accept.col_sep, FALSE ); - } - } - - /* Row end */ - dfa_idx_row = pstrcatstr( dfa_idx_row, - pstrrender( gen->dfa_idx.row_end, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( plist_count( dfa->states ) ), TRUE, - GEN_WILD_PREFIX "row", - int_to_str( row ), TRUE, - (char*)NULL ), TRUE ); - - dfa_accept_row = pstrcatstr( dfa_accept_row, - pstrrender( gen->dfa_accept.row_end, - GEN_WILD_PREFIX "number-of-columns", - int_to_str( plist_count( dfa->states ) ), TRUE, - GEN_WILD_PREFIX "row", int_to_str( row ), TRUE, - (char*)NULL ), TRUE ); - - if( list_next( l ) ) - { - dfa_idx_row = pstrcatstr( dfa_idx_row, - gen->dfa_idx.row_sep, FALSE ); - dfa_accept_row = pstrcatstr( dfa_accept_row, - gen->dfa_accept.row_sep, FALSE ); - } - - dfa_idx = pstrcatstr( dfa_idx, dfa_idx_row, TRUE ); - dfa_accept = pstrcatstr( dfa_accept, dfa_accept_row, TRUE ); - } - - MSG( "Construct symbol information table" ); - - /* Whitespace identification table and symbol-information-table */ - plist_for( parser->symbols, e ) /* Okidoki, now do the generation */ - { - sym = (SYMBOL*)plist_access( e ); - - symbols = pstrcatstr( symbols, pstrrender( gen->symbols.col, - GEN_WILD_PREFIX "symbol-name", - escape_for_target( gen, sym->name, FALSE ), TRUE, - GEN_WILD_PREFIX "emit", - escape_for_target( gen, sym->emit, FALSE ), TRUE, - GEN_WILD_PREFIX "symbol", - int_to_str( sym->id ), TRUE, - GEN_WILD_PREFIX "type", - int_to_str( sym->type ), TRUE, - GEN_WILD_PREFIX "datatype", - int_to_str( sym->vtype ? sym->vtype->id : 0 ), TRUE, - GEN_WILD_PREFIX "terminal", - sym->type > 0 ? gen->truedef : gen->falsedef, FALSE, - GEN_WILD_PREFIX "lexem", - sym->lexem ? gen->truedef : gen->falsedef, FALSE, - GEN_WILD_PREFIX "whitespace", - sym->whitespace ? gen->truedef : gen->falsedef, FALSE, - GEN_WILD_PREFIX "greedy", - sym->greedy ? gen->truedef : gen->falsedef, FALSE, - - (char*)NULL ), TRUE ); - - if( max_symbol_name < (int)strlen( sym->name ) ) - max_symbol_name = (int)strlen( sym->name ); - - if( plist_next( e ) ) - { - symbols = pstrcatstr( symbols, - gen->symbols.col_sep, FALSE ); - } - } - - /* Type definition union */ - if( list_count( parser->vtypes ) == 1 ) - { - vt = (VTYPE*)( parser->vtypes->pptr ); - type_def = pstrrender( gen->vstack_single, - GEN_WILD_PREFIX "value-type", vt->real_def, FALSE, - (char*)NULL ); - } - else - { - type_def = pstrrender( gen->vstack_union_start, - GEN_WILD_PREFIX "number-of-value-types", - int_to_str( list_count( parser->vtypes ) ), - TRUE, (char*)NULL ); - - for( l = parser->vtypes; l; l = l->next ) - { - vt = (VTYPE*)(l->pptr); - - type_def = pstrcatstr( type_def, - pstrrender( gen->vstack_union_def, - GEN_WILD_PREFIX "value-type", vt->real_def, FALSE, - GEN_WILD_PREFIX "attribute", - pstrrender( gen->vstack_union_att, - GEN_WILD_PREFIX "value-type-id", - int_to_str( vt->id ), TRUE, - (char*)NULL ), TRUE, - GEN_WILD_PREFIX "value-type-id", - int_to_str( vt->id ), TRUE, - (char*)NULL ), TRUE ); - } - - type_def = pstrcatstr( type_def, - pstrrender( gen->vstack_union_end, - GEN_WILD_PREFIX "number-of-value-types", - int_to_str( list_count( parser->vtypes ) ), - TRUE, (char*)NULL ), TRUE ); - } - - /* Reduction action code and production definition table */ - row = 0; - - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - - /* Select the semantic code to be processed! */ - act = (char*)NULL; - - is_default_code = FALSE; - - if( p->code ) - act = p->code; - else if( plist_count( p->rhs ) == 0 ) - { - act = parser->p_def_action_e; - is_default_code = TRUE; - } - else - { - act = parser->p_def_action; - is_default_code = TRUE; - } - - if( is_default_code && - ( p->lhs->whitespace || - ( parser->error && plist_get_by_ptr( p->rhs, - parser->error ) ) ) ) - { - act = (char*)NULL; - } - - if( act && *act ) - { - /* Generate action start */ - actions = pstrcatstr( actions, pstrrender( gen->action_start, - GEN_WILD_PREFIX "production-number", int_to_str( p->id ), TRUE, - (char*)NULL ), TRUE ); - - /* Generate code localization */ - if( gen->code_localization && p->code_at > 0 ) - { - actions = pstrcatstr( actions, - pstrrender( gen->code_localization, - GEN_WILD_PREFIX "line", - int_to_str( p->code_at ), TRUE, - (char*)NULL ), - TRUE ); - } - - /* Generate the action code */ - act = build_action( parser, gen, p, act, is_default_code ); - actions = pstrcatstr( actions, act, TRUE ); - - /* Generate the action end */ - actions = pstrcatstr( actions, pstrrender( gen->action_end, - GEN_WILD_PREFIX "production-number", int_to_str( p->id ), TRUE, - (char*)NULL ), TRUE ); - } - - /* Generate production information table */ - productions = pstrcatstr( productions, pstrrender( - gen->productions.col, - - GEN_WILD_PREFIX "production-number", - int_to_str( p->id ), TRUE, - GEN_WILD_PREFIX "production", - escape_for_target( gen, mkproduction_str( p ), TRUE ), - TRUE, - GEN_WILD_PREFIX "emit", - escape_for_target( gen, p->emit, TRUE ), TRUE, - GEN_WILD_PREFIX "length", - int_to_str( plist_count( p->rhs ) ), TRUE, - GEN_WILD_PREFIX "lhs", - int_to_str( p->lhs->id ), TRUE, - - (char*)NULL ), TRUE ); - - if( plist_next( e ) ) - productions = pstrcatstr( productions, - gen->productions.col_sep, FALSE ); - - row++; - } - - /* Scanner action code */ - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - if( sym->keyword ) - continue; - - /* Select the semantic code to be processed! */ - if( ( act = sym->code ) ) - { - /* Code localization features */ - if( gen->code_localization && sym->code_at > 0 ) - { - scan_actions = pstrcatstr( scan_actions, - pstrrender( gen->code_localization, - GEN_WILD_PREFIX "line", - int_to_str( sym->code_at ), TRUE, - (char*)NULL ), TRUE ); - } - - scan_actions = pstrcatstr( scan_actions, - pstrrender( gen->scan_action_start, - GEN_WILD_PREFIX "symbol-number", - int_to_str( sym->id ), TRUE, - (char*)NULL ), TRUE ); - - act = build_scan_action( parser, gen, sym, act ); - scan_actions = pstrcatstr( scan_actions, act, TRUE ); - - scan_actions = pstrcatstr( scan_actions, - pstrrender( gen->scan_action_end, - GEN_WILD_PREFIX "symbol-number", - int_to_str( sym->id ), TRUE, - (char*)NULL ), TRUE ); - } - } - - /* Get the goal production */ - goalprod = (PROD*)plist_access( plist_first( parser->goal->productions ) ); - - /* Generate basename - parser->p_basename may contain directory path */ - basename = pstrdup( pbasename( parser->p_basename ) ); - - /* Construct the output files */ - for( file = xml_child( gen->xml, "file" ); - file; file = xml_next( file ) ) - { - /* Make filename */ - if( !parser->to_stdout ) - { - if( ( filename = (char*)xml_attr( file, "filename" ) ) ) - filename = pstrrender( filename, - /* - Here we have to submit the original basename - to construct the final output filename using the - full output path. - */ - GEN_WILD_PREFIX "basename", parser->p_basename, FALSE, - GEN_WILD_PREFIX "Cbasename", c_identifier( - parser->p_basename, FALSE ), TRUE, - GEN_WILD_PREFIX "CBASENAME", c_identifier( - parser->p_basename, TRUE ), TRUE, - GEN_WILD_PREFIX "prefix", parser->p_prefix, FALSE, - (char*)NULL ); - } - - /* Assembling all together - Warning, this is - ONE single function call! */ - - all = pstrrender( xml_txt( file ), - - /* Lengths of names and Prologue/Epilogue codes */ - GEN_WILD_PREFIX "prologue" LEN_EXT, - long_to_str( (long)pstrlen( parser->p_header ) ), TRUE, - GEN_WILD_PREFIX "epilogue" LEN_EXT, - long_to_str( (long)pstrlen( parser->p_footer ) ), TRUE, - GEN_WILD_PREFIX "pcb" LEN_EXT, - long_to_str( (long)pstrlen( parser->p_pcb ) ), TRUE, - - /* Names and Prologue/Epilogue codes */ - GEN_WILD_PREFIX "prologue", parser->p_header, FALSE, - GEN_WILD_PREFIX "epilogue", parser->p_footer, FALSE, - GEN_WILD_PREFIX "pcb", parser->p_pcb, FALSE, - - /* Limits and sizes, parse tables */ - GEN_WILD_PREFIX "number-of-symbols", - int_to_str( plist_count( parser->symbols ) ), TRUE, - GEN_WILD_PREFIX "number-of-states", - int_to_str( parray_count( parser->states ) ), TRUE, - GEN_WILD_PREFIX "number-of-productions", - int_to_str( plist_count( parser->productions ) ), TRUE, - GEN_WILD_PREFIX "number-of-dfa-machines", - int_to_str( list_count( parser->dfas ) ), TRUE, - GEN_WILD_PREFIX "deepest-action-row", - int_to_str( max_action ), TRUE, - GEN_WILD_PREFIX "deepest-goto-row", - int_to_str( max_goto ), TRUE, - GEN_WILD_PREFIX "deepest-dfa-index-row", - int_to_str( max_dfa_idx ), TRUE, - GEN_WILD_PREFIX "deepest-dfa-accept-row", - int_to_str( max_dfa_accept ), TRUE, - GEN_WILD_PREFIX "size-of-dfa-characters", - int_to_str( column ), TRUE, - GEN_WILD_PREFIX "number-of-character-map", - int_to_str( charmap_count ), TRUE, - GEN_WILD_PREFIX "action-table", action_table, FALSE, - GEN_WILD_PREFIX "goto-table", goto_table, FALSE, - GEN_WILD_PREFIX "default-productions", def_prod, FALSE, - GEN_WILD_PREFIX "character-map-symbols", char_map_sym, FALSE, - GEN_WILD_PREFIX "character-map", char_map, FALSE, - GEN_WILD_PREFIX "character-universe", - int_to_str( parser->p_universe ), TRUE, - GEN_WILD_PREFIX "symbols", symbols, FALSE, - GEN_WILD_PREFIX "productions", productions, FALSE, - GEN_WILD_PREFIX "max-symbol-name-length", - int_to_str( max_symbol_name ), TRUE, - GEN_WILD_PREFIX "dfa-select", dfa_select, FALSE, - GEN_WILD_PREFIX "dfa-index", dfa_idx, FALSE, - GEN_WILD_PREFIX "dfa-char", dfa_char, FALSE, - GEN_WILD_PREFIX "dfa-trans", dfa_trans, FALSE, - GEN_WILD_PREFIX "dfa-accept", dfa_accept, FALSE, - GEN_WILD_PREFIX "value-type-definition", type_def, FALSE, - GEN_WILD_PREFIX "actions", actions, FALSE, - GEN_WILD_PREFIX "scan_actions", scan_actions, FALSE, - GEN_WILD_PREFIX "top-value", top_value, FALSE, - GEN_WILD_PREFIX "goal-value", goal_value, FALSE, - GEN_WILD_PREFIX "goal-type", parser->goal->vtype ? - parser->goal->vtype->real_def : "", - FALSE, - GEN_WILD_PREFIX "mode", int_to_str( parser->p_mode ), TRUE, - GEN_WILD_PREFIX "error", - ( parser->error ? int_to_str( parser->error->id ) : - int_to_str( -1 ) ), TRUE, - GEN_WILD_PREFIX "eof", - ( parser->end_of_input ? - int_to_str( parser->end_of_input->id ) : - int_to_str( -1 ) ), TRUE, - GEN_WILD_PREFIX "goal-production", - int_to_str( goalprod->id ), TRUE, - GEN_WILD_PREFIX "goal", - int_to_str( parser->goal->id ), TRUE, - - (char*)NULL - ); - - /* Replace all top-level options */ - plist_for( parser->options, e ) - { - if( !( option = pasprintf( "%s%s", - GEN_WILD_PREFIX, plist_key( e ) ) ) ) - OUTOFMEM; - - if( !( complete = pstrrender( all, - option, (char*)plist_access( e ), FALSE, - (char*)NULL ) ) ) - OUTOFMEM; - - pfree( all ); - pfree( option ); - all = complete; - } - - /* Perform line number updating on this file */ - /* - if( gen->code_localization ) - build_code_localizations( &all, gen ); - */ - - /* Now replace all prefixes */ - complete = pstrrender( all, - GEN_WILD_PREFIX "prefix", - parser->p_prefix, FALSE, - GEN_WILD_PREFIX "basename", - basename, FALSE, - GEN_WILD_PREFIX "Cbasename", - c_identifier( basename, FALSE ), TRUE, - GEN_WILD_PREFIX "CBASENAME", - c_identifier( basename, TRUE ), TRUE, - GEN_WILD_PREFIX "filename" LEN_EXT, - long_to_str( - (long)pstrlen( parser->filename ) ), TRUE, - GEN_WILD_PREFIX "filename", parser->filename, FALSE, - - (char*)NULL ); - - pfree( all ); - - /* Open output file */ - if( filename ) - { - if( !( stream = fopen( filename, "wt" ) ) ) - { - print_error( parser, ERR_OPEN_OUTPUT_FILE, - ERRSTYLE_FATAL, filename ); - - pfree( filename ); - filename = (char*)NULL; - } - } - - if( !filename ) - { - stream = stdout; - - if( parser->files_count > 0 ) - fprintf( stdout, "%c", EOF ); - } - - parser->files_count++; - fprintf( stream, "%s", complete ); - pfree( complete ); - - if( filename ) - { - fclose( stream ); - pfree( filename ); - } - } - - MSG( "Freeing used memory" ); - - pfree( basename ); - - /* Freeing generated content */ - pfree( action_table ); - pfree( goto_table ); - pfree( def_prod ); - pfree( char_map ); - pfree( char_map_sym ); - pfree( symbols ); - pfree( productions ); - pfree( dfa_select ); - pfree( dfa_idx ); - pfree( dfa_char ); - pfree( dfa_trans ); - pfree( dfa_accept ); - pfree( type_def ); - pfree( actions ); - pfree( scan_actions ); - pfree( top_value ); - pfree( goal_value ); - - /* Freeing the generator's structure */ - pfree( gen->for_sequences ); - pfree( gen->do_sequences ); - xml_free( gen->xml ); - - /* Free local lexers */ - plex_free( action_lex ); - plex_free( scan_lex ); - - VOIDRET; + ) + { + tlt_path = tlt_file; + } + + VARS( "tlt_path", "%s", tlt_path ); + + MSG( "Loading generator" ); + if( !load_generator( parser, gen, tlt_path ) ) + VOIDRET; + + /* Now that we have the generator, do some code generation-related + integrity preparatories on the grammar */ + + MSG( "Performing code generation-related integrity preparatories" ); + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( !( sym->vtype ) ) + sym->vtype = parser->p_def_type; + + if( sym->type == SYM_NON_TERMINAL && !( sym->vtype ) && + ( gen->vstack_def_type && *( gen->vstack_def_type ) ) ) + sym->vtype = create_vtype( parser, gen->vstack_def_type ); + else if( IS_TERMINAL( sym ) /* && !( sym->keyword ) */ + && !( sym->vtype ) && ( gen->vstack_term_type && + *( gen->vstack_term_type ) ) ) + sym->vtype = create_vtype( parser, gen->vstack_term_type ); + } + + /* Create piece of code for the value at the top of the value stack + (e.g. used to store the next terminal character onto the value stack) */ + if( list_count( parser->vtypes ) <= 1 ) + top_value = pstrrender( gen->action_single, + GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, + (char*)NULL ); + else if( ( vt = find_vtype( parser, gen->vstack_term_type ) ) ) + top_value = pstrrender( gen->action_union, + GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, + GEN_WILD_PREFIX "attribute", + pstrrender( gen->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( vt->id ), TRUE, + (char*)NULL ), TRUE, + (char*)NULL ); + else + /* ERROR */ + ; + + /* Create piece of code for the value that is associated with the + * goal symbol, to e.g. return it from the parser function */ + if( list_count( parser->vtypes ) <= 1 ) + goal_value = pstrrender( gen->action_single, + GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, + (char*)NULL ); + else + goal_value = pstrrender( gen->action_union, + GEN_WILD_PREFIX "offset", int_to_str( 0 ), TRUE, + GEN_WILD_PREFIX "attribute", + pstrrender( gen->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( parser->goal->vtype->id ), TRUE, + (char*)NULL ), TRUE, + (char*)NULL ); + + /* Build action, goto and dfa_select tables */ + MSG( "Action, Goto and DFA selection table" ); + + i = 0; + parray_for( parser->states, st ) + { + /* Action table */ + action_table_row = pstrrender( gen->acttab.row_start, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( list_count( st->actions ) ), TRUE, + GEN_WILD_PREFIX "state-number", + int_to_str( st->state_id ), TRUE, + (char*)NULL ); + + if( max_action < list_count( st->actions ) ) + max_action = list_count( st->actions ); + + for( m = st->actions, column = 0; m; m = m->next, column++ ) + { + col = (TABCOL*)(m->pptr); + + action_table_row = pstrcatstr( action_table_row, + pstrrender( gen->acttab.col, + GEN_WILD_PREFIX "symbol", + int_to_str( col->symbol->id ), TRUE, + GEN_WILD_PREFIX "action", int_to_str( col->action ), TRUE, + GEN_WILD_PREFIX "index", int_to_str( col->index ), TRUE, + GEN_WILD_PREFIX "column", int_to_str( column ), TRUE, + (char*)NULL ), TRUE ); + + if( m->next ) + action_table_row = pstrcatstr( action_table_row, + gen->acttab.col_sep, FALSE ); + } + + action_table_row = pstrcatstr( action_table_row, + pstrrender( gen->acttab.row_end, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( list_count( st->actions ) ), TRUE, + GEN_WILD_PREFIX "state-number", + int_to_str( st->state_id ), TRUE, + (char*)NULL ), TRUE ); + + if( parray_next( parser->states, st ) ) + action_table_row = pstrcatstr( action_table_row, + gen->acttab.row_sep, FALSE ); + + action_table = pstrcatstr( action_table, action_table_row, TRUE ); + + /* Goto table */ + goto_table_row = pstrrender( gen->gotab.row_start, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( list_count( st->gotos ) ), TRUE, + GEN_WILD_PREFIX "state-number", + int_to_str( st->state_id ), TRUE, + (char*)NULL ); + + if( max_goto < list_count( st->gotos ) ) + max_goto = list_count( st->gotos ); + + for( m = st->gotos, column = 0; m; m = m->next, column++ ) + { + col = (TABCOL*)(m->pptr); + + goto_table_row = pstrcatstr( goto_table_row, + pstrrender( gen->gotab.col, + GEN_WILD_PREFIX "symbol", + int_to_str( col->symbol->id ), TRUE, + GEN_WILD_PREFIX "action", + int_to_str( col->action ), TRUE, + GEN_WILD_PREFIX "index", + int_to_str( col->index ), TRUE, + GEN_WILD_PREFIX "column", + int_to_str( column ), TRUE, + (char*)NULL ), TRUE ); + + if( m->next ) + goto_table_row = pstrcatstr( goto_table_row, + gen->gotab.col_sep, FALSE ); + } + + goto_table_row = pstrcatstr( goto_table_row, + pstrrender( gen->gotab.row_end, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( list_count( st->actions ) ), TRUE, + GEN_WILD_PREFIX "state-number", + int_to_str( st->state_id ), TRUE, + (char*)NULL ), TRUE ); + + if( parray_next( parser->states, st ) ) + goto_table_row = pstrcatstr( goto_table_row, + gen->gotab.row_sep, FALSE ); + + goto_table = pstrcatstr( goto_table, goto_table_row, TRUE ); + + /* Only in scannerless mode */ + if( parser->p_mode == MODE_SCANNERLESS ) + { + /* dfa machine selection */ + dfa_select = pstrcatstr( dfa_select, + pstrrender( gen->dfa_select.col, + GEN_WILD_PREFIX "machine", + int_to_str( list_find( parser->dfas, st->dfa ) ), TRUE, + (char*)NULL ), TRUE ); + + if( parray_next( parser->states, st ) ) + dfa_select = pstrcatstr( dfa_select, + gen->dfa_select.col_sep, FALSE ); + } + + /* Default production table */ + def_prod = pstrcatstr( def_prod, + pstrrender( gen->defprod.col, + GEN_WILD_PREFIX "state-number", + int_to_str( st->state_id ), TRUE, + GEN_WILD_PREFIX "production-number", + int_to_str( + ( ( st->def_prod ) ? st->def_prod->id : -1 ) ), + TRUE, (char*)NULL ), TRUE ); + + if( parray_next( parser->states, st ) ) + def_prod = pstrcatstr( def_prod, gen->defprod.col_sep, FALSE ); + + i++; + } + + /* Lexical recognition machine table composition */ + MSG( "Lexical recognition machine" ); + for( l = parser->dfas, row = 0, column = 0; l; l = list_next( l ), row++ ) + { + dfa = (pregex_dfa*)list_access( l ); + + /* Row start */ + dfa_idx_row = pstrrender( gen->dfa_idx.row_start, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( plist_count( dfa->states ) ), TRUE, + GEN_WILD_PREFIX "row", + int_to_str( row ), TRUE, + (char*)NULL ); + + dfa_accept_row = pstrrender( gen->dfa_accept.row_start, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( plist_count( dfa->states ) ), TRUE, + GEN_WILD_PREFIX "row", + int_to_str( row ), TRUE, + (char*)NULL ); + + if( max_dfa_idx < plist_count( dfa->states ) ) + max_dfa_accept = max_dfa_idx = plist_count( dfa->states ); + + /* Building row entries */ + plist_for( dfa->states, e ) + { + dfa_st = (pregex_dfa_st*)plist_access( e ); + VARS( "dfa_st", "%p", dfa_st ); + + if( dfa_char && dfa_trans ) + { + dfa_char = pstrcatstr( dfa_char, + gen->dfa_char.col_sep, FALSE ); + dfa_trans = pstrcatstr( dfa_trans, + gen->dfa_trans.col_sep, FALSE ); + } + + dfa_idx_row = pstrcatstr( dfa_idx_row, + pstrrender( gen->dfa_idx.col, + GEN_WILD_PREFIX "index", + int_to_str( column ), TRUE, + (char*)NULL ), TRUE ); + + dfa_accept_row = pstrcatstr( dfa_accept_row, + pstrrender( gen->dfa_accept.col, + GEN_WILD_PREFIX "accept", + int_to_str( dfa_st->accept ), TRUE, + (char*)NULL ), TRUE ); + + /* Iterate trough all transitions */ + MSG( "Iterating to transitions of DFA" ); + plist_for( dfa_st->trans, f ) + { + dfa_ent = (pregex_dfa_tr*)plist_access( f ); + + for( i = 0; pccl_get( &beg, &end, dfa_ent->ccl, i ); i++ ) + { + dfa_char = pstrcatstr( dfa_char, + pstrrender( gen->dfa_char.col, + GEN_WILD_PREFIX "from", + int_to_str( beg ), TRUE, + GEN_WILD_PREFIX "to", + int_to_str( end ), TRUE, + GEN_WILD_PREFIX "goto", + int_to_str( dfa_st->accept ), TRUE, + (char*)NULL ), TRUE ); + + dfa_trans = pstrcatstr( dfa_trans, + pstrrender( gen->dfa_trans.col, + GEN_WILD_PREFIX "goto", + int_to_str( dfa_ent->go_to ), TRUE, + (char*)NULL ), TRUE ); + + + dfa_char = pstrcatstr( dfa_char, + gen->dfa_char.col_sep, FALSE ); + dfa_trans = pstrcatstr( dfa_trans, + gen->dfa_trans.col_sep, FALSE ); + + column++; + } + } + + /* DFA transition end marker */ + dfa_char = pstrcatstr( dfa_char, + pstrrender( gen->dfa_char.col, + GEN_WILD_PREFIX "from", + int_to_str( -1 ), TRUE, + GEN_WILD_PREFIX "to", + int_to_str( -1 ), TRUE, + (char*)NULL ), TRUE ); + + /* DFA transition */ + dfa_trans = pstrcatstr( dfa_trans, + pstrrender( gen->dfa_trans.col, + GEN_WILD_PREFIX "goto", + int_to_str( -1 ), + TRUE, (char*)NULL ), TRUE ); + + column++; + + if( plist_next( e ) ) + { + dfa_idx_row = pstrcatstr( dfa_idx_row, + gen->dfa_idx.col_sep, FALSE ); + dfa_accept_row = pstrcatstr( dfa_accept_row, + gen->dfa_accept.col_sep, FALSE ); + } + } + + /* Row end */ + dfa_idx_row = pstrcatstr( dfa_idx_row, + pstrrender( gen->dfa_idx.row_end, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( plist_count( dfa->states ) ), TRUE, + GEN_WILD_PREFIX "row", + int_to_str( row ), TRUE, + (char*)NULL ), TRUE ); + + dfa_accept_row = pstrcatstr( dfa_accept_row, + pstrrender( gen->dfa_accept.row_end, + GEN_WILD_PREFIX "number-of-columns", + int_to_str( plist_count( dfa->states ) ), TRUE, + GEN_WILD_PREFIX "row", int_to_str( row ), TRUE, + (char*)NULL ), TRUE ); + + if( list_next( l ) ) + { + dfa_idx_row = pstrcatstr( dfa_idx_row, + gen->dfa_idx.row_sep, FALSE ); + dfa_accept_row = pstrcatstr( dfa_accept_row, + gen->dfa_accept.row_sep, FALSE ); + } + + dfa_idx = pstrcatstr( dfa_idx, dfa_idx_row, TRUE ); + dfa_accept = pstrcatstr( dfa_accept, dfa_accept_row, TRUE ); + } + + MSG( "Construct symbol information table" ); + + /* Whitespace identification table and symbol-information-table */ + plist_for( parser->symbols, e ) /* Okidoki, now do the generation */ + { + sym = (SYMBOL*)plist_access( e ); + + symbols = pstrcatstr( symbols, pstrrender( gen->symbols.col, + GEN_WILD_PREFIX "symbol-name", + escape_for_target( gen, sym->name, FALSE ), TRUE, + GEN_WILD_PREFIX "emit", + escape_for_target( gen, sym->emit, FALSE ), TRUE, + GEN_WILD_PREFIX "symbol", + int_to_str( sym->id ), TRUE, + GEN_WILD_PREFIX "type", + int_to_str( sym->type ), TRUE, + GEN_WILD_PREFIX "datatype", + int_to_str( sym->vtype ? sym->vtype->id : 0 ), TRUE, + GEN_WILD_PREFIX "terminal", + sym->type > 0 ? gen->truedef : gen->falsedef, FALSE, + GEN_WILD_PREFIX "lexem", + sym->lexem ? gen->truedef : gen->falsedef, FALSE, + GEN_WILD_PREFIX "whitespace", + sym->whitespace ? gen->truedef : gen->falsedef, FALSE, + GEN_WILD_PREFIX "greedy", + sym->greedy ? gen->truedef : gen->falsedef, FALSE, + + (char*)NULL ), TRUE ); + + if( max_symbol_name < (int)strlen( sym->name ) ) + max_symbol_name = (int)strlen( sym->name ); + + if( plist_next( e ) ) + { + symbols = pstrcatstr( symbols, + gen->symbols.col_sep, FALSE ); + } + } + + /* Type definition union */ + if( list_count( parser->vtypes ) == 1 ) + { + vt = (VTYPE*)( parser->vtypes->pptr ); + type_def = pstrrender( gen->vstack_single, + GEN_WILD_PREFIX "value-type", vt->real_def, FALSE, + (char*)NULL ); + } + else + { + type_def = pstrrender( gen->vstack_union_start, + GEN_WILD_PREFIX "number-of-value-types", + int_to_str( list_count( parser->vtypes ) ), + TRUE, (char*)NULL ); + + for( l = parser->vtypes; l; l = l->next ) + { + vt = (VTYPE*)(l->pptr); + + type_def = pstrcatstr( type_def, + pstrrender( gen->vstack_union_def, + GEN_WILD_PREFIX "value-type", vt->real_def, FALSE, + GEN_WILD_PREFIX "attribute", + pstrrender( gen->vstack_union_att, + GEN_WILD_PREFIX "value-type-id", + int_to_str( vt->id ), TRUE, + (char*)NULL ), TRUE, + GEN_WILD_PREFIX "value-type-id", + int_to_str( vt->id ), TRUE, + (char*)NULL ), TRUE ); + } + + type_def = pstrcatstr( type_def, + pstrrender( gen->vstack_union_end, + GEN_WILD_PREFIX "number-of-value-types", + int_to_str( list_count( parser->vtypes ) ), + TRUE, (char*)NULL ), TRUE ); + } + + /* Reduction action code and production definition table */ + row = 0; + + plist_for( parser->productions, e ) + { + p = (PROD*)plist_access( e ); + + /* Select the semantic code to be processed! */ + act = (char*)NULL; + + is_default_code = FALSE; + + if( p->code ) + act = p->code; + else if( plist_count( p->rhs ) == 0 ) + { + act = parser->p_def_action_e; + is_default_code = TRUE; + } + else + { + act = parser->p_def_action; + is_default_code = TRUE; + } + + if( is_default_code && + ( p->lhs->whitespace || + ( parser->error && plist_get_by_ptr( p->rhs, + parser->error ) ) ) ) + { + act = (char*)NULL; + } + + if( act && *act ) + { + /* Generate action start */ + actions = pstrcatstr( actions, pstrrender( gen->action_start, + GEN_WILD_PREFIX "production-number", int_to_str( p->id ), TRUE, + (char*)NULL ), TRUE ); + + /* Generate code localization */ + if( gen->code_localization && p->code_at > 0 ) + { + actions = pstrcatstr( actions, + pstrrender( gen->code_localization, + GEN_WILD_PREFIX "line", + int_to_str( p->code_at ), TRUE, + (char*)NULL ), + TRUE ); + } + + /* Generate the action code */ + act = build_action( parser, gen, p, act, is_default_code ); + actions = pstrcatstr( actions, act, TRUE ); + + /* Generate the action end */ + actions = pstrcatstr( actions, pstrrender( gen->action_end, + GEN_WILD_PREFIX "production-number", int_to_str( p->id ), TRUE, + (char*)NULL ), TRUE ); + } + + /* Generate production information table */ + productions = pstrcatstr( productions, pstrrender( + gen->productions.col, + + GEN_WILD_PREFIX "production-number", + int_to_str( p->id ), TRUE, + GEN_WILD_PREFIX "production", + escape_for_target( gen, mkproduction_str( p ), TRUE ), + TRUE, + GEN_WILD_PREFIX "emit", + escape_for_target( gen, p->emit, TRUE ), TRUE, + GEN_WILD_PREFIX "length", + int_to_str( plist_count( p->rhs ) ), TRUE, + GEN_WILD_PREFIX "lhs", + int_to_str( p->lhs->id ), TRUE, + + (char*)NULL ), TRUE ); + + if( plist_next( e ) ) + productions = pstrcatstr( productions, + gen->productions.col_sep, FALSE ); + + row++; + } + + /* Scanner action code */ + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + if( sym->keyword ) + continue; + + /* Select the semantic code to be processed! */ + if( ( act = sym->code ) ) + { + /* Code localization features */ + if( gen->code_localization && sym->code_at > 0 ) + { + scan_actions = pstrcatstr( scan_actions, + pstrrender( gen->code_localization, + GEN_WILD_PREFIX "line", + int_to_str( sym->code_at ), TRUE, + (char*)NULL ), TRUE ); + } + + scan_actions = pstrcatstr( scan_actions, + pstrrender( gen->scan_action_start, + GEN_WILD_PREFIX "symbol-number", + int_to_str( sym->id ), TRUE, + (char*)NULL ), TRUE ); + + act = build_scan_action( parser, gen, sym, act ); + scan_actions = pstrcatstr( scan_actions, act, TRUE ); + + scan_actions = pstrcatstr( scan_actions, + pstrrender( gen->scan_action_end, + GEN_WILD_PREFIX "symbol-number", + int_to_str( sym->id ), TRUE, + (char*)NULL ), TRUE ); + } + } + + /* Get the goal production */ + goalprod = (PROD*)plist_access( plist_first( parser->goal->productions ) ); + + /* Generate basename - parser->p_basename may contain directory path */ + basename = pstrdup( pbasename( parser->p_basename ) ); + + /* Construct the output files */ + for( file = xml_child( gen->xml, "file" ); + file; file = xml_next( file ) ) + { + /* Make filename */ + if( !parser->to_stdout ) + { + if( ( filename = (char*)xml_attr( file, "filename" ) ) ) + filename = pstrrender( filename, + /* + Here we have to submit the original basename + to construct the final output filename using the + full output path. + */ + GEN_WILD_PREFIX "basename", parser->p_basename, FALSE, + GEN_WILD_PREFIX "Cbasename", c_identifier( + parser->p_basename, FALSE ), TRUE, + GEN_WILD_PREFIX "CBASENAME", c_identifier( + parser->p_basename, TRUE ), TRUE, + GEN_WILD_PREFIX "prefix", parser->p_prefix, FALSE, + (char*)NULL ); + } + + /* Assembling all together - Warning, this is + ONE single function call! */ + + all = pstrrender( xml_txt( file ), + + /* Lengths of names and Prologue/Epilogue codes */ + GEN_WILD_PREFIX "prologue" LEN_EXT, + long_to_str( (long)pstrlen( parser->p_header ) ), TRUE, + GEN_WILD_PREFIX "epilogue" LEN_EXT, + long_to_str( (long)pstrlen( parser->p_footer ) ), TRUE, + GEN_WILD_PREFIX "pcb" LEN_EXT, + long_to_str( (long)pstrlen( parser->p_pcb ) ), TRUE, + + /* Names and Prologue/Epilogue codes */ + GEN_WILD_PREFIX "prologue", parser->p_header, FALSE, + GEN_WILD_PREFIX "epilogue", parser->p_footer, FALSE, + GEN_WILD_PREFIX "pcb", parser->p_pcb, FALSE, + + /* Limits and sizes, parse tables */ + GEN_WILD_PREFIX "number-of-symbols", + int_to_str( plist_count( parser->symbols ) ), TRUE, + GEN_WILD_PREFIX "number-of-states", + int_to_str( parray_count( parser->states ) ), TRUE, + GEN_WILD_PREFIX "number-of-productions", + int_to_str( plist_count( parser->productions ) ), TRUE, + GEN_WILD_PREFIX "number-of-dfa-machines", + int_to_str( list_count( parser->dfas ) ), TRUE, + GEN_WILD_PREFIX "deepest-action-row", + int_to_str( max_action ), TRUE, + GEN_WILD_PREFIX "deepest-goto-row", + int_to_str( max_goto ), TRUE, + GEN_WILD_PREFIX "deepest-dfa-index-row", + int_to_str( max_dfa_idx ), TRUE, + GEN_WILD_PREFIX "deepest-dfa-accept-row", + int_to_str( max_dfa_accept ), TRUE, + GEN_WILD_PREFIX "size-of-dfa-characters", + int_to_str( column ), TRUE, + GEN_WILD_PREFIX "number-of-character-map", + int_to_str( charmap_count ), TRUE, + GEN_WILD_PREFIX "action-table", action_table, FALSE, + GEN_WILD_PREFIX "goto-table", goto_table, FALSE, + GEN_WILD_PREFIX "default-productions", def_prod, FALSE, + GEN_WILD_PREFIX "character-map-symbols", char_map_sym, FALSE, + GEN_WILD_PREFIX "character-map", char_map, FALSE, + GEN_WILD_PREFIX "character-universe", + int_to_str( parser->p_universe ), TRUE, + GEN_WILD_PREFIX "symbols", symbols, FALSE, + GEN_WILD_PREFIX "productions", productions, FALSE, + GEN_WILD_PREFIX "max-symbol-name-length", + int_to_str( max_symbol_name ), TRUE, + GEN_WILD_PREFIX "dfa-select", dfa_select, FALSE, + GEN_WILD_PREFIX "dfa-index", dfa_idx, FALSE, + GEN_WILD_PREFIX "dfa-char", dfa_char, FALSE, + GEN_WILD_PREFIX "dfa-trans", dfa_trans, FALSE, + GEN_WILD_PREFIX "dfa-accept", dfa_accept, FALSE, + GEN_WILD_PREFIX "value-type-definition", type_def, FALSE, + GEN_WILD_PREFIX "actions", actions, FALSE, + GEN_WILD_PREFIX "scan_actions", scan_actions, FALSE, + GEN_WILD_PREFIX "top-value", top_value, FALSE, + GEN_WILD_PREFIX "goal-value", goal_value, FALSE, + GEN_WILD_PREFIX "goal-type", parser->goal->vtype ? + parser->goal->vtype->real_def : "", + FALSE, + GEN_WILD_PREFIX "mode", int_to_str( parser->p_mode ), TRUE, + GEN_WILD_PREFIX "error", + ( parser->error ? int_to_str( parser->error->id ) : + int_to_str( -1 ) ), TRUE, + GEN_WILD_PREFIX "eof", + ( parser->end_of_input ? + int_to_str( parser->end_of_input->id ) : + int_to_str( -1 ) ), TRUE, + GEN_WILD_PREFIX "goal-production", + int_to_str( goalprod->id ), TRUE, + GEN_WILD_PREFIX "goal", + int_to_str( parser->goal->id ), TRUE, + + (char*)NULL + ); + + /* Replace all top-level options */ + plist_for( parser->options, e ) + { + if( !( option = pasprintf( "%s%s", + GEN_WILD_PREFIX, plist_key( e ) ) ) ) + OUTOFMEM; + + if( !( complete = pstrrender( all, + option, (char*)plist_access( e ), FALSE, + (char*)NULL ) ) ) + OUTOFMEM; + + pfree( all ); + pfree( option ); + all = complete; + } + + /* Perform line number updating on this file */ + /* + if( gen->code_localization ) + build_code_localizations( &all, gen ); + */ + + /* Now replace all prefixes */ + complete = pstrrender( all, + GEN_WILD_PREFIX "prefix", + parser->p_prefix, FALSE, + GEN_WILD_PREFIX "basename", + basename, FALSE, + GEN_WILD_PREFIX "Cbasename", + c_identifier( basename, FALSE ), TRUE, + GEN_WILD_PREFIX "CBASENAME", + c_identifier( basename, TRUE ), TRUE, + GEN_WILD_PREFIX "filename" LEN_EXT, + long_to_str( + (long)pstrlen( parser->filename ) ), TRUE, + GEN_WILD_PREFIX "filename", parser->filename, FALSE, + + (char*)NULL ); + + pfree( all ); + + /* Open output file */ + if( filename ) + { + if( !( stream = fopen( filename, "wt" ) ) ) + { + print_error( parser, ERR_OPEN_OUTPUT_FILE, + ERRSTYLE_FATAL, filename ); + + pfree( filename ); + filename = (char*)NULL; + } + } + + if( !filename ) + { + stream = stdout; + + if( parser->files_count > 0 ) + fprintf( stdout, "%c", EOF ); + } + + parser->files_count++; + fprintf( stream, "%s", complete ); + pfree( complete ); + + if( filename ) + { + fclose( stream ); + pfree( filename ); + } + } + + MSG( "Freeing used memory" ); + + pfree( basename ); + + /* Freeing generated content */ + pfree( action_table ); + pfree( goto_table ); + pfree( def_prod ); + pfree( char_map ); + pfree( char_map_sym ); + pfree( symbols ); + pfree( productions ); + pfree( dfa_select ); + pfree( dfa_idx ); + pfree( dfa_char ); + pfree( dfa_trans ); + pfree( dfa_accept ); + pfree( type_def ); + pfree( actions ); + pfree( scan_actions ); + pfree( top_value ); + pfree( goal_value ); + + /* Freeing the generator's structure */ + pfree( gen->for_sequences ); + pfree( gen->do_sequences ); + xml_free( gen->xml ); + + /* Free local lexers */ + plex_free( action_lex ); + plex_free( scan_lex ); + + VOIDRET; } diff --git a/src/buildxml.c b/src/buildxml.c deleted file mode 100644 index 5e9839e..0000000 --- a/src/buildxml.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: buildxml.c -Author: Jan Max Meyer -Usage: The XML-based Parser Definition code-generator. ------------------------------------------------------------------------------ */ - -#include "unicc.h" - -#define XML_YES "yes" -#define XML_NO "no" - -#define SYMBOL_VAR "symbol" - -extern char* pmod[]; - -/** Dumps a character-class definition into an XML-structure. - -//parent_xml// is the Parent element where the character-class block will be -attached to. //ccl// is the character-class to be dumped. */ -static void build_xml_ccl( XML_T parent_xml, pccl* ccl ) -{ - int i; - wchar_t beg; - wchar_t end; - XML_T ccl_xml; - XML_T range_xml; - - PROC( "build_xml_ccl" ); - PARMS( "parent_xml", "%p", parent_xml ); - PARMS( "ccl", "%p", ccl ); - - if( !( ccl_xml = xml_add_child( parent_xml, "character-class", 0 ) ) ) - OUTOFMEM; - - xml_set_int_attr( ccl_xml, "count", pccl_count( ccl ) ); - - for( i = 0; pccl_get( &beg, &end, ccl, i ); i++ ) - { - if( !( range_xml = xml_add_child( ccl_xml, "range", 0 ) ) ) - OUTOFMEM; - - xml_set_int_attr( range_xml, "from", beg ); - xml_set_int_attr( range_xml, "to", end ); - } - - VOIDRET; -} - -/** Utility function that generates a raw-code-tag from a string. - -//code_xml// is the parent element where the raw-code block will be attached to. -//code// is the content of the raw code block. - -Returns a XML_T raw-code tag. -*/ -static XML_T build_xml_raw_code( XML_T code_xml, char* code ) -{ - XML_T raw_code; - - if( !code ) - OUTOFMEM; - - if( !( raw_code = xml_add_child( code_xml, "raw", 0 ) ) ) - OUTOFMEM; - - xml_set_txt_f( raw_code, code ); - - return raw_code; -} - -/** Constructs a XML-structure for a production's semantic action code, by -splitting the original program code into XML-blocks, which can later be easier -translated into the particular parser program. - -//code_xml// is the code-XML-node where elements will be attached to. -//parser// is the parser information structure. -//p// is the production. -//base// is the code-base template for the reduction action. -//def_code// defines if the base-pointer is a default-code block or an -individually coded one. - -Returns TRUE on success. -*/ -static BOOLEAN build_xml_action( XML_T code_xml, PARSER* parser, PROD* p, - char* base, BOOLEAN def_code ) -{ - plex* lex; - int off; - char* last = base; - char* start; - char* end; - unsigned int match; - char* chk; - char* tmp; - plistel* e; - plist* rhs = p->rhs; - BOOLEAN on_error = FALSE; - SYMBOL* sym; - char* raw; - XML_T code; - - /* - 12.07.2010 Jan Max Meyer - - Print warning if code symbol references to undefined symbol on the - semantic rhs! - */ - - PROC( "build_xml_action" ); - PARMS( "code_xml", "%p", code_xml ); - PARMS( "parser", "%p", parser ); - PARMS( "p", "%p", p ); - PARMS( "base", "%s", base ); - PARMS( "def_code", "%s", BOOLEAN_STR( def_code ) ); - - /* Prepare regular expression engine */ - lex = plex_create( 0 ); - - if( !( plex_define( lex, "@'([^']|\\')*'", 1, 0 ) - && plex_define( lex, "@\"([^\"]|\\\")*\"", 1, 0 ) - && plex_define( lex, "@[A-Za-z_][A-Za-z0-9_]*", 2, 0 ) - && plex_define( lex, "@[0-9]+", 3, 0 ) - && plex_define( lex, "@@", 4, 0 ) - /* - * Hmm ... this way looks "cooler" for future versions, maybe - * this would be a nice extension: @: - */ - && plex_define( lex, "@!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*", - 5, 0 ) - ) ) - { - plex_free( lex ); - RETURN( FALSE ); - } - - VARS( "p->sem_rhs counts", "%d", plist_count( p->sem_rhs ) ); - /* Ok, perform replacement operations */ - if( plist_count( p->sem_rhs ) ) - { - MSG( "Replacing semantic right-hand side" ); - rhs = p->sem_rhs; - } - - MSG( "Iterating trough result array" ); - while( ( start = plex_next( lex, last, &match, &end ) ) && !on_error ) - { - off = 0; - tmp = (char*)NULL; - - /* Copy raw part of code into its own tag */ - if( last < start ) - { - if( !( raw = pstrncatstr( - (char*)NULL, last, start - last ) ) ) - OUTOFMEM; - - build_xml_raw_code( code_xml, raw ); - - VARS( "end", "%s", end ); - } - - last = end; - - VARS( "match", "%d", match ); - switch( match ) - { - case 1: - start++; - end--; - - case 2: - MSG( "Identifier" ); - off = 1; - plist_for( rhs, e ) - { - chk = plist_key( e ); - VARS( "chk", "%s", chk ? chk : "(NULL)" ); - - /* - printf( "check >%s< with >%.*s<\n", - chk, end - start - 1, start + 1 ); - */ - if( chk && !strncmp( chk, start + 1, - end - start - 1 ) - && pstrlen( chk ) == end - start - 1 ) - { - break; - } - - off++; - } - - if( !e ) - { - print_error( parser, ERR_UNDEFINED_SYMREF, ERRSTYLE_WARNING, - end - start, start ); - off = 0; - - tmp = pstrdup( start ); - } - - VARS( "off", "%d", off ); - break; - - case 3: - MSG( "Offset" ); - off = atoi( start + 1 ); - break; - - case 4: - MSG( "Left-hand side" ); - - if( !( code = xml_add_child( code_xml, "variable", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_attr( code, "target", "left-hand-side" ) ) ) - OUTOFMEM; - - if( p->lhs->vtype ) - { - if( !( xml_set_attr( code, "value-type", - p->lhs->vtype->real_def ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "value-type-id", - p->lhs->vtype->id ) ) ) - OUTOFMEM; - } - - break; - - case 5: - MSG( "Assign left-hand side symbol" ); - - if( !( tmp = pasprintf( "%.*s", - end - start - ( pstrlen( SYMBOL_VAR ) + 3 ), - start + ( pstrlen( SYMBOL_VAR ) + 3 ) - ) ) ) - { - OUTOFMEM; - RETURN( FALSE ); - } - - VARS( "tmp", "%s", tmp ); - - /* Go through all possible left-hand side symbols */ - plist_for( p->all_lhs, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !strcmp( sym->name, tmp ) ) - { - MSG( "Found a matching symbol!" ); - - pfree( tmp ); - tmp = (char*)NULL; - - if( !( code = xml_add_child( code_xml, - "command", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_attr( code, - "action", "set-symbol" ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "symbol", sym->id ) ) ) - OUTOFMEM; - - break; - } - } - - if( !e ) - { - MSG( "No match found..." ); - - print_error( parser, ERR_UNDEFINED_LHS, - ERRSTYLE_WARNING, tmp ); - pfree( tmp ); - - if( !( tmp = pstrdup( start ) ) ) - { - OUTOFMEM; - RETURN( FALSE ); - } - } - - break; - - default: - MSG( "Uncaught regular expression match!" ); - break; - } - - VARS( "off", "%d", off ); - if( off > 0 ) - { - MSG( "Handing offset" ); - sym = (SYMBOL*)plist_access( plist_get( rhs, off - 1 ) ); - - if( sym && !( sym->keyword ) ) - { - if( !( code = xml_add_child( code_xml, "variable", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_attr( code, "target", "right-hand-side" ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "offset", - plist_count( rhs ) - off ) ) ) - OUTOFMEM; - - if( sym->vtype ) - { - if( !( xml_set_attr( code, "value-type", - sym->vtype->real_def ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "value-type-id", - sym->vtype->id ) ) ) - OUTOFMEM; - } - } - else - { - if( !def_code ) - { - print_error( parser, ERR_NO_VALUE_TYPE, ERRSTYLE_FATAL, - find_base_symbol( sym )->name, - p->id, end - start + 1, start ); - } - - on_error = TRUE; - } - } - - if( tmp ) - build_xml_raw_code( code_xml, tmp ); - } - - if( last && *last ) - build_xml_raw_code( code_xml, pstrdup( last ) ); - - plex_free( lex ); - - RETURN( !on_error ); -} - -/** Builds the scanner actions */ -static BOOLEAN build_xml_scan_action( - XML_T code_xml, PARSER* parser, SYMBOL* s, char* base ) -{ - plex* lex; - char* last = base; - char* start; - char* end; - unsigned int match; - char* raw; - XML_T code; - char* tmp; - SYMBOL* sym; - plistel* e; - - PROC( "build_scan_action" ); - PARMS( "code_xml", "%p", code_xml ); - PARMS( "parser", "%p", parser ); - PARMS( "s", "%p", s ); - PARMS( "base", "%s", base ); - - /* Prepare regular expression engine */ - lex = plex_create( PREGEX_COMP_NOANCHORS ); - - if( !( plex_define( lex, "@>", 1, 0 ) - && plex_define( lex, "@<", 2, 0 ) - && plex_define( lex, "@@", 3, 0 ) - && plex_define( lex, "@!" SYMBOL_VAR ":[A-Za-z_][A-Za-z0-9_]*", 4, 0 ) - ) ) - { - plex_free( lex ); - RETURN( FALSE ); - } - - while( ( start = plex_next( lex, last, &match, &end ) ) ) - { - if( last < start ) - { - if( !( raw = pstrncatstr( - (char*)NULL, last, start - last ) ) ) - OUTOFMEM; - - build_xml_raw_code( code_xml, raw ); - - VARS( "raw", "%s", raw ); - } - - last = end; - - VARS( "match", "%d", match ); - switch( match ) - { - case 1: - MSG( "@>" ); - - if( !( code = xml_add_child( code_xml, - "begin-of-match", 0 ) ) ) - OUTOFMEM; - - break; - - case 2: - MSG( "@<" ); - - if( !( code = xml_add_child( code_xml, - "end-of-match", 0 ) ) ) - OUTOFMEM; - - break; - - case 3: - MSG( "@@" ); - - if( !( code = xml_add_child( code_xml, - "return-value", 0 ) ) ) - OUTOFMEM; - - if( s->vtype && list_count( parser->vtypes ) > 1 ) - { - if( !( xml_set_attr( code, "value-type", - s->vtype->real_def ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "value-type-id", - s->vtype->id ) ) ) - OUTOFMEM; - } - break; - - case 4: - MSG( "Set terminal symbol" ); - - if( !( tmp = pasprintf( "%.*s", - end - start - ( pstrlen( SYMBOL_VAR ) + 3 ), - start + ( pstrlen( SYMBOL_VAR ) + 3 ) - ) ) ) - OUTOFMEM; - - VARS( "tmp", "%s", tmp ); - - /* Go through all possible terminal symbols */ - plist_for( s->all_sym, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !strcmp( sym->name, tmp ) ) - { - MSG( "Found a matching symbol!" ); - - if( !( code = xml_add_child( code_xml, - "command", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_attr( code, - "action", "set-symbol" ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( code, "symbol", sym->id ) ) ) - OUTOFMEM; - break; - } - } - - if( !e ) - { - MSG( "No match found..." ); - - print_error( parser, ERR_UNDEFINED_TERMINAL, - ERRSTYLE_WARNING, tmp ); - } - - pfree( tmp ); - break; - - default: - MSG( "Uncaught regular expression match!" ); - break; - } - } - - if( last && *last ) - build_xml_raw_code( code_xml, pstrdup( last ) ); - - plex_free( lex ); - - RETURN( TRUE ); -} - -/** Builds the DFAs XML structure. */ -static void build_xml_dfa( XML_T parent, pregex_dfa* dfa ) -{ - pregex_dfa_st* st; - pregex_dfa_tr* tr; - int i; - plistel* e; - plistel* f; - XML_T state; - XML_T trans; - - PROC( "build_xml_dfa" ); - - for( e = plist_first( dfa->states ), i = 0; e; e = plist_next( e ), i++ ) - { - st = (pregex_dfa_st*)plist_access( e ); - - if( !( state = xml_add_child( parent, "state", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( state, "id", i ) ) ) - OUTOFMEM; - - if( st->accept && - !( xml_set_int_attr( state, "accept", st->accept ) ) ) - OUTOFMEM; - - if( st->def_trans ) - if( !( xml_set_int_attr( state, "default-transition", - st->def_trans->go_to ) ) ) - OUTOFMEM; - - plist_for( st->trans, f ) - { - tr = (pregex_dfa_tr*)plist_access( f ); - - if( !( trans = xml_add_child( state, "transition", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( trans, "goto", tr->go_to ) ) ) - OUTOFMEM; - - build_xml_ccl( trans, tr->ccl ); - } - } - - VOIDRET; -} - -static void print_xml_options( plist* opts, XML_T append_to ) -{ - plistel* e; - OPT* opt; - XML_T option; - - /* Set parser options */ - plist_for( opts, e ) - { - opt = (OPT*)plist_access( e ); - - if( !( option = xml_add_child( append_to, "option", 0 ) ) ) - OUTOFMEM; - - if( !xml_set_attr( option, "name", opt->opt ) ) - OUTOFMEM; - - if( !xml_set_int_attr( option, "line", opt->line ) ) - OUTOFMEM; - - if( !( xml_set_txt( option, opt->def ) ) ) - OUTOFMEM; - } -} - - -static void print_xml_symbols( PARSER* parser, XML_T par ) -{ - plistel* e; - SYMBOL* sym; - char* tmp; - pregex_nfa* tmp_nfa; - pregex_dfa* tmp_dfa; - - XML_T sym_tab; - XML_T symbol; - XML_T code; - XML_T lex; - XML_T regex; - - PROC( "print_xml_symbols" ); - MSG( "Printing symbol table" ); - - if( !( sym_tab = xml_add_child( par, "symbols", 0 ) ) ) - OUTOFMEM; - - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( !( symbol = xml_add_child( sym_tab, "symbol", 0 ) ) ) - OUTOFMEM; - - xml_set_attr_d( symbol, "type", - IS_TERMINAL( sym ) ? "terminal" : "non-terminal" ); - xml_set_int_attr( symbol, "id", sym->id ); - xml_set_attr( symbol, "name", sym->name ); - - if( IS_TERMINAL( sym ) ) - { - switch( sym->type ) - { - case SYM_CCL_TERMINAL: - tmp = "character-class"; - build_xml_ccl( symbol, sym->ccl ); - break; - case SYM_REGEX_TERMINAL: - if( sym->keyword ) - tmp = "string"; - else - { - tmp = "regular-expression"; - - if( !( regex = xml_add_child( - symbol, "regex", 0 ) ) ) - OUTOFMEM; - - xml_set_txt_d( regex, pregex_ptn_to_regex( sym->ptn ) ); - - /* - Convert regular pattern into DFA state machine - */ - tmp_nfa = pregex_nfa_create(); - tmp_dfa = pregex_dfa_create(); - - sym->ptn->accept = sym->id; - - pregex_ptn_to_nfa( tmp_nfa, sym->ptn ); - - pregex_dfa_from_nfa( tmp_dfa, tmp_nfa ); - pregex_dfa_minimize( tmp_dfa ); - - if( !( lex = xml_add_child( symbol, "dfa", 0 ) ) ) - OUTOFMEM; - - build_xml_dfa( lex, tmp_dfa ); - tmp_nfa = pregex_nfa_free( tmp_nfa ); - tmp_dfa = pregex_dfa_free( tmp_dfa ); - } - break; - case SYM_SYSTEM_TERMINAL: - tmp = "system"; - break; - - default: - MISSINGCASE; - tmp = "!!!UNDEFINED!!!"; - break; - } - - xml_set_attr_d( symbol, "terminal-type", tmp ); - - if( sym->whitespace ) - xml_set_attr( symbol, "is-whitespace", XML_YES ); - - /* Code (in case of regex terminals */ - if( sym->code && *( sym->code ) ) - { - if( !( code = xml_add_child( symbol, "code", 0 ) ) ) - OUTOFMEM; - - if( sym->code_at > 0 ) - xml_set_int_attr( code, "defined-at", sym->code_at ); - - build_xml_scan_action( code, parser, sym, sym->code ); - } - } - else - { - /* Goal symbol TODO */ - if( sym->goal ) - xml_set_attr( symbol, "is-goal", XML_YES ); - - /* Derived-from */ - if( sym->generated && sym->derived_from ) - xml_set_int_attr( symbol, "derived-from", - sym->derived_from->id ); - } - - /* Symbol value type */ - if( sym->vtype ) - { - if( !( xml_set_attr( symbol, "value-type", - sym->vtype->real_def ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( symbol, "value-type-id", - sym->vtype->id ) ) ) - OUTOFMEM; - } - - if( sym->line > 0 ) - xml_set_int_attr( symbol, "defined-at", sym->line ); - - /* Set symbol options */ - print_xml_options( sym->options, symbol ); - } - - VOIDRET; -} - -static void print_xml_productions( PARSER* parser, XML_T par ) -{ - plistel* e; - plistel* f; - SYMBOL* sym; - PROD* p; - BOOLEAN is_default_code; - char* act; - int i; - int j; - - XML_T prod_tab; - XML_T prod; - XML_T lhs; - XML_T rhs; - XML_T code; - - PROC( "print_xml_productions" ); - MSG( "Printing production table" ); - - if( !( prod_tab = xml_add_child( par, "productions", 0 ) ) ) - OUTOFMEM; - - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - - if( !( prod = xml_add_child( prod_tab, "production", 0 ) ) ) - OUTOFMEM; - - /* Production id */ - xml_set_int_attr( prod, "id", p->id ); - xml_set_int_attr( prod, "length", plist_count( p->rhs ) ); - if( p->line > 0 ) - xml_set_int_attr( prod, "defined-at", p->line ); - - /* Print all left-hand sides */ - plist_for( p->all_lhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( !( lhs = xml_add_child( prod, "left-hand-side", 0 ) ) ) - OUTOFMEM; - - xml_set_int_attr( lhs, "symbol-id", sym->id ); - xml_set_int_attr( lhs, "offset", i ); - } - - /* Print parts of semantic right-hand side */ - for( f = plist_first( p->sem_rhs ), i = 0; - f && plist_count( p->sem_rhs ) - i > plist_count( p->rhs ); - f = plist_next( f ), i++ ) - { - sym = (SYMBOL*)plist_access( f ); - - if( !( rhs = xml_add_child( prod, - "semantic-right-hand-side", 0 ) ) ) - OUTOFMEM; - - xml_set_int_attr( rhs, "symbol-id", sym->id ); - xml_set_int_attr( rhs, "offset", i ); - - if( plist_key( f ) ) - xml_set_attr( rhs, "named", plist_key( f ) ); - } - - /* Print right-hand side */ - for( f = plist_first( p->rhs ), j = 0; - f; f = plist_next( f ), i++, j++ ) - { - sym = (SYMBOL*)plist_access( f ); - - if( !( rhs = xml_add_child( prod, "right-hand-side", 0 ) ) ) - OUTOFMEM; - - xml_set_int_attr( rhs, "symbol-id", sym->id ); - xml_set_int_attr( rhs, "offset", i ); - - if( plist_key( f ) ) - xml_set_attr( rhs, "named", plist_key( f ) ); - } - - /* Set productions options */ - print_xml_options( p->options, prod ); - - /* Code */ - is_default_code = FALSE; - - /* Production has attached semantic action */ - if( p->code ) - act = p->code; - /* Non-empty production */ - else if( plist_count( p->rhs ) == 0 ) - { - act = parser->p_def_action_e; - is_default_code = TRUE; - } - /* Empty production */ - else - { - act = parser->p_def_action; - is_default_code = TRUE; - } - - /* - Unset action code if default code was chosen, left-hand side - is whitespace and error token is part of whitespaces ? - */ - if( is_default_code && - ( p->lhs->whitespace || - ( parser->error && plist_get_by_ptr( p->rhs, parser->error ) ) ) - ) - { - act = (char*)NULL; - } - - if( act && *act ) - { - if( !( code = xml_add_child( prod, "code", 0 ) ) ) - OUTOFMEM; - - if( p->code_at > 0 ) - xml_set_int_attr( code, "defined-at", p->code_at ); - - build_xml_action( code, parser, p, act, is_default_code ); - } - } - - VOIDRET; -} - -static void print_xml_states( PARSER* parser, XML_T par ) -{ - LIST* m; - - STATE* st; - TABCOL* col; - char* transtype; - int i; - int st_lex; - - XML_T state_tab; - XML_T state; - XML_T go_to; - XML_T action; - - PROC( "print_xml_states" ); - - MSG( "State table" ); - - if( !( state_tab = xml_add_child( par, "states", 0 ) ) ) - OUTOFMEM; - - i = 0; - parray_for( parser->states, st ) - { - /* Add state entity */ - if( !( state = xml_add_child( state_tab, "state", 0 ) ) ) - OUTOFMEM; - - /* Set some state-specific options */ - xml_set_int_attr( state, "id", st->state_id ); - - /* Default Production */ - if( st->def_prod ) - xml_set_int_attr( state, "default-production", - st->def_prod->id ); - - /* Matching Lexer */ - if( ( st_lex = list_find( parser->dfas, st->dfa ) ) >= 0 ) - xml_set_int_attr( state, "lexer", st_lex ); - - /* Derived from state CHECK! */ - if( st->derived_from ) - xml_set_int_attr( state, "derived-from-state", - st->derived_from ); - - /* Action table */ - for( m = st->actions; m; m = list_next( m ) ) - { - /* Get table column pointer */ - col = (TABCOL*)list_access( m ); - - /* Shift, reduce, shift&reduce or even error? */ - switch( col->action ) - { - case ERROR: /* Error */ - transtype = "error"; - break; - case REDUCE: /* Reduce */ - transtype = "reduce"; - break; - case SHIFT: /* Shift */ - transtype = "shift"; - break; - case SHIFT_REDUCE: /* Shift&Reduce */ - transtype = "shift-reduce"; - break; - - default: - MISSINGCASE; - break; - } - - if( !( action = xml_add_child( state, transtype, 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( action, "symbol-id", - col->symbol->id ) ) ) - OUTOFMEM; - - /* CHECK! */ - if( col->action != ERROR ) - { - if( col->action & REDUCE ) - xml_set_int_attr( action, "by-production", col->index ); - else - xml_set_int_attr( action, "to-state", col->index ); - } - } - - /* Goto table */ - for( m = st->gotos; m; m = list_next( m ) ) - { - /* Get table column pointer */ - col = (TABCOL*)list_access( m ); - - if( !( go_to = xml_add_child( state, "goto", 0 ) ) ) - OUTOFMEM; - - if( !( xml_set_int_attr( go_to, "symbol-id", - col->symbol->id ) ) ) - OUTOFMEM; - - /* Only print goto on reduce; Else, its value is not relevant */ - if( ( col->action & REDUCE ) ) - { - if( !( xml_set_int_attr( go_to, - "by-production", col->index ) ) ) - OUTOFMEM; - } - else - { - if( !( xml_set_int_attr( go_to, - "to-state", col->index ) ) ) - OUTOFMEM; - } - } - - i++; - } - - VOIDRET; -} - -static void print_xml_lexers( PARSER* parser, XML_T par ) -{ - LIST* l; - int i; - pregex_dfa* dfa; - - XML_T lex_tab; - XML_T lex; - - PROC( "print_xml_lexers" ); - - if( !( lex_tab = xml_add_child( par, "lexers", 0 ) ) ) - OUTOFMEM; - - for( l = parser->dfas, i = 0; l; l = list_next( l ), i++ ) - { - dfa = (pregex_dfa*)list_access( l ); - - if( !( lex = xml_add_child( lex_tab, "lexer", 0 ) ) ) - OUTOFMEM; - - if( list_count( parser->dfas ) > 1 && - !( xml_set_int_attr( lex, "id", i ) ) ) - OUTOFMEM; - - build_xml_dfa( lex, dfa ); - } - - VOIDRET; -} - -static void print_xml_vtypes( PARSER* parser, XML_T par ) -{ - LIST* l; - VTYPE* vt; - - XML_T vartype_tab; - XML_T vartype; - - PROC( "print_xml_vtypes" ); - - if( !( vartype_tab = xml_add_child( par, "value-types", 0 ) ) ) - OUTOFMEM; - - for( l = parser->vtypes; l; l = list_next( l ) ) - { - vt = (VTYPE*)list_access( l ); - - if( !( vartype = xml_add_child( vartype_tab, "value-type", 0 ) ) ) - OUTOFMEM; - - if( !xml_set_int_attr( vartype, "id", vt->id ) ) - OUTOFMEM; - - if( !xml_set_attr( vartype, "c_name", vt->int_name ) ) - OUTOFMEM; - - if( !xml_set_txt( vartype, vt->real_def ) ) - OUTOFMEM; - } - - VOIDRET; -} - -/** Serves a universal XML-code generator. - -//parser// is the parser information structure. - -If //finished// is the TRUE: Parser generation has finished successful. -FALSE: Parser generation failed because of parse errors. */ -void build_xml( PARSER* parser, BOOLEAN finished ) -{ - XML_T par; - XML_T code; - - FILE* out = stdout; - char* outname = (char*)NULL; - - char* xmlstr; - - PROC( "build_xml" ); - PARMS( "parser", "%p", parser ); - PARMS( "finished", "%s", BOOLEAN_STR( finished ) ); - - /* Create root node */ - if( !( par = xml_new( "parser" ) ) ) - OUTOFMEM; - - /* UniCC version */ - xml_set_attr( par, "unicc-version", print_version( TRUE ) ); - - /* Parser model */ - xml_set_attr( par, "mode", pmod[ parser->p_mode ] ); - - /* Set general parser attributes */ - if( parser->p_prefix && - !( xml_set_attr( par, "prefix", parser->p_prefix ) ) ) - OUTOFMEM; - if( parser->filename && - !( xml_set_attr( par, "source", parser->filename ) ) ) - OUTOFMEM; - if( parser->p_basename && - !( xml_set_attr( par, "basename", parser->p_basename ) ) ) - OUTOFMEM; - if( parser->p_template && - !( xml_set_attr( par, "target-language", parser->p_template ) ) ) - OUTOFMEM; - - if( !xml_set_int_attr( par, "char-min", PCCL_MIN ) ) - OUTOFMEM; - if( !xml_set_int_attr( par, "char-max", parser->p_universe - 1 ) ) - OUTOFMEM; - - /* Print parser's options */ - print_xml_options( parser->options, par ); - - VARS( "finished", "%s", BOOLEAN_STR( finished ) ); - if( finished ) - { - /* Build table of symbols --------------------------------------- */ - print_xml_symbols( parser, par ); - - /* Build table of productions ----------------------------------- */ - print_xml_productions( parser, par ); - - /* Build state table -------------------------------------------- */ - print_xml_states( parser, par ); - - /* Build keyword/regular expression matching lexers ------------- */ - print_xml_lexers( parser, par ); - - print_xml_vtypes( parser, par ); - } - - /* Put prologue code */ - if( !( code = xml_add_child( par, "prologue", 0 ) ) ) - OUTOFMEM; - - if( parser->p_header ) - xml_set_txt( code, parser->p_header ); - - /* Put epilogue code */ - if( !( code = xml_add_child( par, "epilogue", 0 ) ) ) - OUTOFMEM; - - if( parser->p_footer ) - xml_set_txt( code, parser->p_footer ); - - /* Put parser control block code */ - if( !( code = xml_add_child( par, "pcb", 0 ) ) ) - OUTOFMEM; - - if( parser->p_pcb ) - xml_set_txt( code, parser->p_pcb ); - - /* Put entire parser source into XML output */ - if( !( code = xml_add_child( par, "source", 0 ) ) ) - OUTOFMEM; - - xml_set_txt( code, parser->source ); - - /* Write error messages */ - if( parser->err_xml ) - xml_move( xml_child( parser->err_xml, "messages" ), par, 0 ); - - /* Write to output file */ - if( !parser->to_stdout ) - { - if( ( outname = pasprintf( "%s%s", - parser->p_basename, UNICC_XML_EXTENSION ) ) ) - { - if( !( out = fopen( outname, "wb" ) ) ) - { - print_error( parser, ERR_OPEN_OUTPUT_FILE, - ERRSTYLE_WARNING, outname ); - - out = stdout; - } - } - } - - if( out == stdout ) - { - if( parser->files_count > 0 ) - fprintf( stdout, "%c", EOF ); - } - - if( ( xmlstr = xml_toxml( par ) ) ) - { - fprintf( out, "\n" ); - fprintf( out, "\n", -#ifndef _WIN32 -#ifdef TLTDIR - TLTDIR "/" -#endif -#endif - "unicc.dtd" - ); - - parser->files_count++; - fprintf( out, "%s", xmlstr ); - } - else - OUTOFMEM; - - pfree( xmlstr ); - xml_free( par ); - - if( out != stdout ) - fclose( out ); - - if( outname ) - pfree( outname ); - - VOIDRET; -} diff --git a/src/debug.c b/src/debug.c index 04e5e2d..961dd7e 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: debug.c -Author: Jan Max Meyer -Usage: Debug and trace functionalities ------------------------------------------------------------------------------ */ +/* Debug and trace functionalities */ #include "unicc.h" @@ -21,19 +12,19 @@ extern BOOLEAN first_progress; */ void print_symbol( FILE* stream, SYMBOL* sym ) { - if( !stream ) - stream = stderr; - - if( sym->type == SYM_CCL_TERMINAL ) - fprintf( stream, "'%s'", pccl_to_str( sym->ccl, TRUE ) ); - else if( sym->type == SYM_REGEX_TERMINAL && sym->keyword ) - fprintf( stream, "\"%s\"", sym->name ); - else if( sym->type == SYM_REGEX_TERMINAL && !( sym->keyword ) ) - fprintf( stream, "@%s", sym->name ); - else if( sym->type == SYM_SYSTEM_TERMINAL ) - fprintf( stream, "%s", sym->name ); - else - fprintf( stream, "%s", sym->name ); + if( !stream ) + stream = stderr; + + if( sym->type == SYM_CCL_TERMINAL ) + fprintf( stream, "'%s'", pccl_to_str( sym->ccl, TRUE ) ); + else if( sym->type == SYM_REGEX_TERMINAL && sym->keyword ) + fprintf( stream, "\"%s\"", sym->name ); + else if( sym->type == SYM_REGEX_TERMINAL && !( sym->keyword ) ) + fprintf( stream, "@%s", sym->name ); + else if( sym->type == SYM_SYSTEM_TERMINAL ) + fprintf( stream, "%s", sym->name ); + else + fprintf( stream, "%s", sym->name ); } /** Dumps the analyzed grammar and all its symbols to a desired file or stream @@ -45,70 +36,70 @@ in an ASCII-based view. */ void dump_grammar( FILE* stream, PARSER* parser ) { - plistel* e; - plistel* f; - - SYMBOL* s = (SYMBOL*)NULL; - SYMBOL* sym = (SYMBOL*)NULL; - PROD* p = (PROD*)NULL; - - if( !stream ) - stream = stderr; - - if( first_progress ) - fprintf( stream, "\n" ); - - fprintf( stream, "\nGRAMMAR\n\n" ); - - plist_for( parser->symbols, e ) - { - s = (SYMBOL*)plist_access( e ); - - if( s->type == SYM_NON_TERMINAL ) - { - fprintf( stream, " " ); - print_symbol( stream, s ); - fprintf( stream, " " ); - - /* Printing the FIRST-set */ - fprintf( stream, "[ " ); - - plist_for( s->first, f ) - { - sym = (SYMBOL*)plist_access( f ); - - print_symbol( stream, sym ); - fprintf( stream, " " ); - } - - if( s->type == SYM_NON_TERMINAL ) - { - fprintf( stream, "] lexem:%d prec:%d assoc:%c v:%s\n", s->lexem, - s->prec, ( ( s->assoc == ASSOC_LEFT ) ? 'L' : - ( ( s->assoc == ASSOC_RIGHT ) ? 'R' : 'N' ) ), - ( s->vtype ) ? s->vtype->int_name : "(null)" ); - - /* Printing the productions */ - plist_for( s->productions, f ) - { - p = (PROD*)plist_access( f ); - - fprintf( stream, " (%d) -> ", p->id ); - dump_production( stream, p, FALSE, FALSE ); - } - } - else - { - fprintf( stream, "] prec:%d assoc:%c\n", - s->prec, ( ( s->assoc == ASSOC_LEFT ) ? 'L' : - ( ( s->assoc == ASSOC_RIGHT ) ? 'R' : 'N' ) ) ); - } - - fprintf( stream, "\n" ); - } - } - - first_progress = FALSE; + plistel* e; + plistel* f; + + SYMBOL* s = (SYMBOL*)NULL; + SYMBOL* sym = (SYMBOL*)NULL; + PROD* p = (PROD*)NULL; + + if( !stream ) + stream = stderr; + + if( first_progress ) + fprintf( stream, "\n" ); + + fprintf( stream, "\nGRAMMAR\n\n" ); + + plist_for( parser->symbols, e ) + { + s = (SYMBOL*)plist_access( e ); + + if( s->type == SYM_NON_TERMINAL ) + { + fprintf( stream, " " ); + print_symbol( stream, s ); + fprintf( stream, " " ); + + /* Printing the FIRST-set */ + fprintf( stream, "[ " ); + + plist_for( s->first, f ) + { + sym = (SYMBOL*)plist_access( f ); + + print_symbol( stream, sym ); + fprintf( stream, " " ); + } + + if( s->type == SYM_NON_TERMINAL ) + { + fprintf( stream, "] lexem:%d prec:%d assoc:%c v:%s\n", s->lexem, + s->prec, ( ( s->assoc == ASSOC_LEFT ) ? 'L' : + ( ( s->assoc == ASSOC_RIGHT ) ? 'R' : 'N' ) ), + ( s->vtype ) ? s->vtype->int_name : "(null)" ); + + /* Printing the productions */ + plist_for( s->productions, f ) + { + p = (PROD*)plist_access( f ); + + fprintf( stream, " (%d) -> ", p->id ); + dump_production( stream, p, FALSE, FALSE ); + } + } + else + { + fprintf( stream, "] prec:%d assoc:%c\n", + s->prec, ( ( s->assoc == ASSOC_LEFT ) ? 'L' : + ( ( s->assoc == ASSOC_RIGHT ) ? 'R' : 'N' ) ) ); + } + + fprintf( stream, "\n" ); + } + } + + first_progress = FALSE; } /** Dumps the analyzed grammar symbols. @@ -119,73 +110,73 @@ output is written to stderr. */ void dump_symbols( FILE* stream, PARSER* parser ) { - plistel* e; - SYMBOL* s = (SYMBOL*)NULL; + plistel* e; + SYMBOL* s = (SYMBOL*)NULL; - if( !stream ) - stream = stderr; + if( !stream ) + stream = stderr; - if( first_progress ) - fprintf( stream, "\n" ); + if( first_progress ) + fprintf( stream, "\n" ); - fprintf( stream, "\n%s%sSYMBOLS\n\n", - ( parser->p_basename ? parser->p_basename : "" ), - ( parser->p_basename ? ": " : "" ) ); + fprintf( stream, "\n%s%sSYMBOLS\n\n", + ( parser->p_basename ? parser->p_basename : "" ), + ( parser->p_basename ? ": " : "" ) ); - plist_for( parser->symbols, e ) - { - s = (SYMBOL*)plist_access( e ); + plist_for( parser->symbols, e ) + { + s = (SYMBOL*)plist_access( e ); - fprintf( stream, " " ); - fprintf( stream, "%c%d: ", - ( IS_TERMINAL( s ) ? 'T' : 'N' ), s->id ); + fprintf( stream, " " ); + fprintf( stream, "%c%d: ", + ( IS_TERMINAL( s ) ? 'T' : 'N' ), s->id ); - print_symbol( stream, s ); + print_symbol( stream, s ); - fprintf( stream, " [" ); + fprintf( stream, " [" ); - switch( s->type ) - { - case SYM_NON_TERMINAL: - fprintf( stream, "non-terminal" ); - break; + switch( s->type ) + { + case SYM_NON_TERMINAL: + fprintf( stream, "non-terminal" ); + break; - case SYM_CCL_TERMINAL: - fprintf( stream, "terminal: character class" ); - break; + case SYM_CCL_TERMINAL: + fprintf( stream, "terminal: character class" ); + break; - case SYM_REGEX_TERMINAL: - if( s->keyword ) - fprintf( stream, "terminal: string" ); - else - fprintf( stream, "terminal: regular expression" ); + case SYM_REGEX_TERMINAL: + if( s->keyword ) + fprintf( stream, "terminal: string" ); + else + fprintf( stream, "terminal: regular expression" ); - if( s->greedy ) - fprintf( stream, "(greedy)" ); - else - fprintf( stream, "(non-greedy)" ); - break; + if( s->greedy ) + fprintf( stream, "(greedy)" ); + else + fprintf( stream, "(non-greedy)" ); + break; - case SYM_SYSTEM_TERMINAL: - fprintf( stream, "system terminal" ); - break; + case SYM_SYSTEM_TERMINAL: + fprintf( stream, "system terminal" ); + break; - default: - fprintf( stream, "undefined" ); - break; - } + default: + fprintf( stream, "undefined" ); + break; + } - fprintf( stream, "]" ); + fprintf( stream, "]" ); - if( s->vtype ) - fprintf( stream, " <%s>", s->vtype->int_name ); + if( s->vtype ) + fprintf( stream, " <%s>", s->vtype->int_name ); - fprintf( stream, "\n" ); - } + fprintf( stream, "\n" ); + } - fprintf( stream, "\n" ); + fprintf( stream, "\n" ); - first_progress = FALSE; + first_progress = FALSE; } /** Dumps an item set. @@ -197,65 +188,65 @@ output is written to stderr. */ void dump_item_set( FILE* stream, char* title, LIST* list ) { - ITEM* it = (ITEM*)NULL; - LIST* i = (LIST*)NULL; - plistel* e; - SYMBOL* sym = (SYMBOL*)NULL; - int cnt = 0; + ITEM* it = (ITEM*)NULL; + LIST* i = (LIST*)NULL; + plistel* e; + SYMBOL* sym = (SYMBOL*)NULL; + int cnt = 0; - if( !stream ) - stream = stderr; + if( !stream ) + stream = stderr; - if( list != (LIST*)NULL ) - { - /* if( first_progress ) - fprintf( stream, "\n\n" ); */ + if( list != (LIST*)NULL ) + { + /* if( first_progress ) + fprintf( stream, "\n\n" ); */ - if( title ) - fprintf( stream, "\n%s\n", title ); + if( title ) + fprintf( stream, "\n%s\n", title ); - for( i = list; i; i = i->next ) - { - it = i->pptr; + for( i = list; i; i = i->next ) + { + it = i->pptr; - fprintf( stream, " (%d) %s -> ", - it->prod->id, it->prod->lhs->name ); + fprintf( stream, " (%d) %s -> ", + it->prod->id, it->prod->lhs->name ); - cnt = 0; + cnt = 0; - plist_for( it->prod->rhs, e ) - { - sym = (SYMBOL*)plist_access( e ); + plist_for( it->prod->rhs, e ) + { + sym = (SYMBOL*)plist_access( e ); - if( cnt == it->dot_offset ) - fprintf( stream, "." ); + if( cnt == it->dot_offset ) + fprintf( stream, "." ); - print_symbol( stream, sym ); - fprintf( stream, " " ); + print_symbol( stream, sym ); + fprintf( stream, " " ); - cnt++; - } + cnt++; + } - if( cnt == it->dot_offset ) - { - fprintf( stream, "." ); - fprintf( stream, " { " ); - plist_for( &it->lookahead, e ) - { - sym = (SYMBOL*)plist_access( e ); + if( cnt == it->dot_offset ) + { + fprintf( stream, "." ); + fprintf( stream, " { " ); + plist_for( &it->lookahead, e ) + { + sym = (SYMBOL*)plist_access( e ); - print_symbol( stream, sym ); - fprintf( stream, " " ); - } - fprintf( stream, "}" ); - } + print_symbol( stream, sym ); + fprintf( stream, " " ); + } + fprintf( stream, "}" ); + } - fprintf( stream, "\n" ); - } - } + fprintf( stream, "\n" ); + } + } - first_progress = FALSE; + first_progress = FALSE; } /** Dumps the LALR(1) states. @@ -266,26 +257,26 @@ output is written to stdout. */ void dump_lalr_states( FILE* stream, PARSER* parser ) { - STATE* st; - - if( !stream ) - stream = stderr; - - if( first_progress ) - fprintf( stream, "\n\n" ); - first_progress = FALSE; - - fprintf( stream, "\n%s%sLALR(1) STATES\n\n", - ( parser->p_basename ? parser->p_basename : "" ), - ( parser->p_basename ? ": " : "" ) ); - - parray_for( parser->states, st ) - { - fprintf( stream, " State %d:\n", st->state_id ); - dump_item_set( stream, "Kernel:", st->kernel ); - dump_item_set( stream, "Epsilon:", st->epsilon ); - fprintf( stream, "\n" ); - } + STATE* st; + + if( !stream ) + stream = stderr; + + if( first_progress ) + fprintf( stream, "\n\n" ); + first_progress = FALSE; + + fprintf( stream, "\n%s%sLALR(1) STATES\n\n", + ( parser->p_basename ? parser->p_basename : "" ), + ( parser->p_basename ? ": " : "" ) ); + + parray_for( parser->states, st ) + { + fprintf( stream, " State %d:\n", st->state_id ); + dump_item_set( stream, "Kernel:", st->kernel ); + dump_item_set( stream, "Epsilon:", st->epsilon ); + fprintf( stream, "\n" ); + } } /** Dumps all productions. @@ -296,26 +287,26 @@ void dump_lalr_states( FILE* stream, PARSER* parser ) */ void dump_productions( FILE* stream, PARSER* parser ) { - PROD* p; - plistel* e; - - if( !stream ) - stream = stderr; - - if( first_progress ) - fprintf( stream, "\n" ); - fprintf( stream, "%s%sPRODUCTIONS\n\n", - ( parser->p_basename ? parser->p_basename : "" ), - ( parser->p_basename ? ": " : "" ) ); - - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - dump_production( stream, p, TRUE, TRUE ); - fprintf( stream, "\n" ); - } - - first_progress = FALSE; + PROD* p; + plistel* e; + + if( !stream ) + stream = stderr; + + if( first_progress ) + fprintf( stream, "\n" ); + fprintf( stream, "%s%sPRODUCTIONS\n\n", + ( parser->p_basename ? parser->p_basename : "" ), + ( parser->p_basename ? ": " : "" ) ); + + plist_for( parser->productions, e ) + { + p = (PROD*)plist_access( e ); + dump_production( stream, p, TRUE, TRUE ); + fprintf( stream, "\n" ); + } + + first_progress = FALSE; } /** Dumps one production. @@ -327,53 +318,53 @@ void dump_productions( FILE* stream, PARSER* parser ) //semantics// is the If TRUE, production-related semantics are even printed. */ void dump_production( FILE* stream, PROD* p, - BOOLEAN with_lhs, BOOLEAN semantics ) + BOOLEAN with_lhs, BOOLEAN semantics ) { - plist* l = p->rhs; - plistel* e; - BOOLEAN embedded = FALSE; - SYMBOL* sym; - - if( !stream ) - stream = stderr; - - if( with_lhs ) - { - fprintf( stream, " (%d) ", p->id ); - - plist_for( p->all_lhs, e ) - { - sym = (SYMBOL*)plist_access( e ); - fprintf( stream, "%s ", sym->name ); - } - - fprintf( stream, "-> " ); - } - - if( semantics && plist_count( p->sem_rhs ) ) - { - l = p->sem_rhs; - embedded = TRUE; - } - - plist_for( l, e ) - { - if( embedded && plist_count( l ) > plist_count( p->rhs ) ) - fprintf( stream, "<<" ); - else - { - sym = (SYMBOL*)plist_access( e ); - print_symbol( stream, sym ); - } - - if( semantics && plist_key( e ) ) - fprintf( stream, ":%s", plist_key( e ) ); - - fprintf( stream, " " ); - } - - if( p->code && semantics ) - fprintf( stream, " [*%s*]", p->code ); - - fprintf( stream, "\n" ); + plist* l = p->rhs; + plistel* e; + BOOLEAN embedded = FALSE; + SYMBOL* sym; + + if( !stream ) + stream = stderr; + + if( with_lhs ) + { + fprintf( stream, " (%d) ", p->id ); + + plist_for( p->all_lhs, e ) + { + sym = (SYMBOL*)plist_access( e ); + fprintf( stream, "%s ", sym->name ); + } + + fprintf( stream, "-> " ); + } + + if( semantics && plist_count( p->sem_rhs ) ) + { + l = p->sem_rhs; + embedded = TRUE; + } + + plist_for( l, e ) + { + if( embedded && plist_count( l ) > plist_count( p->rhs ) ) + fprintf( stream, "<<" ); + else + { + sym = (SYMBOL*)plist_access( e ); + print_symbol( stream, sym ); + } + + if( semantics && plist_key( e ) ) + fprintf( stream, ":%s", plist_key( e ) ); + + fprintf( stream, " " ); + } + + if( p->code && semantics ) + fprintf( stream, " [*%s*]", p->code ); + + fprintf( stream, "\n" ); } diff --git a/src/error.c b/src/error.c index 9a63c17..8d71885 100644 --- a/src/error.c +++ b/src/error.c @@ -1,58 +1,49 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: error.c -Author: Jan Max Meyer -Usage: Error message handling ------------------------------------------------------------------------------ */ +/* Error message handling */ #include "unicc.h" char* error_txt[128] = { - "Memory allocation error (out of memory?) in %s, line %d", - "Parameter required behind command-line option \'%s\'", - "Unknown command-line option \'%s\'", - "Parse error: Invalid input \'%s\'", - "Parse error: Found \'%s\', but expecting %s", - "Multiple goal symbols defined; \'%s\' already defined as goal symbol", - "Invalid right-hand side definition for goal symbol", - "No goal symbol defined", - "Multiple definition for terminal \'%s\'", - "Unknown parser configuration directive \'%s\'", - "Call to whitespace token \'%s\' not allowed", - "Non-terminal \'%s\' is used but never defined", - "Terminal-symbol \'%s\' is used but never defined", - "Non-terminal \'%s\' is defined but never used", - "Terminal-symbol \'%s\' is defined but never used", - "Reduce-reduce conflict on lookahead: ", - "Shift-reduce conflict on lookahead: ", - "Terminal anomaly at shift on \'%s\' and reduce on \'%s\'", - "Unimplemented template \'%s\' for code generator", - "Undefined value type for \'%s\' in reduction code of rule %d, \'%.*s\'", - "Unable to open output file \'%s\'", - "Unable to open input file \'%s\'", - "Can't find generator definition file \'%s\'", - "Can't find tag <%s> in %s", - "XML parse errors %s:\n\t%s", - "In %s: tag <%s> is incomplete, and requires for %s-attribute", - "Duplicate escape sequence definition for \'%s\' in %s", - "Circular definition", - "Empty recursion", - "Useless production, no terminals in expansion", - /* semantic warnings and errors */ - "Use of effectless directive \'%s\' in scanner-mode ignored", - "Nonterminal whitespace \'%s\' is not allowed in scanner-mode", - "Invalid value for character universe", - "Character-class overlap in scanner mode with \'%s\'", - "Action references to undefined right-hand side symbol '%.*s'", - "Left-hand side '%s' not known", - "Terminal '%s' not known", - "Ignoring semantic code: `#!language´ must explicitly be specified.", - "Multiple use of directive '#%s' ignored; It has already been defined." + "Memory allocation error (out of memory?) in %s, line %d", + "Parameter required behind command-line option \'%s\'", + "Unknown command-line option \'%s\'", + "Parse error: Invalid input \'%s\'", + "Parse error: Found \'%s\', but expecting %s", + "Multiple goal symbols defined; \'%s\' already defined as goal symbol", + "Invalid right-hand side definition for goal symbol", + "No goal symbol defined", + "Multiple definition for terminal \'%s\'", + "Unknown parser configuration directive \'%s\'", + "Call to whitespace token \'%s\' not allowed", + "Non-terminal \'%s\' is used but never defined", + "Terminal-symbol \'%s\' is used but never defined", + "Non-terminal \'%s\' is defined but never used", + "Terminal-symbol \'%s\' is defined but never used", + "Reduce-reduce conflict on lookahead: ", + "Shift-reduce conflict on lookahead: ", + "Terminal anomaly at shift on \'%s\' and reduce on \'%s\'", + "Unimplemented template \'%s\' for code generator", + "Undefined value type for \'%s\' in reduction code of rule %d, \'%.*s\'", + "Unable to open output file \'%s\'", + "Unable to open input file \'%s\'", + "Can't find generator definition file \'%s\'", + "Can't find tag <%s> in %s", + "XML parse errors %s:\n\t%s", + "In %s: tag <%s> is incomplete, and requires for %s-attribute", + "Duplicate escape sequence definition for \'%s\' in %s", + "Circular definition", + "Empty recursion", + "Useless production, no terminals in expansion", + /* semantic warnings and errors */ + "Use of effectless directive \'%s\' in scanner-mode ignored", + "Nonterminal whitespace \'%s\' is not allowed in scanner-mode", + "Invalid value for character universe", + "Character-class overlap in scanner mode with \'%s\'", + "Action references to undefined right-hand side symbol '%.*s'", + "Left-hand side '%s' not known", + "Terminal '%s' not known", + "Ignoring semantic code: `#!language´ must explicitly be specified.", + "Multiple use of directive '#%s' ignored; It has already been defined." }; int error_count = 0; @@ -72,151 +63,150 @@ message style. */ void print_error( PARSER* parser, ERRORCODE err_id, int err_style, ... ) { - va_list params; - char* filename = (char*)NULL; - int line = 0; - STATE* state = (STATE*)NULL; - PROD* p = (PROD*)NULL; - SYMBOL* s = (SYMBOL*)NULL; - XML_T messages; - XML_T errmsg = (XML_T)NULL; - - BOOLEAN do_print = TRUE; - - char* tmp; - - va_start( params, err_style ); - - if( err_style & ERRSTYLE_WARNING && no_warnings ) - do_print = MAKE_BOOLEAN( err_style & ERRSTYLE_IMPORTANT ); - - if( parser->gen_xml ) - { - /* - This XML library is a cramp... - it's not possible to move a root-tag, - so it's required to create a dummy-root - that is never used. - */ - if( !parser->err_xml ) - { - parser->err_xml = xml_new( "dummy" ); - messages = xml_add_child( parser->err_xml, "messages", 0 ); - } - else - messages = xml_child( parser->err_xml, "messages" ); - - if( err_style & ERRSTYLE_FATAL ) - errmsg = xml_add_child( messages, "error", 0 ); - else - errmsg = xml_add_child( messages, "warning", 0 ); - - xml_set_int_attr( errmsg, "errorcode", err_id ); - } - - if( err_style & ERRSTYLE_FILEINFO ) - { - filename = va_arg( params, char* ); - line = va_arg( params, int ); - - if( errmsg ) - { - xml_set_attr_d( errmsg, "filename", filename ); - xml_set_int_attr( errmsg, "line", line ); - } - } - - if( err_style & ERRSTYLE_STATEINFO ) - { - state = va_arg( params, STATE* ); - - if( errmsg ) - xml_set_int_attr( errmsg, "state", state->state_id ); - } - else if( err_style & ERRSTYLE_PRODUCTION ) - { - p = va_arg( params, PROD* ); - - if( errmsg ) - xml_set_int_attr( errmsg, "production", p->id ); - } - - /* NO ELSE IF!! */ - if( err_style & ERRSTYLE_SYMBOL ) - { - s = va_arg( params, SYMBOL* ); - - if( errmsg ) - xml_set_int_attr( errmsg, "symbol", s->id ); - } - - if( do_print ) - { - if( first_progress ) - fprintf( stderr, "\n" ); - } - - if( err_style & ERRSTYLE_FATAL ) - { - fprintf( stderr, "%s: error: ", progname ); - error_count++; - } - else if( err_style & ERRSTYLE_WARNING ) - { - if( do_print ) - fprintf( stderr, "%s: warning: ", progname ); - - warning_count++; - } - - if( do_print ) - { - if( err_style & ERRSTYLE_FILEINFO ) - fprintf( stderr, "%s(%d):\n ", filename, line ); - else if( err_style & ERRSTYLE_STATEINFO ) - fprintf( stderr, "state %d: ", state->state_id ); - } - - if( do_print ) - { - vfprintf( stderr, error_txt[ err_id ], params ); - - if( err_style & ERRSTYLE_SYMBOL ) - print_symbol( stderr, s ); - - fprintf( stderr, "\n" ); - - if( err_style & ERRSTYLE_STATEINFO ) - { - dump_item_set( stderr, (char*)NULL, state->kernel ); - dump_item_set( stderr, (char*)NULL, state->epsilon ); - } - else if( err_style & ERRSTYLE_PRODUCTION ) - { - fprintf( stderr, " " ); - dump_production( stderr, p, TRUE, FALSE ); - } - } - - /* Halt on memory error! */ - if( err_id == ERR_MEMORY_ERROR ) - { - va_end( params ); - exit( 1 ); - } - - if( errmsg ) - { - /* Unfortunatelly, this must be done this ugly way... */ - pvasprintf( &tmp, error_txt[ err_id ], params ); - xml_set_txt_d( errmsg, tmp ); - pfree( tmp ); - } - - va_end( params ); - - if( do_print ) - first_progress = FALSE; + va_list params; + char* filename = (char*)NULL; + int line = 0; + STATE* state = (STATE*)NULL; + PROD* p = (PROD*)NULL; + SYMBOL* s = (SYMBOL*)NULL; + XML_T messages; + XML_T errmsg = (XML_T)NULL; + + BOOLEAN do_print = TRUE; + + char* tmp; + + va_start( params, err_style ); + + if( err_style & ERRSTYLE_WARNING && no_warnings ) + do_print = MAKE_BOOLEAN( err_style & ERRSTYLE_IMPORTANT ); + + if( parser->gen_xml ) + { + /* + This XML library is a cramp... + it's not possible to move a root-tag, + so it's required to create a dummy-root + that is never used. + */ + if( !parser->err_xml ) + { + parser->err_xml = xml_new( "dummy" ); + messages = xml_add_child( parser->err_xml, "messages", 0 ); + } + else + messages = xml_child( parser->err_xml, "messages" ); + + if( err_style & ERRSTYLE_FATAL ) + errmsg = xml_add_child( messages, "error", 0 ); + else + errmsg = xml_add_child( messages, "warning", 0 ); + + xml_set_int_attr( errmsg, "errorcode", err_id ); + } + + if( err_style & ERRSTYLE_FILEINFO ) + { + filename = va_arg( params, char* ); + line = va_arg( params, int ); + + if( errmsg ) + { + xml_set_attr_d( errmsg, "filename", filename ); + xml_set_int_attr( errmsg, "line", line ); + } + } + + if( err_style & ERRSTYLE_STATEINFO ) + { + state = va_arg( params, STATE* ); + + if( errmsg ) + xml_set_int_attr( errmsg, "state", state->state_id ); + } + else if( err_style & ERRSTYLE_PRODUCTION ) + { + p = va_arg( params, PROD* ); + + if( errmsg ) + xml_set_int_attr( errmsg, "production", p->id ); + } + + /* NO ELSE IF!! */ + if( err_style & ERRSTYLE_SYMBOL ) + { + s = va_arg( params, SYMBOL* ); + + if( errmsg ) + xml_set_int_attr( errmsg, "symbol", s->id ); + } + + if( do_print ) + { + if( first_progress ) + fprintf( stderr, "\n" ); + } + + if( err_style & ERRSTYLE_FATAL ) + { + fprintf( stderr, "%s: error: ", progname ); + error_count++; + } + else if( err_style & ERRSTYLE_WARNING ) + { + if( do_print ) + fprintf( stderr, "%s: warning: ", progname ); + + warning_count++; + } + + if( do_print ) + { + if( err_style & ERRSTYLE_FILEINFO ) + fprintf( stderr, "%s(%d):\n ", filename, line ); + else if( err_style & ERRSTYLE_STATEINFO ) + fprintf( stderr, "state %d: ", state->state_id ); + } + + if( do_print ) + { + vfprintf( stderr, error_txt[ err_id ], params ); + + if( err_style & ERRSTYLE_SYMBOL ) + print_symbol( stderr, s ); + + fprintf( stderr, "\n" ); + + if( err_style & ERRSTYLE_STATEINFO ) + { + dump_item_set( stderr, (char*)NULL, state->kernel ); + dump_item_set( stderr, (char*)NULL, state->epsilon ); + } + else if( err_style & ERRSTYLE_PRODUCTION ) + { + fprintf( stderr, " " ); + dump_production( stderr, p, TRUE, FALSE ); + } + } + + /* Halt on memory error! */ + if( err_id == ERR_MEMORY_ERROR ) + { + va_end( params ); + exit( 1 ); + } + + if( errmsg ) + { + /* Unfortunatelly, this must be done this ugly way... */ + pvasprintf( &tmp, error_txt[ err_id ], params ); + xml_set_txt_d( errmsg, tmp ); + pfree( tmp ); + } + + va_end( params ); + + if( do_print ) + first_progress = FALSE; } - diff --git a/src/first.c b/src/first.c index 43de2eb..1b9c7f5 100644 --- a/src/first.c +++ b/src/first.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: first.c -Author: Jan Max Meyer -Usage: First set computation ------------------------------------------------------------------------------ */ +/* The First set computation */ #include "unicc.h" @@ -19,71 +10,57 @@ for. */ void compute_first( PARSER* parser ) { - plistel* e; - plistel* f; - plistel* g; - PROD* p = (PROD*)NULL; - SYMBOL* sym = (SYMBOL*)NULL; - SYMBOL* s = (SYMBOL*)NULL; - int nullable = FALSE; - int cnt = 0; - int prev_cnt; - - /* - 23.08.2008 Jan Max Meyer - - Uhh bad bug resolved here, in grammars like - - x$ -> y ; - y -> y 'a' | ; - - The first set of y and x was always empty! - Very, very bad...now it's resolved, due - uncommenting "if( !k )" in one line below... - Prototype generator "min_lalr1" did it the - correct way - */ - do - { - prev_cnt = cnt; - cnt = 0; - - plist_for( parser->symbols, e ) - { - s = (SYMBOL*)plist_access( e ); - - if( s->type == SYM_NON_TERMINAL ) - { - plist_for( s->productions, f ) - { - nullable = FALSE; - - p = (PROD*)plist_access( f ); - - if( plist_count( p->rhs ) > 0 ) - { - plist_for( p->rhs, g ) - { - sym = (SYMBOL*)plist_access( g ); - - plist_union( s->first, sym->first ); - nullable = sym->nullable; - - if( !nullable ) - break; - } - } - else - nullable = TRUE; - - s->nullable |= nullable; - } - } - - cnt += plist_count( s->first ); - } - } - while( prev_cnt != cnt ); + plistel* e; + plistel* f; + plistel* g; + PROD* p = (PROD*)NULL; + SYMBOL* sym = (SYMBOL*)NULL; + SYMBOL* s = (SYMBOL*)NULL; + int nullable = FALSE; + int cnt = 0; + int prev_cnt; + + do + { + prev_cnt = cnt; + cnt = 0; + + plist_for( parser->symbols, e ) + { + s = (SYMBOL*)plist_access( e ); + + if( s->type == SYM_NON_TERMINAL ) + { + plist_for( s->productions, f ) + { + nullable = FALSE; + + p = (PROD*)plist_access( f ); + + if( plist_count( p->rhs ) > 0 ) + { + plist_for( p->rhs, g ) + { + sym = (SYMBOL*)plist_access( g ); + + plist_union( s->first, sym->first ); + nullable = sym->nullable; + + if( !nullable ) + break; + } + } + else + nullable = TRUE; + + s->nullable |= nullable; + } + } + + cnt += plist_count( s->first ); + } + } + while( prev_cnt != cnt ); } @@ -98,28 +75,27 @@ Returns TRUE if the whole right-hand side is possibly nullable, FALSE else. */ int seek_rhs_first( plist* first, plistel* rhs ) { - SYMBOL* sym = (SYMBOL*)NULL; + SYMBOL* sym = (SYMBOL*)NULL; - for( ; rhs; rhs = plist_next( rhs ) ) - { - sym = (SYMBOL*)plist_access( rhs ); + for( ; rhs; rhs = plist_next( rhs ) ) + { + sym = (SYMBOL*)plist_access( rhs ); - if( IS_TERMINAL( sym ) ) - { - if( !plist_get_by_ptr( first, sym ) ) - plist_push( first, sym ); + if( IS_TERMINAL( sym ) ) + { + if( !plist_get_by_ptr( first, sym ) ) + plist_push( first, sym ); - break; - } - else - { - plist_union( first, sym->first ); + break; + } + else + { + plist_union( first, sym->first ); - if( !( sym->nullable ) ) - break; - } - } + if( !( sym->nullable ) ) + break; + } + } - return sym->nullable; + return sym->nullable; } - diff --git a/src/integrity.c b/src/integrity.c index 4265734..0c997b9 100644 --- a/src/integrity.c +++ b/src/integrity.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: integrity.c -Author: Jan Max Meyer -Usage: Grammar integrity checking functions ------------------------------------------------------------------------------ */ +/* Grammar integrity checking functions */ #include "unicc.h" @@ -18,34 +9,34 @@ Usage: Grammar integrity checking functions Returns a TRUE if undefined or unused symbols are found, else FALSE. */ BOOLEAN find_undef_or_unused( PARSER* parser ) { - plistel* e; - SYMBOL* sym; - BOOLEAN ret = FALSE; - - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( sym->generated == FALSE && sym->defined == FALSE ) - { - print_error( parser, (sym->type == SYM_NON_TERMINAL ) ? - ERR_UNDEFINED_NONTERM : ERR_UNDEFINED_TERM, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - parser->filename, sym->line, sym->name ); - - ret = TRUE; - } - - if( sym->generated == FALSE && sym->used == FALSE ) - { - print_error( parser, (sym->type == SYM_NON_TERMINAL ) ? - ERR_UNUSED_NONTERM : ERR_UNUSED_TERM, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - parser->filename, sym->line, sym->name ); - } - } - - return ret; + plistel* e; + SYMBOL* sym; + BOOLEAN ret = FALSE; + + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( sym->generated == FALSE && sym->defined == FALSE ) + { + print_error( parser, (sym->type == SYM_NON_TERMINAL ) ? + ERR_UNDEFINED_NONTERM : ERR_UNDEFINED_TERM, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + parser->filename, sym->line, sym->name ); + + ret = TRUE; + } + + if( sym->generated == FALSE && sym->used == FALSE ) + { + print_error( parser, (sym->type == SYM_NON_TERMINAL ) ? + ERR_UNUSED_NONTERM : ERR_UNUSED_TERM, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + parser->filename, sym->line, sym->name ); + } + } + + return ret; } /** This function is required to test if a given character class will be @@ -63,48 +54,48 @@ consumed by a NFA machine state. Returns the result of the transition on the given character class, 0 if there is no transition. */ static int nfa_transition_on_ccl( pregex_nfa* nfa, plist* res, - unsigned int* accept, pccl* check_with ) + unsigned int* accept, pccl* check_with ) { - int i; - wchar_t beg; - wchar_t end; - wchar_t ch; - plist* tr; - plist* ret_res; - plistel* e; - - if( !plist_count( res ) ) - plist_push( res, plist_access( plist_first( nfa->states ) ) ); - - pregex_nfa_epsilon_closure( nfa, res, accept, (int*)NULL ); - ret_res = plist_create( 0, PLIST_MOD_PTR ); - - for( i = 0; pccl_get( &beg, &end, check_with, i ); i++ ) - { - /* - This may be the source for large run time latency. - The way chosen by pregex/dfa.c should be used if necessary - somewhere in future. - */ - for( ch = beg; ch <= end; ch++ ) - { - tr = plist_dup( res ); - - if( pregex_nfa_move( nfa, tr, ch, ch ) > 0 ) - plist_union( ret_res, tr ); - - tr = plist_free( tr ); - } - } - - plist_erase( res ); - - plist_for( ret_res, e ) - plist_push( res, plist_access( e ) ); - - plist_free( ret_res ); - - return plist_count( res ); + int i; + wchar_t beg; + wchar_t end; + wchar_t ch; + plist* tr; + plist* ret_res; + plistel* e; + + if( !plist_count( res ) ) + plist_push( res, plist_access( plist_first( nfa->states ) ) ); + + pregex_nfa_epsilon_closure( nfa, res, accept, (int*)NULL ); + ret_res = plist_create( 0, PLIST_MOD_PTR ); + + for( i = 0; pccl_get( &beg, &end, check_with, i ); i++ ) + { + /* + This may be the source for large run time latency. + The way chosen by pregex/dfa.c should be used if necessary + somewhere in future. + */ + for( ch = beg; ch <= end; ch++ ) + { + tr = plist_dup( res ); + + if( pregex_nfa_move( nfa, tr, ch, ch ) > 0 ) + plist_union( ret_res, tr ); + + tr = plist_free( tr ); + } + } + + plist_erase( res ); + + plist_for( ret_res, e ) + plist_push( res, plist_access( e ) ); + + plist_free( ret_res ); + + return plist_count( res ); } @@ -125,142 +116,142 @@ Returns a TRUE if the parse succeeded (when str was completely absorbed), FALSE else. */ static BOOLEAN check_nfa_matches_parser( - PARSER* parser, pregex_nfa* nfa, plist* start_res, int start ) + PARSER* parser, pregex_nfa* nfa, plist* start_res, int start ) { - int stack[ 1024 ]; - int act; - unsigned int accept; - int idx; - int tos = 0; - PROD* rprod; - STATE* st; - TABCOL* col; - plist* res; - plistel* e; - BOOLEAN ret = TRUE; - LIST* l; - - /* - 06.03.2008 Jan Max Meyer - Changes on the algorithm because of new SHIFT_REDUCE-transitions. - They are either reduces. - - 30.01.2011 Jan Max Meyer - Renamed to check_nfa_matches_parser(), the function now tries to parse - along an NFA state machine which must not have its origin in a keyword - terminal. - - 16.01.2014 Jan Max Meyer - Fixed sources to run with libphorward v0.18 and newer. - - TODO: - This part of UniCC is programmed very rudely, and should be changed - somewhere in the future. But for now, it does its job. - - Nelson: "Haaahaaaa" ... never! I think... ;) - */ - if( ( st = (STATE*)parray_get( parser->states, start ) ) ) - stack[ tos++ ] = st->derived_from; - else - stack[ tos++ ] = start; - - stack[ tos ] = start; - - res = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - - do - { - act = 0; - idx = 0; - st = (STATE*)parray_get( parser->states, stack[ tos ] ); - - for( l = st->actions; l; l = l->next ) - { - col = (TABCOL*)l->pptr; - if( col->symbol->type == SYM_CCL_TERMINAL ) - { - plist_erase( res ); - plist_for( start_res, e ) - plist_push( res, plist_access( e ) ); - - - if( nfa_transition_on_ccl( nfa, res, - &accept, col->symbol->ccl ) > 0 ) - { - act = col->action; - idx = col->index; - } - - if( act || accept ) - break; - } - } - /* - fprintf( stderr, "state = %d, act = %d idx = %d accept = %d res = %p\n", - st->state_id, act, idx, accept, res ); - */ - - plist_erase( start_res ); - plist_for( res, e ) - plist_push( start_res, plist_access( e ) ); - - if( accept ) - break; - - /* Error */ - if( act == ERROR ) - { - ret = FALSE; - break; - } - - /* Shift */ - if( act & SHIFT ) - stack[ ++tos ] = idx; - - /* Reduce */ - while( act & REDUCE ) - { - rprod = (PROD*)plist_access( - plist_get( parser->productions, idx ) ); - /* - fprintf( stderr, "tos = %d, reducing production %d, %d\n", - tos, idx, list_count( rprod->rhs ) ); - dump_production( stderr, rprod, FALSE, FALSE ); - */ - - tos -= plist_count( rprod->rhs ); - - /* - fprintf( stderr, "tos %d\n", tos ); - */ - tos++; - - st = (STATE*)parray_get( parser->states, stack[ tos - 1 ] ); - - for( l = st->gotos; l; l = l->next ) - { - col = (TABCOL*)l->pptr; - - if( col->symbol == rprod->lhs ) - { - act = col->action; - stack[ tos ] = idx = col->index; - break; - } - } - - if( stack[ tos ] == -1 ) - { - ret = FALSE; - break; - } - } - } - while( ret && !accept ); - - plist_free( res ); - return ret; + int stack[ 1024 ]; + int act; + unsigned int accept; + int idx; + int tos = 0; + PROD* rprod; + STATE* st; + TABCOL* col; + plist* res; + plistel* e; + BOOLEAN ret = TRUE; + LIST* l; + + /* + 06.03.2008 Jan Max Meyer + Changes on the algorithm because of new SHIFT_REDUCE-transitions. + They are either reduces. + + 30.01.2011 Jan Max Meyer + Renamed to check_nfa_matches_parser(), the function now tries to parse + along an NFA state machine which must not have its origin in a keyword + terminal. + + 16.01.2014 Jan Max Meyer + Fixed sources to run with libphorward v0.18 and newer. + + TODO: + This part of UniCC is programmed very rudely, and should be changed + somewhere in the future. But for now, it does its job. + + Nelson: "Haaahaaaa" ... never! I think... ;) + */ + if( ( st = (STATE*)parray_get( parser->states, start ) ) ) + stack[ tos++ ] = st->derived_from; + else + stack[ tos++ ] = start; + + stack[ tos ] = start; + + res = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + + do + { + act = 0; + idx = 0; + st = (STATE*)parray_get( parser->states, stack[ tos ] ); + + for( l = st->actions; l; l = l->next ) + { + col = (TABCOL*)l->pptr; + if( col->symbol->type == SYM_CCL_TERMINAL ) + { + plist_erase( res ); + plist_for( start_res, e ) + plist_push( res, plist_access( e ) ); + + + if( nfa_transition_on_ccl( nfa, res, + &accept, col->symbol->ccl ) > 0 ) + { + act = col->action; + idx = col->index; + } + + if( act || accept ) + break; + } + } + /* + fprintf( stderr, "state = %d, act = %d idx = %d accept = %d res = %p\n", + st->state_id, act, idx, accept, res ); + */ + + plist_erase( start_res ); + plist_for( res, e ) + plist_push( start_res, plist_access( e ) ); + + if( accept ) + break; + + /* Error */ + if( act == ERROR ) + { + ret = FALSE; + break; + } + + /* Shift */ + if( act & SHIFT ) + stack[ ++tos ] = idx; + + /* Reduce */ + while( act & REDUCE ) + { + rprod = (PROD*)plist_access( + plist_get( parser->productions, idx ) ); + /* + fprintf( stderr, "tos = %d, reducing production %d, %d\n", + tos, idx, list_count( rprod->rhs ) ); + dump_production( stderr, rprod, FALSE, FALSE ); + */ + + tos -= plist_count( rprod->rhs ); + + /* + fprintf( stderr, "tos %d\n", tos ); + */ + tos++; + + st = (STATE*)parray_get( parser->states, stack[ tos - 1 ] ); + + for( l = st->gotos; l; l = l->next ) + { + col = (TABCOL*)l->pptr; + + if( col->symbol == rprod->lhs ) + { + act = col->action; + stack[ tos ] = idx = col->index; + break; + } + } + + if( stack[ tos ] == -1 ) + { + ret = FALSE; + break; + } + } + } + while( ret && !accept ); + + plist_free( res ); + return ret; } @@ -275,10 +266,10 @@ reduced for, and the parse fails. A demonstation grammar for a keyword anomaly is the following: - start$ -> a; - a -> b "PRINT"; - b -> c | '[' b ']'; - c -> 'A-Z' | c 'A-Z'; + start$ -> a; + a -> b "PRINT"; + b -> c | '[' b ']'; + c -> 'A-Z' | c 'A-Z'; At the input "[HALLOPRINT]", which is valid, the parser will fail after successfully parsing "[HALLO", expecting a "]". @@ -288,210 +279,210 @@ successfully parsing "[HALLO", expecting a "]". Returns TRUE if regex anomalies where found, FALSE otherwise. */ BOOLEAN check_regex_anomalies( PARSER* parser ) { - STATE* st; - LIST* m; - LIST* n; - plistel* e; - plistel* f; - PROD* p; - SYMBOL* lhs; - SYMBOL* sym; - TABCOL* col; - TABCOL* ccol; - int cnt; - BOOLEAN found; - - plist* res; - pregex_nfa* nfa; - unsigned int accept; - - /* - 06.03.2008 Jan Max Meyer - Changes on the algorithm because of new SHIFT_REDUCE-transitions. - They are either reduces. - - 12.06.2010 Jan Max Meyer - Changed to new regular expression library functions, which can handle - entire sets of characters from 0x0 - 0xFFFF. - - 31.01.2011 Jan Max Meyer - Renamed the function to check_regex_anomalies() because not only keywords - are tested now, also entire regular expressions. - - 29.11.2011 Jan Max Meyer - Changed to new regular expression handling with the pregex_ptn-structure. - */ - - res = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - - /* - For every keyword, try to find a character class beginning with the - same character as the keyword. Then, try to recognize the keyword - beginning from the state that forms the keyword, by running the - parser on its existing tables. - */ - parray_for( parser->states, st ) - { - /* First of all, count all possible reduces in the current state. */ - for( m = st->actions, cnt = 0; m; m = m->next ) - { - col = (TABCOL*)m->pptr; - if( col->action & REDUCE ) - cnt++; - } - - for( m = st->actions; m; m = m->next ) - { - col = (TABCOL*)m->pptr; - - /* Regular expression to be reduced? */ - if( col->symbol->type == SYM_REGEX_TERMINAL - && col->action & REDUCE ) - { - /* - Table columns not derived from the kernel set - of the state are ignored - */ - if( col->derived_from && list_find( st->epsilon, - col->derived_from ) >= 0 ) - continue; - - /* - Generate NFA from pattern - */ - nfa = pregex_nfa_create(); - - col->symbol->ptn->accept = col->symbol->id + 1; - - pregex_ptn_to_nfa( nfa, col->symbol->ptn ); - - /* - check_nfa_matches_parser() can either be called here; - But to be sure, we have a try if there are shifts on - the same character, and then we try to parse the using - the existing parse tables. This will even be more - faster, I think. - */ - for( n = st->actions; n; n = n->next ) - { - ccol = (TABCOL*)n->pptr; - - /* Character class to be shifted? */ - if( ccol->symbol->type == SYM_CCL_TERMINAL - && ccol->action & SHIFT ) - { - plist_erase( res ); - - /* - fprintf( stderr, "col = >%s< ccol = >%s<\n", - col->symbol->name, - pccl_to_str( ccol->symbol->ccl, TRUE ) ); - */ - - /* - If this is a match with the grammar and the keyword, - a keyword anomaly exists between the shift by a - character and the reduce by a keyword which can be - derived from the build-up of the characters of the - keyword. This is not the problem if there is only - one reduce, but if there are more, output a warning! - */ - if( nfa_transition_on_ccl( - nfa, res, &accept, - ccol->symbol->ccl ) ) - { - /* - printf( "state %d\n", st->state_id ); - dump_item_set( stderr, (char*)NULL, st->kernel ); - dump_item_set( stderr, (char*)NULL, st->epsilon ); - getchar(); - */ - - if( check_nfa_matches_parser( parser, nfa, res, - st->state_id ) && cnt > 1 ) - { - /* - At this point, we have a candidate for a - regex anomaly.. now we check out all - positions where the left-hand side of the - reduced production appears in... - if there is no keyword to shift in the - FIRST-sets of following symbols, report - this anomaly! - */ - p = (PROD*)plist_access( - plist_get( parser->productions, - col->index ) ); - lhs = p->lhs; - - /* Go trough all productions */ - found = FALSE; - - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - - plist_for( p->rhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym == lhs && plist_next( f ) ) - { - do - { - f = plist_next( f ); - if( !f ) - break; - - sym = (SYMBOL*) - plist_access( f ); - /* - fprintf( stderr, "sym = " ); - print_symbol( stderr, sym ); - fprintf( stderr, " %d %d\n", - list_find( sym->first, - col->symbol ), - sym->nullable ); - */ - if( !plist_get_by_ptr( - sym->first, - col->symbol ) - && !sym->nullable ) - { - print_error( parser, - ERR_KEYWORD_ANOMALY, - ERRSTYLE_WARNING | - ERRSTYLE_STATEINFO, - st, ccol->symbol->name, - col->symbol->name ); - - found = TRUE; - break; - } - } - while( sym && sym->nullable ); - } - - if( found ) - break; - } - - if( found ) - break; - } - } - } - } - } - - nfa = pregex_nfa_free( nfa ); - } - } - } /* This is stupid... */ - - plist_free( res ); - - return FALSE; + STATE* st; + LIST* m; + LIST* n; + plistel* e; + plistel* f; + PROD* p; + SYMBOL* lhs; + SYMBOL* sym; + TABCOL* col; + TABCOL* ccol; + int cnt; + BOOLEAN found; + + plist* res; + pregex_nfa* nfa; + unsigned int accept; + + /* + 06.03.2008 Jan Max Meyer + Changes on the algorithm because of new SHIFT_REDUCE-transitions. + They are either reduces. + + 12.06.2010 Jan Max Meyer + Changed to new regular expression library functions, which can handle + entire sets of characters from 0x0 - 0xFFFF. + + 31.01.2011 Jan Max Meyer + Renamed the function to check_regex_anomalies() because not only keywords + are tested now, also entire regular expressions. + + 29.11.2011 Jan Max Meyer + Changed to new regular expression handling with the pregex_ptn-structure. + */ + + res = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + + /* + For every keyword, try to find a character class beginning with the + same character as the keyword. Then, try to recognize the keyword + beginning from the state that forms the keyword, by running the + parser on its existing tables. + */ + parray_for( parser->states, st ) + { + /* First of all, count all possible reduces in the current state. */ + for( m = st->actions, cnt = 0; m; m = m->next ) + { + col = (TABCOL*)m->pptr; + if( col->action & REDUCE ) + cnt++; + } + + for( m = st->actions; m; m = m->next ) + { + col = (TABCOL*)m->pptr; + + /* Regular expression to be reduced? */ + if( col->symbol->type == SYM_REGEX_TERMINAL + && col->action & REDUCE ) + { + /* + Table columns not derived from the kernel set + of the state are ignored + */ + if( col->derived_from && list_find( st->epsilon, + col->derived_from ) >= 0 ) + continue; + + /* + Generate NFA from pattern + */ + nfa = pregex_nfa_create(); + + col->symbol->ptn->accept = col->symbol->id + 1; + + pregex_ptn_to_nfa( nfa, col->symbol->ptn ); + + /* + check_nfa_matches_parser() can either be called here; + But to be sure, we have a try if there are shifts on + the same character, and then we try to parse the using + the existing parse tables. This will even be more + faster, I think. + */ + for( n = st->actions; n; n = n->next ) + { + ccol = (TABCOL*)n->pptr; + + /* Character class to be shifted? */ + if( ccol->symbol->type == SYM_CCL_TERMINAL + && ccol->action & SHIFT ) + { + plist_erase( res ); + + /* + fprintf( stderr, "col = >%s< ccol = >%s<\n", + col->symbol->name, + pccl_to_str( ccol->symbol->ccl, TRUE ) ); + */ + + /* + If this is a match with the grammar and the keyword, + a keyword anomaly exists between the shift by a + character and the reduce by a keyword which can be + derived from the build-up of the characters of the + keyword. This is not the problem if there is only + one reduce, but if there are more, output a warning! + */ + if( nfa_transition_on_ccl( + nfa, res, &accept, + ccol->symbol->ccl ) ) + { + /* + printf( "state %d\n", st->state_id ); + dump_item_set( stderr, (char*)NULL, st->kernel ); + dump_item_set( stderr, (char*)NULL, st->epsilon ); + getchar(); + */ + + if( check_nfa_matches_parser( parser, nfa, res, + st->state_id ) && cnt > 1 ) + { + /* + At this point, we have a candidate for a + regex anomaly.. now we check out all + positions where the left-hand side of the + reduced production appears in... + if there is no keyword to shift in the + FIRST-sets of following symbols, report + this anomaly! + */ + p = (PROD*)plist_access( + plist_get( parser->productions, + col->index ) ); + lhs = p->lhs; + + /* Go trough all productions */ + found = FALSE; + + plist_for( parser->productions, e ) + { + p = (PROD*)plist_access( e ); + + plist_for( p->rhs, f ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym == lhs && plist_next( f ) ) + { + do + { + f = plist_next( f ); + if( !f ) + break; + + sym = (SYMBOL*) + plist_access( f ); + /* + fprintf( stderr, "sym = " ); + print_symbol( stderr, sym ); + fprintf( stderr, " %d %d\n", + list_find( sym->first, + col->symbol ), + sym->nullable ); + */ + if( !plist_get_by_ptr( + sym->first, + col->symbol ) + && !sym->nullable ) + { + print_error( parser, + ERR_KEYWORD_ANOMALY, + ERRSTYLE_WARNING | + ERRSTYLE_STATEINFO, + st, ccol->symbol->name, + col->symbol->name ); + + found = TRUE; + break; + } + } + while( sym && sym->nullable ); + } + + if( found ) + break; + } + + if( found ) + break; + } + } + } + } + } + + nfa = pregex_nfa_free( nfa ); + } + } + } /* This is stupid... */ + + plist_free( res ); + + return FALSE; } @@ -505,73 +496,73 @@ FALSE if all is fine :D. */ BOOLEAN check_stupid_productions( PARSER* parser ) { - plistel* e; - plistel* f; - PROD* p; - SYMBOL* sym; - BOOLEAN stupid = FALSE; - BOOLEAN possible = FALSE; - plist* first_check = (plist*)NULL; - - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - - if( plist_count( p->rhs ) == 1 - && plist_access( plist_first( p->rhs ) ) == p->lhs ) - { - print_error( parser, ERR_CIRCULAR_DEFINITION, - ERRSTYLE_WARNING | ERRSTYLE_PRODUCTION | ERRSTYLE_FILEINFO, - parser->filename, p->line, p ); - stupid = TRUE; - } - else if( p->lhs->nullable ) - { - possible = FALSE; - plist_for( p->rhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( !( sym->nullable ) ) - { - possible = FALSE; - break; - } - else if( sym == p->lhs ) - possible = TRUE; - } - - if( possible ) - { - print_error( parser, ERR_EMPTY_RECURSION, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO | ERRSTYLE_PRODUCTION, - parser->filename, p->line, p ); - stupid = TRUE; - } - } - - /* Get all FIRST-sets of the right-hand side; If there are none, - this can't be possible */ - if( plist_count( p->rhs ) > 0 ) - { - if( !first_check ) - first_check = plist_create( 0, - PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - else - plist_erase( first_check ); - - seek_rhs_first( first_check, plist_first( p->rhs ) ); - - if( plist_count( first_check ) == 0 ) - { - print_error( parser, ERR_USELESS_RULE, - ERRSTYLE_WARNING | ERRSTYLE_PRODUCTION | ERRSTYLE_FILEINFO, - parser->filename, p->line, p ); - } - } - } - - plist_free( first_check ); - - return stupid; + plistel* e; + plistel* f; + PROD* p; + SYMBOL* sym; + BOOLEAN stupid = FALSE; + BOOLEAN possible = FALSE; + plist* first_check = (plist*)NULL; + + plist_for( parser->productions, e ) + { + p = (PROD*)plist_access( e ); + + if( plist_count( p->rhs ) == 1 + && plist_access( plist_first( p->rhs ) ) == p->lhs ) + { + print_error( parser, ERR_CIRCULAR_DEFINITION, + ERRSTYLE_WARNING | ERRSTYLE_PRODUCTION | ERRSTYLE_FILEINFO, + parser->filename, p->line, p ); + stupid = TRUE; + } + else if( p->lhs->nullable ) + { + possible = FALSE; + plist_for( p->rhs, f ) + { + sym = (SYMBOL*)plist_access( f ); + + if( !( sym->nullable ) ) + { + possible = FALSE; + break; + } + else if( sym == p->lhs ) + possible = TRUE; + } + + if( possible ) + { + print_error( parser, ERR_EMPTY_RECURSION, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO | ERRSTYLE_PRODUCTION, + parser->filename, p->line, p ); + stupid = TRUE; + } + } + + /* Get all FIRST-sets of the right-hand side; If there are none, + this can't be possible */ + if( plist_count( p->rhs ) > 0 ) + { + if( !first_check ) + first_check = plist_create( 0, + PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + else + plist_erase( first_check ); + + seek_rhs_first( first_check, plist_first( p->rhs ) ); + + if( plist_count( first_check ) == 0 ) + { + print_error( parser, ERR_USELESS_RULE, + ERRSTYLE_WARNING | ERRSTYLE_PRODUCTION | ERRSTYLE_FILEINFO, + parser->filename, p->line, p ); + } + } + } + + plist_free( first_check ); + + return stupid; } diff --git a/src/lalr.c b/src/lalr.c index 0a67e22..273dc0d 100644 --- a/src/lalr.c +++ b/src/lalr.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: lalr.c -Author: Jan Max Meyer -Usage: Performs the LALR(1) parse table construction algorithm ------------------------------------------------------------------------------ */ +/* LR(1) / LALR(1) parse table construction */ #include "unicc.h" @@ -21,39 +12,39 @@ Usage: Performs the LALR(1) parse table construction algorithm Returns a int TRUE if the item sets are equal, else FALSE. */ static int test_same_kernel( LIST* kernel1, LIST* kernel2 ) { - int ret = FALSE; - LIST* checklist = (LIST*)NULL; - LIST* i = (LIST*)NULL; - LIST* j = (LIST*)NULL; - ITEM* item1 = (ITEM*)NULL; - ITEM* item2 = (ITEM*)NULL; - - if( list_count( kernel1 ) == list_count( kernel2 ) ) - { - for( i = kernel1; i; i = i->next ) - { - item1 = i->pptr; - - for( j = kernel2; j; j = j->next ) - { - item2 = j->pptr; - - if( item1->prod == item2->prod - && item1->dot_offset == item2->dot_offset - && item1->next_symbol == item2->next_symbol ) - { - checklist = list_push( checklist, j->pptr ); - } - } - } - - if( list_count( kernel1 ) == list_count( checklist ) ) - ret = TRUE; - - list_free( checklist ); - } - - return ret; + int ret = FALSE; + LIST* checklist = (LIST*)NULL; + LIST* i = (LIST*)NULL; + LIST* j = (LIST*)NULL; + ITEM* item1 = (ITEM*)NULL; + ITEM* item2 = (ITEM*)NULL; + + if( list_count( kernel1 ) == list_count( kernel2 ) ) + { + for( i = kernel1; i; i = i->next ) + { + item1 = i->pptr; + + for( j = kernel2; j; j = j->next ) + { + item2 = j->pptr; + + if( item1->prod == item2->prod + && item1->dot_offset == item2->dot_offset + && item1->next_symbol == item2->next_symbol ) + { + checklist = list_push( checklist, j->pptr ); + } + } + } + + if( list_count( kernel1 ) == list_count( checklist ) ) + ret = TRUE; + + list_free( checklist ); + } + + return ret; } /** This is the key function which performs the major closure from one kernel @@ -66,114 +57,114 @@ be enhanced. */ static void close_item( plist* productions, ITEM* it, LIST** closure_set ) { - LIST* l = (LIST*)NULL; - plistel* e; - plistel* f; - ITEM* cit = (ITEM*)NULL; - PROD* prod = (PROD*)NULL; - plist* first = (plist*)NULL; - - /* Only perform closure if the symbol right to the dot - of the current kernel item is a non-terminal */ - if( it->next_symbol ) - { - if( it->next_symbol->type == SYM_NON_TERMINAL ) - { - /* Find all right-hand sides of this non-terminal */ - plist_for( productions, f ) - { - prod = (PROD*)plist_access( f ); - - if( prod->lhs == it->next_symbol ) - { - /* Check if there is not already such an item - that uses this production! */ - for( l = *closure_set; l; l = l->next ) - { - cit = l->pptr; - if( cit->prod == prod ) - break; - } - - /* Add new item! */ - if( !l ) - { + LIST* l = (LIST*)NULL; + plistel* e; + plistel* f; + ITEM* cit = (ITEM*)NULL; + PROD* prod = (PROD*)NULL; + plist* first = (plist*)NULL; + + /* Only perform closure if the symbol right to the dot + of the current kernel item is a non-terminal */ + if( it->next_symbol ) + { + if( it->next_symbol->type == SYM_NON_TERMINAL ) + { + /* Find all right-hand sides of this non-terminal */ + plist_for( productions, f ) + { + prod = (PROD*)plist_access( f ); + + if( prod->lhs == it->next_symbol ) + { + /* Check if there is not already such an item + that uses this production! */ + for( l = *closure_set; l; l = l->next ) + { + cit = l->pptr; + if( cit->prod == prod ) + break; + } + + /* Add new item! */ + if( !l ) + { #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Closure: Creating new " - "item\n"); - dump_item_set( (FILE*)NULL, "Partial closure:", - *closure_set ); + fprintf( stderr, "\n===> Closure: Creating new " + "item\n"); + dump_item_set( (FILE*)NULL, "Partial closure:", + *closure_set ); #endif - cit = create_item( prod ); + cit = create_item( prod ); - *closure_set = list_push( *closure_set, cit ); - } + *closure_set = list_push( *closure_set, cit ); + } #if ON_ALGORITHM_DEBUG - else - { - fprintf( stderr, "\n===> Closure: Using existing " - "item\n"); - dump_item_set( (FILE*)NULL, "Partial closure:", - *closure_set ); - } + else + { + fprintf( stderr, "\n===> Closure: Using existing " + "item\n"); + dump_item_set( (FILE*)NULL, "Partial closure:", + *closure_set ); + } #endif #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Closure: cit->prod %d " - "it->prod %d\n", - cit->prod->id, it->prod->id ); + fprintf( stderr, "\n===> Closure: cit->prod %d " + "it->prod %d\n", + cit->prod->id, it->prod->id ); #endif - /* --- Passing the lookaheads ... --- */ + /* --- Passing the lookaheads ... --- */ - /* If this is the last symbol... */ - if( plist_count( it->prod->rhs ) > 0 ) - { + /* If this is the last symbol... */ + if( plist_count( it->prod->rhs ) > 0 ) + { #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Closure: dot %d, " - "rhs %d len = %d \n", - it->dot_offset, it->prod->id, - plist_count( it->prod->rhs ) ); + fprintf( stderr, "\n===> Closure: dot %d, " + "rhs %d len = %d \n", + it->dot_offset, it->prod->id, + plist_count( it->prod->rhs ) ); #endif - if( !( e = plist_get( it->prod->rhs, - it->dot_offset + 1 ) ) ) - { - plist_union( &cit->lookahead, &it->lookahead ); + if( !( e = plist_get( it->prod->rhs, + it->dot_offset + 1 ) ) ) + { + plist_union( &cit->lookahead, &it->lookahead ); #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Closure: Dot at the end, " - "taking lookahead \n"); - dump_item_set( (FILE*)NULL, "Partial closure:", - *closure_set ); + fprintf( stderr, "\n===> Closure: Dot at the end, " + "taking lookahead \n"); + dump_item_set( (FILE*)NULL, "Partial closure:", + *closure_set ); #endif - } - else - { - if( !first ) - first = plist_create( 0, - PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - else - plist_erase( first ); + } + else + { + if( !first ) + first = plist_create( 0, + PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + else + plist_erase( first ); - if( seek_rhs_first( first, e ) ) - plist_union( first, &it->lookahead ); + if( seek_rhs_first( first, e ) ) + plist_union( first, &it->lookahead ); - plist_union( &cit->lookahead, first ); + plist_union( &cit->lookahead, first ); #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Closure: " - "Calculated lookahead\n"); - dump_item_set( (FILE*)NULL, "Partial closure:", - *closure_set ); + fprintf( stderr, "\n===> Closure: " + "Calculated lookahead\n"); + dump_item_set( (FILE*)NULL, "Partial closure:", + *closure_set ); #endif - } - } - } - } - } - } - - plist_free( first ); + } + } + } + } + } + } + + plist_free( first ); } /** Drops and frees a list of items. @@ -184,19 +175,19 @@ Returns LIST*(NULL) always. */ static LIST* drop_item_list( LIST* list ) { - LIST* l; - ITEM* it; + LIST* l; + ITEM* it; - for( l = list; l; l = list_next( l ) ) - { - it = (ITEM*)list_access( l ); + for( l = list; l; l = list_next( l ) ) + { + it = (ITEM*)list_access( l ); - free_item( it ); - } + free_item( it ); + } - list_free( list ); + list_free( list ); - return (LIST*)NULL; + return (LIST*)NULL; } /** Performs an LR(1) closure and merges the lookahead-symbols of items with the @@ -207,392 +198,392 @@ closure. //st// is the state to be closed. */ static void lalr1_closure( PARSER* parser, int state_id ) { - STATE* st; - LIST* closure_start; - LIST* closure_set = (LIST*)NULL; - LIST* i = (LIST*)NULL; - LIST* j = (LIST*)NULL; - LIST* k = (LIST*)NULL; - ITEM* it = (ITEM*)NULL; - ITEM* cit = (ITEM*)NULL; - SYMBOL* sym_before_move = (SYMBOL*)NULL; - STATE* nstate = (STATE*)NULL; - - LIST* part_symbols = (LIST*)NULL; - LIST* partitions = (LIST*)NULL; - - int prev_cnt = 0; - int cnt = 0; - - if( !( st = parray_get( parser->states, state_id ) ) ) - { - WRONGPARAM; - return; - } - - closure_start = st->kernel; - - /* - 03.03.2008 Jan Max Meyer - Added new SHIFT_REDUCE-transition to build lesser states - (up to 30% lesser states!) - */ + STATE* st; + LIST* closure_start; + LIST* closure_set = (LIST*)NULL; + LIST* i = (LIST*)NULL; + LIST* j = (LIST*)NULL; + LIST* k = (LIST*)NULL; + ITEM* it = (ITEM*)NULL; + ITEM* cit = (ITEM*)NULL; + SYMBOL* sym_before_move = (SYMBOL*)NULL; + STATE* nstate = (STATE*)NULL; + + LIST* part_symbols = (LIST*)NULL; + LIST* partitions = (LIST*)NULL; + + int prev_cnt = 0; + int cnt = 0; + + if( !( st = parray_get( parser->states, state_id ) ) ) + { + WRONGPARAM; + return; + } + + closure_start = st->kernel; + + /* + 03.03.2008 Jan Max Meyer + Added new SHIFT_REDUCE-transition to build lesser states + (up to 30% lesser states!) + */ #if ON_ALGORITHM_DEBUG - fprintf( stderr, "================\n"); - fprintf( stderr, "=== State % 2d ===\n", st->state_id ); - fprintf( stderr, "================\n"); - dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); - dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); + fprintf( stderr, "================\n"); + fprintf( stderr, "=== State % 2d ===\n", st->state_id ); + fprintf( stderr, "================\n"); + dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); + dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); #endif - /* - Performing the closure: - - First, closure is done on the kernel item set, - all following closures are done on the closure-set - resulting from the kernel closure. - - The closure is finished when no more items are added - to closure_set. - */ - do - { - prev_cnt = cnt; - cnt = 0; - - /* Iterating trough the kernel items */ - for( i = closure_start; i; i = i->next ) - { - it = i->pptr; - close_item( parser->productions, it, &closure_set ); - } - - closure_start = closure_set; - cnt = list_count( closure_set ); - - /* fprintf( stderr, "prev_cnt = %d, cnt = %d\n", prev_cnt, cnt ); */ - } - while( prev_cnt != cnt ); - - /*dump_item_set( (FILE*)NULL, "Closure:", closure_set );*/ - - /* - Adding all kernel items with outgoing transitions - to the closure item set now! These are all items - where next_symbol is not (SYMBOL*)NULL... - */ - for( i = st->kernel; i; i = i->next ) - { - it = i->pptr; - - if( it->next_symbol != (SYMBOL*)NULL ) - { - /* - The items must be really copied: - The complete memory must be mirrored and re-allocated to - create a single, independend item! - */ - cit = create_item( it->prod ); - - cit->prod = it->prod; - cit->dot_offset = it->dot_offset; - cit->next_symbol = it->next_symbol; - - plist_concat( &cit->lookahead, &it->lookahead ); - - closure_set = list_push( closure_set, cit ); - } - } - - /* - Moving all epsilon items (items with an epsilon production!) - to the epsilon item set of this state! - */ - for( i = closure_set; i; ) - { - it = i->pptr; - if( !plist_count( it->prod->rhs ) ) - { - /* For all items with the same epsilon transitions, - merge the lookaheads! */ - for( j = st->epsilon; j; j = j->next ) - { - cit = j->pptr; - if( cit->prod == it->prod ) - break; - } - - if( !j ) - { - st->epsilon = list_push( st->epsilon, it ); - } - else - { - plist_union( &cit->lookahead, &it->lookahead ); - free_item( it ); - } - - i = i->next; - closure_set = list_remove( closure_set, it ); - } - else - { - i = i->next; - } - } + /* + Performing the closure: + + First, closure is done on the kernel item set, + all following closures are done on the closure-set + resulting from the kernel closure. + + The closure is finished when no more items are added + to closure_set. + */ + do + { + prev_cnt = cnt; + cnt = 0; + + /* Iterating trough the kernel items */ + for( i = closure_start; i; i = i->next ) + { + it = i->pptr; + close_item( parser->productions, it, &closure_set ); + } + + closure_start = closure_set; + cnt = list_count( closure_set ); + + /* fprintf( stderr, "prev_cnt = %d, cnt = %d\n", prev_cnt, cnt ); */ + } + while( prev_cnt != cnt ); + + /*dump_item_set( (FILE*)NULL, "Closure:", closure_set );*/ + + /* + Adding all kernel items with outgoing transitions + to the closure item set now! These are all items + where next_symbol is not (SYMBOL*)NULL... + */ + for( i = st->kernel; i; i = i->next ) + { + it = i->pptr; + + if( it->next_symbol != (SYMBOL*)NULL ) + { + /* + The items must be really copied: + The complete memory must be mirrored and re-allocated to + create a single, independend item! + */ + cit = create_item( it->prod ); + + cit->prod = it->prod; + cit->dot_offset = it->dot_offset; + cit->next_symbol = it->next_symbol; + + plist_concat( &cit->lookahead, &it->lookahead ); + + closure_set = list_push( closure_set, cit ); + } + } + + /* + Moving all epsilon items (items with an epsilon production!) + to the epsilon item set of this state! + */ + for( i = closure_set; i; ) + { + it = i->pptr; + if( !plist_count( it->prod->rhs ) ) + { + /* For all items with the same epsilon transitions, + merge the lookaheads! */ + for( j = st->epsilon; j; j = j->next ) + { + cit = j->pptr; + if( cit->prod == it->prod ) + break; + } + + if( !j ) + { + st->epsilon = list_push( st->epsilon, it ); + } + else + { + plist_union( &cit->lookahead, &it->lookahead ); + free_item( it ); + } + + i = i->next; + closure_set = list_remove( closure_set, it ); + } + else + { + i = i->next; + } + } #if 0 - fprintf( stderr, "\n--- State %d ---\n", st->state_id ); - dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); - dump_item_set( (FILE*)NULL, "Closure:", closure_set ); - dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); + fprintf( stderr, "\n--- State %d ---\n", st->state_id ); + dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); + dump_item_set( (FILE*)NULL, "Closure:", closure_set ); + dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); #endif - /* - Sorting the closure set by the symbols next to the dot. - */ - do - { - /* cnt will act as an "I had done something"-flag in this case! */ - cnt = 0; - for( i = closure_set; i; i = i->next ) - { - it = i->pptr; - if( i->next ) - cit = i->next->pptr; - else - cit = (ITEM*)NULL; - - if( it && cit ) - { - if( it->next_symbol && cit->next_symbol ) - { - if( it->next_symbol > cit->next_symbol - || ( it->next_symbol == cit->next_symbol - && it->prod->id > cit->prod->id ) ) - { - cnt = 1; - i->pptr = cit; - i->next->pptr = it; - } - } - } - } - } - while( cnt > 0 ); + /* + Sorting the closure set by the symbols next to the dot. + */ + do + { + /* cnt will act as an "I had done something"-flag in this case! */ + cnt = 0; + for( i = closure_set; i; i = i->next ) + { + it = i->pptr; + if( i->next ) + cit = i->next->pptr; + else + cit = (ITEM*)NULL; + + if( it && cit ) + { + if( it->next_symbol && cit->next_symbol ) + { + if( it->next_symbol > cit->next_symbol + || ( it->next_symbol == cit->next_symbol + && it->prod->id > cit->prod->id ) ) + { + cnt = 1; + i->pptr = cit; + i->next->pptr = it; + } + } + } + } + } + while( cnt > 0 ); #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n--- State %d ---\n", st->state_id ); - dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); - dump_item_set( (FILE*)NULL, "Closure:", closure_set ); - dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); + fprintf( stderr, "\n--- State %d ---\n", st->state_id ); + dump_item_set( (FILE*)NULL, "Kernel:", st->kernel ); + dump_item_set( (FILE*)NULL, "Closure:", closure_set ); + dump_item_set( (FILE*)NULL, "Epsilon:", st->epsilon ); #endif - /* - Partitioning all items with the same symbol right to the dot - (all items that share the symbol where next_symbol points to...) - */ - for( i = closure_set; i; i = i->next ) - { - it = i->pptr; - - if( it->next_symbol != (SYMBOL*)NULL ) - { - if( ( cnt = list_find( part_symbols, it->next_symbol ) ) == -1 ) - { - part_symbols = list_push( part_symbols, it->next_symbol ); - partitions = list_push( partitions, - list_push( (LIST*)NULL, it ) ); - } - else - { - for( j = partitions; j && cnt > 0; j = j->next, cnt-- ) - ; - j->pptr = list_push( (LIST*)(j->pptr), it ); - } - } - } - - - /* - Creating new states from the partitions - */ - for( i = partitions; i; i = i->next ) - { - sym_before_move = (SYMBOL*)NULL; - - /* Move the dot in this partition one to the right! */ - for( j = i->pptr; j; j = j->next ) - { - it = j->pptr; - - /* Remember the symbol to the right of the dot - before the dot is moved...*/ - if( sym_before_move == (SYMBOL*)NULL ) - sym_before_move = it->next_symbol; - - if( it->dot_offset < plist_count( it->prod->rhs ) ) - { - it->dot_offset++; - it->next_symbol = (SYMBOL*)plist_access( - plist_get( it->prod->rhs, - it->dot_offset ) ); - } - } - - /* - Jan Max Meyer, 03.03.2008 - SHIFT_REDUCE-feature added, as in min_lalr1 - - Jan Max Meyer, 28.05.2008 - Improved, not only for terminals, even for nonterminals :) - - Watch for partitions that are - - x -> y .z - - where x is nonterminal, y is a possible sequence of terminals and/or - nonterminals or even epsilon, and z is a terminal or nonterminal. - */ - if( parser->optimize_states - /* && ( IS_TERMINAL( sym_before_move ) & SYM_TERMINAL ) */ - && list_count( (LIST*)(i->pptr) ) == 1 - && it->next_symbol == (SYMBOL*)NULL ) - { + /* + Partitioning all items with the same symbol right to the dot + (all items that share the symbol where next_symbol points to...) + */ + for( i = closure_set; i; i = i->next ) + { + it = i->pptr; + + if( it->next_symbol != (SYMBOL*)NULL ) + { + if( ( cnt = list_find( part_symbols, it->next_symbol ) ) == -1 ) + { + part_symbols = list_push( part_symbols, it->next_symbol ); + partitions = list_push( partitions, + list_push( (LIST*)NULL, it ) ); + } + else + { + for( j = partitions; j && cnt > 0; j = j->next, cnt-- ) + ; + j->pptr = list_push( (LIST*)(j->pptr), it ); + } + } + } + + + /* + Creating new states from the partitions + */ + for( i = partitions; i; i = i->next ) + { + sym_before_move = (SYMBOL*)NULL; + + /* Move the dot in this partition one to the right! */ + for( j = i->pptr; j; j = j->next ) + { + it = j->pptr; + + /* Remember the symbol to the right of the dot + before the dot is moved...*/ + if( sym_before_move == (SYMBOL*)NULL ) + sym_before_move = it->next_symbol; + + if( it->dot_offset < plist_count( it->prod->rhs ) ) + { + it->dot_offset++; + it->next_symbol = (SYMBOL*)plist_access( + plist_get( it->prod->rhs, + it->dot_offset ) ); + } + } + + /* + Jan Max Meyer, 03.03.2008 + SHIFT_REDUCE-feature added, as in min_lalr1 + + Jan Max Meyer, 28.05.2008 + Improved, not only for terminals, even for nonterminals :) + + Watch for partitions that are + + x -> y .z + + where x is nonterminal, y is a possible sequence of terminals and/or + nonterminals or even epsilon, and z is a terminal or nonterminal. + */ + if( parser->optimize_states + /* && ( IS_TERMINAL( sym_before_move ) & SYM_TERMINAL ) */ + && list_count( (LIST*)(i->pptr) ) == 1 + && it->next_symbol == (SYMBOL*)NULL ) + { #if 0 - fprintf( stderr, "\nAdding SHIFT_REDUCE entry\n", st->state_id ); - dump_item_set( (FILE*)NULL, "Partition:", (LIST*)(i->pptr) ); + fprintf( stderr, "\nAdding SHIFT_REDUCE entry\n", st->state_id ); + dump_item_set( (FILE*)NULL, "Partition:", (LIST*)(i->pptr) ); #endif - /* - Add a shift-reduce entry - */ - if( !( st->closed ) ) - { - if( IS_TERMINAL( sym_before_move ) ) - { - st->actions = list_push( st->actions, create_tabcol( - sym_before_move, SHIFT_REDUCE, - it->prod->id, (ITEM*)NULL ) ); - } - else - { - st->gotos = list_push( st->gotos, create_tabcol( - sym_before_move, SHIFT_REDUCE, - it->prod->id, (ITEM*)NULL ) ); - } - } - - drop_item_list( (LIST*)( i->pptr ) ); - } - else - { - /* - Proceed normally - */ - parray_for( parser->states, nstate ) - if( test_same_kernel( nstate->kernel, i->pptr ) ) - break; - - if( !nstate ) - { - nstate = create_state( parser ); - nstate->kernel = i->pptr; - nstate->derived_from = state_id; - - /* Re-get current state due possibly heap re-allocation */ - st = (STATE*)parray_get( parser->states, state_id ); + /* + Add a shift-reduce entry + */ + if( !( st->closed ) ) + { + if( IS_TERMINAL( sym_before_move ) ) + { + st->actions = list_push( st->actions, create_tabcol( + sym_before_move, SHIFT_REDUCE, + it->prod->id, (ITEM*)NULL ) ); + } + else + { + st->gotos = list_push( st->gotos, create_tabcol( + sym_before_move, SHIFT_REDUCE, + it->prod->id, (ITEM*)NULL ) ); + } + } + + drop_item_list( (LIST*)( i->pptr ) ); + } + else + { + /* + Proceed normally + */ + parray_for( parser->states, nstate ) + if( test_same_kernel( nstate->kernel, i->pptr ) ) + break; + + if( !nstate ) + { + nstate = create_state( parser ); + nstate->kernel = i->pptr; + nstate->derived_from = state_id; + + /* Re-get current state due possibly heap re-allocation */ + st = (STATE*)parray_get( parser->states, state_id ); #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Creating new State %d...\n", - nstate->state_id ); - dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); + fprintf( stderr, "\n===> Creating new State %d...\n", + nstate->state_id ); + dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); #endif - } - else - { + } + else + { #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n===> Updating existing State %d...\n", - nstate->state_id ); - dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); - fprintf( stderr, "\n...from partition set...\n" ); - dump_item_set( (FILE*)NULL, "Partition:", i->pptr ); + fprintf( stderr, "\n===> Updating existing State %d...\n", + nstate->state_id ); + dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); + fprintf( stderr, "\n...from partition set...\n" ); + dump_item_set( (FILE*)NULL, "Partition:", i->pptr ); #endif - /* Merging the lookaheads */ - cnt = 0; - prev_cnt = 0; + /* Merging the lookaheads */ + cnt = 0; + prev_cnt = 0; #if 0 - if( nstate->state_id == 96 && st->state_id == 260 ) - { - fprintf( stderr, "\n--- NEW/UPDATE STATE %d from STATE %d ---\n", - nstate->state_id, st->state_id ); - dump_item_set( (FILE*)NULL, "Partition:", i->pptr ); - dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); - getchar(); - } + if( nstate->state_id == 96 && st->state_id == 260 ) + { + fprintf( stderr, "\n--- NEW/UPDATE STATE %d from STATE %d ---\n", + nstate->state_id, st->state_id ); + dump_item_set( (FILE*)NULL, "Partition:", i->pptr ); + dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); + getchar(); + } #endif - for( j = nstate->kernel, k = i->pptr; j; - j = j->next, k = k->next ) - { - it = j->pptr; - prev_cnt += plist_count( &it->lookahead ); + for( j = nstate->kernel, k = i->pptr; j; + j = j->next, k = k->next ) + { + it = j->pptr; + prev_cnt += plist_count( &it->lookahead ); - plist_union( &it->lookahead, &((ITEM*)(k->pptr))->lookahead ); + plist_union( &it->lookahead, &((ITEM*)(k->pptr))->lookahead ); - cnt += plist_count( &it->lookahead ); + cnt += plist_count( &it->lookahead ); - free_item( k->pptr ); - } + free_item( k->pptr ); + } - /* Had new lookaheads been added? */ - if( cnt > prev_cnt ) - nstate->done = FALSE; + /* Had new lookaheads been added? */ + if( cnt > prev_cnt ) + nstate->done = FALSE; #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n...it's now...\n" ); - dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); + fprintf( stderr, "\n...it's now...\n" ); + dump_item_set( (FILE*)NULL, "Kernel:", nstate->kernel ); #endif - /* drop_item_list( (LIST*)( i->pptr ) ); */ - list_free( (LIST*)( i->pptr ) ); - } - - /* Performing some table creation */ - if( !( st->closed ) ) - { - if( IS_TERMINAL( sym_before_move ) ) - { - st->actions = list_push( st->actions, create_tabcol( - sym_before_move, SHIFT, - nstate->state_id, (ITEM*)NULL ) ); - } - else - { - st->gotos = list_push( st->gotos, create_tabcol( - sym_before_move, SHIFT, - nstate->state_id, (ITEM*)NULL ) ); - } - } - } - } - - st->closed = 1; - - list_free( closure_set ); - list_free( part_symbols ); - list_free( partitions ); + /* drop_item_list( (LIST*)( i->pptr ) ); */ + list_free( (LIST*)( i->pptr ) ); + } + + /* Performing some table creation */ + if( !( st->closed ) ) + { + if( IS_TERMINAL( sym_before_move ) ) + { + st->actions = list_push( st->actions, create_tabcol( + sym_before_move, SHIFT, + nstate->state_id, (ITEM*)NULL ) ); + } + else + { + st->gotos = list_push( st->gotos, create_tabcol( + sym_before_move, SHIFT, + nstate->state_id, (ITEM*)NULL ) ); + } + } + } + } + + st->closed = 1; + + list_free( closure_set ); + list_free( part_symbols ); + list_free( partitions ); #if ON_ALGORITHM_DEBUG - fprintf( stderr, "\n\n" ); + fprintf( stderr, "\n\n" ); #endif - /* - if( debug ) - printf( "\n" ); - */ + /* + if( debug ) + printf( "\n" ); + */ } @@ -607,105 +598,105 @@ created for. //it// is the item where the reduce-entries should be created for. */ static void reduce_item( PARSER* parser, STATE* st, ITEM* it ) { - plistel* e; - SYMBOL* sym = (SYMBOL*)NULL; - TABCOL* act = (TABCOL*)NULL; - int resolved; - - /* - 02.03.2011 Jan Max Meyer - In case of non-associative symbols, write a special error action into the - parse table. The parser template also had to be changed for this. Due the - default-production feature, the old way on removing the table entry did not - work anymore. - */ - - if( it->next_symbol == (SYMBOL*)NULL ) - { - plist_for( &it->lookahead, e ) - { - sym = (SYMBOL*)plist_access( e ); - - /* - Check out if there is already an action! - */ - if( ( act = find_tabcol( st->actions, sym ) ) == (TABCOL*)NULL ) - { - st->actions = list_push( st->actions, - create_tabcol( sym, REDUCE, it->prod->id, it ) ); - } - else - { - if( act->action == REDUCE ) - { - if( ( ( !( parser->all_warnings ) && - !( it->prod->lhs->whitespace ) - && !( it->prod->lhs->generated ) ) - || parser->all_warnings ) ) - { - print_error( parser, ERR_REDUCE_REDUCE, - ERRSTYLE_WARNING | ERRSTYLE_STATEINFO - | ERRSTYLE_SYMBOL, st, sym ); - - if( act->index > it->prod->id ) - { - act->index = it->prod->id; - act->symbol = sym; - act->derived_from = it; - } - } - - if( sym->assoc == ASSOC_NOASSOC ) - { - st->actions = list_remove( st->actions, - (void*)act ); - free_tabcol( act ); - } - } - else if( act->action & SHIFT ) - { - /* - * Supress some warnings: - * Always shift on "lexem separation" or "fixate" - */ - if( ( parser->p_lexem_sep && it->prod->lhs->lexem ) - || it->prod->lhs->fixated ) - continue; - - if( ( resolved = ( it->prod->prec && sym->prec ) ) ) - { - if( sym->prec < it->prod->prec || - ( sym->prec == it->prod->prec - && sym->assoc == ASSOC_LEFT ) ) - { - act->action = REDUCE; - act->index = it->prod->id; - act->symbol = sym; - act->derived_from = it; - } - else if( sym->prec == it->prod->prec - && sym->assoc == ASSOC_NOASSOC ) - { - /* 02.03.2011 JMM: Let parser run into an error! */ - act->action = ERROR; - act->index = 0; - } - } - - if( !resolved && ( ( !( parser->all_warnings ) - && !( it->prod->lhs->whitespace ) - && !( it->prod->lhs->generated ) ) - || parser->all_warnings ) ) - { - print_error( parser, ERR_SHIFT_REDUCE, - ERRSTYLE_WARNING | - ERRSTYLE_STATEINFO | ERRSTYLE_SYMBOL, - st, sym ); - } - } - } - } - } + plistel* e; + SYMBOL* sym = (SYMBOL*)NULL; + TABCOL* act = (TABCOL*)NULL; + int resolved; + + /* + 02.03.2011 Jan Max Meyer + In case of non-associative symbols, write a special error action into the + parse table. The parser template also had to be changed for this. Due the + default-production feature, the old way on removing the table entry did not + work anymore. + */ + + if( it->next_symbol == (SYMBOL*)NULL ) + { + plist_for( &it->lookahead, e ) + { + sym = (SYMBOL*)plist_access( e ); + + /* + Check out if there is already an action! + */ + if( ( act = find_tabcol( st->actions, sym ) ) == (TABCOL*)NULL ) + { + st->actions = list_push( st->actions, + create_tabcol( sym, REDUCE, it->prod->id, it ) ); + } + else + { + if( act->action == REDUCE ) + { + if( ( ( !( parser->all_warnings ) && + !( it->prod->lhs->whitespace ) + && !( it->prod->lhs->generated ) ) + || parser->all_warnings ) ) + { + print_error( parser, ERR_REDUCE_REDUCE, + ERRSTYLE_WARNING | ERRSTYLE_STATEINFO + | ERRSTYLE_SYMBOL, st, sym ); + + if( act->index > it->prod->id ) + { + act->index = it->prod->id; + act->symbol = sym; + act->derived_from = it; + } + } + + if( sym->assoc == ASSOC_NOASSOC ) + { + st->actions = list_remove( st->actions, + (void*)act ); + free_tabcol( act ); + } + } + else if( act->action & SHIFT ) + { + /* + * Supress some warnings: + * Always shift on "lexem separation" or "fixate" + */ + if( ( parser->p_lexem_sep && it->prod->lhs->lexem ) + || it->prod->lhs->fixated ) + continue; + + if( ( resolved = ( it->prod->prec && sym->prec ) ) ) + { + if( sym->prec < it->prod->prec || + ( sym->prec == it->prod->prec + && sym->assoc == ASSOC_LEFT ) ) + { + act->action = REDUCE; + act->index = it->prod->id; + act->symbol = sym; + act->derived_from = it; + } + else if( sym->prec == it->prod->prec + && sym->assoc == ASSOC_NOASSOC ) + { + /* 02.03.2011 JMM: Let parser run into an error! */ + act->action = ERROR; + act->index = 0; + } + } + + if( !resolved && ( ( !( parser->all_warnings ) + && !( it->prod->lhs->whitespace ) + && !( it->prod->lhs->generated ) ) + || parser->all_warnings ) ) + { + print_error( parser, ERR_SHIFT_REDUCE, + ERRSTYLE_WARNING | + ERRSTYLE_STATEINFO | ERRSTYLE_SYMBOL, + st, sym ); + } + } + } + } + } } @@ -717,14 +708,14 @@ creation. This must be called as the last step on computing the parse tables. created for. */ static void perform_reductions( PARSER* parser, STATE* st ) { - LIST* i; + LIST* i; - /* First, perform the reductions */ - for( i = st->kernel; i; i = i->next ) - reduce_item( parser, st, i->pptr ); + /* First, perform the reductions */ + for( i = st->kernel; i; i = i->next ) + reduce_item( parser, st, i->pptr ); - for( i = st->epsilon; i; i = i->next ) - reduce_item( parser, st, i->pptr ); + for( i = st->epsilon; i; i = i->next ) + reduce_item( parser, st, i->pptr ); } /** This is the entry function for generating the LALR(1) parse tables for a @@ -733,44 +724,44 @@ parsed grammar definition. //parser// is the pointer to the parser information structure. */ void generate_tables( PARSER* parser ) { - STATE* st = (STATE*)NULL; - ITEM* it = (ITEM*)NULL; - - if( !( parser->symbols || parser->productions ) ) - return; - - if( !( parser->goal ) ) - { - print_error( parser, ERR_NO_GOAL_SYMBOL, ERRSTYLE_FATAL ); - return; - } - - st = create_state( parser ); - it = create_item( (PROD*)plist_access( plist_first( - parser->goal->productions ) ) ); - st->kernel = list_push( st->kernel, it ); - - /* The goal item's lookahead is the end_of_input symbol */ - plist_push( &it->lookahead, parser->end_of_input ); - - /* Perform closure algorithm until no more undone states are found */ - do - { - parray_for( parser->states, st ) - { - if( !st->done ) - { - st->done = TRUE; - lalr1_closure( parser, st->state_id ); - break; - } - } - } - while( st ); - - /* Perform parse table generation. */ - parray_for( parser->states, st ) - perform_reductions( parser, st ); + STATE* st = (STATE*)NULL; + ITEM* it = (ITEM*)NULL; + + if( !( parser->symbols || parser->productions ) ) + return; + + if( !( parser->goal ) ) + { + print_error( parser, ERR_NO_GOAL_SYMBOL, ERRSTYLE_FATAL ); + return; + } + + st = create_state( parser ); + it = create_item( (PROD*)plist_access( plist_first( + parser->goal->productions ) ) ); + st->kernel = list_push( st->kernel, it ); + + /* The goal item's lookahead is the end_of_input symbol */ + plist_push( &it->lookahead, parser->end_of_input ); + + /* Perform closure algorithm until no more undone states are found */ + do + { + parray_for( parser->states, st ) + { + if( !st->done ) + { + st->done = TRUE; + lalr1_closure( parser, st->state_id ); + break; + } + } + } + while( st ); + + /* Perform parse table generation. */ + parray_for( parser->states, st ) + perform_reductions( parser, st ); } /** Performs a default production detection. This must be done immediatelly @@ -780,62 +771,62 @@ lexical analysis generation. //parser// is the pointer to the parser information structure. */ void detect_default_productions( PARSER* parser ) { - STATE* st; - PROD* cur; - TABCOL* act; - - plistel* e; - LIST* a_list; - LIST* n_list; - - int max; - int count; - - parray_for( parser->states, st ) - { - max = 0; - - /* Find the most common reduction and use this as default - (quick and dirty...) */ - plist_for( parser->productions, e ) - { - cur = (PROD*)plist_access( e ); - - for( a_list = st->actions, count = 0; a_list; - a_list = list_next( a_list ) ) - { - act = (TABCOL*)list_access( a_list ); - - if( act->action == REDUCE && act->index == cur->id ) - count++; - } - - if( count > max ) - { - max = count; - st->def_prod = cur; - } - } - - /* Remove all entries that already match the default production */ - if( st->def_prod ) - { - n_list = (LIST*)NULL; - - for( a_list = st->actions; a_list; - a_list = list_next( a_list ) ) - { - act = (TABCOL*)list_access( a_list ); - - if( act->action == REDUCE && - act->index == st->def_prod->id ) - free_tabcol( act ); - else - n_list = list_push( n_list, act ); - } - - list_free( st->actions ); - st->actions = n_list; - } - } + STATE* st; + PROD* cur; + TABCOL* act; + + plistel* e; + LIST* a_list; + LIST* n_list; + + int max; + int count; + + parray_for( parser->states, st ) + { + max = 0; + + /* Find the most common reduction and use this as default + (quick and dirty...) */ + plist_for( parser->productions, e ) + { + cur = (PROD*)plist_access( e ); + + for( a_list = st->actions, count = 0; a_list; + a_list = list_next( a_list ) ) + { + act = (TABCOL*)list_access( a_list ); + + if( act->action == REDUCE && act->index == cur->id ) + count++; + } + + if( count > max ) + { + max = count; + st->def_prod = cur; + } + } + + /* Remove all entries that already match the default production */ + if( st->def_prod ) + { + n_list = (LIST*)NULL; + + for( a_list = st->actions; a_list; + a_list = list_next( a_list ) ) + { + act = (TABCOL*)list_access( a_list ); + + if( act->action == REDUCE && + act->index == st->def_prod->id ) + free_tabcol( act ); + else + n_list = list_push( n_list, act ); + } + + list_free( st->actions ); + st->actions = n_list; + } + } } diff --git a/src/lex.c b/src/lex.c index b7736e0..d5d74ee 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,14 +1,5 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: lex.c -Author: Jan Max Meyer -Usage: Turns regular expression definitions into deterministic state - machines, by using the libphorward regular expression tools. ------------------------------------------------------------------------------ */ +/* Turns regular expression definitions into deterministic state machines, +by using the libphorward regular expression tools. */ #include "unicc.h" @@ -18,74 +9,74 @@ re-uses state machines matching the same pool of terminals. //parser// is the pointer to parser information structure. */ void merge_symbols_to_dfa( PARSER* parser ) { - pregex_nfa* nfa; - pregex_dfa* dfa; - pregex_dfa* ex_dfa; - LIST* m; - STATE* s; - TABCOL* col; - - PROC( "merge_symbols_to_dfa" ); - PARMS( "parser", "%p", parser ); - - parray_for( parser->states, s ) - { - VARS( "s->state_id", "%d", s->state_id ); - nfa = pregex_nfa_create(); - - /* Construct NFAs from symbols */ - LISTFOR( s->actions, m ) - { - col = (TABCOL*)list_access( m ); - nfa_from_symbol( parser, nfa, col->symbol ); - } - - /* Construct DFA, if NFA has been constructed */ - VARS( "plist_count( nfa->states )", "%d", plist_count( nfa->states ) ); - if( plist_count( nfa->states ) ) - { - dfa = pregex_dfa_create(); - - MSG( "Constructing DFA from NFA" ); - if( !pregex_dfa_from_nfa( dfa, nfa ) ) - OUTOFMEM; - - VARS( "plist_count( dfa->states )", "%d", - plist_count( dfa->states ) ); - - MSG( "Freeing NFA" ); - nfa = pregex_nfa_free( nfa ); - - MSG( "Minimizing DFA" ); - if( !pregex_dfa_minimize( dfa ) ) - OUTOFMEM; - - VARS( "plist_count( dfa->states )", "%d", - plist_count( dfa->states ) ); - - if( ( ex_dfa = find_equal_dfa( parser, dfa ) ) ) - { - MSG( "An equal DFA exists; Freeing temporary one!" ); - dfa = pregex_dfa_free( dfa ); - } - else - { - MSG( "This DFA does not exist in pool yet - integrating!" ); - ex_dfa = dfa; - - if( !( parser->dfas = list_push( - parser->dfas, (void*)ex_dfa ) ) ) - OUTOFMEM; - } - - VARS( "ex_dfa", "%p", ex_dfa ); - s->dfa = ex_dfa; - } - else - pregex_nfa_free( nfa ); - } - - VOIDRET; + pregex_nfa* nfa; + pregex_dfa* dfa; + pregex_dfa* ex_dfa; + LIST* m; + STATE* s; + TABCOL* col; + + PROC( "merge_symbols_to_dfa" ); + PARMS( "parser", "%p", parser ); + + parray_for( parser->states, s ) + { + VARS( "s->state_id", "%d", s->state_id ); + nfa = pregex_nfa_create(); + + /* Construct NFAs from symbols */ + LISTFOR( s->actions, m ) + { + col = (TABCOL*)list_access( m ); + nfa_from_symbol( parser, nfa, col->symbol ); + } + + /* Construct DFA, if NFA has been constructed */ + VARS( "plist_count( nfa->states )", "%d", plist_count( nfa->states ) ); + if( plist_count( nfa->states ) ) + { + dfa = pregex_dfa_create(); + + MSG( "Constructing DFA from NFA" ); + if( !pregex_dfa_from_nfa( dfa, nfa ) ) + OUTOFMEM; + + VARS( "plist_count( dfa->states )", "%d", + plist_count( dfa->states ) ); + + MSG( "Freeing NFA" ); + nfa = pregex_nfa_free( nfa ); + + MSG( "Minimizing DFA" ); + if( !pregex_dfa_minimize( dfa ) ) + OUTOFMEM; + + VARS( "plist_count( dfa->states )", "%d", + plist_count( dfa->states ) ); + + if( ( ex_dfa = find_equal_dfa( parser, dfa ) ) ) + { + MSG( "An equal DFA exists; Freeing temporary one!" ); + dfa = pregex_dfa_free( dfa ); + } + else + { + MSG( "This DFA does not exist in pool yet - integrating!" ); + ex_dfa = dfa; + + if( !( parser->dfas = list_push( + parser->dfas, (void*)ex_dfa ) ) ) + OUTOFMEM; + } + + VARS( "ex_dfa", "%p", ex_dfa ); + s->dfa = ex_dfa; + } + else + pregex_nfa_free( nfa ); + } + + VOIDRET; } /** Constructs a single DFA for a general token lexer. @@ -93,52 +84,52 @@ void merge_symbols_to_dfa( PARSER* parser ) //parser// is the pointer to parser information structure. */ void construct_single_lexer( PARSER* parser ) { - pregex_nfa* nfa; - pregex_dfa* dfa; - plistel* e; - SYMBOL* s; + pregex_nfa* nfa; + pregex_dfa* dfa; + plistel* e; + SYMBOL* s; - PROC( "construct_single_lexer" ); - PARMS( "parser", "%p", parser ); + PROC( "construct_single_lexer" ); + PARMS( "parser", "%p", parser ); - MSG( "Constructing NFA" ); - nfa = pregex_nfa_create(); - dfa = pregex_dfa_create(); + MSG( "Constructing NFA" ); + nfa = pregex_nfa_create(); + dfa = pregex_dfa_create(); - plist_for( parser->symbols, e ) - { - s = (SYMBOL*)plist_access( e ); - VARS( "s->id", "%d", s->id ); + plist_for( parser->symbols, e ) + { + s = (SYMBOL*)plist_access( e ); + VARS( "s->id", "%d", s->id ); - nfa_from_symbol( parser, nfa, s ); - } + nfa_from_symbol( parser, nfa, s ); + } - /* Construct DFA, if NFA has been constructed */ - VARS( "plist_count( nfa->states )", "%d", plist_count( nfa->states ) ); - if( plist_count( nfa->states ) ) - { - MSG( "Constructing DFA from NFA" ); - if( !pregex_dfa_from_nfa( dfa, nfa ) ) - OUTOFMEM; + /* Construct DFA, if NFA has been constructed */ + VARS( "plist_count( nfa->states )", "%d", plist_count( nfa->states ) ); + if( plist_count( nfa->states ) ) + { + MSG( "Constructing DFA from NFA" ); + if( !pregex_dfa_from_nfa( dfa, nfa ) ) + OUTOFMEM; - VARS( "plist_count( dfa->states )", "%d", - plist_count( dfa->states ) ); + VARS( "plist_count( dfa->states )", "%d", + plist_count( dfa->states ) ); - MSG( "Freeing NFA" ); - nfa = pregex_nfa_free( nfa ); + MSG( "Freeing NFA" ); + nfa = pregex_nfa_free( nfa ); - MSG( "Minimizing DFA" ); - if( !pregex_dfa_minimize( dfa ) ) - OUTOFMEM; + MSG( "Minimizing DFA" ); + if( !pregex_dfa_minimize( dfa ) ) + OUTOFMEM; - VARS( "plist_count( dfa->states )", "%d", - plist_count( dfa->states ) ); + VARS( "plist_count( dfa->states )", "%d", + plist_count( dfa->states ) ); - if( !( parser->dfas = list_push( parser->dfas, (void*)dfa ) ) ) - OUTOFMEM; - } + if( !( parser->dfas = list_push( parser->dfas, (void*)dfa ) ) ) + OUTOFMEM; + } - VOIDRET; + VOIDRET; } /** Walks trough the DFA machines of the current parser definition and tests if @@ -153,92 +144,92 @@ Returns the pointer to a matching DFA, else (pregex_dfa*)NULL. */ pregex_dfa* find_equal_dfa( PARSER* parser, pregex_dfa* ndfa ) { - LIST* l; - plistel* e; - plistel* f; - plistel* g; - plistel* h; - - pregex_dfa* tdfa; - pregex_dfa_st* dfa_st [2]; - pregex_dfa_tr* dfa_ent [2]; - BOOLEAN match; - - /* - 19.11.2009 Jan Max Meyer - Revision of entire function, to work with structures of the new - regex-library. - - 16.01.2014 Jan Max Meyer - Fixed sources to run with libphorward v0.18 (current development version). - */ - - PROC( "find_equal_dfa" ); - PARMS( "parser", "%p", parser ); - PARMS( "ndfa", "%p", ndfa ); - - LISTFOR( parser->dfas, l ) - { - tdfa = (pregex_dfa*)list_access( l ); - - VARS( "plist_count( tdfa->states )", "%d", - plist_count( tdfa->states ) ); - VARS( "plist_count( ndfa->states )", "%d", - plist_count( ndfa->states ) ); - - if( plist_count( tdfa->states ) != plist_count( ndfa->states ) ) - { - MSG( "Number of states does already not match - test next" ); - continue; - } - - for( e = plist_first( tdfa->states ), - f = plist_first( ndfa->states ); - e && f; e = plist_next( e ), f = plist_next( f ) ) - { - match = TRUE; - - dfa_st[0] = (pregex_dfa_st*)plist_access( e ); - dfa_st[1] = (pregex_dfa_st*)plist_access( f ); - - if( !( dfa_st[0]->accept == dfa_st[1]->accept - && plist_count( dfa_st[0]->trans ) - == plist_count( dfa_st[1]->trans ) ) ) - { - MSG( "Number of transitions or accepting ID does not match" ); - match = FALSE; - break; - } - - for( g = plist_first( dfa_st[0]->trans ), - h = plist_first( dfa_st[1]->trans ); g && h; - g = plist_next( g ), h = plist_next( h ) ) - { - dfa_ent[0] = (pregex_dfa_tr*)plist_access( g ); - dfa_ent[1] = (pregex_dfa_tr*)plist_access( h ); - - if( !( pccl_compare( dfa_ent[0]->ccl, dfa_ent[1]->ccl ) - == 0 - && dfa_ent[0]->go_to == dfa_ent[1]->go_to ) ) - { - MSG( "Deep scan of transitions not equal" ); - match = FALSE; - break; - } - } - } - - VARS( "match", "%d", match ); - if( match ) - { - MSG( "DFA matches!" ); - VARS( "tdfa", "%p", tdfa ); - RETURN( tdfa ); - } - } - - MSG( "No DFA matches!" ); - RETURN( (pregex_dfa*)NULL ); + LIST* l; + plistel* e; + plistel* f; + plistel* g; + plistel* h; + + pregex_dfa* tdfa; + pregex_dfa_st* dfa_st [2]; + pregex_dfa_tr* dfa_ent [2]; + BOOLEAN match; + + /* + 19.11.2009 Jan Max Meyer + Revision of entire function, to work with structures of the new + regex-library. + + 16.01.2014 Jan Max Meyer + Fixed sources to run with libphorward v0.18 (current development version). + */ + + PROC( "find_equal_dfa" ); + PARMS( "parser", "%p", parser ); + PARMS( "ndfa", "%p", ndfa ); + + LISTFOR( parser->dfas, l ) + { + tdfa = (pregex_dfa*)list_access( l ); + + VARS( "plist_count( tdfa->states )", "%d", + plist_count( tdfa->states ) ); + VARS( "plist_count( ndfa->states )", "%d", + plist_count( ndfa->states ) ); + + if( plist_count( tdfa->states ) != plist_count( ndfa->states ) ) + { + MSG( "Number of states does already not match - test next" ); + continue; + } + + for( e = plist_first( tdfa->states ), + f = plist_first( ndfa->states ); + e && f; e = plist_next( e ), f = plist_next( f ) ) + { + match = TRUE; + + dfa_st[0] = (pregex_dfa_st*)plist_access( e ); + dfa_st[1] = (pregex_dfa_st*)plist_access( f ); + + if( !( dfa_st[0]->accept == dfa_st[1]->accept + && plist_count( dfa_st[0]->trans ) + == plist_count( dfa_st[1]->trans ) ) ) + { + MSG( "Number of transitions or accepting ID does not match" ); + match = FALSE; + break; + } + + for( g = plist_first( dfa_st[0]->trans ), + h = plist_first( dfa_st[1]->trans ); g && h; + g = plist_next( g ), h = plist_next( h ) ) + { + dfa_ent[0] = (pregex_dfa_tr*)plist_access( g ); + dfa_ent[1] = (pregex_dfa_tr*)plist_access( h ); + + if( !( pccl_compare( dfa_ent[0]->ccl, dfa_ent[1]->ccl ) + == 0 + && dfa_ent[0]->go_to == dfa_ent[1]->go_to ) ) + { + MSG( "Deep scan of transitions not equal" ); + match = FALSE; + break; + } + } + } + + VARS( "match", "%d", match ); + if( match ) + { + MSG( "DFA matches!" ); + VARS( "tdfa", "%p", tdfa ); + RETURN( tdfa ); + } + } + + MSG( "No DFA matches!" ); + RETURN( (pregex_dfa*)NULL ); } /** Converts a symbols regular expression pattern defininition into a @@ -250,27 +241,26 @@ NFA state machine. */ void nfa_from_symbol( PARSER* parser, pregex_nfa* nfa, SYMBOL* sym ) { - PROC( "nfa_from_symbol" ); - PARMS( "parser", "%p", parser ); - PARMS( "nfa", "%p", nfa ); - PARMS( "sym", "%p", sym ); - - /* - if( !( sym->type == SYM_REGEX_TERMINAL ) ) - { - MSG( "Symbol is not a regular expression" ); - VOIDRET; - } - - TODO: Maybe later, check terminal types here according to config - */ - - if( sym->ptn ) - { - sym->ptn->accept = sym->id + 1; - pregex_ptn_to_nfa( nfa, sym->ptn ); - } - - VOIDRET; + PROC( "nfa_from_symbol" ); + PARMS( "parser", "%p", parser ); + PARMS( "nfa", "%p", nfa ); + PARMS( "sym", "%p", sym ); + + /* + if( !( sym->type == SYM_REGEX_TERMINAL ) ) + { + MSG( "Symbol is not a regular expression" ); + VOIDRET; + } + + TODO: Maybe later, check terminal types here according to config + */ + + if( sym->ptn ) + { + sym->ptn->accept = sym->id + 1; + pregex_ptn_to_nfa( nfa, sym->ptn ); + } + + VOIDRET; } - diff --git a/src/list.c b/src/list.c index bbf3e8c..ff39561 100644 --- a/src/list.c +++ b/src/list.c @@ -1,23 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: list.c -Author: Jan Max Meyer -Usage: Management functions for simple linked-lists. ------------------------------------------------------------------------------ */ - -/* - March 24, 2014: - - The linked list structure LIST (llist) came from libphorward, but - had been replaced there by the much more powerful plist objects. - - UniCC is the only program that still makes use of these older lists, - and a refactoring to use plist is too expensive right now. -*/ +/* Legacy, simple linked-lists. */ #include "unicc.h" @@ -34,27 +15,27 @@ Returns a pointer to the first item of the linked list of elements. */ LIST* list_push( LIST* list, void* ptr ) { - LIST* elem; - LIST* item; - - if( ( elem = (LIST*)pmalloc( sizeof( LIST ) ) ) ) - { - elem->pptr = ptr; - elem->next = (LIST*)NULL; - - if( !list ) - list = elem; - else - { - item = list; - while( item->next ) - item = item->next; - - item->next = elem; - } - } - - return list; + LIST* elem; + LIST* item; + + if( ( elem = (LIST*)pmalloc( sizeof( LIST ) ) ) ) + { + elem->pptr = ptr; + elem->next = (LIST*)NULL; + + if( !list ) + list = elem; + else + { + item = list; + while( item->next ) + item = item->next; + + item->next = elem; + } + } + + return list; } /** Pops the last element off a linked-list of pointers. @@ -70,40 +51,40 @@ If the last element is popped, (LIST*)NULL is returned. */ LIST* list_pop( LIST* list, void** ptr ) { - LIST* item; - LIST* prev = (LIST*)NULL; + LIST* item; + LIST* prev = (LIST*)NULL; - if( !list ) - { - if( ptr ) - *ptr = (void*)NULL; + if( !list ) + { + if( ptr ) + *ptr = (void*)NULL; - return (LIST*)NULL; - } - else - { - item = list; - while( item->next ) - { - prev = item; - item = item->next; - } + return (LIST*)NULL; + } + else + { + item = list; + while( item->next ) + { + prev = item; + item = item->next; + } - if( prev ) - prev->next = (LIST*)NULL; + if( prev ) + prev->next = (LIST*)NULL; - if( ptr ) - *ptr = item->pptr; + if( ptr ) + *ptr = item->pptr; - if( item == list ) - list = (LIST*)NULL; + if( item == list ) + list = (LIST*)NULL; - pfree( item ); + pfree( item ); - item = (LIST*)NULL; - } + item = (LIST*)NULL; + } - return list; + return list; } /** Removes an item from a linked list. Instead as list_pop(), list_remove() can @@ -119,29 +100,29 @@ item was removed. */ LIST* list_remove( LIST* list, void* ptr ) { - LIST* item; - LIST* prev = (LIST*)NULL; + LIST* item; + LIST* prev = (LIST*)NULL; - if( !ptr ) - return list; + if( !ptr ) + return list; - for( item = list; item; item = item->next ) - { - if( item->pptr == ptr ) - { - if( !prev ) - list = item->next; - else - prev->next = item->next; + for( item = list; item; item = item->next ) + { + if( item->pptr == ptr ) + { + if( !prev ) + list = item->next; + else + prev->next = item->next; - pfree( item ); - break; - } + pfree( item ); + break; + } - prev = item; - } + prev = item; + } - return list; + return list; } /** Frees a linked list. @@ -152,19 +133,19 @@ Returns always (LIST*)NULL. */ LIST* list_free( LIST* list ) { - LIST* next = (LIST*)NULL; - LIST* item; + LIST* next = (LIST*)NULL; + LIST* item; - item = list; - while( item ) - { - next = item->next; - pfree( item ); + item = list; + while( item ) + { + next = item->next; + pfree( item ); - item = next; - } + item = next; + } - return (LIST*)NULL; + return (LIST*)NULL; } /** Duplicates a list in a 1:1 copy. @@ -175,13 +156,13 @@ Returns a pointer to the copy if //src//. */ LIST* list_dup( LIST* src ) { - LIST* item; - LIST* tar = (LIST*)NULL; + LIST* item; + LIST* tar = (LIST*)NULL; - for( item = src; item; item = item->next ) - tar = list_push( tar, item->pptr ); + for( item = src; item; item = item->next ) + tar = list_push( tar, item->pptr ); - return tar; + return tar; } /** Counts the elements in a list. @@ -192,12 +173,12 @@ Returns the number of items contained by the list. */ int list_count( LIST* list ) { - int count = 0; + int count = 0; - for( ; list; list = list->next ) - count++; + for( ; list; list = list->next ) + count++; - return count; + return count; } /** Searches for a pointer in a linked list. @@ -210,21 +191,21 @@ from the lists begin, 0 is the first element. */ int list_find( LIST* list, void* ptr ) { - LIST* item; - int cnt = 0; + LIST* item; + int cnt = 0; - if( !ptr ) - return -1; + if( !ptr ) + return -1; - for( item = list; item; item = item->next ) - { - if( item->pptr == ptr ) - return cnt; + for( item = list; item; item = item->next ) + { + if( item->pptr == ptr ) + return cnt; - cnt++; - } + cnt++; + } - return -1; + return -1; } /** Returns the pointer of the desired offset from the linked list. @@ -238,20 +219,20 @@ not in the list (if //cnt// goes over the end of the list). */ void* list_getptr( LIST* list, int cnt ) { - LIST* item; + LIST* item; - if( cnt < 0 ) - return (void*)NULL; + if( cnt < 0 ) + return (void*)NULL; - for( item = list; item; item = item->next ) - { - if( cnt == 0 ) - return item->pptr; + for( item = list; item; item = item->next ) + { + if( cnt == 0 ) + return item->pptr; - cnt--; - } + cnt--; + } - return (void*)NULL; + return (void*)NULL; } /** Unions two list to a huger new one. @@ -264,19 +245,18 @@ Returns the extended list //first//, which is the union of //first// and */ LIST* list_union( LIST* first, LIST* second ) { - LIST* ret; - LIST* current; - - if( first != (LIST*)NULL ) - { - ret = first; - for( current = second; current; current = current->next ) - if( list_find( ret, current->pptr ) == -1 ) - ret = list_push( ret, current->pptr ); - } - else - ret = list_dup( second ); - - return ret; + LIST* ret; + LIST* current; + + if( first != (LIST*)NULL ) + { + ret = first; + for( current = second; current; current = current->next ) + if( list_find( ret, current->pptr ) == -1 ) + ret = list_push( ret, current->pptr ); + } + else + ret = list_dup( second ); + + return ret; } - diff --git a/src/main.c b/src/main.c index 6cfd756..b4bb36e 100644 --- a/src/main.c +++ b/src/main.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2021 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: main.c -Author: Jan Max Meyer -Usage: UniCC program entry / main function ------------------------------------------------------------------------------ */ +/* UniCC program entry / main function */ #include "unicc.h" @@ -21,19 +12,19 @@ extern char* progname; char* pmod[] = { - "scannerless", - "using scanner" + "scannerless", + "using scanner" }; /* Verbose Macros (Main only) */ #define PROGRESS( txt ) if( parser->verbose ) \ - { \ - fprintf( status, "%s...", (txt) ); \ - fflush( status ); \ - first_progress = TRUE; \ - } \ - else \ - first_progress = FALSE; + { \ + fprintf( status, "%s...", (txt) ); \ + fflush( status ); \ + first_progress = TRUE; \ + } \ + else \ + first_progress = FALSE; #define DONE() print_status( parser, "Done\n", (char*)NULL ); #define FAIL() print_status( parser, "Failed\n", (char*)NULL ); #define SUCCESS() print_status( parser, "Succeeded\n", (char*)NULL ); @@ -48,15 +39,15 @@ char* pmod[] = */ static void print_status( PARSER* parser, char* status_msg, char* reason ) { - if( !parser->verbose ) - return; + if( !parser->verbose ) + return; - if( first_progress ) - fprintf( status, status_msg, reason ); - else - fprintf( status, "\n" ); + if( first_progress ) + fprintf( status, status_msg, reason ); + else + fprintf( status, "\n" ); - first_progress = FALSE; + first_progress = FALSE; } /** Generates and returns the UniCC version number string. @@ -65,16 +56,16 @@ static void print_status( PARSER* parser, char* status_msg, char* reason ) information. */ char* print_version( BOOLEAN long_version ) { - static char version [ ONE_LINE + 1 ]; + static char version [ ONE_LINE + 1 ]; - if( long_version ) - sprintf( version, "%d.%d.%d%s", - UNICC_VER_MAJOR, UNICC_VER_MINOR, UNICC_VER_PATCH, - UNICC_VER_EXTSTR ); - else - sprintf( version, "%d.%d", UNICC_VER_MAJOR, UNICC_VER_MINOR ); + if( long_version ) + sprintf( version, "%d.%d.%d%s", + UNICC_VER_MAJOR, UNICC_VER_MINOR, UNICC_VER_PATCH, + UNICC_VER_EXTSTR ); + else + sprintf( version, "%d.%d", UNICC_VER_MAJOR, UNICC_VER_MINOR ); - return version; + return version; } /** Prints a program copyright info message to a desired file or stream. @@ -83,16 +74,16 @@ char* print_version( BOOLEAN long_version ) If this is (FILE*)NULL, stdout will be used. */ void print_copyright( FILE* stream ) { - if( !stream ) - stream = stdout; + if( !stream ) + stream = stdout; - fprintf( stream, "UniCC %s\n", print_version( TRUE ) ); + fprintf( stream, "UniCC %s\n", print_version( TRUE ) ); fprintf( stream, "The Universal LALR(1) parser generator.\n\n" ); - fprintf( stream, "Copyright (C) 2006-2021 by " - "Phorward Software Technologies, Jan Max Meyer\n" ); - fprintf( stream, "All rights reserved. " - "See LICENSE for more information.\n" ); + fprintf( stream, "Copyright (C) 2006-2023 by " + "Phorward Software Technologies, Jan Max Meyer\n" ); + fprintf( stream, "All rights reserved. " + "See LICENSE for more information.\n" ); } @@ -102,33 +93,30 @@ void print_copyright( FILE* stream ) (FILE*)NULL, stdout will be used. //progname// is the name of the executable. */ void print_usage( FILE* stream, char* progname ) { - if( !stream ) - stream = stdout; - - fprintf( stream, "Usage: %s [OPTION]... FILE\n\n" - " -a --all Print all warnings\n" - " -b/-o --basename NAME Use basename NAME for output files\n" - " -G --grammar Dump final (rewritten) grammar\n" - " -h --help Print this help and exit\n" - " -l --language TARGET Specify target language (default: %s)\n" - " -n --no-opt Disables state optimization\n" - " (this will cause more states)\n" - " -P --productions Dump final productions\n" - " -s --stats Print statistics message\n" - " -S --states Dump LALR(1) states\n" - " -t --stdout Print output to stdout instead of files\n" - " -T --symbols Dump symbols\n" - " -v --verbose Print progress messages\n" - " -V --version Print version and copyright and exit\n" - " -w --warnings Print warnings\n" - " -x --xml Build parser description file additionally\n" - " -X --XML Build parser description file only without\n" - " generating a program-module\n" - "\n" - "Errors and warnings are printed to stderr, " - "everything else to stdout.\n" - - "", progname, UNICC_DEFAULT_TARGET ); + if( !stream ) + stream = stdout; + + fprintf( stream, "Usage: %s [OPTION]... FILE\n\n" + " -a --all Print all warnings\n" + " -b/-o --basename NAME Use basename NAME for output files\n" + " -G --grammar Dump final (rewritten) grammar\n" + " -h --help Print this help and exit\n" + " -l --language TARGET Specify target language (default: %s)\n" + " -n --no-opt Disables state optimization\n" + " (this will cause more states)\n" + " -P --productions Dump final productions\n" + " -s --stats Print statistics message\n" + " -S --states Dump LALR(1) states\n" + " -t --stdout Print output to stdout instead of files\n" + " -T --symbols Dump symbols\n" + " -v --verbose Print progress messages\n" + " -V --version Print version and copyright and exit\n" + " -w --warnings Print warnings\n" + "\n" + "Errors and warnings are printed to stderr, " + "everything else to stdout.\n" + + "", progname, UNICC_DEFAULT_TARGET ); } /** Analyzes the command line parameters passed to the parser generator. @@ -141,92 +129,85 @@ void print_usage( FILE* stream, char* progname ) Returns a TRUE, if command-line parameters are correct, FALSE otherwise. */ BOOLEAN get_command_line( int argc, char** argv, char** filename, - char** output, PARSER* parser ) + char** output, PARSER* parser ) { - int i; - int rc; - int next; - char opt [ ONE_LINE + 1 ]; - char* param; - - progname = *argv; - - for( i = 0; - ( rc = pgetopt( opt, ¶m, &next, argc, argv, - "ab:Ghl:no:PsStTvVwxX", - "all grammar help language: no-opt output: basename: " - "productions stats states stdout symbols verbose " - "version warnings xml XML", i ) ) == 0; i++ ) - { - if( !strcmp( opt, "output" ) || !strcmp( opt, "o" ) - || !strcmp( opt, "basename" ) || !strcmp( opt, "b" ) ) - { - if( !param ) - print_error( parser, ERR_CMD_LINE, ERRSTYLE_FATAL, opt ); - else - *output = param; - } - else if( !strcmp( opt, "language" ) || !strcmp( opt, "l" ) ) - { - if( !param ) - print_error( parser, ERR_CMD_LINE, ERRSTYLE_FATAL, opt ); - else - parser->target = param; - } - else if( !strcmp( opt, "verbose" ) || !strcmp( opt, "v" ) ) - { - parser->verbose = TRUE; - parser->stats = TRUE; - } - else if( !strcmp( opt, "stats" ) || !strcmp( opt, "s" ) ) - parser->stats = TRUE; - else if( !strcmp( opt, "warnings" ) || !strcmp( opt, "w" ) ) - no_warnings = FALSE; - else if( !strcmp( opt, "grammar" ) || !strcmp( opt, "G" ) ) - parser->show_grammar = TRUE; - else if( !strcmp( opt, "states" ) || !strcmp( opt, "S" ) ) - parser->show_states = TRUE; - else if( !strcmp( opt, "symbols" ) || !strcmp( opt, "T" ) ) - parser->show_symbols = TRUE; - else if( !strcmp( opt, "stdout" ) || !strcmp( opt, "t" ) ) - { - parser->to_stdout = TRUE; - status = stderr; - } - else if( !strcmp( opt, "productions" ) || !strcmp( opt, "P" ) ) - parser->show_productions = TRUE; - else if( !strcmp( opt, "no-opt" ) || !strcmp( opt, "n" ) ) - parser->optimize_states = FALSE; - else if( !strcmp( opt, "all" ) || !strcmp( opt, "a" ) ) - { - parser->all_warnings = TRUE; - no_warnings = FALSE; - } - else if( !strcmp( opt, "version" ) || !strcmp( opt, "V" ) ) - { - print_copyright( stdout ); - exit( EXIT_SUCCESS ); - } - else if( !strcmp( opt, "help" ) || !strcmp( opt, "h" ) ) - { - print_usage( stdout, *argv ); - exit( EXIT_SUCCESS ); - } - else if( !strcmp( opt, "xml" ) || !strcmp( opt, "x" ) ) - parser->gen_xml = TRUE; - else if( !strcmp( opt, "XML" ) || !strcmp( opt, "X" ) ) - { - parser->gen_xml = TRUE; - parser->gen_prog = FALSE; - } - } - - if( rc == 1 ) - *filename = param; - else if( rc < 0 && param ) - print_error( parser, ERR_CMD_OPT, ERRSTYLE_FATAL, param ); - - return ( *filename ? TRUE : FALSE ); + int i; + int rc; + int next; + char opt [ ONE_LINE + 1 ]; + char* param; + + progname = *argv; + + for( i = 0; + ( rc = pgetopt( opt, ¶m, &next, argc, argv, + "ab:Ghl:no:PsStTvVw", + "all grammar help language: no-opt output: basename: " + "productions stats states stdout symbols verbose " + "version warnings", i ) ) == 0; i++ ) + { + if( !strcmp( opt, "output" ) || !strcmp( opt, "o" ) + || !strcmp( opt, "basename" ) || !strcmp( opt, "b" ) ) + { + if( !param ) + print_error( parser, ERR_CMD_LINE, ERRSTYLE_FATAL, opt ); + else + *output = param; + } + else if( !strcmp( opt, "language" ) || !strcmp( opt, "l" ) ) + { + if( !param ) + print_error( parser, ERR_CMD_LINE, ERRSTYLE_FATAL, opt ); + else + parser->target = param; + } + else if( !strcmp( opt, "verbose" ) || !strcmp( opt, "v" ) ) + { + parser->verbose = TRUE; + parser->stats = TRUE; + } + else if( !strcmp( opt, "stats" ) || !strcmp( opt, "s" ) ) + parser->stats = TRUE; + else if( !strcmp( opt, "warnings" ) || !strcmp( opt, "w" ) ) + no_warnings = FALSE; + else if( !strcmp( opt, "grammar" ) || !strcmp( opt, "G" ) ) + parser->show_grammar = TRUE; + else if( !strcmp( opt, "states" ) || !strcmp( opt, "S" ) ) + parser->show_states = TRUE; + else if( !strcmp( opt, "symbols" ) || !strcmp( opt, "T" ) ) + parser->show_symbols = TRUE; + else if( !strcmp( opt, "stdout" ) || !strcmp( opt, "t" ) ) + { + parser->to_stdout = TRUE; + status = stderr; + } + else if( !strcmp( opt, "productions" ) || !strcmp( opt, "P" ) ) + parser->show_productions = TRUE; + else if( !strcmp( opt, "no-opt" ) || !strcmp( opt, "n" ) ) + parser->optimize_states = FALSE; + else if( !strcmp( opt, "all" ) || !strcmp( opt, "a" ) ) + { + parser->all_warnings = TRUE; + no_warnings = FALSE; + } + else if( !strcmp( opt, "version" ) || !strcmp( opt, "V" ) ) + { + print_copyright( stdout ); + exit( EXIT_SUCCESS ); + } + else if( !strcmp( opt, "help" ) || !strcmp( opt, "h" ) ) + { + print_usage( stdout, *argv ); + exit( EXIT_SUCCESS ); + } + } + + if( rc == 1 ) + *filename = param; + else if( rc < 0 && param ) + print_error( parser, ERR_CMD_OPT, ERRSTYLE_FATAL, param ); + + return ( *filename ? TRUE : FALSE ); } /** Global program entry. @@ -238,222 +219,211 @@ Returns the number of errors count, 0 = all right :D */ int main( int argc, char** argv ) { - char* filename = (char*)NULL; - char* base_name = (char*)NULL; - char* mbase_name = (char*)NULL; - PARSER* parser; - BOOLEAN recursions = FALSE; + char* filename = (char*)NULL; + char* base_name = (char*)NULL; + char* mbase_name = (char*)NULL; + PARSER* parser; + BOOLEAN recursions = FALSE; - status = stdout; - parser = create_parser(); + status = stdout; + parser = create_parser(); #ifdef UNICC_BOOTSTRAP - /* On bootstrap build, print a warning message */ - printf( "*** WARNING: YOU'RE RUNNING A BOOTSTRAP BUILD OF UNICC!\n" ); - printf( "*** Some features may not work as you would expect them.\n\n" ); + /* On bootstrap build, print a warning message */ + printf( "*** WARNING: YOU'RE RUNNING A BOOTSTRAP BUILD OF UNICC!\n" ); + printf( "*** Some features may not work as you would expect them.\n\n" ); #endif - if( get_command_line( argc, argv, &filename, &base_name, parser ) ) - { - if( !pfiletostr( &parser->source, ( parser->filename = filename ) ) ) - { - print_error( parser, ERR_OPEN_INPUT_FILE, - ERRSTYLE_FATAL, filename ); - free_parser( parser ); - - return error_count; - } - - /* Basename */ - if( !base_name ) - { - parser->p_basename = mbase_name = pstrdup( pbasename( filename ) ); - if( ( base_name = strrchr( parser->p_basename, '.' ) ) ) - *base_name = '\0'; - } - else - parser->p_basename = base_name; - - if( parser->verbose ) - fprintf( status, "UniCC v%s\n", print_version( FALSE ) ); - - PROGRESS( "Parsing grammar" ) - - /* Parse grammar structure */ - if( parse_grammar( parser, parser->filename, parser->source ) == 0 ) - { - DONE() - - if( parser->verbose ) - fprintf( status, "Parser construction mode: %s\n", - pmod[ parser->p_mode ] ); - - - PROGRESS( "Goal symbol detection" ) - if( parser->goal ) - { - SUCCESS() - - /* Single goal revision, if necessary */ - PROGRESS( "Setting up single goal symbol" ) - setup_single_goal( parser ); - DONE() - - /* Rewrite the grammar, if required */ - PROGRESS( "Rewriting grammar" ) - if( parser->p_mode == MODE_SCANNERLESS ) - rewrite_grammar( parser ); - - unique_charsets( parser ); - symbol_orders( parser ); - charsets_to_ptn( parser ); - - if( parser->p_mode == MODE_SCANNERLESS ) - inherit_fixiations( parser ); - DONE() - - /* Precedence fixup */ - PROGRESS( "Fixing precedences" ) - fix_precedences( parser ); - DONE() - - /* FIRST-set computation */ - PROGRESS( "Computing FIRST-sets" ) - compute_first( parser ); - DONE() - - if( parser->show_grammar ) - dump_grammar( status, parser ); - - if( parser->show_symbols ) - dump_symbols( status, parser ); - - if( parser->show_productions ) - dump_productions( status, parser ); - - /* Stupid production recognition */ - PROGRESS( "Validating rule integrity" ) - - if( !find_undef_or_unused( parser ) ) - { - if( check_stupid_productions( parser ) ) - recursions = TRUE; - - DONE() - - /* Parse table generator */ - PROGRESS( "Building parse tables" ) - generate_tables( parser ); - - if( parser->show_states ) - dump_lalr_states( status, parser ); - - DONE() - - /* Terminal anomaly detection */ - PROGRESS( "Terminal anomaly detection" ) - if( parser->p_mode == MODE_SCANNERLESS ) - { - if( recursions ) - { - SKIPPED( "Recursions detected" ); - } - else if( parser->p_reserve_regex ) - { - SKIPPED( "Tokens are reserved!" ); - } - else - { - check_regex_anomalies( parser ); - DONE() - } - } - else - { - SKIPPED( "Not required" ); - } - - /* Lexical analyzer generator */ - PROGRESS( "Constructing lexical analyzer" ) - - if( parser->p_mode == MODE_SCANNERLESS ) - merge_symbols_to_dfa( parser ); - else if( parser->p_mode == MODE_SCANNER ) - construct_single_lexer( parser ); - - DONE() - - /* Default production detection */ - PROGRESS( "Detecting default rules" ) - detect_default_productions( parser ); - DONE() - - /* Code generator */ - if( !( parser->p_template ) ) - parser->p_template = parser->target; - - if( parser->gen_prog ) - { - if( parser->verbose ) - fprintf( status, "Code generation target: %s%s\n", - parser->p_template, - ( parser->p_template == parser->target - && strcmp( parser->target, - UNICC_DEFAULT_TARGET ) == 0 ? - " (default)" : "" ) ); - - PROGRESS( "Invoking code generator" ) - build_code( parser ); - DONE() - } - - if( parser->gen_xml ) - { - PROGRESS( "Generating parser description file" ) - build_xml( parser, TRUE ); - DONE() - } - } - else - { - FAIL() - } - } - else - { - FAIL() - print_error( parser, ERR_NO_GOAL_SYMBOL, ERRSTYLE_FATAL ); - } - - if( parser->stats ) - fprintf( status, "%s%s produced %ld states " - "(%d error%s, %d warning%s), %d file%s\n", - ( parser->verbose ? "\n" : "" ), - filename, parray_count( parser->states ), - error_count, ( error_count == 1 ) ? "" : "s", - warning_count, ( warning_count == 1 ) ? "" : "s", - parser->files_count, - ( parser->files_count == 1 ) ? "" : "s" ); - } - else - { - FAIL() - error_count++; - } - - if( error_count && parser->gen_xml ) - build_xml( parser, FALSE ); - - free_parser( parser ); - } - else if( !error_count ) - { - print_usage( status, *argv ); - error_count++; - } - - pfree( mbase_name ); - - return error_count; + if( get_command_line( argc, argv, &filename, &base_name, parser ) ) + { + if( !pfiletostr( &parser->source, ( parser->filename = filename ) ) ) + { + print_error( parser, ERR_OPEN_INPUT_FILE, + ERRSTYLE_FATAL, filename ); + free_parser( parser ); + + return error_count; + } + + /* Basename */ + if( !base_name ) + { + parser->p_basename = mbase_name = pstrdup( pbasename( filename ) ); + if( ( base_name = strrchr( parser->p_basename, '.' ) ) ) + *base_name = '\0'; + } + else + parser->p_basename = base_name; + + if( parser->verbose ) + fprintf( status, "UniCC v%s\n", print_version( FALSE ) ); + + PROGRESS( "Parsing grammar" ) + + /* Parse grammar structure */ + if( parse_grammar( parser, parser->filename, parser->source ) == 0 ) + { + DONE() + + if( parser->verbose ) + fprintf( status, "Parser construction mode: %s\n", + pmod[ parser->p_mode ] ); + + + PROGRESS( "Goal symbol detection" ) + if( parser->goal ) + { + SUCCESS() + + /* Single goal revision, if necessary */ + PROGRESS( "Setting up single goal symbol" ) + setup_single_goal( parser ); + DONE() + + /* Rewrite the grammar, if required */ + PROGRESS( "Rewriting grammar" ) + if( parser->p_mode == MODE_SCANNERLESS ) + rewrite_grammar( parser ); + + unique_charsets( parser ); + symbol_orders( parser ); + charsets_to_ptn( parser ); + + if( parser->p_mode == MODE_SCANNERLESS ) + inherit_fixiations( parser ); + DONE() + + /* Precedence fixup */ + PROGRESS( "Fixing precedences" ) + fix_precedences( parser ); + DONE() + + /* FIRST-set computation */ + PROGRESS( "Computing FIRST-sets" ) + compute_first( parser ); + DONE() + + if( parser->show_grammar ) + dump_grammar( status, parser ); + + if( parser->show_symbols ) + dump_symbols( status, parser ); + + if( parser->show_productions ) + dump_productions( status, parser ); + + /* Stupid production recognition */ + PROGRESS( "Validating rule integrity" ) + + if( !find_undef_or_unused( parser ) ) + { + if( check_stupid_productions( parser ) ) + recursions = TRUE; + + DONE() + + /* Parse table generator */ + PROGRESS( "Building parse tables" ) + generate_tables( parser ); + + if( parser->show_states ) + dump_lalr_states( status, parser ); + + DONE() + + /* Terminal anomaly detection */ + PROGRESS( "Terminal anomaly detection" ) + if( parser->p_mode == MODE_SCANNERLESS ) + { + if( recursions ) + { + SKIPPED( "Recursions detected" ); + } + else if( parser->p_reserve_regex ) + { + SKIPPED( "Tokens are reserved!" ); + } + else + { + check_regex_anomalies( parser ); + DONE() + } + } + else + { + SKIPPED( "Not required" ); + } + + /* Lexical analyzer generator */ + PROGRESS( "Constructing lexical analyzer" ) + + if( parser->p_mode == MODE_SCANNERLESS ) + merge_symbols_to_dfa( parser ); + else if( parser->p_mode == MODE_SCANNER ) + construct_single_lexer( parser ); + + DONE() + + /* Default production detection */ + PROGRESS( "Detecting default rules" ) + detect_default_productions( parser ); + DONE() + + /* Code generator */ + if( !( parser->p_template ) ) + parser->p_template = parser->target; + + if( parser->gen_prog ) + { + if( parser->verbose ) + fprintf( status, "Code generation target: %s%s\n", + parser->p_template, + ( parser->p_template == parser->target + && strcmp( parser->target, + UNICC_DEFAULT_TARGET ) == 0 ? + " (default)" : "" ) ); + + PROGRESS( "Invoking code generator" ) + build_code( parser ); + DONE() + } + } + else + { + FAIL() + } + } + else + { + FAIL() + print_error( parser, ERR_NO_GOAL_SYMBOL, ERRSTYLE_FATAL ); + } + + if( parser->stats ) + fprintf( status, "%s%s produced %ld states " + "(%d error%s, %d warning%s), %d file%s\n", + ( parser->verbose ? "\n" : "" ), + filename, parray_count( parser->states ), + error_count, ( error_count == 1 ) ? "" : "s", + warning_count, ( warning_count == 1 ) ? "" : "s", + parser->files_count, + ( parser->files_count == 1 ) ? "" : "s" ); + } + else + { + FAIL() + error_count++; + } + + free_parser( parser ); + } + else if( !error_count ) + { + print_usage( status, *argv ); + error_count++; + } + + pfree( mbase_name ); + + return error_count; } - diff --git a/src/mem.c b/src/mem.c index e0f1a6e..20ebb2b 100644 --- a/src/mem.c +++ b/src/mem.c @@ -1,14 +1,5 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: mem.c -Author: Jan Max Meyer -Usage: Memory maintenance and management functions for several datatypes used - in UniCC for symbol and state management. ------------------------------------------------------------------------------ */ +/* Memory maintenance and management functions for several datatypes used in +UniCC for symbol and state management. */ #include "unicc.h" @@ -26,140 +17,140 @@ pointer to the ccl, else an identifying name. Returns a SYMBOL*-pointer to the SYMBOL structure representing the symbol. */ SYMBOL* get_symbol( PARSER* p, void* dfn, int type, BOOLEAN create ) { - char* keyname; - char keych; - char* name = (char*)dfn; - SYMBOL* sym = (SYMBOL*)NULL; - plistel* e; - - /* - 26.03.2008 Jan Max Meyer - Distinguish between the different symbol types even in the hash-table, - parameters added for error symbol. - - 11.11.2009 Jan Max Meyer - Changed name-parameter to dfn, to allow direct charclass-assignments to - get_symbol() instead of a name that defines the charclass. - Also, added trace macros. - */ - - PROC( "get_symbol" ); - PARMS( "p", "%p", p ); - PARMS( "dfn", "%p", dfn ); - PARMS( "type", "%d", type ); - PARMS( "create", "%d", create ); - - /* In case of a ccl-terminal, generate the name from the ccl */ - if( type == SYM_CCL_TERMINAL ) - { - MSG( "SYM_CCL_TERMINAL detected - converting character class" ); - name = pccl_to_str( (pccl*)dfn, TRUE ); - - VARS( "name", "%s", name ); - } - - /* - To distinguish between the different types, - a special identification character is prefixed to - the hashtable-keys - */ - switch( type ) - { - case SYM_CCL_TERMINAL: - MSG( "type is SYM_CCL_TERMINAL" ); - keych = '#'; - break; - - case SYM_REGEX_TERMINAL: - MSG( "type is SYM_REGEX_TERMINAL" ); - keych = '@'; - break; - - case SYM_NON_TERMINAL: - MSG( "type is SYM_NON_TERMINAL" ); - keych = '!'; - break; - - case SYM_SYSTEM_TERMINAL: - MSG( "type is a system terminal" ); - keych = '~'; - break; - - default: - MSG( "type is something else?" ); - RETURN( (SYMBOL*)NULL ); - } - - if( !( keyname = (char*)pmalloc( - ( pstrlen( name ) + 1 + 1 ) - * sizeof( char ) ) ) ) - { - OUTOFMEM; - return (SYMBOL*)NULL; - } - - sprintf( keyname, "%c%s", keych, name ); - VARS( "keyname", "%s", keyname ); - - if( !( e = plist_get_by_key( p->symbols, keyname ) ) && create ) - { - /* Create symbol */ - if( !( sym = (SYMBOL*)plist_access( plist_insert( p->symbols, - (plistel*)NULL, keyname, - (void*)NULL ) ) ) ) - RETURN( sym ); - - /* Set up attributes */ - sym->id = plist_count( p->symbols ) - 1; - sym->type = type; - sym->line = -1; - sym->nullable = FALSE; - sym->greedy = TRUE; - - sym->first = plist_create( 0, PLIST_MOD_PTR ); - - /* Terminal symbols have always theirself in the FIRST-set... */ - if( IS_TERMINAL( sym ) ) - { - plist_push( sym->first, sym ); - sym->all_sym = plist_create( 0, PLIST_MOD_PTR ); - } - else - sym->productions = plist_create( 0, PLIST_MOD_PTR ); - - /* Initialize options table */ - sym->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); - - /* Identifying name */ - if( type == SYM_CCL_TERMINAL ) - sym->ccl = (pccl*)dfn; - - sym->keyname = keyname; - keyname = (char*)NULL; - - if( !( sym->name = pstrdup( name ) ) ) - { - OUTOFMEM; - RETURN( (SYMBOL*)NULL ); - } - - /* System terminals are linked to the parser object */ - if( type == SYM_SYSTEM_TERMINAL ) - { - sym->used = TRUE; - sym->defined = TRUE; - - if( strcmp( sym->name, P_ERROR_RESYNC ) == 0 ) - p->error = sym; - else if( strcmp( sym->name, P_END_OF_FILE ) == 0 ) - p->end_of_input = sym; - } - } - else if( e ) - sym = (SYMBOL*)plist_access( e ); - - pfree( keyname ); - RETURN( sym ); + char* keyname; + char keych; + char* name = (char*)dfn; + SYMBOL* sym = (SYMBOL*)NULL; + plistel* e; + + /* + 26.03.2008 Jan Max Meyer + Distinguish between the different symbol types even in the hash-table, + parameters added for error symbol. + + 11.11.2009 Jan Max Meyer + Changed name-parameter to dfn, to allow direct charclass-assignments to + get_symbol() instead of a name that defines the charclass. + Also, added trace macros. + */ + + PROC( "get_symbol" ); + PARMS( "p", "%p", p ); + PARMS( "dfn", "%p", dfn ); + PARMS( "type", "%d", type ); + PARMS( "create", "%d", create ); + + /* In case of a ccl-terminal, generate the name from the ccl */ + if( type == SYM_CCL_TERMINAL ) + { + MSG( "SYM_CCL_TERMINAL detected - converting character class" ); + name = pccl_to_str( (pccl*)dfn, TRUE ); + + VARS( "name", "%s", name ); + } + + /* + To distinguish between the different types, + a special identification character is prefixed to + the hashtable-keys + */ + switch( type ) + { + case SYM_CCL_TERMINAL: + MSG( "type is SYM_CCL_TERMINAL" ); + keych = '#'; + break; + + case SYM_REGEX_TERMINAL: + MSG( "type is SYM_REGEX_TERMINAL" ); + keych = '@'; + break; + + case SYM_NON_TERMINAL: + MSG( "type is SYM_NON_TERMINAL" ); + keych = '!'; + break; + + case SYM_SYSTEM_TERMINAL: + MSG( "type is a system terminal" ); + keych = '~'; + break; + + default: + MSG( "type is something else?" ); + RETURN( (SYMBOL*)NULL ); + } + + if( !( keyname = (char*)pmalloc( + ( pstrlen( name ) + 1 + 1 ) + * sizeof( char ) ) ) ) + { + OUTOFMEM; + return (SYMBOL*)NULL; + } + + sprintf( keyname, "%c%s", keych, name ); + VARS( "keyname", "%s", keyname ); + + if( !( e = plist_get_by_key( p->symbols, keyname ) ) && create ) + { + /* Create symbol */ + if( !( sym = (SYMBOL*)plist_access( plist_insert( p->symbols, + (plistel*)NULL, keyname, + (void*)NULL ) ) ) ) + RETURN( sym ); + + /* Set up attributes */ + sym->id = plist_count( p->symbols ) - 1; + sym->type = type; + sym->line = -1; + sym->nullable = FALSE; + sym->greedy = TRUE; + + sym->first = plist_create( 0, PLIST_MOD_PTR ); + + /* Terminal symbols have always theirself in the FIRST-set... */ + if( IS_TERMINAL( sym ) ) + { + plist_push( sym->first, sym ); + sym->all_sym = plist_create( 0, PLIST_MOD_PTR ); + } + else + sym->productions = plist_create( 0, PLIST_MOD_PTR ); + + /* Initialize options table */ + sym->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); + + /* Identifying name */ + if( type == SYM_CCL_TERMINAL ) + sym->ccl = (pccl*)dfn; + + sym->keyname = keyname; + keyname = (char*)NULL; + + if( !( sym->name = pstrdup( name ) ) ) + { + OUTOFMEM; + RETURN( (SYMBOL*)NULL ); + } + + /* System terminals are linked to the parser object */ + if( type == SYM_SYSTEM_TERMINAL ) + { + sym->used = TRUE; + sym->defined = TRUE; + + if( strcmp( sym->name, P_ERROR_RESYNC ) == 0 ) + p->error = sym; + else if( strcmp( sym->name, P_END_OF_FILE ) == 0 ) + p->end_of_input = sym; + } + } + else if( e ) + sym = (SYMBOL*)plist_access( e ); + + pfree( keyname ); + RETURN( sym ); } /** Frees a symbol structure and all its members. @@ -167,20 +158,20 @@ SYMBOL* get_symbol( PARSER* p, void* dfn, int type, BOOLEAN create ) //sym// is the symbol to be freed. */ void free_symbol( SYMBOL* sym ) { - pfree( sym->code ); - pfree( sym->name ); - pfree( sym->emit ); + pfree( sym->code ); + pfree( sym->name ); + pfree( sym->emit ); - if( sym->ptn ) - pregex_ptn_free( sym->ptn ); - else - sym->ccl = pccl_free( sym->ccl ); + if( sym->ptn ) + pregex_ptn_free( sym->ptn ); + else + sym->ccl = pccl_free( sym->ccl ); - plist_free( sym->first ); - plist_free( sym->productions ); - plist_free( sym->all_sym ); + plist_free( sym->first ); + plist_free( sym->productions ); + plist_free( sym->all_sym ); - sym->options = free_opts( sym->options ); + sym->options = free_opts( sym->options ); } /** Creates a production for an according left-hand side and inserts it into the @@ -194,31 +185,31 @@ Returns a PROD*-pointer to the production, or (PROD*)NULL in error case. */ PROD* create_production( PARSER* p, SYMBOL* lhs ) { - PROD* prod = (PROD*)NULL; + PROD* prod = (PROD*)NULL; - if( !p ) - return (PROD*)NULL; + if( !p ) + return (PROD*)NULL; - prod = (PROD*)plist_malloc( p->productions ); - prod->id = plist_count( p->productions ) - 1; + prod = (PROD*)plist_malloc( p->productions ); + prod->id = plist_count( p->productions ) - 1; - prod->rhs = plist_create( 0, PLIST_MOD_PTR ); - prod->sem_rhs = plist_create( 0, PLIST_MOD_PTR ); - prod->all_lhs = plist_create( 0, PLIST_MOD_PTR ); + prod->rhs = plist_create( 0, PLIST_MOD_PTR ); + prod->sem_rhs = plist_create( 0, PLIST_MOD_PTR ); + prod->all_lhs = plist_create( 0, PLIST_MOD_PTR ); - /* Initialize options table */ - prod->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); + /* Initialize options table */ + prod->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); - /* Set up production attributes */ - if( lhs ) - { - prod->lhs = lhs; + /* Set up production attributes */ + if( lhs ) + { + prod->lhs = lhs; - plist_push( prod->all_lhs, lhs ); - plist_push( lhs->productions, prod ); - } + plist_push( prod->all_lhs, lhs ); + plist_push( lhs->productions, prod ); + } - return prod; + return prod; } /** Appends a symbol and a possible, semantic identifier to the right-hand side @@ -231,32 +222,32 @@ side, can be (char*)NULL. */ void append_to_production( PROD* p, SYMBOL* sym, char* name ) { - SYMBOL* deriv = sym; - - if( !( p && sym ) ) - { - WRONGPARAM; - return; - } - - /* If no name is given, then use the symbol's name, if it's a - nonterminal or a regex-terminal. */ - if( !name ) - { - /* - 11.07.2010 Jan Max Meyer - Use name of derived symbol in case of virtual productions. - */ - while( deriv->derived_from ) - deriv = deriv->derived_from; - - if( !( deriv->generated ) && - ( deriv->type == SYM_NON_TERMINAL - || deriv->type == SYM_REGEX_TERMINAL ) ) - name = deriv->name; - } - - plist_insert( p->rhs, (plistel*)NULL, name, sym ); + SYMBOL* deriv = sym; + + if( !( p && sym ) ) + { + WRONGPARAM; + return; + } + + /* If no name is given, then use the symbol's name, if it's a + nonterminal or a regex-terminal. */ + if( !name ) + { + /* + 11.07.2010 Jan Max Meyer + Use name of derived symbol in case of virtual productions. + */ + while( deriv->derived_from ) + deriv = deriv->derived_from; + + if( !( deriv->generated ) && + ( deriv->type == SYM_NON_TERMINAL + || deriv->type == SYM_REGEX_TERMINAL ) ) + name = deriv->name; + } + + plist_insert( p->rhs, (plistel*)NULL, name, sym ); } /** Frees a production structure and all its members. @@ -264,14 +255,14 @@ void append_to_production( PROD* p, SYMBOL* sym, char* name ) //prod// is the production to be freed. */ void free_production( PROD* prod ) { - plist_free( prod->rhs ); - plist_free( prod->sem_rhs ); - plist_free( prod->all_lhs ); + plist_free( prod->rhs ); + plist_free( prod->sem_rhs ); + plist_free( prod->all_lhs ); - pfree( prod->code ); - pfree( prod->emit ); + pfree( prod->code ); + pfree( prod->emit ); - prod->options = free_opts( prod->options ); + prod->options = free_opts( prod->options ); } @@ -286,15 +277,15 @@ Returns an ITEM*-pointer to the newly created item, (ITEM*)NULL in error case. */ ITEM* create_item( PROD* p ) { - ITEM* i = (ITEM*)NULL; + ITEM* i = (ITEM*)NULL; - i = (ITEM*)pmalloc( sizeof( ITEM ) ); - i->prod = p; - i->next_symbol = (SYMBOL*)plist_access( plist_first( p->rhs ) ); + i = (ITEM*)pmalloc( sizeof( ITEM ) ); + i->prod = p; + i->next_symbol = (SYMBOL*)plist_access( plist_first( p->rhs ) ); - plist_init( &i->lookahead, 0, PLIST_MOD_PTR ); + plist_init( &i->lookahead, 0, PLIST_MOD_PTR ); - return i; + return i; } /** Frees an item structure and all its members. @@ -302,8 +293,8 @@ ITEM* create_item( PROD* p ) //it// is the pointer to item structure to be freed. */ void free_item( ITEM* it ) { - plist_erase( &it->lookahead ); - pfree( it ); + plist_erase( &it->lookahead ); + pfree( it ); } /** Creates a new state. @@ -314,20 +305,20 @@ Returns a STATE* Pointer to the newly created state, (STATE*)NULL in error case. */ STATE* create_state( PARSER* p ) { - STATE* st; + STATE* st; - if( !p ) - { - WRONGPARAM; - return (STATE*)NULL; - } + if( !p ) + { + WRONGPARAM; + return (STATE*)NULL; + } - st = (STATE*)parray_malloc( p->states ); + st = (STATE*)parray_malloc( p->states ); - /* Set state unique key */ - st->state_id = parray_count( p->states ) - 1; + /* Set state unique key */ + st->state_id = parray_count( p->states ) - 1; - return st; + return st; } /** Frees a state structure and all its members. @@ -335,24 +326,24 @@ STATE* create_state( PARSER* p ) //st// is the Pointer to state structure to be freed. */ void free_state( STATE* st ) { - LIST* li; + LIST* li; - for( li = st->kernel; li; li = li->next ) - free_item( li->pptr ); + for( li = st->kernel; li; li = li->next ) + free_item( li->pptr ); - for( li = st->epsilon; li; li = li->next ) - free_item( li->pptr ); + for( li = st->epsilon; li; li = li->next ) + free_item( li->pptr ); - for( li = st->actions; li; li = li->next ) - free_tabcol( li->pptr ); + for( li = st->actions; li; li = li->next ) + free_tabcol( li->pptr ); - for( li = st->gotos; li; li = li->next ) - free_tabcol( li->pptr ); + for( li = st->gotos; li; li = li->next ) + free_tabcol( li->pptr ); - list_free( st->kernel ); - list_free( st->epsilon ); - list_free( st->actions ); - list_free( st->gotos ); + list_free( st->kernel ); + list_free( st->epsilon ); + list_free( st->actions ); + list_free( st->gotos ); } /** Creates a table column to be added to a state's goto-table or action-table @@ -370,22 +361,22 @@ Returns a TABCOL* Pointer to the new action item. On error, (TABCOL*)NULL is returned. */ TABCOL* create_tabcol( SYMBOL* sym, short action, int idx, ITEM* item ) { - TABCOL* act = (TABCOL*)NULL; - - act = (TABCOL*)pmalloc( sizeof( TABCOL ) ); - if( act ) - { - memset( act, 0, sizeof( TABCOL ) ); - - act->symbol = sym; - act->action = action; - act->index = idx; - act->derived_from = item; - } - else - OUTOFMEM; - - return act; + TABCOL* act = (TABCOL*)NULL; + + act = (TABCOL*)pmalloc( sizeof( TABCOL ) ); + if( act ) + { + memset( act, 0, sizeof( TABCOL ) ); + + act->symbol = sym; + act->action = action; + act->index = idx; + act->derived_from = item; + } + else + OUTOFMEM; + + return act; } /** Frees an TABCOL-structure and all its members. @@ -394,7 +385,7 @@ TABCOL* create_tabcol( SYMBOL* sym, short action, int idx, ITEM* item ) */ void free_tabcol( TABCOL* act ) { - pfree( act ); + pfree( act ); } /** Tries to find the entry for a specified symbol within a state's action- or @@ -409,16 +400,16 @@ searching on the row, (TABCOL*)NULL is returned. */ TABCOL* find_tabcol( LIST* row, SYMBOL* sym ) { - TABCOL* act = (TABCOL*)NULL; + TABCOL* act = (TABCOL*)NULL; - for( ; row; row = row->next ) - { - act = row->pptr; - if( act->symbol == sym ) - return act; - } + for( ; row; row = row->next ) + { + act = row->pptr; + if( act->symbol == sym ) + return act; + } - return (TABCOL*)NULL; + return (TABCOL*)NULL; } /** Creates an option data structure and optionally inserts it into a @@ -432,25 +423,25 @@ Returns a OPT* Pointer to the newly created OPT-structure, (OPT*)NULL in error case. */ OPT* create_opt( plist* options, char* opt, char* def ) { - OPT option; - plistel* e; + OPT option; + plistel* e; - memset( &option, 0, sizeof( OPT ) ); + memset( &option, 0, sizeof( OPT ) ); - option.opt = pstrdup( opt ); - option.def = pstrdup( def ); + option.opt = pstrdup( opt ); + option.def = pstrdup( def ); - if( !( e = plist_insert( options, - (plistel*)NULL, option.opt, (void*)&option ) ) ) - { - pfree( option.opt ); - pfree( option.def ); + if( !( e = plist_insert( options, + (plistel*)NULL, option.opt, (void*)&option ) ) ) + { + pfree( option.opt ); + pfree( option.def ); - OUTOFMEM; - return (OPT*)NULL; - } + OUTOFMEM; + return (OPT*)NULL; + } - return (OPT*)plist_access( e ); + return (OPT*)plist_access( e ); } /** Frees the entire options list and its options. @@ -460,44 +451,44 @@ OPT* create_opt( plist* options, char* opt, char* def ) Always returns (plist*)NULL */ plist* free_opts( plist* options ) { - OPT opt; + OPT opt; - if( !options ) - return (plist*)NULL; + if( !options ) + return (plist*)NULL; - while( plist_pop( options, (void*)&opt ) ) - { - pfree( opt.opt ); - pfree( opt.def ); - } + while( plist_pop( options, (void*)&opt ) ) + { + pfree( opt.opt ); + pfree( opt.def ); + } - return plist_free( options ); + return plist_free( options ); } static int sort_symbols( plist* lst, plistel* el, plistel* er ) { - int ret = 0; - SYMBOL* l = (SYMBOL*)plist_access( el ); - SYMBOL* r = (SYMBOL*)plist_access( er ); - - if( l->type < r->type ) - return 1; - else if( l->type > r->type ) - return -1; - - if( l->type == SYM_REGEX_TERMINAL && r->type == SYM_REGEX_TERMINAL ) - { - if( l->keyword < r->keyword ) - ret = 1; - else if( l->keyword > r->keyword ) - ret = -1; - } - - if( l->id < r->id ) - ret = 1; - - return ret; + int ret = 0; + SYMBOL* l = (SYMBOL*)plist_access( el ); + SYMBOL* r = (SYMBOL*)plist_access( er ); + + if( l->type < r->type ) + return 1; + else if( l->type > r->type ) + return -1; + + if( l->type == SYM_REGEX_TERMINAL && r->type == SYM_REGEX_TERMINAL ) + { + if( l->keyword < r->keyword ) + ret = 1; + else if( l->keyword > r->keyword ) + ret = -1; + } + + if( l->id < r->id ) + ret = 1; + + return ret; } /** Allocates and initializes a new parser information structure. @@ -507,40 +498,40 @@ Returns a PARSER* Pointer to the newly created PARSER-structure, */ PARSER* create_parser( void ) { - PARSER* pptr = (PARSER*)NULL; + PARSER* pptr = (PARSER*)NULL; - if( !( pptr = pmalloc( sizeof( PARSER ) ) ) ) - { - OUTOFMEM; - return (PARSER*)NULL; - } + if( !( pptr = pmalloc( sizeof( PARSER ) ) ) ) + { + OUTOFMEM; + return (PARSER*)NULL; + } - memset( pptr, 0, sizeof( PARSER ) ); + memset( pptr, 0, sizeof( PARSER ) ); - /* Initialize any dynamic lists and arrays */ - pptr->symbols = plist_create( sizeof( SYMBOL ), - PLIST_MOD_EXTKEYS | PLIST_MOD_UNIQUE - | PLIST_MOD_AUTOSORT ); - plist_set_sortfn( pptr->symbols, sort_symbols ); + /* Initialize any dynamic lists and arrays */ + pptr->symbols = plist_create( sizeof( SYMBOL ), + PLIST_MOD_EXTKEYS | PLIST_MOD_UNIQUE + | PLIST_MOD_AUTOSORT ); + plist_set_sortfn( pptr->symbols, sort_symbols ); - pptr->productions = plist_create( sizeof( PROD ), PLIST_MOD_NONE ); + pptr->productions = plist_create( sizeof( PROD ), PLIST_MOD_NONE ); - pptr->states = parray_create( sizeof( STATE ), 32 ); + pptr->states = parray_create( sizeof( STATE ), 32 ); - /* Setup defaults */ - pptr->p_mode = MODE_SCANNERLESS; - pptr->p_universe = PCCL_MAX; - pptr->optimize_states = TRUE; - pptr->gen_prog = TRUE; - pptr->target = UNICC_DEFAULT_TARGET; + /* Setup defaults */ + pptr->p_mode = MODE_SCANNERLESS; + pptr->p_universe = PCCL_MAX; + pptr->optimize_states = TRUE; + pptr->gen_prog = TRUE; + pptr->target = UNICC_DEFAULT_TARGET; - /* Initialize options table */ - pptr->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); + /* Initialize options table */ + pptr->options = plist_create( sizeof( OPT ), PLIST_MOD_EXTKEYS ); - /* End of Input symbol must exist in every parser! */ - get_symbol( pptr, P_END_OF_FILE, SYM_SYSTEM_TERMINAL, TRUE ); + /* End of Input symbol must exist in every parser! */ + get_symbol( pptr, P_END_OF_FILE, SYM_SYSTEM_TERMINAL, TRUE ); - return pptr; + return pptr; } /** Frees a parser structure and all its members. @@ -548,50 +539,50 @@ PARSER* create_parser( void ) //parser// is the Parser structure to be freed. */ void free_parser( PARSER* parser ) { - LIST* it = (LIST*)NULL; - STATE* st; - pregex_dfa* dfa; + LIST* it = (LIST*)NULL; + STATE* st; + pregex_dfa* dfa; - for( it = parser->vtypes; it; it = it->next ) - free_vtype( it->pptr ); + for( it = parser->vtypes; it; it = it->next ) + free_vtype( it->pptr ); - for( it = parser->dfas; it; it = it->next ) - { - dfa = (pregex_dfa*)list_access( it ); - pregex_dfa_free( dfa ); - } + for( it = parser->dfas; it; it = it->next ) + { + dfa = (pregex_dfa*)list_access( it ); + pregex_dfa_free( dfa ); + } - plist_iter_access( parser->symbols, (plistfn)free_symbol ); - plist_free( parser->symbols ); + plist_iter_access( parser->symbols, (plistfn)free_symbol ); + plist_free( parser->symbols ); - plist_free( parser->productions ); + plist_free( parser->productions ); - parray_for( parser->states, st ) - free_state( st ); + parray_for( parser->states, st ) + free_state( st ); - parray_free( parser->states ); + parray_free( parser->states ); - list_free( parser->vtypes ); - list_free( parser->dfas ); + list_free( parser->vtypes ); + list_free( parser->dfas ); - if( parser->p_template != parser->target ) - pfree( parser->p_template ); + if( parser->p_template != parser->target ) + pfree( parser->p_template ); - pfree( parser->p_prefix ); - pfree( parser->p_header ); - pfree( parser->p_footer ); - pfree( parser->p_pcb ); + pfree( parser->p_prefix ); + pfree( parser->p_header ); + pfree( parser->p_footer ); + pfree( parser->p_pcb ); - pfree( parser->p_def_action ); - pfree( parser->p_def_action_e ); + pfree( parser->p_def_action ); + pfree( parser->p_def_action_e ); - pfree( parser->source ); + pfree( parser->source ); - parser->options = free_opts( parser->options ); + parser->options = free_opts( parser->options ); - xml_free( parser->err_xml ); + xml_free( parser->err_xml ); - pfree( parser ); + pfree( parser ); } /** Seaches for a value stack type. @@ -604,28 +595,28 @@ a matching type has been found. */ VTYPE* find_vtype( PARSER* p, char* name ) { - VTYPE* vt; - LIST* l; - char* test_name; - - test_name = pstrdup( name ); - if( !test_name ) - OUTOFMEM; - - str_no_whitespace( test_name ); - - for( l = p->vtypes; l; l = l->next ) - { - vt = (VTYPE*)(l->pptr); - if( !strcmp( vt->int_name, test_name ) ) - { - pfree( test_name ); - return vt; - } - } - - pfree( test_name ); - return (VTYPE*)NULL; + VTYPE* vt; + LIST* l; + char* test_name; + + test_name = pstrdup( name ); + if( !test_name ) + OUTOFMEM; + + str_no_whitespace( test_name ); + + for( l = p->vtypes; l; l = l->next ) + { + vt = (VTYPE*)(l->pptr); + if( !strcmp( vt->int_name, test_name ) ) + { + pfree( test_name ); + return vt; + } + } + + pfree( test_name ); + return (VTYPE*)NULL; } /** Creates a new value stack type, or returns an existing one, if it does @@ -637,35 +628,35 @@ already exists. Returns a VTYPE*-pointer to the VTYPE structure representing the value type. */ VTYPE* create_vtype( PARSER* p, char* name ) { - VTYPE* vt; + VTYPE* vt; - if( !( vt = find_vtype( p, name ) ) ) - { - vt = (VTYPE*)pmalloc( sizeof( VTYPE ) ); - if( !vt ) - OUTOFMEM; + if( !( vt = find_vtype( p, name ) ) ) + { + vt = (VTYPE*)pmalloc( sizeof( VTYPE ) ); + if( !vt ) + OUTOFMEM; - vt->id = list_count( p->vtypes ); + vt->id = list_count( p->vtypes ); - vt->int_name = pstrdup( name ); - if( !( vt->int_name ) ) - OUTOFMEM; + vt->int_name = pstrdup( name ); + if( !( vt->int_name ) ) + OUTOFMEM; - str_no_whitespace( vt->int_name ); + str_no_whitespace( vt->int_name ); - vt->real_def = pstrdup( name ); - if( !( vt->real_def ) ) - OUTOFMEM; + vt->real_def = pstrdup( name ); + if( !( vt->real_def ) ) + OUTOFMEM; - /* - printf( "Adding vtype >%s< >%s< and >%s<\n", - name, vt->int_name, vt->real_def ); - */ + /* + printf( "Adding vtype >%s< >%s< and >%s<\n", + name, vt->int_name, vt->real_def ); + */ - p->vtypes = list_push( p->vtypes, (void*)vt ); - } + p->vtypes = list_push( p->vtypes, (void*)vt ); + } - return vt; + return vt; } /** Frees a VTYPE-structure. @@ -673,8 +664,7 @@ VTYPE* create_vtype( PARSER* p, char* name ) //vt// is the VTYPE-Structure to be deleted. */ void free_vtype( VTYPE* vt ) { - pfree( vt->int_name ); - pfree( vt->real_def ); - pfree( vt ); + pfree( vt->int_name ); + pfree( vt->real_def ); + pfree( vt ); } - diff --git a/src/parse.min b/src/parse.min index ceabf3f..7e14bce 100644 --- a/src/parse.min +++ b/src/parse.min @@ -1,19 +1,10 @@ { -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: parse.min -Author: Jan Max Meyer -Usage: Grammar definition parser - FOR BOOTSTRAPPING ONLY!! ------------------------------------------------------------------------------ */ +/* Grammar definition parser - FOR BOOTSTRAPPING ONLY!! */ /* - * IMPORTANT NOTIFICATION ON THIS PARSER - * ------------------------------------- - * This parser is only used for bootstrapping UniCC, and is not be tended any- + * IMPORTANT NOTIFICATION ON THIS FILE + * ----------------------------------- + * This file is only used for bootstrapping UniCC, and is not be tended any- * more with new features. Instead, implement new language-oriented features * into the final UniCC parser, parse.par. This grammar uses the min_lalr1 * experimental and testing purpose parser generator to build a first running @@ -22,7 +13,6 @@ Usage: Grammar definition parser - FOR BOOTSTRAPPING ONLY!! * So please note: Only bugfixing and/or revision is allowed here! */ - /* * Includes */ @@ -35,21 +25,21 @@ Usage: Grammar definition parser - FOR BOOTSTRAPPING ONLY!! union _stack_val { - char* str; - char ch; - int num; - void* ptr; - BOOLEAN negative; - - /* - struct - { - pregex_nfa_st* start; - pregex_nfa_st* end; - } nfa; - */ - - pregex_ptn* ptn; + char* str; + char ch; + int num; + void* ptr; + BOOLEAN negative; + + /* + struct + { + pregex_nfa_st* start; + pregex_nfa_st* end; + } nfa; + */ + + pregex_ptn* ptn; }; #define PDEBUG 0 @@ -82,46 +72,46 @@ char* strbuf = (char*)NULL; /* Append character to current string */ static void strbuf_append( char ch ) { - int len; - len = strbuf ? (int)strlen( strbuf ) : 0; - - if( !strbuf ) - strbuf = (char*)pmalloc( ( MALLOC_STEP + 2 ) * sizeof( char ) ); - else if( len % MALLOC_STEP == 0 ) - strbuf = (char*)prealloc( (char*)strbuf, ( len + MALLOC_STEP + 2 ) - * sizeof( char ) ); - - strbuf[len] = ch; - strbuf[len+1] = '\0'; - strbuf[len+2] = '\0'; + int len; + len = strbuf ? (int)strlen( strbuf ) : 0; + + if( !strbuf ) + strbuf = (char*)pmalloc( ( MALLOC_STEP + 2 ) * sizeof( char ) ); + else if( len % MALLOC_STEP == 0 ) + strbuf = (char*)prealloc( (char*)strbuf, ( len + MALLOC_STEP + 2 ) + * sizeof( char ) ); + + strbuf[len] = ch; + strbuf[len+1] = '\0'; + strbuf[len+2] = '\0'; } /* Create a new string */ static void reset_strbuf( void ) { - if( strbuf ) - { - *strbuf = '\0'; - *(strbuf+1) = '\0'; - } + if( strbuf ) + { + *strbuf = '\0'; + *(strbuf+1) = '\0'; + } } /* Set precedence and associativiy */ static void set_assoc_and_prec( LIST* symbols, int assoc ) { - LIST* l; - SYMBOL* sym; - - for( l = symbols; l; l = l->next ) - { - sym = l->pptr; - sym->prec = prec_cnt; - sym->assoc = assoc; - } - - list_free( symbols ); - prec_cnt++; + LIST* l; + SYMBOL* sym; + + for( l = symbols; l; l = l->next ) + { + sym = l->pptr; + sym->prec = prec_cnt; + sym->assoc = assoc; + } + + list_free( symbols ); + prec_cnt++; } /* @@ -133,587 +123,586 @@ static void set_assoc_and_prec( LIST* symbols, int assoc ) $ sym modifier code ccl_string kw type; grammar_spec: fixed_directives segments - ; + ; segments: segments segment - | - ; + | + ; fixed_directives: fixed_directives "#!" fixed_directive ';' - | - ; + | + ; fixed_directive: "mode" mode_type - | "language" string { if( parser->p_template ) - pfree( parser->p_template ); - parser->p_template = pstrdup( strbuf ); - } - | "language" identifier { if( parser->p_template ) - pfree( parser->p_template ); - parser->p_template = pstrdup( strbuf ); - } - ; + | "language" string { if( parser->p_template ) + pfree( parser->p_template ); + parser->p_template = pstrdup( strbuf ); + } + | "language" identifier { if( parser->p_template ) + pfree( parser->p_template ); + parser->p_template = pstrdup( strbuf ); + } + ; mode_type: "context-sensitive" { parser->p_mode = MODE_SCANNERLESS;} /* deprecate! */ - | "context-insensitive" { parser->p_mode = MODE_SCANNER; } /* deprecate! */ - | "sensitive" { parser->p_mode = MODE_SCANNERLESS; }/* deprecate! */ - | "insensitive" { parser->p_mode = MODE_SCANNER; } /* deprecate! */ - | "scannerless" { parser->p_mode = MODE_SCANNERLESS; } - | "scanner" { parser->p_mode = MODE_SCANNER; } - ; + | "context-insensitive" { parser->p_mode = MODE_SCANNER; } /* deprecate! */ + | "sensitive" { parser->p_mode = MODE_SCANNERLESS; }/* deprecate! */ + | "insensitive" { parser->p_mode = MODE_SCANNER; } /* deprecate! */ + | "scannerless" { parser->p_mode = MODE_SCANNERLESS; } + | "scanner" { parser->p_mode = MODE_SCANNER; } + ; segment: code - | definition ';' - | unfixed_directive ';'; + | definition ';' + | unfixed_directive ';'; unfixed_directive: '#' directive_parms ; directive_parms: "whitespaces" symbol_list { LIST* l; - SYMBOL* s; - - for( l = #2.ptr; l; l = l->next ) - { - s = l->pptr; - - /* Do not allow nonterminal definitions here - in conext-free mode */ - if( s->type == SYM_NON_TERMINAL && - parser->p_mode != - MODE_SCANNERLESS ) - { - print_error( parser, ERR_NONTERM_WS_NOT_ALLOWED, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - parser->filename, line, - s->name ); - } - else - s->whitespace = TRUE; - } - - list_free( #2.ptr ); - } - | "lexeme separation" boolean { /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - parser->filename, line, - "lexem separation" ); - } - else - parser->p_lexem_sep = !(#2.negative); } - | "lexeme" symbol_list { LIST* l; - SYMBOL* s; - - - /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - parser->filename, line, "lexeme" ); - } - else - { - for( l = #2.ptr; l; l = l->next ) - { - s = l->pptr; - s->lexem = TRUE; - } - } - - list_free( #2.ptr ); - - } - | "fixate" symbol_list { LIST* l; - SYMBOL* s; - - /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - parser->filename, line, "fixate" ); - } - else - { - for( l = #2.ptr; l; l = l->next ) - { - s = l->pptr; - s->fixated = TRUE; - } - } - - list_free( #2.ptr ); - - } - | "left" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_LEFT ); } - | "right" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_RIGHT ); } - | "nonassoc" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_NOASSOC ); } - | "prefix" string { if( !( parser->p_prefix ) ) - parser->p_prefix = pstrdup( strbuf ); } - | "default action" code { if( !( parser->p_def_action ) ) - parser->p_def_action = pstrdup( strbuf ); - } - | "default epsilon action" code { if( !( parser->p_def_action_e ) ) - parser->p_def_action_e = pstrdup( strbuf ); - } - | "case insensitive strings" boolean { parser->p_cis_strings = !(#2.negative); } - | "prologue" code { parser->p_header = pstrcatstr( - parser->p_header, strbuf, FALSE ); } - | "epilogue" code { parser->p_footer = pstrcatstr( - parser->p_footer, strbuf, FALSE ); } - | "pcb" code { parser->p_pcb = pstrcatstr( - parser->p_pcb, strbuf, FALSE ); } - ; + SYMBOL* s; + + for( l = #2.ptr; l; l = l->next ) + { + s = l->pptr; + + /* Do not allow nonterminal definitions here + in conext-free mode */ + if( s->type == SYM_NON_TERMINAL && + parser->p_mode != + MODE_SCANNERLESS ) + { + print_error( parser, ERR_NONTERM_WS_NOT_ALLOWED, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + parser->filename, line, + s->name ); + } + else + s->whitespace = TRUE; + } + + list_free( #2.ptr ); + } + | "lexeme separation" boolean { /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + parser->filename, line, + "lexem separation" ); + } + else + parser->p_lexem_sep = !(#2.negative); } + | "lexeme" symbol_list { LIST* l; + SYMBOL* s; + + + /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + parser->filename, line, "lexeme" ); + } + else + { + for( l = #2.ptr; l; l = l->next ) + { + s = l->pptr; + s->lexem = TRUE; + } + } + + list_free( #2.ptr ); + + } + | "fixate" symbol_list { LIST* l; + SYMBOL* s; + + /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + parser->filename, line, "fixate" ); + } + else + { + for( l = #2.ptr; l; l = l->next ) + { + s = l->pptr; + s->fixated = TRUE; + } + } + + list_free( #2.ptr ); + + } + | "left" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_LEFT ); } + | "right" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_RIGHT ); } + | "nonassoc" symbol_list { set_assoc_and_prec( #2.ptr, ASSOC_NOASSOC ); } + | "prefix" string { if( !( parser->p_prefix ) ) + parser->p_prefix = pstrdup( strbuf ); } + | "default action" code { if( !( parser->p_def_action ) ) + parser->p_def_action = pstrdup( strbuf ); + } + | "default epsilon action" code { if( !( parser->p_def_action_e ) ) + parser->p_def_action_e = pstrdup( strbuf ); + } + | "case insensitive strings" boolean { parser->p_cis_strings = !(#2.negative); } + | "prologue" code { parser->p_header = pstrcatstr( + parser->p_header, strbuf, FALSE ); } + | "epilogue" code { parser->p_footer = pstrcatstr( + parser->p_footer, strbuf, FALSE ); } + | "pcb" code { parser->p_pcb = pstrcatstr( + parser->p_pcb, strbuf, FALSE ); } + ; boolean: "on" { ##.negative = FALSE; } - | "off" { ##.negative = TRUE; } - | { ##.negative = FALSE; } - ; + | "off" { ##.negative = TRUE; } + | { ##.negative = FALSE; } + ; symbol_list: symbol_list sym { ##.ptr = list_push( #1.ptr, #2.ptr ); } - | sym { ##.ptr = list_push( (LIST*)NULL, #1.ptr ); } - ; + | sym { ##.ptr = list_push( (LIST*)NULL, #1.ptr ); } + ; definition: lhs goal_mark type ":" productions { - LIST* l; - PROD* p; - - if( #2.ch == '$' ) - { - if( parser->goal ) - { - print_error( parser, ERR_MULTIPLE_GOAL_DEF, - ERRSTYLE_FATAL, - parser->goal->name ); - } - else - { - parser->goal = #1.ptr; - parser->goal->used = TRUE; - } - } - - /* Link productions with this left-hand side */ - for( l = #5.ptr; l; l = l->next ) - { - p = l->pptr; - - plist_push( ((SYMBOL*)(#1.ptr))->productions, p ); - p->lhs = #1.ptr; - } - - /* Value type */ - if( #3.ptr ) - { - VTYPE* vt; - - vt = create_vtype( parser, - (char*)(#3.ptr) ); - ((SYMBOL*)(#1.ptr))->vtype = vt; - - pfree( #3.ptr ); - } - } - | '@' identifier_copy type regex code_opt { SYMBOL* s; - s = get_symbol( parser, - #2.str, SYM_REGEX_TERMINAL, TRUE ); - - pfree( #2.str ); - - s->greedy = greedy; - s->ptn = #4.ptn; - greedy = TRUE; - - /* Attach code */ - if( strbuf && *strbuf ) - { - s->code = strbuf; - strbuf = (char*)NULL; - } - - /* Value type */ - if( #3.ptr ) - { - VTYPE* vt; - - vt = create_vtype( parser, - (char*)(#3.ptr) ); - s->vtype = vt; - pfree( #3.ptr ); - } - - /* re_dbg_print_nfa( s->nfa_def, - parser->p_universe ); */ - } - ; + LIST* l; + PROD* p; + + if( #2.ch == '$' ) + { + if( parser->goal ) + { + print_error( parser, ERR_MULTIPLE_GOAL_DEF, + ERRSTYLE_FATAL, + parser->goal->name ); + } + else + { + parser->goal = #1.ptr; + parser->goal->used = TRUE; + } + } + + /* Link productions with this left-hand side */ + for( l = #5.ptr; l; l = l->next ) + { + p = l->pptr; + + plist_push( ((SYMBOL*)(#1.ptr))->productions, p ); + p->lhs = #1.ptr; + } + + /* Value type */ + if( #3.ptr ) + { + VTYPE* vt; + + vt = create_vtype( parser, + (char*)(#3.ptr) ); + ((SYMBOL*)(#1.ptr))->vtype = vt; + + pfree( #3.ptr ); + } + } + | '@' identifier_copy type regex code_opt { SYMBOL* s; + s = get_symbol( parser, + #2.str, SYM_REGEX_TERMINAL, TRUE ); + + pfree( #2.str ); + + s->greedy = greedy; + s->ptn = #4.ptn; + greedy = TRUE; + + /* Attach code */ + if( strbuf && *strbuf ) + { + s->code = strbuf; + strbuf = (char*)NULL; + } + + /* Value type */ + if( #3.ptr ) + { + VTYPE* vt; + + vt = create_vtype( parser, + (char*)(#3.ptr) ); + s->vtype = vt; + pfree( #3.ptr ); + } + + /* re_dbg_print_nfa( s->nfa_def, + parser->p_universe ); */ + } + ; lhs: identifier { SYMBOL* s; - s = get_symbol( parser, - strbuf, SYM_NON_TERMINAL, TRUE ); - s->defined = TRUE; + s = get_symbol( parser, + strbuf, SYM_NON_TERMINAL, TRUE ); + s->defined = TRUE; - ##.ptr = s; - } - ; + ##.ptr = s; + } + ; goal_mark: '$' { ##.ch = '$'; } - | { ##.ch = '\0'; } - ; + | { ##.ch = '\0'; } + ; productions: productions '|' rhs code_opt { if( *strbuf != '\0' ) - ((PROD*)(#3.ptr))->code = pstrdup( strbuf ); + ((PROD*)(#3.ptr))->code = pstrdup( strbuf ); - ##.ptr = list_push( #1.ptr, #3.ptr ); - } - | rhs code_opt { if( *strbuf != '\0' ) - ((PROD*)(#1.ptr))->code = pstrdup( strbuf ); + ##.ptr = list_push( #1.ptr, #3.ptr ); + } + | rhs code_opt { if( *strbuf != '\0' ) + ((PROD*)(#1.ptr))->code = pstrdup( strbuf ); - ##.ptr = list_push( (LIST*)NULL, #1.ptr ); - } - ; + ##.ptr = list_push( (LIST*)NULL, #1.ptr ); + } + ; rhs: rhs symbol access_name { append_to_production( #1.ptr, #2.ptr, - ( *strbuf == '\0' ? (char*)NULL : - pstrdup( strbuf ) ) ); - ##.ptr = #1.ptr; - } - | { ##.ptr = create_production( parser, - (SYMBOL*)NULL ); } - ; + ( *strbuf == '\0' ? (char*)NULL : + pstrdup( strbuf ) ) ); + ##.ptr = #1.ptr; + } + | { ##.ptr = create_production( parser, + (SYMBOL*)NULL ); } + ; symbol: sym modifier { - switch( #2.ch ) - { - case '*': - ##.ptr = kleene_closure( parser, - #1.ptr ); - break; - case '+': - ##.ptr = positive_closure( parser, - #1.ptr ); - break; - case '?': - ##.ptr = optional_closure( parser, - #1.ptr ); - break; - default: - ##.ptr = #1.ptr; - break; - } - } - | "&error" { SYMBOL* sym; - sym = get_symbol( parser, - P_ERROR_RESYNC, SYM_SYSTEM_TERMINAL, - TRUE ); - ##.ptr = sym; - } - - | "&eof" { SYMBOL* sym; - sym = get_symbol( parser, - P_END_OF_FILE, SYM_SYSTEM_TERMINAL, - TRUE ); - ##.ptr = sym; - } - ; + switch( #2.ch ) + { + case '*': + ##.ptr = kleene_closure( parser, + #1.ptr ); + break; + case '+': + ##.ptr = positive_closure( parser, + #1.ptr ); + break; + case '?': + ##.ptr = optional_closure( parser, + #1.ptr ); + break; + default: + ##.ptr = #1.ptr; + break; + } + } + | "&error" { SYMBOL* sym; + sym = get_symbol( parser, + P_ERROR_RESYNC, SYM_SYSTEM_TERMINAL, + TRUE ); + ##.ptr = sym; + } + + | "&eof" { SYMBOL* sym; + sym = get_symbol( parser, + P_END_OF_FILE, SYM_SYSTEM_TERMINAL, + TRUE ); + ##.ptr = sym; + } + ; sym: terminal { ##.ptr = #1.ptr; } - | identifier { SYMBOL* s; - s = get_symbol( parser, - strbuf, SYM_NON_TERMINAL, TRUE ); - s->used = TRUE; - ##.ptr = s; - } - ; + | identifier { SYMBOL* s; + s = get_symbol( parser, + strbuf, SYM_NON_TERMINAL, TRUE ); + s->used = TRUE; + ##.ptr = s; + } + ; terminal: ccl { SYMBOL* s; - pccl* ccl; + pccl* ccl; - ccl = pccl_create( -1, -1, strbuf ); + ccl = pccl_create( -1, -1, strbuf ); - if( #1.negative ) - pccl_negate( ccl ); + if( #1.negative ) + pccl_negate( ccl ); - s = get_symbol( parser, - (void*)ccl, - SYM_CCL_TERMINAL, TRUE ); + s = get_symbol( parser, + (void*)ccl, + SYM_CCL_TERMINAL, TRUE ); - s->used = TRUE; - s->defined = TRUE; - ##.ptr = s; - } - | kw { SYMBOL* s; + s->used = TRUE; + s->defined = TRUE; + ##.ptr = s; + } + | kw { SYMBOL* s; - s = get_symbol( parser, - strbuf, SYM_REGEX_TERMINAL, TRUE ); + s = get_symbol( parser, + strbuf, SYM_REGEX_TERMINAL, TRUE ); - s->used = TRUE; - s->defined = TRUE; - s->keyword = TRUE; + s->used = TRUE; + s->defined = TRUE; + s->keyword = TRUE; - s->ptn = pregex_ptn_create_string( strbuf, - parser->p_cis_strings ? - PREGEX_COMP_INSENSITIVE : - 0 ); + s->ptn = pregex_ptn_create_string( strbuf, + parser->p_cis_strings ? + PREGEX_COMP_INSENSITIVE : + 0 ); - ##.ptr = s; - } - | '@' identifier { SYMBOL* s; - s = get_symbol( parser, - strbuf, SYM_REGEX_TERMINAL, FALSE ); + ##.ptr = s; + } + | '@' identifier { SYMBOL* s; + s = get_symbol( parser, + strbuf, SYM_REGEX_TERMINAL, FALSE ); - s->used = TRUE; - s->defined = TRUE; + s->used = TRUE; + s->defined = TRUE; - ##.ptr = s; - } - /* - | '~' { ##.ptr = get_symbol( parser, - "", SYM_ERROR, TRUE ); - } - */ - ; + ##.ptr = s; + } + /* + | '~' { ##.ptr = get_symbol( parser, + "", SYM_ERROR, TRUE ); + } + */ + ; modifier: '*' { ##.ch = '*'; } - | '+' { ##.ch = '+'; } - | '?' { ##.ch = '?'; } - | { ##.ch = '\0'; } - ; + | '+' { ##.ch = '+'; } + | '?' { ##.ch = '?'; } + | { ##.ch = '\0'; } + ; access_name: ':' identifier - | ':' string - | { reset_strbuf(); } - ; + | ':' string + | { reset_strbuf(); } + ; /* Regular Expression parser and NFA generator */ regex: re_alt - ; + ; re_alt: re_alt '|' re_expr - { - ##.ptn = pregex_ptn_create_alt( - #1.ptn, #3.ptn, (pregex_ptn*)NULL ); - } + { + ##.ptn = pregex_ptn_create_alt( + #1.ptn, #3.ptn, (pregex_ptn*)NULL ); + } - | re_expr - ; + | re_expr + ; re_expr: re_expr re_modifier - { - ##.ptn = pregex_ptn_create_seq( - #1.ptn, #2.ptn, (pregex_ptn*)NULL ); - } + { + ##.ptn = pregex_ptn_create_seq( + #1.ptn, #2.ptn, (pregex_ptn*)NULL ); + } - | re_modifier - ; + | re_modifier + ; re_modifier: re_factor '*' - { - ##.ptn = pregex_ptn_create_kle( #1.ptn ); - } + { + ##.ptn = pregex_ptn_create_kle( #1.ptn ); + } - | re_factor '+' + | re_factor '+' - { - ##.ptn = pregex_ptn_create_pos( #1.ptn ); - } + { + ##.ptn = pregex_ptn_create_pos( #1.ptn ); + } - | re_factor '?' + | re_factor '?' - { - ##.ptn = pregex_ptn_create_opt( #1.ptn ); - } + { + ##.ptn = pregex_ptn_create_opt( #1.ptn ); + } - | re_factor - ; + | re_factor + ; re_factor: ccl - { - pccl* ccl; + { + pccl* ccl; - ccl = pccl_create( -1, -1, strbuf ); - if( #1.negative ) - pccl_negate( ccl ); + ccl = pccl_create( -1, -1, strbuf ); + if( #1.negative ) + pccl_negate( ccl ); - ##.ptn = pregex_ptn_create_char( ccl ); - } + ##.ptn = pregex_ptn_create_char( ccl ); + } - | kw + | kw - { - ##.ptn = pregex_ptn_create_string( strbuf, 0 ); - } + { + ##.ptn = pregex_ptn_create_string( strbuf, 0 ); + } - | '.' + | '.' - { - pccl* ccl; - greedy = FALSE; + { + pccl* ccl; + greedy = FALSE; - ccl = pccl_create( -1, -1, (char*)NULL ); + ccl = pccl_create( -1, -1, (char*)NULL ); - pccl_addrange( ccl, PCCL_MIN, PCCL_MAX ); + pccl_addrange( ccl, PCCL_MIN, PCCL_MAX ); - ##.ptn = pregex_ptn_create_char( ccl ); - } + ##.ptn = pregex_ptn_create_char( ccl ); + } - | '(' regex ')' + | '(' regex ')' - { - ##.ptn = pregex_ptn_create_sub( #2.ptn ); - } - ; + { + ##.ptn = pregex_ptn_create_sub( #2.ptn ); + } + ; /* General parsing objects */ string: ccl_string | kw; ccl: ccl_string { ##.negative = FALSE; } - | '!' ccl_string { ##.negative = TRUE; } - ; + | '!' ccl_string { ##.negative = TRUE; } + ; ccl_string: '\'' ccl_str '\'' - ; + ; ccl_str: ccl_str ccl_char - | { reset_strbuf(); } - ; + | { reset_strbuf(); } + ; ccl_char: '^\'\\' { strbuf_append( (char)#1.ch ); } - | '\\' '^' { strbuf_append( (char)'\\' ); - strbuf_append( (char)#2.ch ); } - ; + | '\\' '^' { strbuf_append( (char)'\\' ); + strbuf_append( (char)#2.ch ); } + ; kw: '\"' kw_str '\"'; kw_str: kw_str '^\"' { strbuf_append( (char)#2.ch ); } - | { reset_strbuf(); } - ; + | { reset_strbuf(); } + ; type: '<' type_str '>' { ##.ptr = pstrdup( strbuf ); } - | { ##.ptr = (void*)NULL; } - ; + | { ##.ptr = (void*)NULL; } + ; type_str: type_str '^>' { strbuf_append( (char)#2.ch ); } - | { reset_strbuf(); } - ; + | { reset_strbuf(); } + ; /* escape_sequence: - 'a' { ##.ch = '\a'; } - | 'b' { ##.ch = '\b'; } - | 'n' { ##.ch = '\n'; } - | 'f' { ##.ch = '\f'; } - | 'r' { ##.ch = '\r'; } - | 't' { ##.ch = '\t'; } - | 'v' { ##.ch = '\v'; } - | '\\' { ##.ch = '\\'; } - | '?' { ##.ch = '\?'; } - | '\'' { ##.ch = '\''; } - | '\"' { ##.ch = '\"'; } - | '0' { ##.ch = '\0'; } - ; + 'a' { ##.ch = '\a'; } + | 'b' { ##.ch = '\b'; } + | 'n' { ##.ch = '\n'; } + | 'f' { ##.ch = '\f'; } + | 'r' { ##.ch = '\r'; } + | 't' { ##.ch = '\t'; } + | 'v' { ##.ch = '\v'; } + | '\\' { ##.ch = '\\'; } + | '?' { ##.ch = '\?'; } + | '\'' { ##.ch = '\''; } + | '\"' { ##.ch = '\"'; } + | '0' { ##.ch = '\0'; } + ; */ identifier_copy: identifier { ##.str = strbuf; - strbuf = (char*)NULL; - } - ; + strbuf = (char*)NULL; + } + ; identifier: identifier_start identifier_follow - ; + ; identifier_start: 'A-Za-z_' { reset_strbuf(); - strbuf_append( #1.ch ); } - ; + strbuf_append( #1.ch ); } + ; identifier_follow: identifier_follow 'A-Za-z0-9_' { strbuf_append( #2.ch ); } - | - ; + | + ; integer: integer '0-9' { strbuf_append( #2.ch ); } - | '0-9' { reset_strbuf(); - strbuf_append( #1.ch ); - } - ; + | '0-9' { reset_strbuf(); + strbuf_append( #1.ch ); + } + ; code_opt: code - | { reset_strbuf(); } - ; + | { reset_strbuf(); } + ; code: "[*" inner_code "*]" { - if( !parser->p_template ) - { - print_error( parser, - ERR_NO_TARGET_TPL_SUPPLY, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - parser->filename, line ); - reset_strbuf(); - } - } - ; + if( !parser->p_template ) + { + print_error( parser, + ERR_NO_TARGET_TPL_SUPPLY, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + parser->filename, line ); + reset_strbuf(); + } + } + ; inner_code: inner_code '^' { strbuf_append( #2.ch ); } - | { reset_strbuf(); } - ; + | { reset_strbuf(); } + ; whitespace: ' ' - | '\t' - | "/*" comment "*/" - | "//" scomment '\n' - | '\r' - | '\n' { line++; } - ; + | '\t' + | "/*" comment "*/" + | "//" scomment '\n' + | '\r' + | '\n' { line++; } + ; comment: comment '^' - | - ; + | + ; scomment: scomment '^\n' - | - ; + | + ; { int parse_grammar( PARSER* p, char* filename, char* src ) { - char* s, - * sp; - int err = -1; + char* s, + * sp; + int err = -1; - line = 1; - auto_regex = 0; - prec_cnt = 1; - directive_level = 0; + line = 1; + auto_regex = 0; + prec_cnt = 1; + directive_level = 0; - if( p && src ) - { - err = 0; + if( p && src ) + { + err = 0; - parser = p; - strbuf_append( '\0' ); + parser = p; + strbuf_append( '\0' ); - s = sp = src; + s = sp = src; - #if PDEBUG - printf( "<<< %s >>>\n", s ); - #endif - err = parse( s ); + #if PDEBUG + printf( "<<< %s >>>\n", s ); + #endif + err = parse( s ); - pfree( strbuf ); - } + pfree( strbuf ); + } - return err + error_count; + return err + error_count; } } - diff --git a/src/parse.par b/src/parse.par index 5980e9f..87673aa 100644 --- a/src/parse.par +++ b/src/parse.par @@ -1,13 +1,4 @@ -/* -PARSER---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2020 by Phorward Software Technologies, Jan Max Meyer -http://unicc.phorward-software.com ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: parse.par -Author: Jan Max Meyer -Usage: Grammar definition parser ------------------------------------------------------------------------------ */ +/* Grammar definition parser */ #!mode scannerless ; #!language C; @@ -42,9 +33,9 @@ Usage: Grammar definition parser #default epsilon action [* @@ = 0; *]; #pcb [* - pboolean main; - char* filename; - char* src; + pboolean main; + char* filename; + char* src; *]; @@ -65,8 +56,8 @@ Usage: Grammar definition parser struct @@prefix_rhs_item { - SYMBOL* sym; - char* ident; + SYMBOL* sym; + char* ident; }; #ifndef MALLOC_STEP @@ -92,70 +83,70 @@ char* regex; /* Append character to current string */ static void strbuf_append( char ch ) { - int len; + int len; - len = pstrlen( strbuf ); + len = pstrlen( strbuf ); - if( !strbuf ) - strbuf = (char*)pmalloc( ( MALLOC_STEP + 2 ) * sizeof( char ) ); - else if( len % MALLOC_STEP == 0 ) - strbuf = (char*)prealloc( (char*)strbuf, ( len + MALLOC_STEP + 2 ) - * sizeof( char ) ); + if( !strbuf ) + strbuf = (char*)pmalloc( ( MALLOC_STEP + 2 ) * sizeof( char ) ); + else if( len % MALLOC_STEP == 0 ) + strbuf = (char*)prealloc( (char*)strbuf, ( len + MALLOC_STEP + 2 ) + * sizeof( char ) ); - strbuf[len] = ch; - strbuf[len+1] = '\0'; - strbuf[len+2] = '\0'; + strbuf[len] = ch; + strbuf[len+1] = '\0'; + strbuf[len+2] = '\0'; } /* Create a new string */ static void reset_strbuf( void ) { - if( strbuf ) - { - *strbuf = '\0'; - *(strbuf+1) = '\0'; - } + if( strbuf ) + { + *strbuf = '\0'; + *(strbuf+1) = '\0'; + } } /* Set precedence and associativiy */ static void set_assoc_and_prec( LIST* symbols, int assoc ) { - LIST* l; - SYMBOL* sym; - - for( l = symbols; l; l = l->next ) - { - sym = l->pptr; - sym->prec = prec_cnt; - sym->assoc = assoc; - } - - list_free( symbols ); - prec_cnt++; + LIST* l; + SYMBOL* sym; + + for( l = symbols; l; l = l->next ) + { + sym = l->pptr; + sym->prec = prec_cnt; + sym->assoc = assoc; + } + + list_free( symbols ); + prec_cnt++; } static void make_semantic_rhs( PROD* p ) { - if( current_prod && current_prod->rhs ) - plist_concat( p->sem_rhs, current_prod->rhs ); + if( current_prod && current_prod->rhs ) + plist_concat( p->sem_rhs, current_prod->rhs ); - plist_concat( p->sem_rhs, p->rhs ); + plist_concat( p->sem_rhs, p->rhs ); } static OPT* set_option( plist* opts, char* key, char* val, int line ) { - OPT* opt; + OPT* opt; - if( !( opt = create_opt( opts, key, (char*)NULL ) ) ) - OUTOFMEM; + if( !( opt = create_opt( opts, key, (char*)NULL ) ) ) + OUTOFMEM; - opt->def = val; - opt->line = line; + opt->def = val; + opt->line = line; - return opt; + return opt; } static void parse_error(); @@ -168,922 +159,922 @@ static void parse_error(); */ grammar_spec$ : fixed_directive_def* segment* - ; + ; fixed_directive_def : "#!" fixed_directive ';' - ; + ; fixed_directive : "mode" mode_type - | "language" string_or_ident - [* - if( !( parser->p_template ) ) - parser->p_template = pstrdup( strbuf ); - else if( !pcb->main ) - print_error( parser, ERR_DIRECTIVE_ALREADY_USED, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - "!language" ); - *] - - //TODO: Must be reworked! + | "language" string_or_ident + [* + if( !( parser->p_template ) ) + parser->p_template = pstrdup( strbuf ); + else if( !pcb->main ) + print_error( parser, ERR_DIRECTIVE_ALREADY_USED, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + "!language" ); + *] + + //TODO: Must be reworked! /* - | "character universe" integer + | "character universe" integer - [* int universe = atoi( strbuf ); + [* int universe = atoi( strbuf ); - if( universe > 0 ) - parser->p_universe = universe; - else - print_error( parser, ERR_INVALID_CHAR_UNIVERSE, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, ); - *] + if( universe > 0 ) + parser->p_universe = universe; + else + print_error( parser, ERR_INVALID_CHAR_UNIVERSE, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, ); + *] */ - ; + ; mode_type : "context-sensitive" //deprecate! - [* parser->p_mode = MODE_SCANNERLESS; *] - | "sensitive" - [* parser->p_mode = MODE_SCANNERLESS; *] //deprecate! - | "scannerless" - [* parser->p_mode = MODE_SCANNERLESS; *] - | "context-insensitive" - [* parser->p_mode = MODE_SCANNER; *] //deprecate! - | "insensitive" - [* parser->p_mode = MODE_SCANNER; *] //deprecate! - | "scanner" - [* parser->p_mode = MODE_SCANNER; *] - ; + [* parser->p_mode = MODE_SCANNERLESS; *] + | "sensitive" + [* parser->p_mode = MODE_SCANNERLESS; *] //deprecate! + | "scannerless" + [* parser->p_mode = MODE_SCANNERLESS; *] + | "context-insensitive" + [* parser->p_mode = MODE_SCANNER; *] //deprecate! + | "insensitive" + [* parser->p_mode = MODE_SCANNER; *] //deprecate! + | "scanner" + [* parser->p_mode = MODE_SCANNER; *] + ; segment : code - | definition ';' - | unfixed_directive ';' - ; + | definition ';' + | unfixed_directive ';' + ; unfixed_directive : '#' directive_parms - /* - | &error - [* - printf( "Incomplete directive\n" ); - *] - */ - ; + /* + | &error + [* + printf( "Incomplete directive\n" ); + *] + */ + ; @USERDEF !'\0"\'/ \t\n\r!;'+ - [* - @@ = @>; - *]; + [* + @@ = @>; + *]; userparam : code - | string - | boolean - [* - if( !@boolean ) - strbuf_append( '1' ); - else - strbuf_append( '0' ); - *] - | [* reset_strbuf(); *] - ; + | string + | boolean + [* + if( !@boolean ) + strbuf_append( '1' ); + else + strbuf_append( '0' ); + *] + | [* reset_strbuf(); *] + ; directive_parms : "whitespaces" symbol_list - [* LIST* l; - SYMBOL* s; - - for( l = @2; l; l = l->next ) - { - s = l->pptr; - - /* Do not allow nonterminal definitions here - in conext-free model */ - if( s->type == SYM_NON_TERMINAL && - parser->p_mode != - MODE_SCANNERLESS ) - { - print_error( parser, ERR_NONTERM_WS_NOT_ALLOWED, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - s->name ); - } - else - s->whitespace = TRUE; - } - - list_free( @2 ); - *] - - | "lexeme" symbol_list - [* LIST* l; - SYMBOL* s; - - - /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, "lexeme" ); - } - else - { - for( l = @2; l; l = l->next ) - { - s = l->pptr; - s->lexem = TRUE; - } - } - - list_free( @2 ); - *] - - | "fixate" symbol_list - [* LIST* l; - SYMBOL* s; - - /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, "fixate" ); - } - else - { - for( l = @2; l; l = l->next ) - { - s = l->pptr; - s->fixated = TRUE; - } - } - - list_free( @2 ); - - *] - - | "left" symbol_list - [* set_assoc_and_prec( @2, ASSOC_LEFT ); *] - - - | "right" symbol_list - [* set_assoc_and_prec( @2, ASSOC_RIGHT ); *] - - - | "nonassoc" symbol_list - [* set_assoc_and_prec( @2, ASSOC_NOASSOC ); *] - - | "prefix" string - [* parser->p_prefix = pstrcatstr( - parser->p_prefix, strbuf, FALSE ); *] - - | "default action" code_opt - [* if( !( parser->p_def_action ) ) - parser->p_def_action = pstrdup( strbuf ); - else - print_error( parser, ERR_DIRECTIVE_ALREADY_USED, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - "default epsilon action" ); - *] - - | "default epsilon action" code_opt - [* if( !( parser->p_def_action_e ) ) - parser->p_def_action_e = pstrdup( strbuf ); - else - print_error( parser, ERR_DIRECTIVE_ALREADY_USED, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - "default epsilon action" ); - *] - - | "default value type" type - [* if( !( parser->p_def_type ) ) - parser->p_def_type = create_vtype( - parser, (char*)@type ); - else - print_error( parser, ERR_DIRECTIVE_ALREADY_USED, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - "default value type" ); - - pfree( @type ); - *] - - | "lexeme separation" boolean_opt - [* /* Ouput a warning, if this definition - is effectless */ - if( parser->p_mode != MODE_SCANNERLESS ) - { - print_error( parser, ERR_NO_EFFECT_IN_MODE, - ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - "lexem separation" ); - } - else - parser->p_lexem_sep = !@2; - *] - - | "case insensitive strings" boolean_opt - [* parser->p_cis_strings = !@2; *] - - | "reserve terminals" boolean_opt - [* parser->p_reserve_regex = !@2; *] - - | "prologue" code - [* parser->p_header = pstrcatstr( - parser->p_header, strbuf, FALSE ); *] - - | "epilogue" code - [* parser->p_footer = pstrcatstr( - parser->p_footer, strbuf, FALSE ); *] - | "pcb" code - [* parser->p_pcb = pstrcatstr( - parser->p_pcb, strbuf, FALSE ); *] - - | "extends" string - - [* - char* src; - char* filename = strbuf; - - strbuf = NULL; - - if( !pfiletostr( &src, filename ) ) - { - print_error( parser, ERR_OPEN_INPUT_FILE, - ERRSTYLE_FATAL, filename ); - } - else - { - parse_grammar( parser, filename, src ); - strbuf = NULL; - pfree( src ); - } - - pfree( filename ); - *] - - | "option"? line_number @USERDEF:key userparam - [* - set_option( parser->options, - @key, strbuf, @line_number ); - - strbuf = (char*)NULL; - *] - - ; + [* LIST* l; + SYMBOL* s; + + for( l = @2; l; l = l->next ) + { + s = l->pptr; + + /* Do not allow nonterminal definitions here + in conext-free model */ + if( s->type == SYM_NON_TERMINAL && + parser->p_mode != + MODE_SCANNERLESS ) + { + print_error( parser, ERR_NONTERM_WS_NOT_ALLOWED, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + s->name ); + } + else + s->whitespace = TRUE; + } + + list_free( @2 ); + *] + + | "lexeme" symbol_list + [* LIST* l; + SYMBOL* s; + + + /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, "lexeme" ); + } + else + { + for( l = @2; l; l = l->next ) + { + s = l->pptr; + s->lexem = TRUE; + } + } + + list_free( @2 ); + *] + + | "fixate" symbol_list + [* LIST* l; + SYMBOL* s; + + /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, "fixate" ); + } + else + { + for( l = @2; l; l = l->next ) + { + s = l->pptr; + s->fixated = TRUE; + } + } + + list_free( @2 ); + + *] + + | "left" symbol_list + [* set_assoc_and_prec( @2, ASSOC_LEFT ); *] + + + | "right" symbol_list + [* set_assoc_and_prec( @2, ASSOC_RIGHT ); *] + + + | "nonassoc" symbol_list + [* set_assoc_and_prec( @2, ASSOC_NOASSOC ); *] + + | "prefix" string + [* parser->p_prefix = pstrcatstr( + parser->p_prefix, strbuf, FALSE ); *] + + | "default action" code_opt + [* if( !( parser->p_def_action ) ) + parser->p_def_action = pstrdup( strbuf ); + else + print_error( parser, ERR_DIRECTIVE_ALREADY_USED, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + "default epsilon action" ); + *] + + | "default epsilon action" code_opt + [* if( !( parser->p_def_action_e ) ) + parser->p_def_action_e = pstrdup( strbuf ); + else + print_error( parser, ERR_DIRECTIVE_ALREADY_USED, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + "default epsilon action" ); + *] + + | "default value type" type + [* if( !( parser->p_def_type ) ) + parser->p_def_type = create_vtype( + parser, (char*)@type ); + else + print_error( parser, ERR_DIRECTIVE_ALREADY_USED, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + "default value type" ); + + pfree( @type ); + *] + + | "lexeme separation" boolean_opt + [* /* Ouput a warning, if this definition + is effectless */ + if( parser->p_mode != MODE_SCANNERLESS ) + { + print_error( parser, ERR_NO_EFFECT_IN_MODE, + ERRSTYLE_WARNING | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + "lexem separation" ); + } + else + parser->p_lexem_sep = !@2; + *] + + | "case insensitive strings" boolean_opt + [* parser->p_cis_strings = !@2; *] + + | "reserve terminals" boolean_opt + [* parser->p_reserve_regex = !@2; *] + + | "prologue" code + [* parser->p_header = pstrcatstr( + parser->p_header, strbuf, FALSE ); *] + + | "epilogue" code + [* parser->p_footer = pstrcatstr( + parser->p_footer, strbuf, FALSE ); *] + | "pcb" code + [* parser->p_pcb = pstrcatstr( + parser->p_pcb, strbuf, FALSE ); *] + + | "extends" string + + [* + char* src; + char* filename = strbuf; + + strbuf = NULL; + + if( !pfiletostr( &src, filename ) ) + { + print_error( parser, ERR_OPEN_INPUT_FILE, + ERRSTYLE_FATAL, filename ); + } + else + { + parse_grammar( parser, filename, src ); + strbuf = NULL; + pfree( src ); + } + + pfree( filename ); + *] + + | "option"? line_number @USERDEF:key userparam + [* + set_option( parser->options, + @key, strbuf, @line_number ); + + strbuf = (char*)NULL; + *] + + ; boolean_opt: boolean - | [* @@ = FALSE; *] - ; + | [* @@ = FALSE; *] + ; boolean : "on" [* @@ = FALSE; *] - | "off" [* @@ = TRUE; *] - ; + | "off" [* @@ = TRUE; *] + ; symbol_list : symbol_list sym - [* @@ = list_push( @1, (void*)@2 ); *] + [* @@ = list_push( @1, (void*)@2 ); *] - | sym - [* @@ = list_push( (LIST*)NULL, (void*)@1 ); *] - ; + | sym + [* @@ = list_push( (LIST*)NULL, (void*)@1 ); *] + ; lhs : identifier - [* @@ = get_symbol( parser, - strbuf, SYM_NON_TERMINAL, TRUE ); - @@->defined = TRUE; - @@->line = pcb->line; - *] - ; + [* @@ = get_symbol( parser, + strbuf, SYM_NON_TERMINAL, TRUE ); + @@->defined = TRUE; + @@->line = pcb->line; + *] + ; alt_lhs_list : alt_lhs_list lhs - [* @@ = list_push( @1, (void*)@2 ); *] + [* @@ = list_push( @1, (void*)@2 ); *] - | lhs + | lhs - [* @@ = list_push( (LIST*)NULL, (void*)@1 ); *] - ; + [* @@ = list_push( (LIST*)NULL, (void*)@1 ); *] + ; alt_regex_sym: alt_regex_sym regex_sym - [* @@ = list_push( @alt_regex_sym, - (void*)@regex_sym ); - @regex_sym->derived_from = - (SYMBOL*)list_access( @alt_regex_sym ); - *] + [* @@ = list_push( @alt_regex_sym, + (void*)@regex_sym ); + @regex_sym->derived_from = + (SYMBOL*)list_access( @alt_regex_sym ); + *] - | regex_sym + | regex_sym - [* @@ = list_push( (LIST*)NULL, - (void*)@regex_sym ); + [* @@ = list_push( (LIST*)NULL, + (void*)@regex_sym ); - current_sym = @regex_sym; - *] - ; + current_sym = @regex_sym; + *] + ; regex_sym : identifier - [* @@ = get_symbol( parser, - strbuf, SYM_REGEX_TERMINAL, TRUE ); + [* @@ = get_symbol( parser, + strbuf, SYM_REGEX_TERMINAL, TRUE ); - if( @@->defined ) - { - print_error( parser, ERR_DOUBLE_TERMINAL_DEF, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - pcb->filename, pcb->line, - @@->name ); - } + if( @@->defined ) + { + print_error( parser, ERR_DOUBLE_TERMINAL_DEF, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + pcb->filename, pcb->line, + @@->name ); + } - @@->defined = TRUE; - @@->line = pcb->line; - *] - ; + @@->defined = TRUE; + @@->line = pcb->line; + *] + ; defines : "->" [* @@ = FALSE; *] - | ':' [* @@ = FALSE; *] - | "=>" [* @@ = TRUE; *] // Defines ast node - | ":=" [* @@ = TRUE; *] // Defines ast node - ; + | ':' [* @@ = FALSE; *] + | "=>" [* @@ = TRUE; *] // Defines ast node + | ":=" [* @@ = TRUE; *] // Defines ast node + ; definition : lhs:primary - goal_mark:is_goal - alt_lhs_list?:all type - defines productions - - [* - LIST* l; - LIST* m; - PROD* p; - SYMBOL* lhs; - VTYPE* vt = (VTYPE*)NULL; - - @all = list_push( @all, (void*)@primary ); - - /* Value type */ - if( @type ) - { - vt = create_vtype( parser, - (char*)@type ); - - pfree( @type ); - } - - /* Connect left-hand sides */ - for( l = @all; l; l = list_next( l ) ) - { - lhs = (SYMBOL*)list_access( l ); - - LISTFOR( @productions, m ) - plist_push( lhs->productions, - list_access( m ) ); - - lhs->vtype = vt; - } - - /* Link productions with this left-hand sides */ - for( l = @productions; l; l = l->next ) - { - LIST* m; - - p = (PROD*)list_access( l ); - - p->lhs = @primary; - - LISTFOR( @all, m ) - plist_push( p->all_lhs, list_access( m ) ); - - /* AST */ - if( @defines && !p->emit ) - p->emit = pstrdup( @primary->name ); - } - - list_free( @productions ); - list_free( @all ); - - /* Goal primary defined? */ - if( @is_goal && pcb->main ) - { - if( parser->goal ) - { - print_error( parser, ERR_MULTIPLE_GOAL_DEF, - ERRSTYLE_FATAL, - parser->goal->name ); - } - else - { - parser->goal = @primary; - parser->goal->used = TRUE; - } - } - *] - - | '@' alt_regex_sym type regex code_opt ast_node - sym_option* - - [* - SYMBOL* primary; - SYMBOL* s; - LIST* l; - VTYPE* vt; - - primary = (SYMBOL*)list_access( @alt_regex_sym ); - - primary->ptn = @4; - primary->greedy = greedy; - primary->emit = @ast_node; - - greedy = TRUE; - - /* Attach code */ - if( @code_opt ) - { - /* Copy code for every symbol */ - for( l = @alt_regex_sym; l; - l = list_next( l ) ) - { - s = (SYMBOL*)list_access( l ); - - /* - Last symbol gets strbuf-pointer, - all other assignments need to be - duplicated. - */ - if( list_next( l ) ) - s->code = pstrdup( strbuf ); - else - s->code = strbuf; - - primary->code_at = last_code_begin; - } - - strbuf = (char*)NULL; - } - - /* Value type */ - if( @type ) - { - vt = create_vtype( parser, @type ); - - for( l = @alt_regex_sym; l; - l = list_next( l ) ) - { - s = (SYMBOL*)list_access( l ); - s->vtype = vt; - } - - pfree( @type ); - } - - /* re_dbg_print_nfa( s->nfa_def, - parser->p_universe ); */ - - /* Assign the list of all possible symbols to - the primary symbol */ - LISTFOR( @alt_regex_sym, l ) - plist_push( primary->all_sym, - list_access( l ) ); - - current_sym = (SYMBOL*)NULL; - *] - ; + goal_mark:is_goal + alt_lhs_list?:all type + defines productions + + [* + LIST* l; + LIST* m; + PROD* p; + SYMBOL* lhs; + VTYPE* vt = (VTYPE*)NULL; + + @all = list_push( @all, (void*)@primary ); + + /* Value type */ + if( @type ) + { + vt = create_vtype( parser, + (char*)@type ); + + pfree( @type ); + } + + /* Connect left-hand sides */ + for( l = @all; l; l = list_next( l ) ) + { + lhs = (SYMBOL*)list_access( l ); + + LISTFOR( @productions, m ) + plist_push( lhs->productions, + list_access( m ) ); + + lhs->vtype = vt; + } + + /* Link productions with this left-hand sides */ + for( l = @productions; l; l = l->next ) + { + LIST* m; + + p = (PROD*)list_access( l ); + + p->lhs = @primary; + + LISTFOR( @all, m ) + plist_push( p->all_lhs, list_access( m ) ); + + /* AST */ + if( @defines && !p->emit ) + p->emit = pstrdup( @primary->name ); + } + + list_free( @productions ); + list_free( @all ); + + /* Goal primary defined? */ + if( @is_goal && pcb->main ) + { + if( parser->goal ) + { + print_error( parser, ERR_MULTIPLE_GOAL_DEF, + ERRSTYLE_FATAL, + parser->goal->name ); + } + else + { + parser->goal = @primary; + parser->goal->used = TRUE; + } + } + *] + + | '@' alt_regex_sym type regex code_opt ast_node + sym_option* + + [* + SYMBOL* primary; + SYMBOL* s; + LIST* l; + VTYPE* vt; + + primary = (SYMBOL*)list_access( @alt_regex_sym ); + + primary->ptn = @4; + primary->greedy = greedy; + primary->emit = @ast_node; + + greedy = TRUE; + + /* Attach code */ + if( @code_opt ) + { + /* Copy code for every symbol */ + for( l = @alt_regex_sym; l; + l = list_next( l ) ) + { + s = (SYMBOL*)list_access( l ); + + /* + Last symbol gets strbuf-pointer, + all other assignments need to be + duplicated. + */ + if( list_next( l ) ) + s->code = pstrdup( strbuf ); + else + s->code = strbuf; + + primary->code_at = last_code_begin; + } + + strbuf = (char*)NULL; + } + + /* Value type */ + if( @type ) + { + vt = create_vtype( parser, @type ); + + for( l = @alt_regex_sym; l; + l = list_next( l ) ) + { + s = (SYMBOL*)list_access( l ); + s->vtype = vt; + } + + pfree( @type ); + } + + /* re_dbg_print_nfa( s->nfa_def, + parser->p_universe ); */ + + /* Assign the list of all possible symbols to + the primary symbol */ + LISTFOR( @alt_regex_sym, l ) + plist_push( primary->all_sym, + list_access( l ) ); + + current_sym = (SYMBOL*)NULL; + *] + ; sym_option : '#' "greedy" - [* - greedy = TRUE; - *] + [* + greedy = TRUE; + *] - | '#' "non-greedy" + | '#' "non-greedy" - [* - greedy = FALSE; - *] + [* + greedy = FALSE; + *] - | - '#' "option"? line_number @USERDEF:key userparam + | + '#' "option"? line_number @USERDEF:key userparam - [* - set_option( current_sym->options, @key, - strbuf, @line_number ); + [* + set_option( current_sym->options, @key, + strbuf, @line_number ); - strbuf = (char*)NULL; - *] - ; + strbuf = (char*)NULL; + *] + ; goal_mark : '$' - [* @@ = TRUE; *] + [* @@ = TRUE; *] - | [* @@ = FALSE; *] - ; + | [* @@ = FALSE; *] + ; productions : productions '|' production - [* @@ = list_push( @1, @3 ); *] + [* @@ = list_push( @1, @3 ); *] - | production - [* @@ = list_push( (LIST*)NULL, @1 ); *] - ; + | production + [* @@ = list_push( (LIST*)NULL, @1 ); *] + ; ast_node : '=' identifier [* @@ = pstrdup( strbuf ); *] - | '=' string [* @@ = pstrdup( strbuf ); *] - | [* @@ = (char*)NULL; *] - ; + | '=' string [* @@ = pstrdup( strbuf ); *] + | [* @@ = (char*)NULL; *] + ; production : line_number rhs_opt:rhs code_opt_dup:act - ast_node - prod_directives* - [* - @rhs->line = @line_number; + ast_node + prod_directives* + [* + @rhs->line = @line_number; - if( @act ) - { - @rhs->code = @act; - @rhs->code_at = last_code_begin; - } + if( @act ) + { + @rhs->code = @act; + @rhs->code_at = last_code_begin; + } - @rhs->emit = @ast_node; + @rhs->emit = @ast_node; - @@ = @rhs; + @@ = @rhs; - current_prod = (PROD*)NULL; - *] - ; + current_prod = (PROD*)NULL; + *] + ; rhs_opt : rhs - | [* @@ = current_prod = create_production( parser, - (SYMBOL*)NULL ); - *] - ; + | [* @@ = current_prod = create_production( parser, + (SYMBOL*)NULL ); + *] + ; prod_directives: '#' "precedence" terminal - [* current_prod->prec = @terminal->prec; *] + [* current_prod->prec = @terminal->prec; *] - ; + ; rhs : rhs symbol access_name - [* append_to_production( @1, @2, - ( *strbuf == '\0' ? (char*)NULL : - pstrdup( strbuf ) ) ); - @@ = @1; - *] + [* append_to_production( @1, @2, + ( *strbuf == '\0' ? (char*)NULL : + pstrdup( strbuf ) ) ); + @@ = @1; + *] - | symbol access_name + | symbol access_name - [* @@ = current_prod = - create_production( parser, - (SYMBOL*)NULL ); + [* @@ = current_prod = + create_production( parser, + (SYMBOL*)NULL ); - append_to_production( @@, @1, - ( *strbuf == '\0' ? (char*)NULL : - pstrdup( strbuf ) ) ); - *] - ; + append_to_production( @@, @1, + ( *strbuf == '\0' ? (char*)NULL : + pstrdup( strbuf ) ) ); + *] + ; symbol : sym modifier - [* - switch( @2 ) - { - case '*': - @@ = kleene_closure( parser, @1 ); - break; - case '+': - @@ = positive_closure( parser, @1 ); - break; - case '?': - @@ = optional_closure( parser, @1 ); - break; - default: - @@ = @1; - break; - } - *] - - | "&error" - - [* @@ = get_symbol( parser, - P_ERROR_RESYNC, SYM_SYSTEM_TERMINAL, - TRUE ); - *] - - | "&eof" - - [* @@ = get_symbol( parser, - P_END_OF_FILE, SYM_SYSTEM_TERMINAL, - TRUE ); - *] - ; + [* + switch( @2 ) + { + case '*': + @@ = kleene_closure( parser, @1 ); + break; + case '+': + @@ = positive_closure( parser, @1 ); + break; + case '?': + @@ = optional_closure( parser, @1 ); + break; + default: + @@ = @1; + break; + } + *] + + | "&error" + + [* @@ = get_symbol( parser, + P_ERROR_RESYNC, SYM_SYSTEM_TERMINAL, + TRUE ); + *] + + | "&eof" + + [* @@ = get_symbol( parser, + P_END_OF_FILE, SYM_SYSTEM_TERMINAL, + TRUE ); + *] + ; sym : terminal - | identifier + | identifier - [* @@ = get_symbol( parser, - strbuf, SYM_NON_TERMINAL, TRUE ); - @@->used = TRUE; + [* @@ = get_symbol( parser, + strbuf, SYM_NON_TERMINAL, TRUE ); + @@->used = TRUE; - if( @@->line < 0 ) - @@->line = pcb->line; - *] + if( @@->line < 0 ) + @@->line = pcb->line; + *] - //Embedded productions - | type '(' stack_cur_prod productions ')' + //Embedded productions + | type '(' stack_cur_prod productions ')' - [* - char temp_nonterm[ ONE_LINE + 1 ]; - PROD* prod; - LIST* l; + [* + char temp_nonterm[ ONE_LINE + 1 ]; + PROD* prod; + LIST* l; - sprintf( temp_nonterm, P_EMBEDDED, - embedded_count++ ); + sprintf( temp_nonterm, P_EMBEDDED, + embedded_count++ ); - @@ = get_symbol( parser, - temp_nonterm, SYM_NON_TERMINAL, - TRUE ); - @@->defined = TRUE; - @@->used = TRUE; - @@->generated = TRUE; + @@ = get_symbol( parser, + temp_nonterm, SYM_NON_TERMINAL, + TRUE ); + @@->defined = TRUE; + @@->used = TRUE; + @@->generated = TRUE; - /* Value type */ - if( @type ) - { - VTYPE* vt; + /* Value type */ + if( @type ) + { + VTYPE* vt; - vt = create_vtype( parser, - (char*)@type ); - @@->vtype = vt; + vt = create_vtype( parser, + (char*)@type ); + @@->vtype = vt; - pfree( @type ); - } + pfree( @type ); + } - /* Associate the productions */ - current_prod = @stack_cur_prod; + /* Associate the productions */ + current_prod = @stack_cur_prod; - /* Link productions with this left-hand side */ - for( l = @productions; l; l = list_next( l ) ) - { - prod = (PROD*)list_access( l ); + /* Link productions with this left-hand side */ + for( l = @productions; l; l = list_next( l ) ) + { + prod = (PROD*)list_access( l ); - prod->lhs = @@; - plist_push( @@->productions, prod ); - plist_push( prod->all_lhs, @@ ); - make_semantic_rhs( prod ); - } - *] - ; + prod->lhs = @@; + plist_push( @@->productions, prod ); + plist_push( prod->all_lhs, @@ ); + make_semantic_rhs( prod ); + } + *] + ; stack_cur_prod - : [* @@ = current_prod; - current_prod = (PROD*)NULL; - *] - ; + : [* @@ = current_prod; + current_prod = (PROD*)NULL; + *] + ; terminal : ccl - [* pccl* ccl; + [* pccl* ccl; - ccl = pccl_create( -1, -1, strbuf ); - if( @1 ) - pccl_negate( ccl ); + ccl = pccl_create( -1, -1, strbuf ); + if( @1 ) + pccl_negate( ccl ); - @@ = get_symbol( parser, (void*)ccl, - SYM_CCL_TERMINAL, TRUE ); + @@ = get_symbol( parser, (void*)ccl, + SYM_CCL_TERMINAL, TRUE ); - @@->defined = TRUE; - @@->used = TRUE; + @@->defined = TRUE; + @@->used = TRUE; - if( @@->line < 0 ) - @@->line = pcb->line; - *] + if( @@->line < 0 ) + @@->line = pcb->line; + *] - | kw + | kw - [* @@ = get_symbol( parser, - strbuf, SYM_REGEX_TERMINAL, TRUE ); + [* @@ = get_symbol( parser, + strbuf, SYM_REGEX_TERMINAL, TRUE ); - @@->used = TRUE; - @@->defined = TRUE; - @@->keyword = TRUE; - @@->emit = @kw ? pstrdup( strbuf ) : NULL; + @@->used = TRUE; + @@->defined = TRUE; + @@->keyword = TRUE; + @@->emit = @kw ? pstrdup( strbuf ) : NULL; - @@->ptn = pregex_ptn_create_string( strbuf, - parser->p_cis_strings ? - PREGEX_COMP_INSENSITIVE : 0 ); + @@->ptn = pregex_ptn_create_string( strbuf, + parser->p_cis_strings ? + PREGEX_COMP_INSENSITIVE : 0 ); - if( @@->line < 0 ) - @@->line = pcb->line; - *] + if( @@->line < 0 ) + @@->line = pcb->line; + *] - | '@' identifier + | '@' identifier - [* @@ = get_symbol( parser, - strbuf, SYM_REGEX_TERMINAL, TRUE ); + [* @@ = get_symbol( parser, + strbuf, SYM_REGEX_TERMINAL, TRUE ); - /* @@->defined = TRUE; - DO NOT SET DEFINED! */ - @@->used = TRUE; + /* @@->defined = TRUE; + DO NOT SET DEFINED! */ + @@->used = TRUE; - if( @@->line < 0 ) - @@->line = pcb->line; - *] - ; + if( @@->line < 0 ) + @@->line = pcb->line; + *] + ; modifier : '*' - [* @@ = (int)'*'; *] + [* @@ = (int)'*'; *] - | '+' - [* @@ = (int)'+'; *] + | '+' + [* @@ = (int)'+'; *] - | '?' - [* @@ = (int)'?'; *] + | '?' + [* @@ = (int)'?'; *] - | - [* @@ = 0; *] + | + [* @@ = 0; *] - ; + ; access_name : ':' identifier - | ':' string_single - | [* reset_strbuf(); *] - ; + | ':' string_single + | [* reset_strbuf(); *] + ; /* Regular Expression parser and NFA generator */ regex : re_alt - ; + ; re_alt : re_alt '|' re_expr - [* - @@ = pregex_ptn_create_alt( - @1, @3, (pregex_ptn*)NULL ); - *] + [* + @@ = pregex_ptn_create_alt( + @1, @3, (pregex_ptn*)NULL ); + *] - | re_expr + | re_expr - ; + ; re_expr: re_expr re_modifier - [* - @@ = pregex_ptn_create_seq( - @1, @2, (pregex_ptn*)NULL ); - *] + [* + @@ = pregex_ptn_create_seq( + @1, @2, (pregex_ptn*)NULL ); + *] - | re_modifier + | re_modifier - ; + ; re_modifier - : re_factor '*' + : re_factor '*' - [* - @@ = pregex_ptn_create_kle( @1 ); - *] + [* + @@ = pregex_ptn_create_kle( @1 ); + *] - | re_factor '+' + | re_factor '+' - [* - @@ = pregex_ptn_create_pos( @1 ); - *] + [* + @@ = pregex_ptn_create_pos( @1 ); + *] - | re_factor '?' + | re_factor '?' - [* - @@ = pregex_ptn_create_opt( @1 ); - *] + [* + @@ = pregex_ptn_create_opt( @1 ); + *] - | re_factor - ; + | re_factor + ; re_factor - : ccl + : ccl - [* - pccl* ccl; + [* + pccl* ccl; - ccl = pccl_create( -1, -1, strbuf ); - if( @1 ) - pccl_negate( ccl ); + ccl = pccl_create( -1, -1, strbuf ); + if( @1 ) + pccl_negate( ccl ); - @@ = pregex_ptn_create_char( ccl ); - *] + @@ = pregex_ptn_create_char( ccl ); + *] - | kw + | kw - [* - @@ = pregex_ptn_create_string( strbuf, 0 ); - *] + [* + @@ = pregex_ptn_create_string( strbuf, 0 ); + *] - | '.' + | '.' - [* - pccl* ccl; - greedy = FALSE; + [* + pccl* ccl; + greedy = FALSE; - ccl = pccl_create( -1, -1, (char*)NULL ); + ccl = pccl_create( -1, -1, (char*)NULL ); - pccl_addrange( ccl, PCCL_MIN, PCCL_MAX ); + pccl_addrange( ccl, PCCL_MIN, PCCL_MAX ); - @@ = pregex_ptn_create_char( ccl ); - *] + @@ = pregex_ptn_create_char( ccl ); + *] - | '(' regex ')' + | '(' regex ')' - [* - @@ = pregex_ptn_create_sub( @2 ); - *] - ; + [* + @@ = pregex_ptn_create_sub( @2 ); + *] + ; /* General parsing objects */ string : string_single+ - ; + ; string_single : ccl_string | kw - ; + ; ccl : ccl_string - [* @@ = FALSE; *] + [* @@ = FALSE; *] - | '!' ccl_string + | '!' ccl_string - [* @@ = TRUE; *] - ; + [* @@ = TRUE; *] + ; /* ------------------------------------- TODO: Must be re-designed... --- */ ccl_string : '\'' ccl_str '\''; ccl_str : ccl_str ccl_char - | - [* reset_strbuf(); *] - ; + | + [* reset_strbuf(); *] + ; ccl_char : !'\\\'' - [* strbuf_append( @1 ); *] + [* strbuf_append( @1 ); *] - | '\\' !'\0' - [* strbuf_append( (char)'\\' ); - strbuf_append( @2 ); *] - ; + | '\\' !'\0' + [* strbuf_append( (char)'\\' ); + strbuf_append( @2 ); *] + ; kw : '\"' '\"' kw_str '\"' '\"' [* @@ = TRUE *] - | '\"' kw_str '\"' [* @@ = FALSE *] - ; + | '\"' kw_str '\"' [* @@ = FALSE *] + ; kw_str : kw_str kw_char - [* strbuf_append( @2 ); *] - | [* reset_strbuf(); *] - ; + [* strbuf_append( @2 ); *] + | [* reset_strbuf(); *] + ; kw_char : !'\\"' - [* strbuf_append( @1 ); *] + [* strbuf_append( @1 ); *] - | '\\' !'\0' - [* strbuf_append( (char)'\\' ); - strbuf_append( @2 ); - *] - ; + | '\\' !'\0' + [* strbuf_append( (char)'\\' ); + strbuf_append( @2 ); + *] + ; type : '<' type_str '>' - [* @@ = pstrdup( strbuf ); *] - | [* @@ = (char*)NULL; *] - ; + [* @@ = pstrdup( strbuf ); *] + | [* @@ = (char*)NULL; *] + ; type_str : type_str !'>' - [* strbuf_append( @2 ); *] - | [* reset_strbuf(); *] - ; + [* strbuf_append( @2 ); *] + | [* reset_strbuf(); *] + ; /* identifier_cpy - : identifier - [* - @@ = pstrdup( strbuf ); - reset_strbuf(); - *] - ; + : identifier + [* + @@ = pstrdup( strbuf ); + reset_strbuf(); + *] + ; */ identifier : identifier_start identifier_follow - ; + ; identifier_start - : 'A-Za-z_' - [* reset_strbuf(); - strbuf_append( @1 ); - *] - ; + : 'A-Za-z_' + [* reset_strbuf(); + strbuf_append( @1 ); + *] + ; identifier_follow - : identifier_follow 'A-Za-z0-9_' - [* strbuf_append( @2 ); *] - | - ; + : identifier_follow 'A-Za-z0-9_' + [* strbuf_append( @2 ); *] + | + ; string_or_ident : string @@ -1093,85 +1084,85 @@ string_or_ident : string /* ------------------------------------- TODO: ...until here --- */ integer : integer '0-9' - [* strbuf_append( @2 ); *] + [* strbuf_append( @2 ); *] - | '0-9' - [* reset_strbuf(); - strbuf_append( @1 ); - *] - ; + | '0-9' + [* reset_strbuf(); + strbuf_append( @1 ); + *] + ; code_opt_dup - : code_opt + : code_opt - [* @@ = pstrdup( @code_opt ); - reset_strbuf(); - *] - ; + [* @@ = pstrdup( @code_opt ); + reset_strbuf(); + *] + ; code_opt : code - [* @@ = strbuf; *] - | [* reset_strbuf(); - @@ = (char*)NULL; *] - ; + [* @@ = strbuf; *] + | [* reset_strbuf(); + @@ = (char*)NULL; *] + ; code : code_begin inner_code_opt "*]" - [* - if( !parser->p_template ) - { - print_error( parser, - ERR_NO_TARGET_TPL_SUPPLY, - ERRSTYLE_WARNING | ERRSTYLE_IMPORTANT - | ERRSTYLE_FILEINFO, - pcb->filename, last_code_begin ); - - reset_strbuf(); - } - *] - ; + [* + if( !parser->p_template ) + { + print_error( parser, + ERR_NO_TARGET_TPL_SUPPLY, + ERRSTYLE_WARNING | ERRSTYLE_IMPORTANT + | ERRSTYLE_FILEINFO, + pcb->filename, last_code_begin ); + + reset_strbuf(); + } + *] + ; code_begin : "[*" - [* last_code_begin = pcb->line; *] - ; + [* last_code_begin = pcb->line; *] + ; inner_code_opt : inner_code - | [* reset_strbuf(); *] - ; + | [* reset_strbuf(); *] + ; inner_code : inner_code anychar - [* strbuf_append( @2 ); *] + [* strbuf_append( @2 ); *] - | anychar - [* reset_strbuf(); - strbuf_append( @1 ); - *] - ; + | anychar + [* reset_strbuf(); + strbuf_append( @1 ); + *] + ; whitespace : ' ' - | '\t' - | "/*" comment? "*/" - | "//" scomment? '\n' - | '\r' - | '\n' - ; + | '\t' + | "/*" comment? "*/" + | "//" scomment? '\n' + | '\r' + | '\n' + ; comment : comment anychar - | anychar - ; + | anychar + ; anychar : !'\0' - [* - @@ = @1; - *] - ; + [* + @@ = @1; + *] + ; scomment : scomment !'\n' - | !'\n' - ; + | !'\n' + ; line_number : [* @@ = pcb->line; *] - ; + ; #epilogue @@ -1179,72 +1170,72 @@ line_number : [* @@ = pcb->line; *] static void parse_error( @@prefix_pcb* pcb ) { - int i; - char* expect = NULL; - char* lookahead; - char buf [ 255 + 1 ]; - - for( i = 1; i < @@prefix_act[ pcb->tos->state ][0] * 3; i += 3 ) - { - lookahead = @@prefix_symbols[ _act[ pcb->tos->state ][i] ].name; - - if( strlen( lookahead ) < sizeof( buf ) - 4 + 1 ) - { - if( i + 3 >= @@prefix_act[ pcb->tos->state ][0] * 3 ) - { - if( expect ) - sprintf( buf, " or %s", lookahead ); - else - strcpy( buf, lookahead ); - } - else - sprintf( buf, "%s%s", expect ? ", " : "", lookahead ); - - expect = pstrcatstr( expect, buf, FALSE ); - } - else - { - if( expect ) - expect = pstrcatstr( expect, ", (too long token)", FALSE ); - else - expect = pstrcatstr( expect, "(too long token)", FALSE ); - } - } - - printf( "state %d\n", pcb->tos->state ); - - print_error( parser, expect ? ERR_PARSE_ERROR_EXPECT : ERR_PARSE_ERROR, - ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, - parser->filename, pcb->line, - pcb->sym == -1 ? - (char*)pcb->buf : @@prefix_symbols[ pcb->sym ].name, - expect ? expect : "(unknown)" ); - pfree( expect ); + int i; + char* expect = NULL; + char* lookahead; + char buf [ 255 + 1 ]; + + for( i = 1; i < @@prefix_act[ pcb->tos->state ][0] * 3; i += 3 ) + { + lookahead = @@prefix_symbols[ _act[ pcb->tos->state ][i] ].name; + + if( strlen( lookahead ) < sizeof( buf ) - 4 + 1 ) + { + if( i + 3 >= @@prefix_act[ pcb->tos->state ][0] * 3 ) + { + if( expect ) + sprintf( buf, " or %s", lookahead ); + else + strcpy( buf, lookahead ); + } + else + sprintf( buf, "%s%s", expect ? ", " : "", lookahead ); + + expect = pstrcatstr( expect, buf, FALSE ); + } + else + { + if( expect ) + expect = pstrcatstr( expect, ", (too long token)", FALSE ); + else + expect = pstrcatstr( expect, "(too long token)", FALSE ); + } + } + + printf( "state %d\n", pcb->tos->state ); + + print_error( parser, expect ? ERR_PARSE_ERROR_EXPECT : ERR_PARSE_ERROR, + ERRSTYLE_FATAL | ERRSTYLE_FILEINFO, + parser->filename, pcb->line, + pcb->sym == -1 ? + (char*)pcb->buf : @@prefix_symbols[ pcb->sym ].name, + expect ? expect : "(unknown)" ); + pfree( expect ); } int parse_grammar( PARSER* p, char* filename, char* src ) { - @@prefix_pcb pcb; + @@prefix_pcb pcb; - prec_cnt = 1; - embedded_count = 0; + prec_cnt = 1; + embedded_count = 0; - memset( &pcb, 0, sizeof( pcb ) ); - pcb.src = src; - pcb.filename = filename; - pcb.main = MAKE_BOOLEAN( filename == p->filename ); + memset( &pcb, 0, sizeof( pcb ) ); + pcb.src = src; + pcb.filename = filename; + pcb.main = MAKE_BOOLEAN( filename == p->filename ); - if( p && src ) - { - parser = p; - strbuf_append( '\0' ); + if( p && src ) + { + parser = p; + strbuf_append( '\0' ); - @@prefix_parse( &pcb ); + @@prefix_parse( &pcb ); - pfree( strbuf ); - } + pfree( strbuf ); + } - return pcb.error_count + error_count; + return pcb.error_count + error_count; } *]; diff --git a/src/proto.h b/src/proto.h index 58d2b7d..75cef00 100644 --- a/src/proto.h +++ b/src/proto.h @@ -1,4 +1,4 @@ -/* build.c */ +/* src/build.c */ char* escape_for_target( GENERATOR* g, char* str, BOOLEAN clear ); char* build_action( PARSER* parser, GENERATOR* g, PROD* p, char* base, BOOLEAN def_code ); char* build_scan_action( PARSER* parser, GENERATOR* g, SYMBOL* s, char* base ); @@ -6,10 +6,7 @@ char* mkproduction_str( PROD* p ); BOOLEAN load_generator( PARSER* parser, GENERATOR* g, char* genfile ); void build_code( PARSER* parser ); -/* buildxml.c */ -void build_xml( PARSER* parser, BOOLEAN finished ); - -/* debug.c */ +/* src/debug.c */ void print_symbol( FILE* stream, SYMBOL* sym ); void dump_grammar( FILE* stream, PARSER* parser ); void dump_symbols( FILE* stream, PARSER* parser ); @@ -18,29 +15,29 @@ void dump_lalr_states( FILE* stream, PARSER* parser ); void dump_productions( FILE* stream, PARSER* parser ); void dump_production( FILE* stream, PROD* p, BOOLEAN with_lhs, BOOLEAN semantics ); -/* error.c */ +/* src/error.c */ void print_error( PARSER* parser, ERRORCODE err_id, int err_style, ... ); -/* first.c */ +/* src/first.c */ void compute_first( PARSER* parser ); int seek_rhs_first( plist* first, plistel* rhs ); -/* integrity.c */ +/* src/integrity.c */ BOOLEAN find_undef_or_unused( PARSER* parser ); BOOLEAN check_regex_anomalies( PARSER* parser ); BOOLEAN check_stupid_productions( PARSER* parser ); -/* lalr.c */ +/* src/lalr.c */ void generate_tables( PARSER* parser ); void detect_default_productions( PARSER* parser ); -/* lex.c */ +/* src/lex.c */ void merge_symbols_to_dfa( PARSER* parser ); void construct_single_lexer( PARSER* parser ); pregex_dfa* find_equal_dfa( PARSER* parser, pregex_dfa* ndfa ); void nfa_from_symbol( PARSER* parser, pregex_nfa* nfa, SYMBOL* sym ); -/* list.c */ +/* src/list.c */ LIST* list_push( LIST* list, void* ptr ); LIST* list_pop( LIST* list, void** ptr ); LIST* list_remove( LIST* list, void* ptr ); @@ -51,13 +48,13 @@ int list_find( LIST* list, void* ptr ); void* list_getptr( LIST* list, int cnt ); LIST* list_union( LIST* first, LIST* second ); -/* main.c */ +/* src/main.c */ char* print_version( BOOLEAN long_version ); void print_copyright( FILE* stream ); void print_usage( FILE* stream, char* progname ); BOOLEAN get_command_line( int argc, char** argv, char** filename, char** output, PARSER* parser ); -/* mem.c */ +/* src/mem.c */ SYMBOL* get_symbol( PARSER* p, void* dfn, int type, BOOLEAN create ); void free_symbol( SYMBOL* sym ); PROD* create_production( PARSER* p, SYMBOL* lhs ); @@ -78,10 +75,10 @@ VTYPE* find_vtype( PARSER* p, char* name ); VTYPE* create_vtype( PARSER* p, char* name ); void free_vtype( VTYPE* vt ); -/* parse.c */ +/* src/parse.c */ int parse_grammar( PARSER* p, char* filename, char* src ); -/* rewrite.c */ +/* src/rewrite.c */ void rewrite_grammar( PARSER* parser ); void unique_charsets( PARSER* parser ); void fix_precedences( PARSER* parser ); @@ -91,23 +88,23 @@ void setup_single_goal( PARSER* parser ); void charsets_to_ptn( PARSER* parser ); void symbol_orders( PARSER* parser ); -/* string.c */ +/* src/string.c */ char* int_to_str( int val ); char* long_to_str( long val ); char* str_no_whitespace( char* str ); -/* utils.c */ +/* src/utils.c */ char* derive_name( char* name, char append_char ); int unescape_char( char* str, char** strfix ); SYMBOL* find_base_symbol( SYMBOL* sym ); char* c_identifier( char* str, BOOLEAN to_upper ); -/* virtual.c */ +/* src/virtual.c */ SYMBOL* positive_closure( PARSER* parser, SYMBOL* base ); SYMBOL* kleene_closure( PARSER* parser, SYMBOL* base ); SYMBOL* optional_closure( PARSER* parser, SYMBOL* base ); -/* xml.c */ +/* src/xml.c */ XML_T xml_child( XML_T xml, char* name ); XML_T xml_idx( XML_T xml, int idx ); char* xml_attr( XML_T xml, char* attr ); diff --git a/src/rewrite.c b/src/rewrite.c index 81b2f70..b244a2a 100644 --- a/src/rewrite.c +++ b/src/rewrite.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: rewrite.c -Author: Jan Max Meyer -Usage: Grammar revision functions ------------------------------------------------------------------------------ */ +/* Grammar revision functions */ #include "unicc.h" @@ -18,224 +9,224 @@ The revision is done to simulate tokens which are separated by whitespaces. //parser// is the pointer to parser information structure. */ void rewrite_grammar( PARSER* parser ) { - plistel * e, - * f; - plist * stack, - * done, - * rewritten; - SYMBOL * ws_all = (SYMBOL*)NULL, - * ws_list, - * ws_optlist, - * sym, - * nsym; - PROD * p; - char * deriv; - - /* - 26.03.2008 Jan Max Meyer - Use keyname with special type prefix for hash table access - - 20.08.2011 Jan Max Meyer - Mark productions a rewritten if they are already done, to avoid problems - with multiple left-hand sides (they caused a problem that rewritten - productions would be rewritten multiple times) - */ - - /* Create productions for all whitespaces */ - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( sym->whitespace ) - { - if( !ws_all ) - { - ws_all = get_symbol( parser, P_WHITESPACE, - SYM_NON_TERMINAL, TRUE ); - ws_all->lexem = TRUE; - ws_all->generated = TRUE; - } - - p = create_production( parser, ws_all ); - append_to_production( p, sym, (char*)NULL ); - } - } - - if( !ws_all ) - return; - - ws_all->whitespace = TRUE; - - ws_list = positive_closure( parser, ws_all ); - ws_list->lexem = TRUE; - ws_list->whitespace = TRUE; - - ws_optlist = optional_closure( parser, ws_list ); - ws_optlist->lexem = TRUE; - ws_optlist->whitespace = TRUE; - - /* - Find out all lexeme non-terminals and those - which belong to them. - */ - done = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - stack = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - rewritten = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); - - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( sym->lexem && sym->type == SYM_NON_TERMINAL ) - { - plist_push( done, sym ); - plist_push( stack, sym ); - } - } - - while( plist_pop( stack, &sym ) ) - { - plist_for( sym->productions, e ) - { - p = (PROD*)plist_access( e ); - - plist_for( p->rhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym->type == SYM_NON_TERMINAL ) - { - if( !plist_get_by_ptr( done, sym ) ) - { - sym->lexem = TRUE; - - plist_push( done, sym ); - plist_push( stack, sym ); - } - } - } - } - } - - /* - Find all non-terminals from goal; If there is a call to a - lexem non-terminal or a terminal, rewrite their rules and - replace them. - */ - if( !( parser->goal->lexem ) ) - { - plist_erase( done ); - plist_erase( stack ); - - plist_push( done, parser->goal ); - plist_push( stack, parser->goal ); - - while( plist_pop( stack, &sym ) ) - { - plist_for( sym->productions, e ) - { - p = (PROD*)plist_access( e ); - - /* Don't rewrite a production twice! */ - if( plist_get_by_ptr( rewritten, p ) ) - continue; - - plist_for( p->rhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym->type == SYM_NON_TERMINAL && !( sym->lexem ) ) - { - if( !plist_get_by_ptr( done, sym ) ) - { - plist_push( done, sym ); - plist_push( stack, sym ); - } - } - else if( ( sym->type == SYM_NON_TERMINAL && sym->lexem ) - || IS_TERMINAL( sym ) ) - { - /* Do not rewrite system terminals! */ - if( sym->type == SYM_SYSTEM_TERMINAL ) - continue; - - /* Construct derivative symbol name */ - deriv = pstrdup( sym->name ); - - /* Create unique symbol name */ - do - { - deriv = pstrcatstr( deriv, - P_REWRITTEN_TOKEN, FALSE ); - nsym = get_symbol( parser, deriv, - SYM_NON_TERMINAL, FALSE ); - } - while( nsym && nsym->derived_from != sym ); - - /* If you already found a symbol, don't do anything! */ - if( !nsym ) - { - nsym = get_symbol( parser, deriv, - SYM_NON_TERMINAL, TRUE ); - - p = create_production( parser, nsym ); - append_to_production( p, sym, (char*)NULL ); - append_to_production( p, ws_optlist, (char*)NULL ); - - /* dump_production( stdout, p, TRUE, FALSE ); */ - - nsym->prec = sym->prec; - nsym->assoc = sym->assoc; - nsym->nullable = sym->nullable; - nsym->keyword = sym->keyword; - nsym->vtype = sym->vtype; - - nsym->generated = TRUE; - nsym->derived_from = sym; - } - - /* Replace the rewritten symbol with the - production's symbol! */ - memcpy( f + 1, &nsym, sizeof( SYMBOL* ) ); - - pfree( deriv ); - } - } - - /* Mark this production as already rewritten! */ - plist_push( rewritten, p ); - } - } - } - - plist_free( done ); - plist_free( rewritten ); - plist_free( stack ); - - /* Build a new goal symbol */ - deriv = pstrdup( parser->goal->name ); - - do - { - deriv = pstrcatstr( deriv, P_REWRITTEN_TOKEN, FALSE ); - sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, FALSE ); - } - while( sym && sym->derived_from != parser->goal ); - - sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, TRUE ); - sym->generated = TRUE; - pfree( deriv ); - - p = create_production( parser, sym ); - if( !p ) - { - OUTOFMEM; - return; - } - - append_to_production( p, ws_optlist, (char*)NULL ); - append_to_production( p, parser->goal, (char*)NULL ); - parser->goal = sym; + plistel * e, + * f; + plist * stack, + * done, + * rewritten; + SYMBOL * ws_all = (SYMBOL*)NULL, + * ws_list, + * ws_optlist, + * sym, + * nsym; + PROD * p; + char * deriv; + + /* + 26.03.2008 Jan Max Meyer + Use keyname with special type prefix for hash table access + + 20.08.2011 Jan Max Meyer + Mark productions a rewritten if they are already done, to avoid problems + with multiple left-hand sides (they caused a problem that rewritten + productions would be rewritten multiple times) + */ + + /* Create productions for all whitespaces */ + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( sym->whitespace ) + { + if( !ws_all ) + { + ws_all = get_symbol( parser, P_WHITESPACE, + SYM_NON_TERMINAL, TRUE ); + ws_all->lexem = TRUE; + ws_all->generated = TRUE; + } + + p = create_production( parser, ws_all ); + append_to_production( p, sym, (char*)NULL ); + } + } + + if( !ws_all ) + return; + + ws_all->whitespace = TRUE; + + ws_list = positive_closure( parser, ws_all ); + ws_list->lexem = TRUE; + ws_list->whitespace = TRUE; + + ws_optlist = optional_closure( parser, ws_list ); + ws_optlist->lexem = TRUE; + ws_optlist->whitespace = TRUE; + + /* + Find out all lexeme non-terminals and those + which belong to them. + */ + done = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + stack = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + rewritten = plist_create( 0, PLIST_MOD_PTR | PLIST_MOD_RECYCLE ); + + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( sym->lexem && sym->type == SYM_NON_TERMINAL ) + { + plist_push( done, sym ); + plist_push( stack, sym ); + } + } + + while( plist_pop( stack, &sym ) ) + { + plist_for( sym->productions, e ) + { + p = (PROD*)plist_access( e ); + + plist_for( p->rhs, f ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym->type == SYM_NON_TERMINAL ) + { + if( !plist_get_by_ptr( done, sym ) ) + { + sym->lexem = TRUE; + + plist_push( done, sym ); + plist_push( stack, sym ); + } + } + } + } + } + + /* + Find all non-terminals from goal; If there is a call to a + lexem non-terminal or a terminal, rewrite their rules and + replace them. + */ + if( !( parser->goal->lexem ) ) + { + plist_erase( done ); + plist_erase( stack ); + + plist_push( done, parser->goal ); + plist_push( stack, parser->goal ); + + while( plist_pop( stack, &sym ) ) + { + plist_for( sym->productions, e ) + { + p = (PROD*)plist_access( e ); + + /* Don't rewrite a production twice! */ + if( plist_get_by_ptr( rewritten, p ) ) + continue; + + plist_for( p->rhs, f ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym->type == SYM_NON_TERMINAL && !( sym->lexem ) ) + { + if( !plist_get_by_ptr( done, sym ) ) + { + plist_push( done, sym ); + plist_push( stack, sym ); + } + } + else if( ( sym->type == SYM_NON_TERMINAL && sym->lexem ) + || IS_TERMINAL( sym ) ) + { + /* Do not rewrite system terminals! */ + if( sym->type == SYM_SYSTEM_TERMINAL ) + continue; + + /* Construct derivative symbol name */ + deriv = pstrdup( sym->name ); + + /* Create unique symbol name */ + do + { + deriv = pstrcatstr( deriv, + P_REWRITTEN_TOKEN, FALSE ); + nsym = get_symbol( parser, deriv, + SYM_NON_TERMINAL, FALSE ); + } + while( nsym && nsym->derived_from != sym ); + + /* If you already found a symbol, don't do anything! */ + if( !nsym ) + { + nsym = get_symbol( parser, deriv, + SYM_NON_TERMINAL, TRUE ); + + p = create_production( parser, nsym ); + append_to_production( p, sym, (char*)NULL ); + append_to_production( p, ws_optlist, (char*)NULL ); + + /* dump_production( stdout, p, TRUE, FALSE ); */ + + nsym->prec = sym->prec; + nsym->assoc = sym->assoc; + nsym->nullable = sym->nullable; + nsym->keyword = sym->keyword; + nsym->vtype = sym->vtype; + + nsym->generated = TRUE; + nsym->derived_from = sym; + } + + /* Replace the rewritten symbol with the + production's symbol! */ + memcpy( f + 1, &nsym, sizeof( SYMBOL* ) ); + + pfree( deriv ); + } + } + + /* Mark this production as already rewritten! */ + plist_push( rewritten, p ); + } + } + } + + plist_free( done ); + plist_free( rewritten ); + plist_free( stack ); + + /* Build a new goal symbol */ + deriv = pstrdup( parser->goal->name ); + + do + { + deriv = pstrcatstr( deriv, P_REWRITTEN_TOKEN, FALSE ); + sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, FALSE ); + } + while( sym && sym->derived_from != parser->goal ); + + sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, TRUE ); + sym->generated = TRUE; + pfree( deriv ); + + p = create_production( parser, sym ); + if( !p ) + { + OUTOFMEM; + return; + } + + append_to_production( p, ws_optlist, (char*)NULL ); + append_to_production( p, parser->goal, (char*)NULL ); + parser->goal = sym; } /** Rewrites the grammar to work with uniquely identifyable character sets @@ -245,137 +236,137 @@ instead of overlapping ones. This function was completely rewritten in Nov 2009. */ void unique_charsets( PARSER* parser ) { - plistel* e; - plistel* f; - SYMBOL* sym; - SYMBOL* tsym; - SYMBOL* nsym; - SYMBOL* rsym; - PROD* p; - pccl* inter; - pccl* diff; - int old_prod_cnt; - - /* - 11.11.2009 Jan Max Meyer - Entire redesign of function, to work with full - Unicode-range character classes. - */ - - PROC( "unique_charsets" ); - - do - { - old_prod_cnt = plist_count( parser->productions ); - - plist_for( parser->symbols, e ) - { - /* Get symbol pointer */ - sym = (SYMBOL*)plist_access( e ); - if( sym->type != SYM_CCL_TERMINAL ) - continue; - - MSG( "NEXT SYMBOL FOR REVISION" ); - VARS( "sym->name", "%s", sym->name ); - - /* - fprintf( stderr, "sym->ccl: %d\n", pccl_size( sym->ccl ) ); - pccl_print( stderr, sym->ccl, 1 ); - */ - - /* Find overlapping character classes */ - MSG( "Searching for overlapping character classes" ); - plist_for( parser->symbols, f ) - { - tsym = (SYMBOL*)plist_access( f ); - - if( tsym->type != SYM_CCL_TERMINAL ) - continue; - - VARS( "tsym->name", "%s", tsym->name ); - - inter = pccl_intersect( sym->ccl, tsym->ccl ); - - /* - fprintf( stdout, "inter = >%s< sym = >%s< tsym = >%s<\n", - pccl_to_str( inter, TRUE ), - pccl_to_str( sym->ccl, TRUE ), - pccl_to_str( tsym->ccl, TRUE ) ); - */ - - VARS( "inter", "%p", inter ); - if( inter ) - { - MSG( "Intersections found with tsym" ); - - /* Create charclass-symbol for remaining symbols */ - diff = pccl_diff( tsym->ccl, inter ); - if( !pccl_size( diff ) ) - { - diff = pccl_free( diff ); - inter = pccl_free( inter ); - continue; - } - - /* Disallow intersections in scanner-mode */ - if( parser->p_mode == MODE_SCANNER ) - { - print_error( parser, ERR_CHARCLASS_OVERLAP, - ERRSTYLE_FATAL, - pccl_to_str( inter, TRUE )); - - inter = pccl_free( inter ); - continue; - } - - /* Create charclass-symbol for intersecting symbols */ - if( !( nsym = get_symbol( parser, (void*)inter, - SYM_CCL_TERMINAL, FALSE ) ) ) - { - nsym = get_symbol( parser, (void*)inter, - SYM_CCL_TERMINAL, TRUE ); - nsym->used = TRUE; - nsym->defined = TRUE; - } - else - inter = pccl_free( inter ); - - rsym = get_symbol( parser, (void*)diff, - SYM_CCL_TERMINAL, TRUE ); - rsym->used = TRUE; - rsym->defined = TRUE; - - /* Re-configure symbol */ - tsym->ccl = pccl_free( tsym->ccl ); - tsym->name = pstrcatstr( tsym->name, - P_REWRITTEN_CCL, FALSE ); - tsym->type = SYM_NON_TERMINAL; - plist_erase( tsym->first ); - tsym->productions = plist_create( 0, PLIST_MOD_PTR ); - - /* Create & append productions */ - p = create_production( parser, tsym ); - append_to_production( p, nsym, (char*)NULL ); - - p = create_production( parser, tsym ); - append_to_production( p, rsym, (char*)NULL ); - } - else - { - MSG( "Has no intersections, next" ); - } - } - } - - /* - fprintf( stderr, "-----\nCURRENT GRAMMAR:\n" ); - dump_grammar( stderr, parser ); - getchar(); - */ - } - while( old_prod_cnt != plist_count( parser->productions ) ); - - VOIDRET; + plistel* e; + plistel* f; + SYMBOL* sym; + SYMBOL* tsym; + SYMBOL* nsym; + SYMBOL* rsym; + PROD* p; + pccl* inter; + pccl* diff; + int old_prod_cnt; + + /* + 11.11.2009 Jan Max Meyer + Entire redesign of function, to work with full + Unicode-range character classes. + */ + + PROC( "unique_charsets" ); + + do + { + old_prod_cnt = plist_count( parser->productions ); + + plist_for( parser->symbols, e ) + { + /* Get symbol pointer */ + sym = (SYMBOL*)plist_access( e ); + if( sym->type != SYM_CCL_TERMINAL ) + continue; + + MSG( "NEXT SYMBOL FOR REVISION" ); + VARS( "sym->name", "%s", sym->name ); + + /* + fprintf( stderr, "sym->ccl: %d\n", pccl_size( sym->ccl ) ); + pccl_print( stderr, sym->ccl, 1 ); + */ + + /* Find overlapping character classes */ + MSG( "Searching for overlapping character classes" ); + plist_for( parser->symbols, f ) + { + tsym = (SYMBOL*)plist_access( f ); + + if( tsym->type != SYM_CCL_TERMINAL ) + continue; + + VARS( "tsym->name", "%s", tsym->name ); + + inter = pccl_intersect( sym->ccl, tsym->ccl ); + + /* + fprintf( stdout, "inter = >%s< sym = >%s< tsym = >%s<\n", + pccl_to_str( inter, TRUE ), + pccl_to_str( sym->ccl, TRUE ), + pccl_to_str( tsym->ccl, TRUE ) ); + */ + + VARS( "inter", "%p", inter ); + if( inter ) + { + MSG( "Intersections found with tsym" ); + + /* Create charclass-symbol for remaining symbols */ + diff = pccl_diff( tsym->ccl, inter ); + if( !pccl_size( diff ) ) + { + diff = pccl_free( diff ); + inter = pccl_free( inter ); + continue; + } + + /* Disallow intersections in scanner-mode */ + if( parser->p_mode == MODE_SCANNER ) + { + print_error( parser, ERR_CHARCLASS_OVERLAP, + ERRSTYLE_FATAL, + pccl_to_str( inter, TRUE )); + + inter = pccl_free( inter ); + continue; + } + + /* Create charclass-symbol for intersecting symbols */ + if( !( nsym = get_symbol( parser, (void*)inter, + SYM_CCL_TERMINAL, FALSE ) ) ) + { + nsym = get_symbol( parser, (void*)inter, + SYM_CCL_TERMINAL, TRUE ); + nsym->used = TRUE; + nsym->defined = TRUE; + } + else + inter = pccl_free( inter ); + + rsym = get_symbol( parser, (void*)diff, + SYM_CCL_TERMINAL, TRUE ); + rsym->used = TRUE; + rsym->defined = TRUE; + + /* Re-configure symbol */ + tsym->ccl = pccl_free( tsym->ccl ); + tsym->name = pstrcatstr( tsym->name, + P_REWRITTEN_CCL, FALSE ); + tsym->type = SYM_NON_TERMINAL; + plist_erase( tsym->first ); + tsym->productions = plist_create( 0, PLIST_MOD_PTR ); + + /* Create & append productions */ + p = create_production( parser, tsym ); + append_to_production( p, nsym, (char*)NULL ); + + p = create_production( parser, tsym ); + append_to_production( p, rsym, (char*)NULL ); + } + else + { + MSG( "Has no intersections, next" ); + } + } + } + + /* + fprintf( stderr, "-----\nCURRENT GRAMMAR:\n" ); + dump_grammar( stderr, parser ); + getchar(); + */ + } + while( old_prod_cnt != plist_count( parser->productions ) ); + + VOIDRET; } /** Fixes the precedence and associativity information of the current grammar @@ -384,74 +375,74 @@ to be prepared for LALR(1) table generation. //parser// is the pointer to parser information structure. */ void fix_precedences( PARSER* parser ) { - PROD* p; - plistel* e; - plistel* f; - BOOLEAN found; - SYMBOL* sym; - - /* - * If nonterminal symbol has a precedence, - * attach it to all its productions! - */ - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( sym->type == SYM_NON_TERMINAL - && sym->prec > 0 && !( sym->generated ) ) - { - plist_for( sym->productions, f ) - { - p = (PROD*)plist_access( f ); - - if( p->prec <= sym->prec ) - p->prec = sym->prec; - } - } - } - - /* - Set production's precedence level to the - one of the rightmost terminal! - */ - plist_for( parser->productions, e ) - { - p = (PROD*)plist_access( e ); - - if( p->prec > 0 ) - continue; - - /* First try to find rightmost terminal */ - found = FALSE; - for( f = plist_last( p->rhs ); f; f = plist_prev( f ) ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym->lexem ) - { - p->prec = sym->prec; - found = TRUE; - break; - } - } - - /* - If there is no terminal, use rightmost - non-terminal with a precedence - */ - if( !found ) - for( f = plist_last( p->rhs ); f; f = plist_prev( f ) ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym->prec > p->prec ) - { - p->prec = sym->prec; - break; - } - } - } + PROD* p; + plistel* e; + plistel* f; + BOOLEAN found; + SYMBOL* sym; + + /* + * If nonterminal symbol has a precedence, + * attach it to all its productions! + */ + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( sym->type == SYM_NON_TERMINAL + && sym->prec > 0 && !( sym->generated ) ) + { + plist_for( sym->productions, f ) + { + p = (PROD*)plist_access( f ); + + if( p->prec <= sym->prec ) + p->prec = sym->prec; + } + } + } + + /* + Set production's precedence level to the + one of the rightmost terminal! + */ + plist_for( parser->productions, e ) + { + p = (PROD*)plist_access( e ); + + if( p->prec > 0 ) + continue; + + /* First try to find rightmost terminal */ + found = FALSE; + for( f = plist_last( p->rhs ); f; f = plist_prev( f ) ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym->lexem ) + { + p->prec = sym->prec; + found = TRUE; + break; + } + } + + /* + If there is no terminal, use rightmost + non-terminal with a precedence + */ + if( !found ) + for( f = plist_last( p->rhs ); f; f = plist_prev( f ) ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym->prec > p->prec ) + { + p->prec = sym->prec; + break; + } + } + } } /** Inherits the fixiation definitions once done with "fixate" parser directive. @@ -460,54 +451,54 @@ void fix_precedences( PARSER* parser ) data, holding everything :) */ void inherit_fixiations( PARSER* parser ) { - plistel* e; - plistel* f; - SYMBOL* sym; - PROD* p; - - plist* done; - plist* stack; - - done = plist_create( 0, PLIST_MOD_PTR ); - stack = plist_create( 0, PLIST_MOD_PTR ); - - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - - if( sym->fixated && sym->type == SYM_NON_TERMINAL ) - { - plist_push( done, sym ); - plist_push( stack, sym ); - } - } - - while( plist_pop( stack, &sym ) ) - { - plist_for( sym->productions, e ) - { - p = (PROD*)plist_access( e ); - - plist_for( p->rhs, f ) - { - sym = (SYMBOL*)plist_access( f ); - - if( sym->type == SYM_NON_TERMINAL ) - { - if( !plist_get_by_ptr( done, sym ) ) - { - sym->fixated = TRUE; - - plist_push( done, sym ); - plist_push( stack, sym ); - } - } - } - } - } - - plist_free( stack ); - plist_free( done ); + plistel* e; + plistel* f; + SYMBOL* sym; + PROD* p; + + plist* done; + plist* stack; + + done = plist_create( 0, PLIST_MOD_PTR ); + stack = plist_create( 0, PLIST_MOD_PTR ); + + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + + if( sym->fixated && sym->type == SYM_NON_TERMINAL ) + { + plist_push( done, sym ); + plist_push( stack, sym ); + } + } + + while( plist_pop( stack, &sym ) ) + { + plist_for( sym->productions, e ) + { + p = (PROD*)plist_access( e ); + + plist_for( p->rhs, f ) + { + sym = (SYMBOL*)plist_access( f ); + + if( sym->type == SYM_NON_TERMINAL ) + { + if( !plist_get_by_ptr( done, sym ) ) + { + sym->fixated = TRUE; + + plist_push( done, sym ); + plist_push( stack, sym ); + } + } + } + } + } + + plist_free( stack ); + plist_free( done ); } /** Inherits value types of rewritten symbols from their base. @@ -516,66 +507,66 @@ This is required for symbols that where generated before their definition in the code - where a possible value type is still unknown. */ void inherit_vtypes( PARSER* parser ) { - SYMBOL* sym; - plistel* e; + SYMBOL* sym; + plistel* e; - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); - if( !sym->vtype && sym->derived_from ) - sym->vtype = sym->derived_from->vtype; - } + if( !sym->vtype && sym->derived_from ) + sym->vtype = sym->derived_from->vtype; + } } /** Sets up a single goal symbol, if necessary. */ void setup_single_goal( PARSER* parser ) { - SYMBOL* sym; - PROD* p; - char* deriv; - - if( plist_count( parser->goal->productions ) == 1 ) - { - p = (PROD*)plist_access( plist_first( parser->goal->productions ) ); - - if( plist_count( p->rhs ) == 1 ) - { - sym = (SYMBOL*)plist_access( plist_last( p->rhs ) ); - - if( sym->type == SYM_NON_TERMINAL ) - { - plist_push( p->rhs, parser->end_of_input ); - return; /* Nothing to do anymore! */ - } - } - } - - /* Setup a new goal symbol */ - deriv = pstrcatstr( pstrdup( parser->goal->name ), - P_REWRITTEN_TOKEN, FALSE ); - - if( !( sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, TRUE ) ) ) - { - OUTOFMEM; - return; - } - - sym->generated = TRUE; - sym->vtype = parser->goal->vtype; - - pfree( deriv ); - - if( !( p = create_production( parser, sym ) ) ) - { - OUTOFMEM; - return; - } - - append_to_production( p, parser->goal, (char*)NULL ); - append_to_production( p, parser->end_of_input, (char*)NULL ); - - parser->goal = sym; + SYMBOL* sym; + PROD* p; + char* deriv; + + if( plist_count( parser->goal->productions ) == 1 ) + { + p = (PROD*)plist_access( plist_first( parser->goal->productions ) ); + + if( plist_count( p->rhs ) == 1 ) + { + sym = (SYMBOL*)plist_access( plist_last( p->rhs ) ); + + if( sym->type == SYM_NON_TERMINAL ) + { + plist_push( p->rhs, parser->end_of_input ); + return; /* Nothing to do anymore! */ + } + } + } + + /* Setup a new goal symbol */ + deriv = pstrcatstr( pstrdup( parser->goal->name ), + P_REWRITTEN_TOKEN, FALSE ); + + if( !( sym = get_symbol( parser, deriv, SYM_NON_TERMINAL, TRUE ) ) ) + { + OUTOFMEM; + return; + } + + sym->generated = TRUE; + sym->vtype = parser->goal->vtype; + + pfree( deriv ); + + if( !( p = create_production( parser, sym ) ) ) + { + OUTOFMEM; + return; + } + + append_to_production( p, parser->goal, (char*)NULL ); + append_to_production( p, parser->end_of_input, (char*)NULL ); + + parser->goal = sym; } /** Turns character-classes into patterns, to be later integrated into lexical @@ -587,39 +578,39 @@ finished, and no more character classes are added. //parser// is the Pointer to the parser information structure. */ void charsets_to_ptn( PARSER* parser ) { - SYMBOL* sym; - plistel* e; + SYMBOL* sym; + plistel* e; - PROC( "charsets_to_ptn" ); - PARMS( "parser", "%p", parser ); + PROC( "charsets_to_ptn" ); + PARMS( "parser", "%p", parser ); - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); - if( sym->type == SYM_CCL_TERMINAL ) - sym->ptn = pregex_ptn_create_char( sym->ccl ); - } + if( sym->type == SYM_CCL_TERMINAL ) + sym->ptn = pregex_ptn_create_char( sym->ccl ); + } - VOIDRET; + VOIDRET; } /** Re-arrange symbol orders. */ void symbol_orders( PARSER* parser ) { - plistel* e; - SYMBOL* sym; + plistel* e; + SYMBOL* sym; - PROC( "symbol_orders" ); - PARMS( "parser", "%p", parser ); + PROC( "symbol_orders" ); + PARMS( "parser", "%p", parser ); - plist_sort( parser->symbols ); - plist_for( parser->symbols, e ) - { - sym = (SYMBOL*)plist_access( e ); - sym->id = plist_offset( e ); - } + plist_sort( parser->symbols ); + plist_for( parser->symbols, e ) + { + sym = (SYMBOL*)plist_access( e ); + sym->id = plist_offset( e ); + } - VOIDRET; + VOIDRET; } diff --git a/src/string.c b/src/string.c index 4749036..b617c06 100644 --- a/src/string.c +++ b/src/string.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: string.c -Author: Jan Max Meyer -Usage: String-related management functions ------------------------------------------------------------------------------ */ +/* String-related management functions */ #include "unicc.h" @@ -20,12 +11,12 @@ Returns a char*-pointer to allocated string. This must bee freed later on. */ char* int_to_str( int val ) { - char* ret; + char* ret; - ret = (char*)pmalloc( 64 * sizeof( char ) ); - sprintf( ret, "%d", val ); + ret = (char*)pmalloc( 64 * sizeof( char ) ); + sprintf( ret, "%d", val ); - return ret; + return ret; } /** Returns an allocated string which contains the string-representation of a @@ -37,12 +28,12 @@ Returns a char*-pointer to allocated string. This must bee freed later on. */ char* long_to_str( long val ) { - char* ret; + char* ret; - ret = (char*)pmalloc( 128 * sizeof( char ) ); - sprintf( ret, "%ld", val ); + ret = (char*)pmalloc( 128 * sizeof( char ) ); + sprintf( ret, "%ld", val ); - return ret; + return ret; } /** Removes all whitespaces from a string (including inline ones!) and returns @@ -54,17 +45,16 @@ Returns a pointer to the input string. */ char* str_no_whitespace( char* str ) { - char* ptr = str; - char* start = str; + char* ptr = str; + char* start = str; - while( *str != '\0' ) - if( *str == ' ' || *str == '\t' ) - str++; - else - *ptr++ = *str++; + while( *str != '\0' ) + if( *str == ' ' || *str == '\t' ) + str++; + else + *ptr++ = *str++; - *ptr = '\0'; + *ptr = '\0'; - return start; + return start; } - diff --git a/src/unicc.h b/src/unicc.h index cdc4ab5..f68b1ca 100644 --- a/src/unicc.h +++ b/src/unicc.h @@ -1,17 +1,4 @@ -/* -HEADER---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: unicc.h -Author: Jan Max Meyer -Usage: Global declarations, structures and includes ------------------------------------------------------------------------------ */ - -#ifdef _WIN32 -#pragma warning( disable: 4996 ) -#endif +/* UniCC global declarations, structures and includes */ /* * Includes @@ -77,7 +64,7 @@ Usage: Global declarations, structures and includes /* UniCC version number */ #define UNICC_VER_MAJOR 1 -#define UNICC_VER_MINOR 8 +#define UNICC_VER_MINOR 9 #define UNICC_VER_PATCH 0 #define UNICC_VER_EXTSTR "" @@ -86,7 +73,6 @@ Usage: Global declarations, structures and includes /* File extensions */ #define UNICC_TLT_EXTENSION ".tlt" -#define UNICC_XML_EXTENSION ".xml" /* * Macros @@ -96,12 +82,12 @@ Usage: Global declarations, structures and includes #endif #define OUTOFMEM fprintf( stderr, \ - "%s, %d: Memory allocation failure; " \ - "UniCC possibly ran out of memory!\n", \ - __FILE__, __LINE__ ), \ - print_error( (PARSER*)NULL, ERR_MEMORY_ERROR,\ - ERRSTYLE_FATAL, __FILE__, __LINE__ ), \ - exit( EXIT_FAILURE ) + "%s, %d: Memory allocation failure; " \ + "UniCC possibly ran out of memory!\n", \ + __FILE__, __LINE__ ), \ + print_error( (PARSER*)NULL, ERR_MEMORY_ERROR,\ + ERRSTYLE_FATAL, __FILE__, __LINE__ ), \ + exit( EXIT_FAILURE ) /* * Type definitions @@ -126,8 +112,8 @@ typedef struct _generator_2d_tab _2D_TABLE; /* Simple linked list */ struct _list { - void* pptr; - LIST* next; + void* pptr; + LIST* next; }; #define list_access( ll ) ( (ll) ? (ll)->pptr : (void*)NULL ) @@ -138,307 +124,307 @@ struct _list /* Symbol structure */ struct _symbol { - int id; /* Symbol ID */ - int type; /* Symbol type */ - - char* keyname; /* Key name */ - char* name; /* Symbol name */ - char* emit; - - pccl* ccl; /* Character-class definition */ - - plist* productions; /* List of productions attached to a - non-terminal symbol */ - plist* first; /* The symbol's first set */ - - plist* all_sym; /* List of all possible terminal - definitions, for multiple-terminals. - This list will only be set in the - primary symbol. - */ - - pregex_ptn* ptn; /* Regular expression pattern */ - - BOOLEAN fixated; /* Flags, if fixated symbols - (do always shift!) */ - BOOLEAN goal; /* Flags, if goal non-terminal */ - BOOLEAN nullable; /* Flags, if nullable */ - BOOLEAN defined; /* Flags, if defined */ - BOOLEAN used; /* Flags, if used */ - BOOLEAN lexem; /* Flags, if distinguished as lexem */ - BOOLEAN keyword; /* Flags, if it is a keyword - (or if it has been derived from a - keyword) */ - BOOLEAN whitespace; /* Flags, if it is a whitespace */ - BOOLEAN generated; /* Flags, if automatically generated - symbol */ - BOOLEAN greedy; /* Flags if this is a greedy or nongreedy - nonterminal */ - - plist* options; /* Options hash table */ - - SYMBOL* derived_from; /* Pointer to symbol from which the - current has been derived */ - - VTYPE* vtype; /* Pointer to value type the symbol - is associated with */ - - int prec; /* Level of precedence */ - int assoc; /* Associativity */ - - int line; /* Line of definition */ - - char* code; /* Code for regex terminals */ - int code_at; /* Beginning line of code-segment - in source file */ + int id; /* Symbol ID */ + int type; /* Symbol type */ + + char* keyname; /* Key name */ + char* name; /* Symbol name */ + char* emit; + + pccl* ccl; /* Character-class definition */ + + plist* productions; /* List of productions attached to a + non-terminal symbol */ + plist* first; /* The symbol's first set */ + + plist* all_sym; /* List of all possible terminal + definitions, for multiple-terminals. + This list will only be set in the + primary symbol. + */ + + pregex_ptn* ptn; /* Regular expression pattern */ + + BOOLEAN fixated; /* Flags, if fixated symbols + (do always shift!) */ + BOOLEAN goal; /* Flags, if goal non-terminal */ + BOOLEAN nullable; /* Flags, if nullable */ + BOOLEAN defined; /* Flags, if defined */ + BOOLEAN used; /* Flags, if used */ + BOOLEAN lexem; /* Flags, if distinguished as lexem */ + BOOLEAN keyword; /* Flags, if it is a keyword + (or if it has been derived from a + keyword) */ + BOOLEAN whitespace; /* Flags, if it is a whitespace */ + BOOLEAN generated; /* Flags, if automatically generated + symbol */ + BOOLEAN greedy; /* Flags if this is a greedy or nongreedy + nonterminal */ + + plist* options; /* Options hash table */ + + SYMBOL* derived_from; /* Pointer to symbol from which the + current has been derived */ + + VTYPE* vtype; /* Pointer to value type the symbol + is associated with */ + + int prec; /* Level of precedence */ + int assoc; /* Associativity */ + + int line; /* Line of definition */ + + char* code; /* Code for regex terminals */ + int code_at; /* Beginning line of code-segment + in source file */ }; /* Production structure */ struct _prod { - int id; /* Production ID */ + int id; /* Production ID */ - SYMBOL* lhs; /* Primary left-hand side symbol */ - plist* all_lhs; /* List of all possible left-hand sides */ + SYMBOL* lhs; /* Primary left-hand side symbol */ + plist* all_lhs; /* List of all possible left-hand sides */ - char* emit; /* AST node generation */ + char* emit; /* AST node generation */ - plist* rhs; /* Right-hand side symbols */ - plist* sem_rhs; /* Semantic right-hand side; This - may differ from the right hand side - in case of embedded productions, - to allow a mixture of the - outer- and inner-production */ + plist* rhs; /* Right-hand side symbols */ + plist* sem_rhs; /* Semantic right-hand side; This + may differ from the right hand side + in case of embedded productions, + to allow a mixture of the + outer- and inner-production */ - int prec; /* Precedence level */ - int assoc; /* Associativity flag */ + int prec; /* Precedence level */ + int assoc; /* Associativity flag */ - plist* options; /* Options hash table */ + plist* options; /* Options hash table */ - int line; /* Line of definition */ + int line; /* Line of definition */ - char* code; /* Semantic reduction action template */ - int code_at; /* Beginning line of code-segment - in source file */ + char* code; /* Semantic reduction action template */ + int code_at; /* Beginning line of code-segment + in source file */ }; /* Closure item */ struct _item { - PROD* prod; /* The associated production - to this item */ - int dot_offset; /* The dot's offset from the left - of the right hand side */ - SYMBOL* next_symbol; /* Symbol following the dot */ - plist lookahead; /* Set of lookahead-symbols */ + PROD* prod; /* The associated production + to this item */ + int dot_offset; /* The dot's offset from the left + of the right hand side */ + SYMBOL* next_symbol; /* Symbol following the dot */ + plist lookahead; /* Set of lookahead-symbols */ }; /* LALR(1) State */ struct _state { - int state_id; /* State ID */ - LIST* kernel; /* Kernel item set */ - LIST* epsilon; /* Epsilon item set */ + int state_id; /* State ID */ + LIST* kernel; /* Kernel item set */ + LIST* epsilon; /* Epsilon item set */ - LIST* actions; /* Action table entries */ - LIST* gotos; /* Goto table entries */ + LIST* actions; /* Action table entries */ + LIST* gotos; /* Goto table entries */ - PROD* def_prod; /* Default production */ + PROD* def_prod; /* Default production */ - BOOLEAN done; /* Done flag */ - BOOLEAN closed; /* Closed flag */ + BOOLEAN done; /* Done flag */ + BOOLEAN closed; /* Closed flag */ - pregex_dfa* dfa; /* DFA machine for regex recognition - in this state */ + pregex_dfa* dfa; /* DFA machine for regex recognition + in this state */ - int derived_from; /* Previous state */ + int derived_from; /* Previous state */ }; /* Action/Goto table column */ struct _tabcol { - SYMBOL* symbol; /* Symbol */ - short action; /* Action on this symbol */ - int index; /* Action-index on this symbol */ + SYMBOL* symbol; /* Symbol */ + short action; /* Action on this symbol */ + int index; /* Action-index on this symbol */ - ITEM* derived_from; /* List of items that caused the - derivation of this column */ + ITEM* derived_from; /* List of items that caused the + derivation of this column */ }; /* Value stack type */ struct _vtype { - int id; /* Value type ID */ - char* int_name; /* Internal name for verification */ - char* real_def; /* Definition by user */ + int id; /* Value type ID */ + char* int_name; /* Internal name for verification */ + char* real_def; /* Definition by user */ }; /* Parser option */ struct _option { - int line; /* Line of option definition */ - char* opt; /* Option name */ - char* def; /* Option content */ + int line; /* Line of option definition */ + char* opt; /* Option name */ + char* def; /* Option content */ }; /* Parser information structure */ struct _parser { - plist* symbols; /* Symbol table */ - plist* productions; /* Productions */ - parray* states; /* LALR(1) states */ - LIST* dfa; /* List containing the DFA for - regex terminal recognition */ - - SYMBOL* goal; /* Pointer to the goal non-terminal */ - SYMBOL* end_of_input; /* End of input symbol */ - SYMBOL* error; /* Error token */ - - LIST* dfas; /* Lexers */ - LIST* vtypes; /* Value stack types */ - - short p_mode; /* Parser model */ - char* p_template; /* Parser target template */ - char* p_prefix; /* Parser symbol prefix */ - char* p_basename; /* Parser file basename */ - char* p_def_action; /* Default reduce action */ - char* p_def_action_e; /* Default reduce action for - epsilon-productions */ - - BOOLEAN p_lexem_sep; /* Flag, if lexem separation is switched - ON or not */ - BOOLEAN p_cis_strings; /* Flag, if case-insensitive strings */ - BOOLEAN p_extern_tokens;/* Flag if parser uses external tokens */ - BOOLEAN p_reserve_regex;/* Flag, if regex'es are reserved */ - int p_universe; /* Maximum of the character universe */ - - char* p_header; /* Header/Prologue program code of the parser */ - char* p_footer; /* Footer/Epilogue embedded program code - of the parser */ - char* p_pcb; /* Parser control block: Individual - code segment */ - - VTYPE* p_def_type; /* Default value type */ - - plist* options; /* Options parameter hash table */ - - char* source; /* Parser definition source */ - - /* Context-free model relevant */ - LIST* lexer; - - /* Parser runtime switches */ - BOOLEAN stats; - BOOLEAN verbose; - BOOLEAN show_states; - BOOLEAN show_grammar; - BOOLEAN show_productions; - BOOLEAN show_symbols; - BOOLEAN optimize_states; - BOOLEAN all_warnings; - BOOLEAN gen_prog; - BOOLEAN gen_xml; - BOOLEAN to_stdout; - char* target; /* Target language by command-line */ - int files_count; - - /* Debug and maintainance */ - char* filename; - int debug_level; - - /* XML-root node for XML-encoded error messages */ - XML_T err_xml; + plist* symbols; /* Symbol table */ + plist* productions; /* Productions */ + parray* states; /* LALR(1) states */ + LIST* dfa; /* List containing the DFA for + regex terminal recognition */ + + SYMBOL* goal; /* Pointer to the goal non-terminal */ + SYMBOL* end_of_input; /* End of input symbol */ + SYMBOL* error; /* Error token */ + + LIST* dfas; /* Lexers */ + LIST* vtypes; /* Value stack types */ + + short p_mode; /* Parser model */ + char* p_template; /* Parser target template */ + char* p_prefix; /* Parser symbol prefix */ + char* p_basename; /* Parser file basename */ + char* p_def_action; /* Default reduce action */ + char* p_def_action_e; /* Default reduce action for + epsilon-productions */ + + BOOLEAN p_lexem_sep; /* Flag, if lexem separation is switched + ON or not */ + BOOLEAN p_cis_strings; /* Flag, if case-insensitive strings */ + BOOLEAN p_extern_tokens;/* Flag if parser uses external tokens */ + BOOLEAN p_reserve_regex;/* Flag, if regex'es are reserved */ + int p_universe; /* Maximum of the character universe */ + + char* p_header; /* Header/Prologue program code of the parser */ + char* p_footer; /* Footer/Epilogue embedded program code + of the parser */ + char* p_pcb; /* Parser control block: Individual + code segment */ + + VTYPE* p_def_type; /* Default value type */ + + plist* options; /* Options parameter hash table */ + + char* source; /* Parser definition source */ + + /* Context-free model relevant */ + LIST* lexer; + + /* Parser runtime switches */ + BOOLEAN stats; + BOOLEAN verbose; + BOOLEAN show_states; + BOOLEAN show_grammar; + BOOLEAN show_productions; + BOOLEAN show_symbols; + BOOLEAN optimize_states; + BOOLEAN all_warnings; + BOOLEAN gen_prog; + BOOLEAN gen_xml; + BOOLEAN to_stdout; + char* target; /* Target language by command-line */ + int files_count; + + /* Debug and maintainance */ + char* filename; + int debug_level; + + /* XML-root node for XML-encoded error messages */ + XML_T err_xml; }; /* Generator 2D table structure */ struct _generator_2d_tab { - char* row_start; - char* row_end; - char* col; - char* col_sep; - char* row_sep; + char* row_start; + char* row_end; + char* col; + char* col_sep; + char* row_sep; }; /* Generator 1D table structur */ struct _generator_1d_tab { - char* col; - char* col_sep; + char* col; + char* col_sep; }; /* Generator template structure */ struct _generator { - char* name; /* Target language name */ - char* prefix; /* Replacement variable prefix */ - char* driver; /* Driver source code */ - char* vstack_def_type; /* Default-type for nonterminals - if no type is specified */ - char* vstack_term_type; /* Type for terminals - (characters) to be pushed on - the value stack */ - _2D_TABLE acttab; /* Action table */ - _2D_TABLE gotab; /* Goto table */ - _1D_TABLE defprod; /* Default production for - each state */ - _1D_TABLE symbols; /* Symbol information table */ - _1D_TABLE productions; /* Production information table */ - _1D_TABLE dfa_select; /* DFA machine selection */ - _2D_TABLE dfa_idx; /* DFA state index */ - _1D_TABLE dfa_char; /* DFA transition characters */ - _1D_TABLE dfa_trans; /* DFA transitions */ - _2D_TABLE dfa_accept; /* DFA accepting states */ - - char* action_start; /* Action code start */ - char* action_end; /* Action code end */ - char* action_single; /* Action vstack access */ - char* action_union; /* Action union access */ - char* action_lhs_single; /* Action left-hand side - single access */ - char* action_lhs_union; /* Action left-hand side - union access */ - char* action_set_lhs; /* Set a left-hand - side within semantic - action code */ - char* vstack_single; /* Single value stack type - definition */ - char* vstack_union_start; /* Begin of value stack - union definition */ - char* vstack_union_end; /* End of value stack - union definition */ - char* vstack_union_def; /* Union inline type - definition */ - char* vstack_union_att; /* Union attribute name */ - char* scan_action_start; /* Scanner action code start */ - char* scan_action_end; /* Scanner action code end */ - char* scan_action_begin_offset; /* Begin-offset of scanned token - in source */ - char* scan_action_end_offset; /* End-offset of scanned token - in source */ - char* scan_action_ret_single; /* Semantic value, - single access */ - char* scan_action_ret_union; /* Semantic value, - union access */ - char* scan_action_set_symbol; /* Set regex symbol depending - on action code decision */ - - char* code_localization; /* Code localization template */ - - char** for_sequences; /* Dynamic array of string - sequences to be replaced/es- - caped in output strings */ - char** do_sequences; /* Dynamic array of escape- - sequences for the particular - string sequence in the above - array */ - int sequences_count; /* Number of elements in the - above array */ - - char* truedef; /* Value for true */ - char* falsedef; /* Value for false */ - - XML_T xml; /* XML root node */ + char* name; /* Target language name */ + char* prefix; /* Replacement variable prefix */ + char* driver; /* Driver source code */ + char* vstack_def_type; /* Default-type for nonterminals + if no type is specified */ + char* vstack_term_type; /* Type for terminals + (characters) to be pushed on + the value stack */ + _2D_TABLE acttab; /* Action table */ + _2D_TABLE gotab; /* Goto table */ + _1D_TABLE defprod; /* Default production for + each state */ + _1D_TABLE symbols; /* Symbol information table */ + _1D_TABLE productions; /* Production information table */ + _1D_TABLE dfa_select; /* DFA machine selection */ + _2D_TABLE dfa_idx; /* DFA state index */ + _1D_TABLE dfa_char; /* DFA transition characters */ + _1D_TABLE dfa_trans; /* DFA transitions */ + _2D_TABLE dfa_accept; /* DFA accepting states */ + + char* action_start; /* Action code start */ + char* action_end; /* Action code end */ + char* action_single; /* Action vstack access */ + char* action_union; /* Action union access */ + char* action_lhs_single; /* Action left-hand side + single access */ + char* action_lhs_union; /* Action left-hand side + union access */ + char* action_set_lhs; /* Set a left-hand + side within semantic + action code */ + char* vstack_single; /* Single value stack type + definition */ + char* vstack_union_start; /* Begin of value stack + union definition */ + char* vstack_union_end; /* End of value stack + union definition */ + char* vstack_union_def; /* Union inline type + definition */ + char* vstack_union_att; /* Union attribute name */ + char* scan_action_start; /* Scanner action code start */ + char* scan_action_end; /* Scanner action code end */ + char* scan_action_begin_offset; /* Begin-offset of scanned token + in source */ + char* scan_action_end_offset; /* End-offset of scanned token + in source */ + char* scan_action_ret_single; /* Semantic value, + single access */ + char* scan_action_ret_union; /* Semantic value, + union access */ + char* scan_action_set_symbol; /* Set regex symbol depending + on action code decision */ + + char* code_localization; /* Code localization template */ + + char** for_sequences; /* Dynamic array of string + sequences to be replaced/es- + caped in output strings */ + char** do_sequences; /* Dynamic array of escape- + sequences for the particular + string sequence in the above + array */ + int sequences_count; /* Number of elements in the + above array */ + + char* truedef; /* Value for true */ + char* falsedef; /* Value for false */ + + XML_T xml; /* XML root node */ }; /* Error styles */ @@ -457,45 +443,45 @@ struct _generator typedef enum { - ERR_MEMORY_ERROR, - ERR_CMD_LINE, - ERR_CMD_OPT, - ERR_PARSE_ERROR, - ERR_PARSE_ERROR_EXPECT, - ERR_MULTIPLE_GOAL_DEF, - ERR_GOAL_ONE_RHS, - ERR_NO_GOAL_SYMBOL, - ERR_DOUBLE_TERMINAL_DEF, - ERR_UNKNOWN_DIRECTIVE, - ERR_WHITESPACE_TOKEN, - ERR_UNDEFINED_NONTERM, - ERR_UNDEFINED_TERM, - ERR_UNUSED_NONTERM, - ERR_UNUSED_TERM, - ERR_REDUCE_REDUCE, - ERR_SHIFT_REDUCE, - ERR_KEYWORD_ANOMALY, - ERR_UNKNOWN_TARGET_LANG, - ERR_NO_VALUE_TYPE, - ERR_OPEN_OUTPUT_FILE, - ERR_OPEN_INPUT_FILE, - ERR_NO_GENERATOR_FILE, - ERR_TAG_NOT_FOUND, - ERR_XML_ERROR, - ERR_XML_INCOMPLETE, - ERR_DUPLICATE_ESCAPE_SEQ, - ERR_CIRCULAR_DEFINITION, - ERR_EMPTY_RECURSION, - ERR_USELESS_RULE, - ERR_NO_EFFECT_IN_MODE, - ERR_NONTERM_WS_NOT_ALLOWED, - ERR_INVALID_CHAR_UNIVERSE, - ERR_CHARCLASS_OVERLAP, - ERR_UNDEFINED_SYMREF, - ERR_UNDEFINED_LHS, - ERR_UNDEFINED_TERMINAL, - ERR_NO_TARGET_TPL_SUPPLY, - ERR_DIRECTIVE_ALREADY_USED + ERR_MEMORY_ERROR, + ERR_CMD_LINE, + ERR_CMD_OPT, + ERR_PARSE_ERROR, + ERR_PARSE_ERROR_EXPECT, + ERR_MULTIPLE_GOAL_DEF, + ERR_GOAL_ONE_RHS, + ERR_NO_GOAL_SYMBOL, + ERR_DOUBLE_TERMINAL_DEF, + ERR_UNKNOWN_DIRECTIVE, + ERR_WHITESPACE_TOKEN, + ERR_UNDEFINED_NONTERM, + ERR_UNDEFINED_TERM, + ERR_UNUSED_NONTERM, + ERR_UNUSED_TERM, + ERR_REDUCE_REDUCE, + ERR_SHIFT_REDUCE, + ERR_KEYWORD_ANOMALY, + ERR_UNKNOWN_TARGET_LANG, + ERR_NO_VALUE_TYPE, + ERR_OPEN_OUTPUT_FILE, + ERR_OPEN_INPUT_FILE, + ERR_NO_GENERATOR_FILE, + ERR_TAG_NOT_FOUND, + ERR_XML_ERROR, + ERR_XML_INCOMPLETE, + ERR_DUPLICATE_ESCAPE_SEQ, + ERR_CIRCULAR_DEFINITION, + ERR_EMPTY_RECURSION, + ERR_USELESS_RULE, + ERR_NO_EFFECT_IN_MODE, + ERR_NONTERM_WS_NOT_ALLOWED, + ERR_INVALID_CHAR_UNIVERSE, + ERR_CHARCLASS_OVERLAP, + ERR_UNDEFINED_SYMREF, + ERR_UNDEFINED_LHS, + ERR_UNDEFINED_TERMINAL, + ERR_NO_TARGET_TPL_SUPPLY, + ERR_DIRECTIVE_ALREADY_USED } ERRORCODE; #include "proto.h" diff --git a/src/utils.c b/src/utils.c index 1676b56..0580593 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: utils.c -Author: Jan Max Meyer -Usage: Utility functions ------------------------------------------------------------------------------ */ +/* Utility functions */ #include "unicc.h" @@ -21,33 +12,33 @@ Returns the derived name; New allocated memory, must be freed! */ char* derive_name( char* name, char append_char ) { - char* ret; - size_t len; - - ret = (char*)pmalloc( ( strlen( name ) + 1 + 1 ) * sizeof( char ) ); - strcpy( ret, name ); - - len = strlen( ret ); - ret[ len ] = append_char; - ret[ len + 1 ] = '\0'; - - /* Some name styling - this is currently onle for one case, the whitespace - symbol ... other cases should not appear... */ - switch( append_char ) - { - case P_OPTIONAL_CLOSURE: - if( ret[ len - 1 ] == P_POSITIVE_CLOSURE ) - { - ret[ len - 1 ] = P_KLEENE_CLOSURE; - ret[ len ] = '\0'; - } - break; - - default: - break; - } - - return ret; + char* ret; + size_t len; + + ret = (char*)pmalloc( ( strlen( name ) + 1 + 1 ) * sizeof( char ) ); + strcpy( ret, name ); + + len = strlen( ret ); + ret[ len ] = append_char; + ret[ len + 1 ] = '\0'; + + /* Some name styling - this is currently onle for one case, the whitespace + symbol ... other cases should not appear... */ + switch( append_char ) + { + case P_OPTIONAL_CLOSURE: + if( ret[ len - 1 ] == P_POSITIVE_CLOSURE ) + { + ret[ len - 1 ] = P_KLEENE_CLOSURE; + ret[ len ] = '\0'; + } + break; + + default: + break; + } + + return ret; } /** Parses a single character, even escaped ones. @@ -59,149 +50,148 @@ character definition. Returns the character value */ int unescape_char( char* str, char** strfix ) { - char* ptr = str; - int ch = 0; - short cnt = 0; - BOOLEAN neg = FALSE; - - /* - 18.07.2009 Jan Max Meyer: - Negative escaped characters - */ - - if( *ptr == '\\' ) - { - ptr++; - switch( *ptr ) - { - case 'n': - ch = '\n'; - ptr++; - break; - case 'r': - ch = '\r'; - ptr++; - break; - case 't': - ch = '\t'; - ptr++; - break; - case 'v': - ch = '\v'; - ptr++; - break; - case 'a': - ch = '\a'; - ptr++; - break; - case 'b': - ch = '\b'; - ptr++; - break; - case 'f': - ch = '\f'; - ptr++; - break; - case '\'': - ch = '\''; - ptr++; - break; - case '\"': - ch = '\"'; - ptr++; - break; - case '\\': - ch = '\\'; - ptr++; - break; - case '\?': - ch = '\?'; - ptr++; - break; - case 'x': - ptr++; - - while( ( ( *ptr >= '0' && *ptr <= '9' ) - || ( *ptr >= 'A' && *ptr <= 'F' ) - || ( *ptr >= 'a' && *ptr <= 'f' ) ) - && cnt < 2 ) - { - ch *= 16; - - if( ( *ptr >= 'A' && *ptr <= 'F' ) - || ( *ptr >= 'a' && *ptr <= 'f' ) ) - ch += ( *ptr & 7 ) + 9; - else - ch += ( *ptr - '0' ); - - ptr++; - cnt++; - } - /* printf( "ch = %d\n", ch ); */ - break; - - default: - if( *ptr == '-' ) - { - neg = TRUE; - ptr++; - } - - while( *ptr >= '0' && *ptr <= '9' ) - { - ch *= 10; - ch += ( *ptr - '0' ); - ptr++; - } - - if( neg && ch ) - ch *= -1; - - /* if( *ptr != '\0' ) - ptr++; */ - break; - } - } - else if( *ptr != '\0' ) - { - ch = *ptr; - ptr++; - } - - if( strfix ) - *strfix = ptr; - - return ch; + char* ptr = str; + int ch = 0; + short cnt = 0; + BOOLEAN neg = FALSE; + + /* + 18.07.2009 Jan Max Meyer: + Negative escaped characters + */ + + if( *ptr == '\\' ) + { + ptr++; + switch( *ptr ) + { + case 'n': + ch = '\n'; + ptr++; + break; + case 'r': + ch = '\r'; + ptr++; + break; + case 't': + ch = '\t'; + ptr++; + break; + case 'v': + ch = '\v'; + ptr++; + break; + case 'a': + ch = '\a'; + ptr++; + break; + case 'b': + ch = '\b'; + ptr++; + break; + case 'f': + ch = '\f'; + ptr++; + break; + case '\'': + ch = '\''; + ptr++; + break; + case '\"': + ch = '\"'; + ptr++; + break; + case '\\': + ch = '\\'; + ptr++; + break; + case '\?': + ch = '\?'; + ptr++; + break; + case 'x': + ptr++; + + while( ( ( *ptr >= '0' && *ptr <= '9' ) + || ( *ptr >= 'A' && *ptr <= 'F' ) + || ( *ptr >= 'a' && *ptr <= 'f' ) ) + && cnt < 2 ) + { + ch *= 16; + + if( ( *ptr >= 'A' && *ptr <= 'F' ) + || ( *ptr >= 'a' && *ptr <= 'f' ) ) + ch += ( *ptr & 7 ) + 9; + else + ch += ( *ptr - '0' ); + + ptr++; + cnt++; + } + /* printf( "ch = %d\n", ch ); */ + break; + + default: + if( *ptr == '-' ) + { + neg = TRUE; + ptr++; + } + + while( *ptr >= '0' && *ptr <= '9' ) + { + ch *= 10; + ch += ( *ptr - '0' ); + ptr++; + } + + if( neg && ch ) + ch *= -1; + + /* if( *ptr != '\0' ) + ptr++; */ + break; + } + } + else if( *ptr != '\0' ) + { + ch = *ptr; + ptr++; + } + + if( strfix ) + *strfix = ptr; + + return ch; } /** Finds out the base symbol for a possibly derived symbol, and returns it. */ SYMBOL* find_base_symbol( SYMBOL* sym ) { - while( sym->derived_from ) - sym = sym->derived_from; + while( sym->derived_from ) + sym = sym->derived_from; - return sym; + return sym; } /** Construct a C-identifier from a file-name. */ char* c_identifier( char* str, BOOLEAN to_upper ) { - char* p; - - if( !( str = pstrdup( str ) ) ) - OUTOFMEM; - - for( p = str; *p; p++ ) - { - if( isalnum( *p ) ) - { - if( to_upper ) - *p = toupper( *p ); - } - else - *p = '_'; - } - - return str; + char* p; + + if( !( str = pstrdup( str ) ) ) + OUTOFMEM; + + for( p = str; *p; p++ ) + { + if( isalnum( *p ) ) + { + if( to_upper ) + *p = toupper( *p ); + } + else + *p = '_'; + } + + return str; } - diff --git a/src/virtual.c b/src/virtual.c index cc5c777..5e86ac9 100644 --- a/src/virtual.c +++ b/src/virtual.c @@ -1,13 +1,4 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: virtual.c -Author: Jan Max Meyer -Usage: Virtual production generation functions ------------------------------------------------------------------------------ */ +/* Virtual production construction functions */ #include "unicc.h" @@ -19,43 +10,43 @@ Usage: Virtual production generation functions Returns a SYMBOL* sointer to the symbol representing the closure nonterminal. */ SYMBOL* positive_closure( PARSER* parser, SYMBOL* base ) { - char* deriv_str; - PROD* p; - SYMBOL* s = (SYMBOL*)NULL; - - /* - 30.09.2010 Jan Max Meyer: - Inherit defined_at information - */ - - if( base ) - { - deriv_str = derive_name( base->name, P_POSITIVE_CLOSURE ); - - if( !( s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, FALSE ) ) ) - { - s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, TRUE ); - s->generated = TRUE; - s->used = TRUE; - s->defined = TRUE; - s->vtype = base->vtype; - s->derived_from = base; - s->line = base->line; - - p = create_production( parser, s ); - append_to_production( p, s, (char*)NULL ); - append_to_production( p, base, (char*)NULL ); - - p = create_production( parser, s ); - append_to_production( p, base, (char*)NULL ); - } - - pfree( deriv_str ); - } - - return s; + char* deriv_str; + PROD* p; + SYMBOL* s = (SYMBOL*)NULL; + + /* + 30.09.2010 Jan Max Meyer: + Inherit defined_at information + */ + + if( base ) + { + deriv_str = derive_name( base->name, P_POSITIVE_CLOSURE ); + + if( !( s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, FALSE ) ) ) + { + s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, TRUE ); + s->generated = TRUE; + s->used = TRUE; + s->defined = TRUE; + s->vtype = base->vtype; + s->derived_from = base; + s->line = base->line; + + p = create_production( parser, s ); + append_to_production( p, s, (char*)NULL ); + append_to_production( p, base, (char*)NULL ); + + p = create_production( parser, s ); + append_to_production( p, base, (char*)NULL ); + } + + pfree( deriv_str ); + } + + return s; } /** Creates a kleene closure for a symbol. @@ -66,59 +57,59 @@ SYMBOL* positive_closure( PARSER* parser, SYMBOL* base ) Returns a SYMBOL* Pointer to symbol representing the closure nonterminal. */ SYMBOL* kleene_closure( PARSER* parser, SYMBOL* base ) { - char* deriv_str; - PROD* p; - SYMBOL* s = (SYMBOL*)NULL; - SYMBOL* pos_s = (SYMBOL*)NULL; + char* deriv_str; + PROD* p; + SYMBOL* s = (SYMBOL*)NULL; + SYMBOL* pos_s = (SYMBOL*)NULL; - /* - 14.05.2008 Jan Max Meyer - Modified rework. Instead of + /* + 14.05.2008 Jan Max Meyer + Modified rework. Instead of - s* -> s* base | ; + s* -> s* base | ; - this will now create + this will now create - s+ -> s+ base | base; - s* -> s+ | ; + s+ -> s+ base | base; + s* -> s+ | ; - 30.09.2010 Jan Max Meyer - Inherit defined_at information - */ + 30.09.2010 Jan Max Meyer + Inherit defined_at information + */ - if( base ) - { - pos_s = positive_closure( parser, base ); - if( !pos_s ) - return s; + if( base ) + { + pos_s = positive_closure( parser, base ); + if( !pos_s ) + return s; - deriv_str = derive_name( base->name, P_KLEENE_CLOSURE ); + deriv_str = derive_name( base->name, P_KLEENE_CLOSURE ); - if( !( s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, FALSE ) ) ) - { - s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, TRUE ); - s->generated = TRUE; - s->used = TRUE; - s->defined = TRUE; - s->vtype = base->vtype; - s->derived_from = base; - s->line = base->line; + if( !( s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, FALSE ) ) ) + { + s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, TRUE ); + s->generated = TRUE; + s->used = TRUE; + s->defined = TRUE; + s->vtype = base->vtype; + s->derived_from = base; + s->line = base->line; - p = create_production( parser, s ); - /*append_to_production( p, s, (char*)NULL ); - append_to_production( p, base, (char*)NULL );*/ - append_to_production( p, pos_s, (char*)NULL ); + p = create_production( parser, s ); + /*append_to_production( p, s, (char*)NULL ); + append_to_production( p, base, (char*)NULL );*/ + append_to_production( p, pos_s, (char*)NULL ); - p = create_production( parser, s ); - } + p = create_production( parser, s ); + } - pfree( deriv_str ); - } + pfree( deriv_str ); + } - return s; + return s; } /** Creates an optional closure for a symbol. @@ -130,40 +121,39 @@ Returns a SYMBOL* Pointer to symbol representing the closure nonterminal. */ SYMBOL* optional_closure( PARSER* parser, SYMBOL* base ) { - char* deriv_str; - PROD* p; - SYMBOL* s = (SYMBOL*)NULL; - - /* - 30.09.2010 Jan Max Meyer: - Inherit defined_at information - */ - - if( base ) - { - deriv_str = derive_name( base->name, P_OPTIONAL_CLOSURE ); - - if( !(s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, FALSE ) ) ) - { - s = get_symbol( parser, deriv_str, - SYM_NON_TERMINAL, TRUE ); - s->generated = TRUE; - s->used = TRUE; - s->defined = TRUE; - s->vtype = base->vtype; - s->derived_from = base; - s->line = base->line; - - p = create_production( parser, s ); - append_to_production( p, base, (char*)NULL ); - - p = create_production( parser, s ); - } - - pfree( deriv_str ); - } - - return s; + char* deriv_str; + PROD* p; + SYMBOL* s = (SYMBOL*)NULL; + + /* + 30.09.2010 Jan Max Meyer: + Inherit defined_at information + */ + + if( base ) + { + deriv_str = derive_name( base->name, P_OPTIONAL_CLOSURE ); + + if( !(s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, FALSE ) ) ) + { + s = get_symbol( parser, deriv_str, + SYM_NON_TERMINAL, TRUE ); + s->generated = TRUE; + s->used = TRUE; + s->defined = TRUE; + s->vtype = base->vtype; + s->derived_from = base; + s->line = base->line; + + p = create_production( parser, s ); + append_to_production( p, base, (char*)NULL ); + + p = create_production( parser, s ); + } + + pfree( deriv_str ); + } + + return s; } - diff --git a/src/xml.c b/src/xml.c index 64c05aa..253609f 100644 --- a/src/xml.c +++ b/src/xml.c @@ -1,17 +1,8 @@ -/* -MODULE---------------------------------------------------------------------- -UniCC LALR(1) Parser Generator -Copyright (C) 2006-2019 by Phorward Software Technologies, Jan Max Meyer -https://phorward.info ++ uniccphorwardsoftwarecom -All rights reserved. See LICENSE for more information. - -File: xml.c -Author: Aaron Voisine, contributions and code-formatting by Jan Max Meyer -Usage: XML processing functions (based on ezXML) ------------------------------------------------------------------------------ */ +/* XML processing functions (based on ezXML) */ /* xml.c * - * Copyright 2004-2006 Aaron Voisine + * Initial Copyright 2004-2006 Aaron Voisine * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -41,22 +32,22 @@ Usage: XML processing functions (based on ezXML) typedef struct xml_root* xml_root_t; struct xml_root { /* additional data for the root tag */ - struct xml xml; /* is a super-struct built on - top of xml struct */ - XML_T cur; /* current xml tree insertion point */ - char* m; /* original xml string */ - size_t len; /* length of allocated memory for mmap, -1 for - * pmalloc */ - int lines; /* All lines of the root file */ - char* u; /* UTF-8 conversion of string if original was - * UTF-16 */ - char* s; /* start of work area */ - char* e; /* end of work area */ - char** ent; /* general entities (ampersand sequences) */ - char*** attr; /* default attributes */ - char*** pi; /* processing instructions */ - short standalone; /* non-zero if */ - char err[XML_ERRL]; /* error string */ + struct xml xml; /* is a super-struct built on + top of xml struct */ + XML_T cur; /* current xml tree insertion point */ + char* m; /* original xml string */ + size_t len; /* length of allocated memory for mmap, -1 for + * pmalloc */ + int lines; /* All lines of the root file */ + char* u; /* UTF-8 conversion of string if original was + * UTF-16 */ + char* s; /* start of work area */ + char* e; /* end of work area */ + char** ent; /* general entities (ampersand sequences) */ + char*** attr; /* default attributes */ + char*** pi; /* processing instructions */ + short standalone; /* non-zero if */ + char err[XML_ERRL]; /* error string */ }; char* XML_NIL[] = { NULL }; /* empty, null terminated array of strings */ @@ -66,12 +57,12 @@ char* XML_NIL[] = { NULL }; /* empty, null terminated array of strings */ ============================================================================ */ XML_T xml_child( XML_T xml, char* name ) { - xml = ( xml ) ? xml->child : NULL; + xml = ( xml ) ? xml->child : NULL; - while( xml && strcmp( name, xml->name ) ) - xml = xml->sibling; + while( xml && strcmp( name, xml->name ) ) + xml = xml->sibling; - return xml; + return xml; } /* ============================================================================= @@ -80,10 +71,10 @@ XML_T xml_child( XML_T xml, char* name ) ============================================================================ */ XML_T xml_idx( XML_T xml, int idx ) { - for( ; xml && idx; idx-- ) - xml = xml->next; + for( ; xml && idx; idx-- ) + xml = xml->next; - return xml; + return xml; } /* ============================================================================= @@ -91,33 +82,33 @@ XML_T xml_idx( XML_T xml, int idx ) ============================================================================ */ char* xml_attr( XML_T xml, char* attr ) { - int i = 0, j = 1; - xml_root_t root = (xml_root_t)xml; + int i = 0, j = 1; + xml_root_t root = (xml_root_t)xml; - if( !xml || !xml->attr ) - return (char*)NULL; + if( !xml || !xml->attr ) + return (char*)NULL; - while( xml->attr[i] && strcmp( attr, xml->attr[i] ) ) - i += 2; + while( xml->attr[i] && strcmp( attr, xml->attr[i] ) ) + i += 2; - if( xml->attr[i] ) - return xml->attr[i + 1]; /* found attribute */ + if( xml->attr[i] ) + return xml->attr[i + 1]; /* found attribute */ - while( root->xml.parent ) - root = (xml_root_t)root->xml.parent; /* root tag */ + while( root->xml.parent ) + root = (xml_root_t)root->xml.parent; /* root tag */ - for( i = 0; root->attr[i] && strcmp( xml->name, root->attr[i][0] ); i++ ) - ; + for( i = 0; root->attr[i] && strcmp( xml->name, root->attr[i][0] ); i++ ) + ; - if( !root->attr[i] ) - return (char*)NULL; /* no matching default attributes */ + if( !root->attr[i] ) + return (char*)NULL; /* no matching default attributes */ - while( root->attr[i][j] && strcmp( attr, root->attr[i][j] ) ) - j += 3; + while( root->attr[i][j] && strcmp( attr, root->attr[i][j] ) ) + j += 3; - /* found default */ - return ( root->attr[i][j] ) ? root->attr[i][j + 1] : (char*)NULL; + /* found default */ + return ( root->attr[i][j] ) ? root->attr[i][j + 1] : (char*)NULL; } /* ============================================================================= @@ -125,12 +116,12 @@ char* xml_attr( XML_T xml, char* attr ) ============================================================================ */ long xml_int_attr( XML_T xml, char* attr ) { - char* v; + char* v; - if( !( v = xml_attr( xml, attr ) ) ) - return 0; + if( !( v = xml_attr( xml, attr ) ) ) + return 0; - return strtol( v, (char**)NULL, 0 ); + return strtol( v, (char**)NULL, 0 ); } /* ============================================================================= @@ -138,12 +129,12 @@ long xml_int_attr( XML_T xml, char* attr ) ============================================================================ */ double xml_float_attr( XML_T xml, char* attr ) { - char* v; + char* v; - if( !( v = xml_attr( xml, attr ) ) ) - return (double)0.0; + if( !( v = xml_attr( xml, attr ) ) ) + return (double)0.0; - return (double)strtod( v, (char**)NULL ); + return (double)strtod( v, (char**)NULL ); } /* ============================================================================= @@ -151,16 +142,16 @@ double xml_float_attr( XML_T xml, char* attr ) ============================================================================ */ XML_T xml_vget( XML_T xml, va_list ap ) { - char* name = va_arg( ap, char * ); - int idx = -1; + char* name = va_arg( ap, char * ); + int idx = -1; - if( name && *name ) - { - idx = va_arg( ap, int ); - xml = xml_child( xml, name ); - } + if( name && *name ) + { + idx = va_arg( ap, int ); + xml = xml_child( xml, name ); + } - return( idx < 0 ) ? xml : xml_vget( xml_idx( xml, idx ), ap ); + return( idx < 0 ) ? xml : xml_vget( xml_idx( xml, idx ), ap ); } /* ============================================================================= @@ -175,14 +166,14 @@ XML_T xml_vget( XML_T xml, va_list ap ) XML_T xml_get( XML_T xml, ... ) { - va_list ap; - XML_T r; + va_list ap; + XML_T r; - va_start( ap, xml ); - r = xml_vget( xml, ap ); - va_end( ap ); - return r; + va_start( ap, xml ); + r = xml_vget( xml, ap ); + va_end( ap ); + return r; } /* ============================================================================= @@ -192,20 +183,20 @@ XML_T xml_get( XML_T xml, ... ) char ** xml_pi( XML_T xml, char* target ) { - xml_root_t root = (xml_root_t)xml; - int i = 0; + xml_root_t root = (xml_root_t)xml; + int i = 0; - if( !root ) - return (char**)XML_NIL; + if( !root ) + return (char**)XML_NIL; - while( root->xml.parent ) root = ( xml_root_t ) - root->xml.parent; + while( root->xml.parent ) root = ( xml_root_t ) + root->xml.parent; - while( root->pi[i] && strcmp( target, root->pi[i][0] ) ) - i++; + while( root->pi[i] && strcmp( target, root->pi[i][0] ) ) + i++; - return( char ** ) ( ( root->pi[i] ) ? root->pi[i] + 1 : XML_NIL ); + return( char ** ) ( ( root->pi[i] ) ? root->pi[i] + 1 : XML_NIL ); } /* ============================================================================= @@ -213,26 +204,26 @@ char ** xml_pi( XML_T xml, char* target ) ============================================================================ */ static XML_T xml_err( xml_root_t root, char* s, char* err, ... ) { - va_list ap; - int line = 1; - char fmt[XML_ERRL]; + va_list ap; + int line = 1; + char fmt[XML_ERRL]; #ifdef _WIN32 - _snprintf + _snprintf #else - snprintf + snprintf #endif - ( fmt, XML_ERRL, "[error near line %d]: %s", line, err ); - va_start( ap, err ); + ( fmt, XML_ERRL, "[error near line %d]: %s", line, err ); + va_start( ap, err ); #ifdef _WIN32 - _vsnprintf + _vsnprintf #else - vsnprintf + vsnprintf #endif - ( root->err, XML_ERRL, fmt, ap ); - va_end( ap ); + ( root->err, XML_ERRL, fmt, ap ); + va_end( ap ); - return &root->xml; + return &root->xml; } /* ============================================================================= @@ -249,99 +240,99 @@ static XML_T xml_err( xml_root_t root, char* s, char* err, ... ) char* xml_decode( char* s, char ** ent, char t ) { - char* e, *r = s, *m = s; - long b, c, d, l; - - - for( ; *s; s++ ) - { /* normalize line endings */ - while( *s == '\r' ) - { - *( s++ ) = '\n'; - if( *s == '\n' ) memmove( s, ( s + 1 ), strlen( s ) ); - } - } - - for( s = r;; ) - { - while( *s && *s != '&' && ( *s != '%' || t != '%' ) && !isspace( *s ) ) - s++; - - if( !*s ) - break; - else if( t != 'c' && !strncmp( s, "&#", 2 ) ) - { /* character reference */ - if( s[2] == 'x' ) c = strtol( s + 3, &e, 16 ); /* base 16 */ - else - c = strtol( s + 2, &e, 10 ); /* base 10 */ - if( !c || *e != ';' ) - { - s++; - continue; - } /* not a character ref */ - - if( c < 0x80 ) - *( s++ ) = (char)c; /* US-ASCII subset */ - else - { /* multi-byte UTF-8 sequence */ - for( b = 0, d = c; d; d /= 2 ) b++; /* number of bits in c */ - b = ( b - 2 ) / 5; /* number of bytes in payload */ - *( s++ ) = (char)( ( 0xFF << (7 - b) ) | ( c >> (6 * b) ) ); - - while( b ) - *( s++ ) = (char)( 0x80 | ( (c >> (6 * --b)) & 0x3F ) ); - } - - memmove( s, strchr( s, ';' ) + 1, strlen( strchr(s, ';') ) ); - } - else if - ( - ( *s == '&' && (t == '&' || t == ' ' || t == '*') ) - || ( *s == '%' && t == '%' ) - ) - { /* entity reference */ - for( b = 0; ent[b] && strncmp( s + 1, ent[b], strlen(ent[b]) ); - b += 2 ) - ; - - /* find entity in entity list */ - - if( ent[b++] ) - { - /* found a match */ - if( ( c = strlen(ent[b]) ) - 1 > ( e = strchr(s, ';') ) - s ) - { - l = ( d = ( s - r ) ) + c + strlen( e ); - r = ( r == m ) ? strcpy( pmalloc( l ), r ) : - prealloc( r, l ); - e = strchr( ( s = r + d ), ';' ); - } - - memmove( s + c, e + 1, strlen( e ) ); /* shift rest of string */ - strncpy( s, ent[b], c ); /* copy in replacement text */ - } - else - s++; /* not a known entity */ - } - else if( ( t == ' ' || t == '*' ) && isspace( *s ) ) - *( s++ ) = ' '; - else - s++; /* no decoding needed */ - } - - if( t == '*' ) - { /* normalize spaces for non-cdata attributes */ - for( s = r; *s; s++ ) - { - if( ( l = strspn(s, " ") ) ) - memmove( s, s + l, strlen( s + l ) + 1 ); - while( *s && *s != ' ' ) s++; - } - - if( --s >= r && *s == ' ' ) *s = '\0'; /* trim any trailing space */ - } - - return r; + char* e, *r = s, *m = s; + long b, c, d, l; + + + for( ; *s; s++ ) + { /* normalize line endings */ + while( *s == '\r' ) + { + *( s++ ) = '\n'; + if( *s == '\n' ) memmove( s, ( s + 1 ), strlen( s ) ); + } + } + + for( s = r;; ) + { + while( *s && *s != '&' && ( *s != '%' || t != '%' ) && !isspace( *s ) ) + s++; + + if( !*s ) + break; + else if( t != 'c' && !strncmp( s, "&#", 2 ) ) + { /* character reference */ + if( s[2] == 'x' ) c = strtol( s + 3, &e, 16 ); /* base 16 */ + else + c = strtol( s + 2, &e, 10 ); /* base 10 */ + if( !c || *e != ';' ) + { + s++; + continue; + } /* not a character ref */ + + if( c < 0x80 ) + *( s++ ) = (char)c; /* US-ASCII subset */ + else + { /* multi-byte UTF-8 sequence */ + for( b = 0, d = c; d; d /= 2 ) b++; /* number of bits in c */ + b = ( b - 2 ) / 5; /* number of bytes in payload */ + *( s++ ) = (char)( ( 0xFF << (7 - b) ) | ( c >> (6 * b) ) ); + + while( b ) + *( s++ ) = (char)( 0x80 | ( (c >> (6 * --b)) & 0x3F ) ); + } + + memmove( s, strchr( s, ';' ) + 1, strlen( strchr(s, ';') ) ); + } + else if + ( + ( *s == '&' && (t == '&' || t == ' ' || t == '*') ) + || ( *s == '%' && t == '%' ) + ) + { /* entity reference */ + for( b = 0; ent[b] && strncmp( s + 1, ent[b], strlen(ent[b]) ); + b += 2 ) + ; + + /* find entity in entity list */ + + if( ent[b++] ) + { + /* found a match */ + if( ( c = strlen(ent[b]) ) - 1 > ( e = strchr(s, ';') ) - s ) + { + l = ( d = ( s - r ) ) + c + strlen( e ); + r = ( r == m ) ? strcpy( pmalloc( l ), r ) : + prealloc( r, l ); + e = strchr( ( s = r + d ), ';' ); + } + + memmove( s + c, e + 1, strlen( e ) ); /* shift rest of string */ + strncpy( s, ent[b], c ); /* copy in replacement text */ + } + else + s++; /* not a known entity */ + } + else if( ( t == ' ' || t == '*' ) && isspace( *s ) ) + *( s++ ) = ' '; + else + s++; /* no decoding needed */ + } + + if( t == '*' ) + { /* normalize spaces for non-cdata attributes */ + for( s = r; *s; s++ ) + { + if( ( l = strspn(s, " ") ) ) + memmove( s, s + l, strlen( s + l ) + 1 ); + while( *s && *s != ' ' ) s++; + } + + if( --s >= r && *s == ' ' ) *s = '\0'; /* trim any trailing space */ + } + + return r; } /* ============================================================================= @@ -350,18 +341,18 @@ char* xml_decode( char* s, char ** ent, char t ) static void xml_open_tag( xml_root_t root, char* name, char ** attr ) { - XML_T xml = root->cur; + XML_T xml = root->cur; - if( xml->name ) - xml = xml_add_child( xml, name, strlen( xml->txt ) ); - else - xml->name = name; /* first open tag */ + if( xml->name ) + xml = xml_add_child( xml, name, strlen( xml->txt ) ); + else + xml->name = name; /* first open tag */ - xml->attr = attr; - xml->line = root->lines; + xml->attr = attr; + xml->line = root->lines; - root->cur = xml; /* update tag insertion point */ + root->cur = xml; /* update tag insertion point */ } /* ============================================================================= @@ -370,34 +361,34 @@ static void xml_open_tag( xml_root_t root, char* name, char ** attr ) static void xml_char_content( xml_root_t root, char* s, size_t len, char t ) { - XML_T xml = root->cur; - char* m = s; - size_t l; - - - if( !xml || !xml->name || !len ) return; /* sanity check */ - - s[len] = '\0'; /* null terminate text - (calling functions anticipate this) */ - len = strlen( s = xml_decode( s, root->ent, t ) ) + 1; - - if( !*( xml->txt ) ) - xml->txt = s; /* initial character content */ - else - { /* allocate our own memory and make a copy */ - xml->txt = ( xml->flags & XML_TXTM ) /* allocate some space */ - ? prealloc( xml->txt, ( l = strlen(xml->txt) ) + len ) : strcpy - ( - pmalloc( (l = strlen(xml->txt)) + len ), - xml->txt - ); - strcpy( xml->txt + l, s ); /* add new char content */ - if( s != m ) - pfree( s ); /* free s if it was pmalloced by xml_decode() */ - } - - if( xml->txt != m ) - xml_set_flag( xml, XML_TXTM ); + XML_T xml = root->cur; + char* m = s; + size_t l; + + + if( !xml || !xml->name || !len ) return; /* sanity check */ + + s[len] = '\0'; /* null terminate text + (calling functions anticipate this) */ + len = strlen( s = xml_decode( s, root->ent, t ) ) + 1; + + if( !*( xml->txt ) ) + xml->txt = s; /* initial character content */ + else + { /* allocate our own memory and make a copy */ + xml->txt = ( xml->flags & XML_TXTM ) /* allocate some space */ + ? prealloc( xml->txt, ( l = strlen(xml->txt) ) + len ) : strcpy + ( + pmalloc( (l = strlen(xml->txt)) + len ), + xml->txt + ); + strcpy( xml->txt + l, s ); /* add new char content */ + if( s != m ) + pfree( s ); /* free s if it was pmalloced by xml_decode() */ + } + + if( xml->txt != m ) + xml_set_flag( xml, XML_TXTM ); } /* ============================================================================= @@ -405,11 +396,11 @@ static void xml_char_content( xml_root_t root, char* s, size_t len, char t ) ============================================================================ */ static XML_T xml_close_tag( xml_root_t root, char* name, char* s ) { - if( !root->cur || !root->cur->name || strcmp( name, root->cur->name ) ) - return xml_err( root, s, "unexpected closing tag ", name ); + if( !root->cur || !root->cur->name || strcmp( name, root->cur->name ) ) + return xml_err( root, s, "unexpected closing tag ", name ); - root->cur = root->cur->parent; - return NULL; + root->cur = root->cur->parent; + return NULL; } /* ============================================================================= @@ -419,27 +410,27 @@ static XML_T xml_close_tag( xml_root_t root, char* name, char* s ) static int xml_ent_ok( char* name, char* s, char ** ent ) { - int i; + int i; - for( ;; s++ ) - { - while( *s && *s != '&' ) - s++; /* find next entity reference */ + for( ;; s++ ) + { + while( *s && *s != '&' ) + s++; /* find next entity reference */ - if( !*s ) - return 1; + if( !*s ) + return 1; - if( !strncmp( s + 1, name, strlen(name) ) ) - return 0; /* circular ref. */ + if( !strncmp( s + 1, name, strlen(name) ) ) + return 0; /* circular ref. */ - for( i = 0; ent[i] && strncmp( ent[i], s + 1, strlen(ent[i]) ); - i += 2 ) - ; + for( i = 0; ent[i] && strncmp( ent[i], s + 1, strlen(ent[i]) ); + i += 2 ) + ; - if( ent[i] && !xml_ent_ok( name, ent[i + 1], ent ) ) - return 0; - } + if( ent[i] && !xml_ent_ok( name, ent[i + 1], ent ) ) + return 0; + } } /* ============================================================================= @@ -448,52 +439,52 @@ static int xml_ent_ok( char* name, char* s, char ** ent ) static void xml_proc_inst( xml_root_t root, char* s, size_t len ) { - int i = 0, j = 1; - char* target = s; + int i = 0, j = 1; + char* target = s; - s[len] = '\0'; /* null terminate instruction */ - if( *( s += strcspn(s, XML_WS) ) ) - { - *s = '\0'; /* null terminate target */ - s += strspn( s + 1, XML_WS ) + 1; /* skip whitespace after target */ - } + s[len] = '\0'; /* null terminate instruction */ + if( *( s += strcspn(s, XML_WS) ) ) + { + *s = '\0'; /* null terminate target */ + s += strspn( s + 1, XML_WS ) + 1; /* skip whitespace after target */ + } - if( !strcmp( target, "xml" ) ) - { /* */ - if( ( s = strstr(s, "standalone") ) - && !strncmp( s + strspn(s + 10, XML_WS "='\"") + 10, - "yes", 3 ) ) - root->standalone = 1; + if( !strcmp( target, "xml" ) ) + { /* */ + if( ( s = strstr(s, "standalone") ) + && !strncmp( s + strspn(s + 10, XML_WS "='\"") + 10, + "yes", 3 ) ) + root->standalone = 1; - return; - } + return; + } - if( !root->pi[0] ) - *( root->pi = pmalloc( sizeof(char **) ) ) = NULL; + if( !root->pi[0] ) + *( root->pi = pmalloc( sizeof(char **) ) ) = NULL; - while( root->pi[i] && strcmp( target, root->pi[i][0] ) ) - i++; + while( root->pi[i] && strcmp( target, root->pi[i][0] ) ) + i++; - if( !root->pi[i] ) - { /* new target */ - root->pi = prealloc( root->pi, sizeof( char ** ) * ( i + 2 ) ); - root->pi[i] = pmalloc( sizeof( char * ) * 3 ); - root->pi[i][0] = target; - root->pi[i][1] = ( char * ) ( root->pi[i + 1] = NULL ); - root->pi[i][2] = strdup( "" ); /* empty document position list */ - } + if( !root->pi[i] ) + { /* new target */ + root->pi = prealloc( root->pi, sizeof( char ** ) * ( i + 2 ) ); + root->pi[i] = pmalloc( sizeof( char * ) * 3 ); + root->pi[i][0] = target; + root->pi[i][1] = ( char * ) ( root->pi[i + 1] = NULL ); + root->pi[i][2] = strdup( "" ); /* empty document position list */ + } - while( root->pi[i][j] ) j++; /* find end of instruction list for - * this target */ - root->pi[i] = prealloc( root->pi[i], sizeof( char * ) * ( j + 3 ) ); - root->pi[i][j + 2] = prealloc( root->pi[i][j + 1], j + 1 ); + while( root->pi[i][j] ) j++; /* find end of instruction list for + * this target */ + root->pi[i] = prealloc( root->pi[i], sizeof( char * ) * ( j + 3 ) ); + root->pi[i][j + 2] = prealloc( root->pi[i][j + 1], j + 1 ); - strcpy( root->pi[i][j + 2] + j - 1, ( root->xml.name ) ? ">" : "<" ); + strcpy( root->pi[i][j + 2] + j - 1, ( root->xml.name ) ? ">" : "<" ); - root->pi[i][j + 1] = NULL; /* null terminate pi list for this - * target */ - root->pi[i][j] = s; /* set instruction */ + root->pi[i][j + 1] = NULL; /* null terminate pi list for this + * target */ + root->pi[i][j] = s; /* set instruction */ } /* ============================================================================= @@ -502,152 +493,152 @@ static void xml_proc_inst( xml_root_t root, char* s, size_t len ) static short xml_internal_dtd( xml_root_t root, char* s, size_t len ) { - char q, *c, *t, *n = NULL, *v, **ent, **pe; - int i, j; - - - pe = memcpy( pmalloc( sizeof(XML_NIL) ), XML_NIL, sizeof( XML_NIL ) ); - - for( s[len] = '\0'; s; ) - { - while( *s && *s != '<' && *s != '%' ) s++; /* find next declaration */ - - if( !*s ) - break; - else if( !strncmp( s, "' ); - continue; - } - - for( i = 0, ent = ( *c == '%' ) ? pe : root->ent; ent[i]; i++ ) - ; - ent = prealloc( ent, ( i + 3 ) * sizeof( char * ) ); /* space - * for next - * ent */ - if( *c == '%' ) - pe = ent; - else - root->ent = ent; - - *( ++s ) = '\0'; /* null terminate name */ - if( ( s = strchr(v, q) ) ) *( s++ ) = '\0'; /* null terminate val */ - ent[i + 1] = xml_decode( v, pe, '%' ); /* set value */ - ent[i + 2] = NULL; /* null terminate entity list */ - if( !xml_ent_ok( n, ent[i + 1], ent ) ) - { /* circular reference */ - if( ent[i + 1] != v ) pfree( ent[i + 1] ); - xml_err( root, v, "circular entity declaration &%s", n ); - break; - } else - ent[i] = n; /* set entity name */ - } - else if( !strncmp( s, "") ) == '>' ) - continue; - else - *s = '\0'; /* null terminate tag name */ - - for( i = 0; root->attr[i] && strcmp( n, root->attr[i][0] ); i++ ) - ; - - while( *( ++s, n = s + strspn(s, XML_WS) ) && *n != '>' ) - { - if( *( s = n + strcspn(n, XML_WS) ) ) *s = '\0'; /* attr name */ - else - { - xml_err( root, t, "malformed " ) - 1; - if( *c == ' ' ) continue; /* cdata is default, - * nothing to do */ - v = NULL; - } - else if( ( *s == '"' || *s == '\'' ) /* default value */ - && ( s = strchr(v = s + 1, *s) ) ) - *s = '\0'; - else - { - xml_err( root, t, "malformed attr[i] ) - { /* new tag name */ - root->attr = ( !i ) ? pmalloc( 2 * sizeof( char ** ) ) - : prealloc( root->attr, - ( i + 2 ) * sizeof( char ** ) ); - root->attr[i] = pmalloc( 2 * sizeof( char * ) ); - root->attr[i][0] = t; /* set tag name */ - root->attr[i][1] = ( char * ) ( root->attr[i + 1] = NULL ); - } - - for( j = 1; root->attr[i][j]; j += 3 ) - ; - - /* find end of list */ - root->attr[i] = prealloc( root->attr[i], - ( j + 4 ) * sizeof( char * ) ); - - root->attr[i][j + 3] = NULL; /* null terminate list */ - root->attr[i][j + 2] = c; /* is it cdata? */ - root->attr[i][j + 1] = ( v ) ? - xml_decode( v, root->ent, *c ) : NULL; - - root->attr[i][j] = n; /* attribute name */ - } - } - else if( !strncmp( s, "" ); - else if( !strncmp( s, "") ) ) - xml_proc_inst( root, c, s++ -c ); - } - else if( *s == '<' ) - s = strchr( s, '>' ); /* skip other declarations */ - else if( *( s++ ) == '%' && !root->standalone ) - break; - } - - pfree( pe ); - return !*root->err; + char q, *c, *t, *n = NULL, *v, **ent, **pe; + int i, j; + + + pe = memcpy( pmalloc( sizeof(XML_NIL) ), XML_NIL, sizeof( XML_NIL ) ); + + for( s[len] = '\0'; s; ) + { + while( *s && *s != '<' && *s != '%' ) s++; /* find next declaration */ + + if( !*s ) + break; + else if( !strncmp( s, "' ); + continue; + } + + for( i = 0, ent = ( *c == '%' ) ? pe : root->ent; ent[i]; i++ ) + ; + ent = prealloc( ent, ( i + 3 ) * sizeof( char * ) ); /* space + * for next + * ent */ + if( *c == '%' ) + pe = ent; + else + root->ent = ent; + + *( ++s ) = '\0'; /* null terminate name */ + if( ( s = strchr(v, q) ) ) *( s++ ) = '\0'; /* null terminate val */ + ent[i + 1] = xml_decode( v, pe, '%' ); /* set value */ + ent[i + 2] = NULL; /* null terminate entity list */ + if( !xml_ent_ok( n, ent[i + 1], ent ) ) + { /* circular reference */ + if( ent[i + 1] != v ) pfree( ent[i + 1] ); + xml_err( root, v, "circular entity declaration &%s", n ); + break; + } else + ent[i] = n; /* set entity name */ + } + else if( !strncmp( s, "") ) == '>' ) + continue; + else + *s = '\0'; /* null terminate tag name */ + + for( i = 0; root->attr[i] && strcmp( n, root->attr[i][0] ); i++ ) + ; + + while( *( ++s, n = s + strspn(s, XML_WS) ) && *n != '>' ) + { + if( *( s = n + strcspn(n, XML_WS) ) ) *s = '\0'; /* attr name */ + else + { + xml_err( root, t, "malformed " ) - 1; + if( *c == ' ' ) continue; /* cdata is default, + * nothing to do */ + v = NULL; + } + else if( ( *s == '"' || *s == '\'' ) /* default value */ + && ( s = strchr(v = s + 1, *s) ) ) + *s = '\0'; + else + { + xml_err( root, t, "malformed attr[i] ) + { /* new tag name */ + root->attr = ( !i ) ? pmalloc( 2 * sizeof( char ** ) ) + : prealloc( root->attr, + ( i + 2 ) * sizeof( char ** ) ); + root->attr[i] = pmalloc( 2 * sizeof( char * ) ); + root->attr[i][0] = t; /* set tag name */ + root->attr[i][1] = ( char * ) ( root->attr[i + 1] = NULL ); + } + + for( j = 1; root->attr[i][j]; j += 3 ) + ; + + /* find end of list */ + root->attr[i] = prealloc( root->attr[i], + ( j + 4 ) * sizeof( char * ) ); + + root->attr[i][j + 3] = NULL; /* null terminate list */ + root->attr[i][j + 2] = c; /* is it cdata? */ + root->attr[i][j + 1] = ( v ) ? + xml_decode( v, root->ent, *c ) : NULL; + + root->attr[i][j] = n; /* attribute name */ + } + } + else if( !strncmp( s, "" ); + else if( !strncmp( s, "") ) ) + xml_proc_inst( root, c, s++ -c ); + } + else if( *s == '<' ) + s = strchr( s, '>' ); /* skip other declarations */ + else if( *( s++ ) == '%' && !root->standalone ) + break; + } + + pfree( pe ); + return !*root->err; } /* ============================================================================= @@ -658,40 +649,40 @@ static short xml_internal_dtd( xml_root_t root, char* s, size_t len ) char* xml_str2utf8( char ** s, size_t* len ) { - char* u; - size_t l = 0, sl, max = *len; - long c, d; - int b, be = ( **s == '\xFE' ) ? 1 : ( **s == '\xFF' ) ? 0 : -1; - - - if( be == -1 ) return NULL; /* not UTF-16 */ - - u = pmalloc( max ); - for( sl = 2; sl < *len - 1; sl += 2 ) - { - c = ( be ) ? ( ( (*s)[sl] & 0xFF ) << 8 ) | ( ( *s )[sl + 1] & 0xFF ) - : ( ( (*s)[sl + 1] & 0xFF ) << 8 ) | ( ( *s )[sl] & 0xFF ); - if( c >= 0xD800 && c <= 0xDFFF && ( sl += 2 ) < *len - 1 ) - { /* high-half */ - d = ( be ) ? ( ( (*s)[sl] & 0xFF ) << 8 ) | - ( ( *s )[sl + 1] & 0xFF ) : ( ( (*s)[sl + 1] & 0xFF ) << 8 ) | - ( ( *s )[sl] & 0xFF ); - c = ( ( (c & 0x3FF) << 10 ) | ( d & 0x3FF ) ) + 0x10000; - } - - while( l + 6 > max ) u = prealloc( u, max += XML_BUFSIZE ); - if( c < 0x80 ) u[l++] = (char)c; /* US-ASCII subset */ - else - { /* multi-byte UTF-8 sequence */ - for( b = 0, d = c; d; d /= 2 ) b++; /* bits in c */ - b = ( b - 2 ) / 5; /* bytes in payload */ - u[l++] = (char)( ( 0xFF << (7 - b) ) | ( c >> (6 * b) ) ); - while( b ) - u[l++] = (char)( 0x80 | ( (c >> (6 * --b)) & 0x3F ) ); - } - } - - return *s = prealloc( u, *len = l ); + char* u; + size_t l = 0, sl, max = *len; + long c, d; + int b, be = ( **s == '\xFE' ) ? 1 : ( **s == '\xFF' ) ? 0 : -1; + + + if( be == -1 ) return NULL; /* not UTF-16 */ + + u = pmalloc( max ); + for( sl = 2; sl < *len - 1; sl += 2 ) + { + c = ( be ) ? ( ( (*s)[sl] & 0xFF ) << 8 ) | ( ( *s )[sl + 1] & 0xFF ) + : ( ( (*s)[sl + 1] & 0xFF ) << 8 ) | ( ( *s )[sl] & 0xFF ); + if( c >= 0xD800 && c <= 0xDFFF && ( sl += 2 ) < *len - 1 ) + { /* high-half */ + d = ( be ) ? ( ( (*s)[sl] & 0xFF ) << 8 ) | + ( ( *s )[sl + 1] & 0xFF ) : ( ( (*s)[sl + 1] & 0xFF ) << 8 ) | + ( ( *s )[sl] & 0xFF ); + c = ( ( (c & 0x3FF) << 10 ) | ( d & 0x3FF ) ) + 0x10000; + } + + while( l + 6 > max ) u = prealloc( u, max += XML_BUFSIZE ); + if( c < 0x80 ) u[l++] = (char)c; /* US-ASCII subset */ + else + { /* multi-byte UTF-8 sequence */ + for( b = 0, d = c; d; d /= 2 ) b++; /* bits in c */ + b = ( b - 2 ) / 5; /* bytes in payload */ + u[l++] = (char)( ( 0xFF << (7 - b) ) | ( c >> (6 * b) ) ); + while( b ) + u[l++] = (char)( 0x80 | ( (c >> (6 * --b)) & 0x3F ) ); + } + } + + return *s = prealloc( u, *len = l ); } /* ============================================================================= @@ -700,28 +691,28 @@ char* xml_str2utf8( char ** s, size_t* len ) void xml_free_attr( char ** attr ) { - int i = 0; - char* m; + int i = 0; + char* m; - if( !attr || attr == XML_NIL ) - return; /* nothing to pfree */ + if( !attr || attr == XML_NIL ) + return; /* nothing to pfree */ - while( attr[i] ) - i += 2; /* find end of attribute list */ + while( attr[i] ) + i += 2; /* find end of attribute list */ - m = attr[i + 1]; /* list of which names and values are taken */ + m = attr[i + 1]; /* list of which names and values are taken */ - for( i = 0; m[i]; i++ ) - { - if( m[i] & XML_NAMEM ) - pfree( attr[i * 2] ); - if( m[i] & XML_TXTM ) - pfree( attr[( i * 2 ) + 1] ); - } + for( i = 0; m[i]; i++ ) + { + if( m[i] & XML_NAMEM ) + pfree( attr[i * 2] ); + if( m[i] & XML_TXTM ) + pfree( attr[( i * 2 ) + 1] ); + } - pfree( m ); - pfree( attr ); + pfree( m ); + pfree( attr ); } /* ============================================================================= @@ -730,220 +721,220 @@ void xml_free_attr( char ** attr ) XML_T xml_parse_str( char* s, size_t len ) { - xml_root_t root = (xml_root_t)xml_new( NULL ); - char q, e, *d, **attr, **a = NULL; /* initialize a to avoid - * compile warning */ - int l, i, j; - char* z; - char* last = s; - - - root->m = s; - if( !len ) return xml_err( root, NULL, "root tag missing" ); - root->u = xml_str2utf8( &s, &len ); /* convert utf-16 to utf-8 */ - root->e = ( root->s = s ) + len; /* record start and end of work - * area */ - root->lines = 1; - - e = s[len - 1]; /* save end char* s[len - 1] - * '\0'; - * turn end char into null terminator */ - - while( *s && *s != '<' ) - s++; /* find first tag */ - if( !*s ) - return xml_err( root, s, "root tag missing" ); - - for( ;; ) - { - /* - fprintf( stderr, "loop = >%s<\n", s ); - */ - attr = (char**)XML_NIL; - d = ++s; - - if( isalpha( *s ) || *s == '_' || *s == ':' || *s < '\0' ) - { /* new tag */ - if( !root->cur ) - return xml_err( root, d, "markup outside of root element" ); - - s += strcspn( s, XML_WS "/>" ); - while( isspace( *s ) ) *( s++ ) = '\0'; /* null terminate tagname */ - - if( *s && *s != '/' && *s != '>' ) /* find tag in default attr - * list */ - for( i = 0; ( a = root->attr[i] ) && strcmp( a[0], d ); i++ ) - ; - - for( l = 0; *s && *s != '/' && *s != '>'; l += 2 ) - { /* new attrib */ - attr = ( l ) ? prealloc - ( - attr, - ( l + 4 ) * sizeof( char * ) - ) : pmalloc( 4 * sizeof( char * ) ); /* allocate space */ - attr[l + 3] = ( l ) ? prealloc - ( - attr[l + 1], - ( l / 2 ) + 2 - ) : pmalloc( 2 ); /* mem for list of maloced vals */ - strcpy( attr[l + 3] + ( l / 2 ), " " ); /* value is not - * pmalloced */ - attr[l + 2] = NULL; /* null terminate list */ - attr[l + 1] = ""; /* temporary attribute value */ - attr[l] = s; /* set attribute name */ - - s += strcspn( s, XML_WS "=/>" ); - if( *s == '=' || isspace( *s ) ) - { - *( s++ ) = '\0'; /* null terminate tag attribute name */ - q = *( s += strspn( s, XML_WS "=" ) ); - if( q == '"' || q == '\'' ) - { /* attribute value */ - attr[l + 1] = ++s; - while( *s && *s != q ) s++; - if( *s ) *( s++ ) = '\0'; /* null terminate - * attribute val */ - else - { - xml_free_attr( attr ); - return xml_err( root, d, "missing %c", q ); - } - - for - ( - j = 1; - a && a[j] && strcmp( a[j], attr[l] ); - j += 3 - ) - ; - attr[l + 1] = xml_decode - ( - attr[l + 1], - root->ent, - ( a && a[j] ) ? *a[j + 2] : ' ' - ); - if( attr[l + 1] < d || attr[l + 1] > s ) - attr[l + 3][l / 2] = XML_TXTM;/* value pmalloced */ - } - } - - while( isspace( *s ) ) s++; - } - - for( z = s - 1; z >= last; z-- ) - if( *z == '\n' ) - root->lines++; - last = s; - - if( *s == '/' ) - { /* self closing tag */ - *( s++ ) = '\0'; - if( ( *s && *s != '>' ) || ( !*s && e != '>' ) ) - { - if( l ) xml_free_attr( attr ); - return xml_err( root, d, "missing >" ); - } - - xml_open_tag( root, d, attr ); - xml_close_tag( root, d, s ); - } - else if( ( q = *s ) == '>' || ( !*s && e == '>' ) ) - { /* open tag */ - *s = '\0'; /* temporarily null terminate tag name */ - xml_open_tag( root, d, attr ); - *s = q; - } else - { - if( l ) xml_free_attr( attr ); - return xml_err( root, d, "missing >" ); - } - } - else if( *s == '/' ) - { /* close tag */ - s += strcspn( d = s + 1, XML_WS ">" ) + 1; - if( !( q = *s ) && e != '>' ) - return xml_err( root, d, "missing >" ); - *s = '\0'; /* temporarily null terminate tag name */ - if( xml_close_tag( root, d, s ) ) return &root->xml; - if( isspace( *s = q ) ) s += strspn( s, XML_WS ); - } - else if( !strncmp( s, "!--", 3 ) ) - { /* xml comment */ - if( !( s = strstr(s + 3, "--") ) - || ( *(s += 2) != '>' && *s ) - || ( !*s && e != '>' ) ) - return xml_err( root, d, "unclosed