From 29806ff0f16d54537ad2a59ca8771e7942038e3c Mon Sep 17 00:00:00 2001 From: matthieu Date: Sat, 12 Jul 2014 16:23:43 +0000 Subject: [PATCH] Import a copy of ucpp, lightweight cpp that doesn't depend on the 'comp' set. Moving from xenocara with a new name. ok deraadt@. --- libexec/auxcpp/CHANGELOG | 21 + libexec/auxcpp/Makefile | 115 ++ libexec/auxcpp/README | 877 +++++++++++++ libexec/auxcpp/arith.c | 1462 ++++++++++++++++++++++ libexec/auxcpp/arith.h | 255 ++++ libexec/auxcpp/assert.c | 420 +++++++ libexec/auxcpp/atest.c | 236 ++++ libexec/auxcpp/config.h | 352 ++++++ libexec/auxcpp/cpp.c | 2565 ++++++++++++++++++++++++++++++++++++++ libexec/auxcpp/cpp.h | 317 +++++ libexec/auxcpp/eval.c | 699 +++++++++++ libexec/auxcpp/hash.c | 329 +++++ libexec/auxcpp/hash.h | 58 + libexec/auxcpp/lexer.c | 1020 +++++++++++++++ libexec/auxcpp/macro.c | 1921 ++++++++++++++++++++++++++++ libexec/auxcpp/mem.c | 328 +++++ libexec/auxcpp/mem.h | 155 +++ libexec/auxcpp/nhash.c | 481 +++++++ libexec/auxcpp/nhash.h | 132 ++ libexec/auxcpp/sample.c | 114 ++ libexec/auxcpp/tune.h | 422 +++++++ libexec/auxcpp/ucpp.1 | 212 ++++ libexec/auxcpp/ucppi.h | 196 +++ 23 files changed, 12687 insertions(+) create mode 100644 libexec/auxcpp/CHANGELOG create mode 100644 libexec/auxcpp/Makefile create mode 100644 libexec/auxcpp/README create mode 100644 libexec/auxcpp/arith.c create mode 100644 libexec/auxcpp/arith.h create mode 100644 libexec/auxcpp/assert.c create mode 100644 libexec/auxcpp/atest.c create mode 100644 libexec/auxcpp/config.h create mode 100644 libexec/auxcpp/cpp.c create mode 100644 libexec/auxcpp/cpp.h create mode 100644 libexec/auxcpp/eval.c create mode 100644 libexec/auxcpp/hash.c create mode 100644 libexec/auxcpp/hash.h create mode 100644 libexec/auxcpp/lexer.c create mode 100644 libexec/auxcpp/macro.c create mode 100644 libexec/auxcpp/mem.c create mode 100644 libexec/auxcpp/mem.h create mode 100644 libexec/auxcpp/nhash.c create mode 100644 libexec/auxcpp/nhash.h create mode 100644 libexec/auxcpp/sample.c create mode 100644 libexec/auxcpp/tune.h create mode 100644 libexec/auxcpp/ucpp.1 create mode 100644 libexec/auxcpp/ucppi.h diff --git a/libexec/auxcpp/CHANGELOG b/libexec/auxcpp/CHANGELOG new file mode 100644 index 00000000000..071b4682db4 --- /dev/null +++ b/libexec/auxcpp/CHANGELOG @@ -0,0 +1,21 @@ +ucpp-1.3.2 +* Fixed Issue 8, Included files missing a "terminating carriage + return character" will interrupt preprocessing in sample.c/LEXER + mode. + (http://code.google.com/p/ucpp/issues/detail?id=8) + +ucpp-1.3.1 +* Fixed Issue 5, "\r\n" carriage return characters are double + counted. + (http://code.google.com/p/ucpp/issues/detail?id=5) +* Fixed Issue 6, Included files missing a "terminating carriage + return character" will interrupt preprocessing in ucpp + (STAND_ALONE mode). + (http://code.google.com/p/ucpp/issues/detail?id=6) +* Fixed Issue 7, STD_MACROS & STD_ASSERTS undefined when trying to + build ucpp -DSTAND_ALONE. + (http://code.google.com/p/ucpp/issues/detail?id=7) +* Build ucpp & libucpp with 'make'. + +ucpp-1.3 +* Original import into svn at code.google.com/p/ucpp diff --git a/libexec/auxcpp/Makefile b/libexec/auxcpp/Makefile new file mode 100644 index 00000000000..c37f0b438c7 --- /dev/null +++ b/libexec/auxcpp/Makefile @@ -0,0 +1,115 @@ +# Makefile for ucpp +# +# (c) Thomas Pornin 1999 - 2002 +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 4. The name of the authors may not be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +.POSIX: + +# ----- user configurable part ----- + +# Edit the variables to suit your system. +# +# use -DAUDIT to enable some internal sanity checks +# use -DMEM_CHECK to check the return value of malloc() +# (superseded by AUDIT) +# use -DMEM_DEBUG to enable memory leak research (warning: this +# slows down ucpp a bit, and greatly increases memory consumption) +# use -DINLINE=foobar to enable use of the 'foobar' +# non standard qualifier, as an equivalent to the C99 'inline' +# qualifier. See tune.h for details. +# +# Two FLAGS lines are given for each system type; chose the first one for +# debug, the second one for a fast binary. + +# for a generic compiler called cc +#CC = cc +#FLAGS = -DAUDIT +#FLAGS = -O -DMEM_CHECK + +# for Minix-86 +#CC = cc +#LDFLAGS = -i +#FLAGS = -m -DAUDIT +#FLAGS = -O -m -DMEM_CHECK + +# for gcc +CC = gcc +FLAGS = -O3 -W -Wall -ansi +#FLAGS = -g -W -Wall -ansi -DAUDIT -DMEM_DEBUG +#FLAGS = -O3 -mcpu=pentiumpro -fomit-frame-pointer -W -Wall -ansi -DMEM_CHECK +#FLAGS = -O -pg -W -Wall -ansi -DMEM_CHECK +#LDFLAGS = -pg + +# for the Compaq C compiler on Alpha/Linux +#CC = ccc +#FLAGS = -w0 -g -DAUDIT +#FLAGS = -w0 -fast -DMEM_CHECK + +# for the Sun Workshop C Compiler +#CC = cc +#FLAGS = -g -Xa -DAUDIT +#FLAGS = -Xa -fast -DMEM_CHECK + +# flags for the link step +LIBS = +#LIBS = libefence.a +#LIBS = -lgc_dbg + +STAND_ALONE = -DSTAND_ALONE + +ifdef STAND_ALONE + CSRC = mem.c nhash.c cpp.c lexer.c assert.c macro.c eval.c + FINAL_STEP = $(CC) $(LDFLAGS) -DUCPP_CONFIG $(STAND_ALONE) -o ucpp $(CSRC) $(LIBS) +endif + +# ----- nothing should be changed below this line ----- + +COBJ = mem.o nhash.o cpp.o lexer.o assert.o macro.o eval.o +CFLAGS = $(FLAGS) + +all: ucpp + @ar cq libucpp.a *.o + +clean: + @rm -f *.o ucpp core *.a + +ucpp: $(COBJ) + @$(FINAL_STEP) + +assert.o: tune.h ucppi.h cpp.h nhash.h mem.h + @$(CC) $(CFLAGS) -c assert.c +cpp.o: tune.h ucppi.h cpp.h nhash.h mem.h + @$(CC) $(CFLAGS) -c cpp.c +eval.o: tune.h ucppi.h cpp.h nhash.h mem.h arith.c arith.h + @$(CC) $(CFLAGS) -c eval.c +lexer.o: tune.h ucppi.h cpp.h nhash.h mem.h + @$(CC) $(CFLAGS) -c lexer.c +macro.o: tune.h ucppi.h cpp.h nhash.h mem.h + @$(CC) $(CFLAGS) -c macro.c +mem.o: mem.h + @$(CC) $(CFLAGS) -c mem.c +nhash.o: nhash.h mem.h + @$(CC) $(CFLAGS) -c nhash.c diff --git a/libexec/auxcpp/README b/libexec/auxcpp/README new file mode 100644 index 00000000000..f47ba9f3e71 --- /dev/null +++ b/libexec/auxcpp/README @@ -0,0 +1,877 @@ +ucpp-1.3 is a C preprocessor compliant to ISO-C99. + +Author: Thomas Pornin +Main site: http://pornin.nerim.net/ucpp/ + + + +INTRODUCTION +------------ + +A C preprocessor is a part of a C compiler responsible for macro +replacement, conditional compilation and inclusion of header files. +It is often found as a stand-alone program on Unix systems. + +ucpp is such a preprocessor; it is designed to be quick and light, +but anyway fully compliant to the ISO standard 9899:1999, also known +as C99. ucpp can be compiled as a stand-alone program, or linked to +some other code; in the latter case, ucpp will output tokens, one +at a time, on demand, as an integrated lexer. + +ucpp operates in two modes: +-- lexer mode: ucpp is linked to some other code and outputs a stream of +tokens (each call to the lex() function will yield one token) +-- non-lexer mode: ucpp preprocesses text and outputs the resulting text +to a file descriptor; if linked to some other code, the cpp() function +must be called repeatedly, otherwise ucpp is a stand-alone binary. + + + +INSTALLATION +------------ + +1. Uncompress the archive file and extract the source files. + +2. Edit tune.h. Here is a short explanation of compile-time options: + + LOW_MEM + Enable memory-saving functions; this is for low-end and old systems, + but seems to be good for larger systems too. Keep it. + NO_LIBC_BUF + NO_UCPP_BUF + Two options used to disable the two bufferings inside ucpp. Define + both options for maximum memory savings but you will probably want + to keep libc buffering for decent performance. Define none on large + systems (modern 32 or 64-bit systems). + UCPP_MMAP + With this option, if ucpp internal buffering is active, ucpp will + try to mmap() the input files. This might yield a slight performance + improvement, but will work only on a limited set of architectures. + PRAGMA_TOKENIZE + Make ucpp generate tokenized PRAGMA tokens on #pragma and _Pragma(); + tokenization is made this way: tokens are assembled as a null + terminated array of unsigned chars; if a token has a string value + (as defined by the STRING_TOKEN macro), the value follows the token, + terminated by PRAGMA_TOKEN_END (by default, a newline character cast + to unsigned char). Whitespace tokens are skipped. The "name" value + of the PRAGMA token is a pointer to that array. This setting is + irrelevant in non-lexer mode. + PRAGMA_DUMP + In non-lexer mode, keep #pragma in output; non-void _Pragma() are + translated to the equivalent #pragma. Irrelevant in lexer mode. + NO_PRAGMA_IN_DIRECTIVE + Do not evaluate _Pragma() inside #if, #include, #include_next and #line + directives; instead, emit an error (since the remaining _Pragma will + surely imply a syntax error). + DSHARP_TOKEN_MERGE + When two tokens are to be merged with the `##' operator, but fail + because they do not merge into a single valid token, ucpp keeps those + two tokens separate by adding an extra space between them in text + output. With this option on, that extra space is not added, which means + that some tokens may merge partially if the text output is preprocessed + again. See tune.h for details. + INMACRO_FLAG + In lexer mode, set the inmacro flag to 1 if the current token comes + from a macro replacement, 0 otherwise. macro_count maintains an + increasing counter of such replacements. CONTEXT tokens count as + one macro replacement each. #pragma, and _Pragma() that do not come + from a macro replacement, also count as one macro replacement each. + This setting is irrelevant in non-lexer mode. + STD_INCLUDE_PATH + Default include path in stand-alone ucpp. + STD_MACROS + Default predefined macros in stand-alone ucpp. + STD_ASSERT + Default assertions in stand-alone ucpp. + NATIVE_SIGNED + NATIVE_UNSIGNED + NATIVE_UNSIGNED_BITS + NATIVE_SIGNED_MIN + NATIVE_SIGNED_MAX + SIMUL_ARITH_SUBTYPE + SIMUL_SUBTYPE_BITS + SIMUL_NUMBITS + WCHAR_SIGNEDNESS + Those options define how #if expressions are evaluated; see the + cross-compilation section of this file for more info, and the + comments in tune.h. Extra info is found in arith.h and arith.c, + at the possible expense of your mental health. + DEFAULT_LEXER_FLAGS + DEFAULT_CPP_FLAGS + Default flags in respectively lexer and non-lexer modes. + POSIX_JMP + Define this if your architecture defines sigsetjmp() and + siglongjmp(); it is known to (very slightly) improve performance + on AIX systems. + MAX_CHAR_VAL + ucpp will consider characters whose value is equal or above + MAX_CHAR_VAL as outside the C source charset (so they will be + treated just like '@', for instance). For ASCII systems, 128 + is fine. 256 is a safer value, but uses more (static) memory. + For performance reasons, use a power of two. If MAX_CHAR_VAL is + correctly adjusted, ucpp should be compatible with any character + set. + UNBREAKABLE_SPACE + If you want an extra-whitespace character, define this macro to that + character. For instance, define this to 160 on an ISO-8859-1 system + if you want the 'unbreakable space' to be considered as whitespace. + SEMPER_FIDELIS + With this option set, ucpp, when used as a lexer, will pass + whitespace tokens to its caller, and those tokens will have their + true content; this is intended for reconstruction of the source + line. Beware that some comments may have embedded newlines. + COPY_LINE_LENGTH + ucpp can maintain a copy of the current source line, up to that + length. Irrelevant to stand-alone version. + *_MEMG + Those settings modify ucpp behaviour, wrt memory allocations. With + higher values, ucpp will perform less malloc() calls and will run + faster, but it will use more memory. Reduce INPUT_BUF_MEMG and + OUTPUT_BUF_MEMG on low-memory systems, if you kept ucpp buffering + (see NO_UCPP_BUF option). + +3. Edit the Makefile. You should define the variables CC and FLAGS; + there are the following options: + + -DAUDIT + Enable internal sanity checks; this slows down a bit ucpp. Do not + define unless you plan to debug ucpp. + -DMEM_CHECK + With this setting, ucpp will check for the return value of malloc() + and exit with a diagnostic when out of memory. MEM_CHECK is implied + by AUDIT. + -DMEM_DEBUG + Enable memory debug code. This will track memory leaks and several + occurrences of memory management errors; it will also slow down + things and increase memory consumption, so you probably do not + want to use this option. + -DINLINE=foobar + The ucpp code uses "inline" qualifier for some functions; by + default, that qualifier is macro-replaced with nothing. Define + INLINE to the correct replacement for your compiler, if supported. + Note that all "inline" functions in ucpp are also "static". For any + C99-compliant compiler, the GNU compiler (gcc), and the Compaq C + compiler under Linux/Alpha, no -DINLINE is needed (see tune.h for + details). + +4. Compile by typing "make". This should produce the ucpp executable + file. You might see some warning messages, especially with gcc: + gcc believes some variables might be used prior to their + initialization; ignore those messages. + +5. Install wherever you want the binary and the man page ucpp.1. I + have not provided an install sequence because I didn't bother. + +6. If you do not have the make utility, compile each file separately + and link them together. The exact details depend on your compiler. + You must define the macro STAND_ALONE when compiling cpp.c (there + is such a definition, commented out, in cpp.c, line 34). + +There is no "configure" script because: +-- I do not like the very idea of a "configure" script. +-- ucpp is written in ANSI-C and should be fairly portable. +-- There is no such thing as "standard" settings for a C preprocessor. + The predefined system macros, standard assertions,... must be tuned + by the sysadmin. +-- The primary goal of ucpp is to be included in compilers. The + stand-alone version is mainly a debugging tool. + +Please note that you need an ISO-C90 (formerly ANSI) C compiler suite +(including the standard library) to compile ucpp. If your compiler is +not C99 (or later), read the cross-compilation section in this README +file. + +The C90 and C99 standards state that external linkage names might be +considered equal or different based upon only their first 6 characters; +this rule might make ucpp not compile on a conformant C implementation. +I have yet to see such an implementation, however. + +If you want to use ucpp as an integrated preprocessor and lexer, see the +section REUSE. Compiling ucpp as a library is an exercise left to the +reader. + +With the LOW_MEM code enabled, ucpp can run on a Minix-i86 or Msdos +16-bit small-memory-model machine. It will not be fully compliant +on such an architecture to C99, since C99 states that at least one +source code with 4095 simultaneously defined macros must be processed; +ucpp will be limited to about 1500 macros (at most) due to memory +restrictions. At least ucpp can preprocess its own code in these +conditions. LOW_MEM is on by default because it seems to improve +performance on large systems. + + + +LICENSE +------- + +The copyright notice and license is at the beginning of the Makefile and +each source file. It is basically a BSD license, without the advertising +subclause (which BSD dropped recently anyway) and with no reference to +Berkeley (since the code is all mine, written from scratch). Informally, +this means that you can reuse and redistribute the code as you want, +provided that you state in the documentation (or any substantial part of +the software) of redistributed code that I am the original author. (If +you press a cdrom with 200 software packages, I do not insist on having +my name on the cover of the cdrom -- just keep a Readme file somewhere +on the cdrom, with the copyright notice included.) + +As a courteous gesture, if you reuse my code, please drop me a mail. +It raises my self-esteem. + + + +REUSE +----- + +The code has been thought as part of a bigger project; it might be +used as an integrated lexer, that will read files, process them as a +C preprocessor, and output a stream of C tokens. To include this code +into a project, compile with STAND_ALONE undefined. + +To use the preprocessor and lexer, several steps should be performed. +See the file 'sample.c' for an example. + +1. call init_cpp(). This function initializes the lexer automaton. + +2. set the following global variables: + no_special_macros + non-zero if the special macros (__FILE__ and others) + should not be defined. This is a global flag since + it affects the redefinition of such macros (which are + allowed if the special macros are not defined) + c99_compliant + if non-zero, define __STDC_VERSION__ to 199901L; this + is the default; otherwise, do not define __STDC_VERSION__. + Note that ucpp will accept to undefine __STDC_VERSION__ + with a #undef directive. + c99_hosted + if strictly positive, define __STDC_HOSTED__ to 1. + If zero, define __STDC_HOSTED__ to 0. If negative, + do not define __STDC_HOSTED__. The default is 1. + emit_defines and emit_assertions should be set to 0 for + the step 3. + +3. call init_tables(). This function initializes the macro table + and other things; it will intialize assertions if it has a non-zero + argument. + +4. call init_include_path(). This function will reset the include + path to the list of paths given as argument. + +5. set the following global variables + emit_dependencies + set to 1 if dependencies should be emitted during + preprocessing + set to 2 if dependencies should also be emitted for + system include files + emit_defines + set to non-zero if #define macro definitions should be + emitted when macros are defined + emit_assertions + set to non-zero if #define macro definitions should be + emitted when macros are defined + emit_output + the FILE * where the above items are sent if one of the + three emit_ variables is set to non zero + transient_characters + this is for some cross-compilation; see the relevant + part in this README file for details + +6. call set_init_filename() with the initial filename as argument; + the second argument indicates whether the filename is real or + conventional ("real" means "an fopen() on it will work"). + +7. initialize your struct lexer_state: + call init_lexer_state() + call init_lexer_mode() if the preprocessor is supposed to + output a list of tokens, otherwise set the flags field + to DEFAULT_CPP_FLAGS and set the output field to the + FILE * where output should be sent + (init_lexer_mode(), if called at all, must be called after + init_lexer_state()) + adjust the flags field; here is the meaning of flags: + +WARN_STANDARD + emit the standard warnings +WARN_ANNOYING + emit the useless and annoying warnings +WARN_TRIGRAPHS + count trigraphs encountered; it is up to the caller to emit + a warning if some trigraphs were indeed encountered; the count + is stored in the count_trigraphs field of the struct lexer_state +WARN_TRIGRAPHS_MORE + emit a warning for each trigraph encountered +WARN_PRAGMA + emit a warning for each non-void _Pragma encountered in non-lexer + mode (because these are dumped as #pragma in the output) and for each + #pragma too, if ucpp was compiled without PRAGMA_DUMP +FAIL_SHARP + emit errors on '#' tokens beginning a line and not followed + by a valid cpp directive +CCHARSET + emit errors when non-C characters are encountered; if this flag + is not set, each non-C character will be considered as a BUNCH + token (since C99 states that non-C characters are allowed as + long as they "disappear" during preprocessing [through macro + replacement and stringification for instance], this flag must + not be set, for maximum C99 compliance) +DISCARD_COMMENTS + do not keep comments in output (irrelevant in lexer mode) +CPLUSPLUS_COMMENTS + understand new style comments (//) (mandatory for C99) +LINE_NUM + emit #line directives when entering a file, if not in lexer mode; + emit CONTEXT token in lexer mode for #line and new files +GCC_LINE_NUM + if LINE_NUM is set, emit gcc-like directives instead of #line +HANDLE_ASSERTIONS + understand assertions in #if expressions (and #assert, #unassert) +HANDLE_PRAGMA + make PRAGMA tokens for #pragma; irrelevant in non-lexer mode + (handling of some pragmas is required in C99 but is not of + the competence of the preprocessor; without this flag, ucpp will + ignore the contents of #pragma and _Pragma directives) +MACRO_VAARG + understand macros with a variable number of arguments (mandatory + for C99) +UTF8_SOURCE + understand UTF-8 encoding: multibyte characters are considered + equivalent to letters as far as syntax is concerned (they can + be used in identifiers) +LEXER + act as a lexer, outputting tokens +TEXT_OUTPUT + this flag should be set to 0 if ucpp works as a lexer, 1 otherwise. + It is somehow redundant with the LEXER flag, but the presence of + those two different flags is needed in ucpp. +KEEP_OUTPUT + in non-lexer mode, emit the result of preprocessing +COPY_LINE + maintain a copy of the last read line in the copy_line field of + the struct lexer_state ; see below for how to use this buffer +HANDLE_TRIGRAPHS + understand trigraphs, such as ??/ for \. This option should be + set by default, except for some legacy code. + + There are other flags, but they are for private usage of ucpp. + +8. adjust the input field in the lexer_state to the FILE * from where + source file is read. If you use the UCPP_MMAP compile-time option, + and your input file is eligible to mmap(), then you can call + fopen_mmap_file() to open it, then set_input_file() to set ls->input + and some other internal options. Do not call set_input_file() unless + you just called fopen_mmap_file() just before on the same file. + +9. call add_incpath() to add an include path, define_macro() and + undef_macro() to add or remove macros, make_assertion() and + destroy_assertion() to add or remove assertions. + +10. call enter_file() (this is needed only in non-lexer mode, or if + LINE_NUM is set). + + +Afterwards: + +-- if you are in lexer mode, call lex(); each call will make the ctok + field point to the next token. A non-zero return value is an error. + lex() skips whitespace tokens. The memory used by the string value + of some tokens (identifiers, numbers...) is automatically freed, + so copy the contents of each such token if you want to keep it + (tokens with a string content are identified by the STRING_TOKEN + macro applied to their type). + When lex() returned a non-zero value: if it is CPPERR_EOF, then + end-of-input was reached. Otherwise, it is a genuine error and + ls->ctok is an undefined token; skip it and call lex() again to + ignore the error. + +-- otherwise, call cpp(); each call will analyze one or more tokens + (one token if it did find neither a cpp directive nor a macro name). + A positive return value is an error. + +For both functions, if the return value is CPPERR_EOF (which is a +strictly positive value), then it means that the end of file was +reached. Call check_cpp_errors() after end of file for pending errors +(unfinished #if constructions for instance). In non-lexer mode, +call flush_output(). + +In the struct lexer_state, the following fields might be read: + line the current input line number + oline the current output line number (in non-lexer mode) + flags the flags described above + count_trigraphs the number of trigraphs encountered + inmacro the current token comes from a macro + macro_count the current macro counter +"flags" is an unsigned long and might be modified; the three others +are of long type. + + +To perform another preprocessing: use free_lexer_state() to release +memory used by the buffers referenced in lexer_state, and go back to +step 2. The different tables (macros, assertions...) should be reset to +their respective initial contents. + +There is also the wipeout() function: when called, it should release +(almost) all memory blocks allocated dynamically. After a wipeout(), +ucpp should be back to its state at step 2 (init_cpp() initializes only +static tables, that are never freed nor modified afterwards). + + +The COPY_LINE buffer: the struct lexer_state contains two interesting +fields, copy_line[] and cli. If the COPY_LINE flag is on, each read +line is stored in this buffer, up to (at most) COPY_LINE_LENGTH - 1 +characters (COPY_LINE_LENGTH is defined in tune.h). The last character +of the buffer is always a zero, and if the line was read entirely, it is +zero terminated; the trailing newline is not included. + +The purpose of this buffer is error-reporting. When an error occurs +(cpp() returns a strictly positive value, or lex() returns a non-zero +value), if your struct lexer_state is called ls, use this code: + + if (ls.cli != 0) ls.copy_line[ls.cli] = 0; + +This will add a trailing 0 if the line was not read entirely. + + +ucpp may be configured at runtime to accept alternate characters as +possible parts of identifiers. Typical intended usage is for the '$' +and '@' characters. The two relevant functions are set_identifier_char() +and unset_identifier_char(). When this call is issued: + set_identifier_char('$'); +then for all the remaining input, the '$' character will be considered +as just another letter, as far as identifier tokenizing is concerned. This +is for identifiers only; numeric constants are not modified by that setting. +This call resets things back: + unset_identifier_char('$'); +Those two functions modify the static table which is initialized by +init_cpp(). You may call init_cpp() at any time to restore the table +to its standard state. + +When using this feature, take care of the following points: + +-- Do NOT use a character whose numeric value (as an `unsigned char' +cast into an `int') is greater than or equal to MAX_CHAR_VAL (in tune.h). +This would lead to unpredictable results, including an abrupt crash of +ucpp. ucpp makes absolutely no check whatsoever on that matter: this is +the programmer's responsibility. + +-- If you use a standard character such as '+' or '{', tokens which +begin with those characters cease to exist. This can be troublesome. +If you use set_identifier_char() on the '<' character, the handling of +#include directives will be greatly disturbed. Therefore the use of any +standard C character in set_identifier_char() of unset_identifier_char() +is declared unsupported, forbidden and altogether unwise. + +-- Stricto sensu, when an extra character is declared as part of an +identifier, ucpp behaviour cease to conform to C99, which mandates that +characters such as '$' or '@' must be treated as independant tokens of +their own. Therefore, if your purpose is to use ucpp in a conformant +C implementation, the use of set_identifier_char() should be made at +least a runtime option. + +-- When enabling a new character in the middle of a macro replacement, +the effect of that replacement may be delayed up to the end of that +macro (but this is a "may" !). If you wish to trigger this feature with +a custom #pragma or _Pragma(), you should remember it (for instance, +usine _Pragma() in a macro replacement, and then the extra character +in the same macro replacement, is not reliable). + + + +COMPATIBILITY NOTES +------------------- + +The C language has a lengthening history. Nowadays, C comes in three +flavours: + +-- Traditional C, aka "K&R". This is the language first described by +Brian Kernighan and Dennis Ritchie, and implemented in the first C +compiler that was ever coded. There are actually several dialects of +K&R, and all of them are considered deprecated. + +-- ISO 9899:1990, aka C90, aka C89, aka ANSI-C. Formalized by ANSI +in 1989 and adopted by ISO the next year, it is the C flavour many C +compilers understand. It is mostly backward compatible with K&R C, but +with enhancements, clarifications and several new features. + +-- ISO 9899:1999, aka C99. This is an evolution on C90, almost fully +backward compatible with C90. C99 introduces many new and useful +features, however, including in the preprocessor. + +There was also a normative addendum in 1995, that added a few features +to C90 (for instance, digraphs) that are also present in C99. It is +sometimes refered to as "C95" or "AMD 1". + + +ucpp implements the C99 standard, but can be used in a stricter mode, +to enforce C90 compatibility (it will, however, still recognize some +constructions that are not in plain C90). + +ucpp also knows about several extensions to C99: + +-- Assertions: this is an extension to the defined() operator, with + its own namespace. Assertions seem to be used in several places, + therefore ucpp knows about them. It is recommended to enable + assertions by default on Solaris systems. +-- Unicode: the C99 norm specifies that extended characters, from + the ISO-10646 charset (aka "unicode") can be used in identifiers + with the notations \u and \U. ucpp also accepts (with the proper + flag) the UTF-8 encoding in the source file for such characters. +-- #include_next directive: it works as a #include, but will look + for files only in the directories specified in the include path + after the one the current file was found. This is a GNU-ism that + is useful for writing transparent wrappers around header files. + +Assertions and unicode are activated by specific flags; the #include_next +support is always active. + +The ucpp code itself should be compatible with any ISO-C90 compiler. +The cpp.c file is rather big (~ 64kB), it might confuse old 16-bit C +compilers; the macro.c file is somewhat large also (~ 47kB). + +The evaluation of #if expressions is subject to some subtleties, see the +section "cross-compilation". + +The lexer code makes no assumption about the source character set, but +the following: source characters (those which have a syntactic value in +C; comment and string literal contents are not concerned) must have a +strictly positive value that is strictly lower than MAX_CHAR_VAL. The +strict positivity is already assured by the C standard, so you just need +to adjust MAX_CHAR_VAL. + +ucpp has been tested succesfully on ASCII/ISO-8859-1 and EBCDIC systems. +Beware that UTF-8 is NOT compatible with EBCDIC. + +Pragma handling: when used in non-lexer mode, ucpp tries to output a +source text that, when read again, will yield the exact same stream of +tokens. This is not completely true with regards to line numbering in +some tricky macro replacements, but it should work correctly otherwise, +especially with pragma directives if the compile-time option PRAGMA_DUMP +was set: #pragma are dumped, non-void _Pragma() are converted to the +corresponding #pragma and dumped also. + +ucpp does not macro-replace the contents of #pragma and _Pragma(); +If you want a macro-replaced pragma, use this: + +#define pragma_(x) _Pragma(#x) +#define pragma(x) pragma_(x) + +Anyway, pragmas do not nest (an _Pragma() cannot be evaluated if it is +inside a #pragma or another _Pragma). + + +I wrote ucpp according to what is found in "The C Programming Language" +from Brian Kernighan and Dennis Ritchie (2nd edition) and the C99 +standard; but I could have misinterpreted some points. On some tricky +points I got help from the helpful people from the comp.std.c newsgroup. +For assertions and #include_next, I mimicked the behaviour of GNU cpp, +as is stated in the GNU cpp info documentation. An open question is +related to the following code: + +#define undefined ! +#define makeun(x) un ## x +#if makeun(defined foo) +qux +#else +bar +#endif + +ucpp will replace 'defined foo' with 0 first (since foo is not defined), +then it will replace the macro makeun, and the expression will become +'un0', which is replaced by 0 since this is a remaining identifier. The +expression evaluates to false, and 'bar' is emitted. +However, some other preprocessors will replace makeun first, considering +that it is not part of a 'defined' operator application; this will +produce the macro 'undefined', which is replaced, and the expression +becomes '!foo'. 'foo' is replaced by 0, the expression evaluates to +true, and 'qux' is emitted. + +My opinion is that the behaviour is undefined, because use of the +'defined' operator does not match an allowed form prior to macro +replacement (I mean, its syntax matches, but its use is reconverted +to inexistant and therefore is not anymore matching). Other people +think that the behaviour is well-specified, and contrary to what ucpp +does. The only thing clear to me is that the wording of the standard +(paragraph 6.10.1.3) is unclear. + +Since the ucpp behaviour makes ucpp code simpler and cleaner, and +that it is unlikely that any real-life code would ever be disturbed +by that interpretation of the standard, ucpp will keep its current +behaviour until convincing evidence of my misinterpretation of the +standard is given to me. The problem can only occur if one uses ## to +make a 'defined' operator disappear from a #if expression (everybody +agrees that the generation of a 'defined' operator triggers undefined +behaviour). + + +Another point about macro replacement has been discussed at length in +several occasions. It is about the following code: + +#define CAT(a, b) CAT_(a, b) +#define CAT_(a, b) a ## b +#define AB(x, y) CAT(x, y) +CAT(A, B)(X, Y) + +ucpp will produce `CAT(X,Y)' as replacement for the last line, whereas +some other preprocessors output `XY'. The answer to the question +"which behaviour is correct" seems to be "this is not defined by the +C standard". It is the answer that has been actually given by the C +standardization committee in 1992, to the defect report #017, question +23, which asked that very same question. Since the wording of the +standard has not changed in these parts from the 1990 to the 1999 +version, the preprocessor behaviour on the above-stated code should +still be considered as undefined. + +It seems, however, that there used to be a time (around 1988) when the +committee members agreed upon a precise macro-replacement algorithm, +which specified quite clearly the preprocessor behaviour in such +situation. ucpp behaviour is occasionnaly claimed as "incorrect" with +regards to that algorithm. Since that macro replacement algorithm has +never been published, and the committee itself backed out from it in +1992, I decided to disregard those feeble claims. + +It is possible, however, that at some point in the future I rewrite the +ucpp macro replacement code, since that code is a bit messy and might be +made to use less memory in some occasions. It is then possible that, in +the aftermath of such a rewrite, the ucpp behaviour for the above stated +code become tunable. Don't hold your breath, though. + + +About _Pragma: the standard is not clear about when this operator is +evaluated, and if it is allowed inside #if directives and such. For +ucpp, I coded _Pragma as a special macro with lazy replacement: it will +be evaluated wherever a macro could be replaced, and only at the end of +the macro replacement (for practical purposes, _Pragma can be considered +as a macro taking one argument, and being replaced by nothing, except +for some tricky uses of the # and ## operators). This means that, by +default, ucpp will evaluate _Pragma inside some directives (mainly, #if, +#include, #include_next and #line), but it can be taught not to do so by +defining NO_PRAGMA_IN_DIRECTIVE in tune.h. + + + +CROSS-COMPILATION +----------------- + +If compiled with a C99 development suite, ucpp should be fully +C99-compliant on the host platform (up to my own understanding of the +standard -- remember that this software is distributed as-is, without +any guarantee). However, if a pre-C99 compiler is used, or if the +target machine is not the host machine (for instance when you build a +cross-compiler), the evaluation of #if expressions is subject to some +cross-compiling issues: + + +-- character constants: when evaluating expressions, character constants +are interpreted in the source character set context; this is allowed +by the standard but this can lead to problems with code that expects +this interpretation to match the one made in the C code. To ease +cross-compilation, you can define a conversion array, and make the +global variable transient_characters point to it. The array should +contain 256 int; transient_characters[x] is the value of the character +whose value is x in the source character set. + +This facility is provided for inclusion of ucpp inside another code; +if you want a stand-alone ucpp with that conversion, hard-code the +conversion table into eval.c and make transient_characters[] statically +point to it. Alternatively, you could provide an option syntax to +provide such a table on command-line, if you feel like it. + + +-- wide character constants signedness: by default, ucpp makes wide +characters as signed as what plain chars are on the build host. To +force wide character constant signedness, define WCHAR_SIGNEDNESS to 0 +(for unsigned) or 1 (for signed). Beware, however, that "native" wide +character constants, even signed, are considered positive. Non-wide +character constants are, according to the C99 standard, of type int, and +therefore always signed. + + +-- evaluation type: C90 states that all constants in #if expressions +are considered as either long or unsigned long, and that the evaluation +is performed with operands of that size. In C99, the situation is +equivalent, except that the types used are intmax_t and uintmax_t, as +defined in . + +ucpp can use two expression evaluators: one uses native integer types +(one signed and one unsigned), the other evaluator emulates big integer +numbers by representing them with two values of some unsigned type. The +emulated type handles signed values in two's complement representation, +and can be any width ranging from 2 bits to twice the size of the +underlying native unsigned type used. An odd width is allowed. When +right shifting an emulated signed negative value, it is left-padded with +bits set to 1 (this is sign extension). + +When the ARITHMETIC_CHECKS macro is defined in tune.h, all occurrences +of implementation-defined or undefined behaviour during arithmetic +evaluation are reported as errors or warned upon. This includes all +overflows and underflows on signed quantities, constants too large, +and so on. Errors (which terminate immediately evaluation) are emitted +for division by 0 (on / and % operators) and overflow (on / operator); +otherwise, warnings are emitted and the faulty evaluation takes place. +This prevents ucpp from crashing on typical x86 machines, while still +allowing to use some extensions. + + + +FUTURE EVOLUTIONS +----------------- + +ucpp is quite complete now. There was a longstanding project of +"traditional" preprocessing, but I dropped it because it would not +map cleanly on the token-based ucpp structure. Maybe I will code a +string-based preprocessor one day; it would certainly use some of the +code from lexer.c, eval.c, mem.c and nhash.c. However, making such a +tool is almost irrelevant nowadays. If one wants to handle such project, +using ucpp as code base, I would happily provide some help, if needed. + + + +CHANGES +------- + +From 1.2 to 1.3: + +* brand new integer evaluation code, with precise evaluation and checks +* new hash table implementation, with binary trees +* relaxed attitude on failed `##' operators +* bugfix on macro definition on command-line wrt nesting macros +* support for up to 32766 macro arguments in LOW_MEM code +* support for optional additional "identifier" characters such as '$' or '@' +* bugfix: memory leak on void #assert + +From 1.1 to 1.2: + +* bugfix: numerous memory leaks +* new function: wipeout(); this should release all malloc() blocks +* bugfix: missing "newline" and trailing "context" tokens +* improved included files name caching +* included memory leak detection code + +From 1.0 to 1.1: + +* bugfix: missing newline when exiting from a non-newline-terminated file +* bugfix: crash when resetting due to definition of the _Pragma pseudo-macro +* bugfix: handling of additional "optional" whitespace with SEMPER_FIDELIS +* improved handling of unreplaced arg macros wrt output line +* tricky handling of utterly tricky #include +* bugfix: spurious token `~=' eliminated + +From 0.9 to 1.0: + +* bugfix: crash after erroneous #assert +* changed ERR_SHARP to FAIL_SHARP, EMUL_UINTMAX to SIMUL_UINTMAX +* made "inline" default on gcc and DEC ccc (Linux/Alpha) +* semantic of -I is now Unix-like (added directories are looked first) +* added -J flag (to add include directories after the system ones) +* cleaned up non-ascii issues +* bugfix: missing brace in no-LOW_MEM code +* bugfix: argument number check in variadic macros +* bugfix: crash in non-lexer mode after some cases of unreplaced macro +* bugfix: _Pragma() handling wrt # and ## +* made evaluation of _Pragma() optional in #if, #include and #line +* bugfix: re-dump of multiline #pragma +* added the inmacro and macro_count flags +* added mmap() support +* added option to retain whitespace content in lexer mode + +From 0.8 to 0.9: + +* added check for division by 0 in #if evaluation +* added check for non-standard line numbers +* added check for trailing garbage in most directives +* corrected signedness of char constants (always int, therefore always signed) +* made LOW_MEM code, so that ucpp runs smoothly on low memory architectures +* multiple bugfixes (using the GNU cpp testsuite) +* added handling of _Pragma (as a macro) +* added tokenization of pragma directives +* added conservation of pragma directives in text output +* produced Msdos 16-bit small memory model executable +* produced Minix-86 executable + +From 0.7 to 0.8: + +* added some support for Amiga systems +* fixed extra spacing in stringified tokens +* fixed bug related to %:% and tolerated rogue sharps +* namespace cleanup +* bugfix for macro redefinition +* added warning for evaluated comma operators in #if (ISO requirement) +* -Dfoo now defines foo with content 1 (and not void content) +* trigraphs can be disabled (for incorrect but legacy code) +* fixed semantics for #include "file" (local directory) +* fixed detection of protected files +* produced a Msdos 16-bit executable + +From 0.6 to 0.7: + +* officially changed the goal to full C99 compliance +* added the CONTEXT token and let NEWLINE tokens go +* added report_context() for error reporting +* enforced matching of #if/#endif (file-global nesting level = 0) +* added support of C99 digraphs +* added UTF-8 encoding support +* added universal character names +* rewrote #if expressions (sizes fixed, bignum, signed/unsigned fixed) +* fixed incomplete evaluation of #if expressions +* added transient_characters[] + +From 0.5 to 0.6: + +* disappearance of error_nonl() +* added extra optional warnings for trigraphs +* some bugfixes, especially in lexer mode +* handled MacIntosh files correctly + +From 0.4 to 0.5: + +* nicer #pragma handling (a token can be emitted) +* bugfix in lexer mode after #line and #error +* sample.c an example of code linked with ucpp +* made #if expressions conforming to standard signed/unsigned handling +* added the copy_line[] buffer feature + +From 0.3 to 0.4: + +* relaxed interpretation of '#include foo' when foo ends up, after macro + substitution, with a '' content +* corrected the 'double-dot' bug +* corrected two bugs related to the treatment of macro aborted calls (due + to lack of arguments) +* some namespaces cleanup, to ease integration into other code +* documented the way to include ucpp into another program +* made newlines embedded into strings illegal (and reported as such) + +From 0.2 to 0.3: + +* added support for system predefined macros +* made several bugfixes +* checked C99 compliance for most of the features +* ucpp now accepts non-C characters on standard when used stand-alone +* removed many useless spaces in the output + +From 0.1 to 0.2: + +* added support for assertions +* added support for macros with variable arguments +* split the pharaonic cpp.c file into many +* made several bugfixes +* relaxed the behaviour with regards to the void arguments +* made C++-like comments an option + + + +THANKS TO +--------- + +Volker Barthelmann, Neil Booth, Stephen Davies, Stéphane Ecolivet, +Marc Espie, Marcus Holland-Moritz, Antoine Leca, Cyrille Lefevre, +Dave Rivers, Loic Tortay and Laurent Wacrenier, for suggestions and +beta-testing. + +Paul Eggert, Douglas A. Gwyn, Clive D.W. Feather, and the other guys from +comp.std.c, for explanations about the standard. + +Dave Brolley, Jamie Lokier and Neil Booth, for discussion about tricky +points on nesting macros. + +Brian Kernighan and Dennis Ritchie, for bringing C to mortal Men. diff --git a/libexec/auxcpp/arith.c b/libexec/auxcpp/arith.c new file mode 100644 index 00000000000..bef258052a1 --- /dev/null +++ b/libexec/auxcpp/arith.c @@ -0,0 +1,1462 @@ +/* + * Integer arithmetic evaluation. + * + * (c) Thomas Pornin 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "arith.h" + +#define ARITH_OCTAL(x) ((x) >= '0' && (x) <= '7') +#define ARITH_OVAL(x) ((x) - '0') +#define ARITH_DECIM(x) ((x) >= '0' && (x) <= '9') +#define ARITH_DVAL(x) ((x) - '0') +#define ARITH_HEXAD(x) (ARITH_DECIM(x) \ + || (x) == 'a' || (x) == 'A' \ + || (x) == 'b' || (x) == 'B' \ + || (x) == 'c' || (x) == 'C' \ + || (x) == 'd' || (x) == 'D' \ + || (x) == 'e' || (x) == 'E' \ + || (x) == 'f' || (x) == 'F') +#define ARITH_HVAL(x) (ARITH_DECIM(x) ? ARITH_DVAL(x) \ + : (x) == 'a' || (x) == 'A' ? 10 \ + : (x) == 'b' || (x) == 'B' ? 11 \ + : (x) == 'c' || (x) == 'C' ? 12 \ + : (x) == 'd' || (x) == 'D' ? 13 \ + : (x) == 'e' || (x) == 'E' ? 14 : 15) + +#ifdef NATIVE_SIGNED +/* ====================================================================== */ +/* Arithmetics with native types */ +/* ====================================================================== */ + +/* + * The following properties are imposed by the C standard: + * + * -- Arithmetics on the unsigned type should never overflow; every + * result is reduced modulo some power of 2. The macro NATIVE_UNSIGNED_BITS + * should have been defined to that specific exponent. + * + * -- The signed type should use either two's complement, one's complement + * or a sign bit and a magnitude. There should be an integer N such that + * the maximum signed value is (2^N)-1 and the minimum signed value is + * either -(2^N) or -((2^N)-1). -(2^N) is possible only for two's complement. + * + * -- The maximum signed value is at most equal to the maximum unsigned + * value. + * + * -- Trap representations can only be: + * ** In two's complement, 1 as sign bit and 0 for all value bits. + * This can happen only if the minimum signed value is -((2^N)-1). + * ** In one's complement, all bits set to 1. + * ** In mantissa + sign, sign bit to 1 and 0 for all value bits. + * Unsigned values have no trap representation achievable with numerical + * operators. Only signed values can have such representations, with + * operators &, |, ^, ~, << and >>. If trap representations are possible, + * such occurrences are reported as warnings. + * + * -- The operators +, -, * and << may overflow or underflow on signed + * quantities, which is potentially an error. A warning is emitted. + * + * -- The operator >> yields an implementation-defined result on + * signed negative quantities. Usually, the sign is extended, but this + * is not guaranteed. A warning is emitted. + * + * -- The operators / and % used with a second operand of 0 cannot work. + * An error is emitted when such a call is performed. Furthermore, in + * two's complemement representation, with NATIVE_SIGNED_MIN == -(2^N) + * for some N, the expression `NATIVE_SIGNED_MIN / (-1)' yields an + * unrepresentable result, which is also an error. + * + * + * For the value checks, we need to consider those different cases. So + * we calculate the following macros: + * -- TWOS_COMPLEMENT: is 1 if representation is two's complement, 0 + * otherwise. + * -- ONES_COMPLEMENT: is 1 if representation is one's complement, 0 + * otherwise. + * -- SIGNED_IS_BIGGER: 1 if the maximum signed value is equal to the + * maximum unsigned value, 0 otherwise. NATIVE_SIGNED_MAX cannot + * exceed the maximum unsigned value. If SIGNED_IS_BIGGER is 0, then + * the maximum unsigned value is strictly superior to twice the + * value of NATIVE_SIGNED_MAX (e.g. 65535 to 32767). + * -- TRAP_REPRESENTATION: 1 if a trap representation is possible, 0 + * otherwise. The only way trap representations are guaranteed + * impossible is when TWOS_COMPLEMENT is set, and NATIVE_SIGNED_MIN + * is equal to -NATIVE_SIGNED_MAX - 1. + * + * Those macros are calculated by some preprocessor directives. This + * supposes that the implementation conforms to C99. Rules on preprocessing + * were quite looser in C90, and it could be that an old compiler, used + * for a cross-compiling task, does not get those right. Therefore, if + * ARCH_DEFINED is defined prior to the inclusion of this file, those + * four macros are supposed to be already defined. Otherwise they are + * (re)defined. The macro ARCH_TRAP_DEFINED has the same meaning, but + * is limited to the TRAP_REPRESENTATION macro (if ARCH_TRAP_DEFINED is + * defined, the macro TRAP_REPRESENTATION is supposed to be already + * defined; the three other macros are recalculated). + * + * + * To sum up: + * -- Whenever a division operator (/ or %) is invoked and would yield + * an unrepresentable result, ARITH_ERROR() is invoked. + * -- With ARITHMETIC_CHECKS undefined, ARITH_WARNING() is never invoked. + * -- With ARITHMETIC_CHECKS defined: + * ** If ARCH_DEFINED is defined, the including context must provide + * the macros TWOS_COMPLEMENT, ONES_COMPLEMENT, SIGNED_IS_BIGGER + * and TRAP_REPRESENTATION. + * ** Otherwise, if ARCH_TRAP_DEFINED is defined, the including context + * must provide the macro TRAP_REPRESENTATION. + * The code then detects all operator invokations that would yield an + * overflow, underflow, trap representation, or any implementation + * defined result or undefined behaviour. The macro ARITH_WARNING() is + * invoked for each detection. + * -- Trap representation detection code supposes that the operands are + * _not_ trap representation. + */ + +#ifndef ARCH_DEFINED + +#undef TWOS_COMPLEMENT +#undef ONES_COMPLEMENT +#undef SIGNED_IS_BIGGER +#ifndef ARCH_TRAP_DEFINED +#undef TRAP_REPRESENTATION +#endif + +#if (-1) & 3 == 3 +/* + * Two's complement. + */ +#define TWOS_COMPLEMENT 1 +#define ONES_COMPLEMENT 0 +#ifndef ARCH_TRAP_DEFINED +#if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX +#define TRAP_REPRESENTATION 0 +#else +#define TRAP_REPRESENTATION 1 +#endif +#endif + +#elif (-1) & 3 == 2 +/* + * One's complement. + */ +#define TWOS_COMPLEMENT 0 +#define ONES_COMPLEMENT 1 +#ifndef ARCH_TRAP_DEFINED +#define TRAP_REPRESENTATION 1 +#endif + +#else +/* + * Mantissa + sign. + */ +#define TWOS_COMPLEMENT 0 +#define ONES_COMPLEMENT 0 +#ifndef ARCH_TRAP_DEFINED +#define TRAP_REPRESENTATION 1 +#endif + +#endif + +/* + * Maximum native unsigned value. The first macro is for #if directives, + * the second macro is for use as constant expression in C code. + */ +#define NATIVE_UNSIGNED_MAX ((((1U << (NATIVE_UNSIGNED_BITS - 1)) - 1U) \ + << 1) + 1U) +#define NATIVE_UNSIGNED_MAX_A (((((arith_u)1 << (NATIVE_UNSIGNED_BITS - 1)) \ + - (arith_u)1) << 1) + (arith_u)1) + +#if NATIVE_SIGNED_MAX == NATIVE_UNSIGNED_MAX +#define SIGNED_IS_BIGGER 1 +#else +#define SIGNED_IS_BIGGER 0 +#endif + +#endif + +#undef NEGATIVE_IS_BIGGER +#if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX +#define NEGATIVE_IS_BIGGER 1 +#else +#define NEGATIVE_IS_BIGGER 0 +#endif + +/* sanity check: we cannot have a trap representation if we have + two's complement with NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX */ +#if TRAP_REPRESENTATION && NEGATIVE_IS_BIGGER +#error Impossible to get trap representations. +#endif + +/* operations on the unsigned type */ + +ARITH_DECL_MONO_S_U(to_u) { return (arith_u)x; } +ARITH_DECL_MONO_I_U(fromint) { return (arith_u)x; } +ARITH_DECL_MONO_L_U(fromulong) { return (arith_u)x; } + +ARITH_DECL_MONO_U_I(toint) +{ +#if NATIVE_UNSIGNED_MAX > INT_MAX + if (x > (arith_u)INT_MAX) return INT_MAX; +#endif + return (int)x; +} + +ARITH_DECL_MONO_U_L(toulong) +{ +#if NATIVE_UNSIGNED_MAX > LONG_MAX + if (x > (arith_u)LONG_MAX) return LONG_MAX; +#endif + return (long)x; +} + +ARITH_DECL_MONO_U_U(neg) { return -x; } +ARITH_DECL_MONO_U_U(not) { return ~x; } +ARITH_DECL_MONO_U_I(lnot) { return !x; } +ARITH_DECL_MONO_U_I(lval) { return x != 0; } + +ARITH_DECL_BI_UU_U(plus) { return x + y; } +ARITH_DECL_BI_UU_U(minus) { return x - y; } +ARITH_DECL_BI_UU_I(lt) { return x < y; } +ARITH_DECL_BI_UU_I(leq) { return x <= y; } +ARITH_DECL_BI_UU_I(gt) { return x > y; } +ARITH_DECL_BI_UU_I(geq) { return x >= y; } +ARITH_DECL_BI_UU_I(same) { return x == y; } +ARITH_DECL_BI_UU_I(neq) { return x != y; } +ARITH_DECL_BI_UU_U(and) { return x & y; } +ARITH_DECL_BI_UU_U(xor) { return x ^ y; } +ARITH_DECL_BI_UU_U(or) { return x | y; } +ARITH_DECL_BI_UU_U(star) { return x * y; } + +ARITH_DECL_BI_UI_U(lsh) +{ +#ifdef ARITHMETIC_CHECKS + if (y >= NATIVE_UNSIGNED_BITS) + ARITH_WARNING(ARITH_EXCEP_LSH_W); + else if (y < 0) + ARITH_WARNING(ARITH_EXCEP_LSH_C); +#endif + return x << y; +} + +ARITH_DECL_BI_UI_U(rsh) +{ +#ifdef ARITHMETIC_CHECKS + if (y >= NATIVE_UNSIGNED_BITS) + ARITH_WARNING(ARITH_EXCEP_RSH_W); + else if (y < 0) + ARITH_WARNING(ARITH_EXCEP_RSH_C); +#endif + return x >> y; +} + +ARITH_DECL_BI_UU_U(slash) +{ + if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D); + return x / y; +} + +ARITH_DECL_BI_UU_U(pct) +{ + if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D); + return x % y; +} + +/* operations on the signed type */ + +ARITH_DECL_MONO_U_S(to_s) +{ +#ifdef ARITHMETIC_CHECKS +#if !SIGNED_IS_BIGGER + if (x > (arith_u)NATIVE_SIGNED_MAX) + ARITH_WARNING(ARITH_EXCEP_CONV_O); +#endif +#endif + return (arith_s)x; +} + +ARITH_DECL_MONO_I_S(fromint) { return (arith_s)x; } +ARITH_DECL_MONO_L_S(fromlong) { return (arith_s)x; } + +ARITH_DECL_MONO_S_I(toint) +{ +#if NATIVE_SIGNED_MIN < INT_MIN + if (x < (arith_s)INT_MIN) return INT_MIN; +#endif +#if NATIVE_SIGNED_MAX > INT_MAX + if (x > (arith_s)INT_MAX) return INT_MAX; +#endif + return (int)x; +} + +ARITH_DECL_MONO_S_L(tolong) +{ +#if NATIVE_SIGNED_MIN < LONG_MIN + if (x < (arith_s)LONG_MIN) return LONG_MIN; +#endif +#if NATIVE_SIGNED_MAX > LONG_MAX + if (x > (arith_s)LONG_MAX) return LONG_MAX; +#endif + return (long)x; +} + +ARITH_DECL_MONO_S_S(neg) +{ +#ifdef ARITHMETIC_CHECKS +#if NEGATIVE_IS_BIGGER + if (x == NATIVE_SIGNED_MIN) + ARITH_WARNING(ARITH_EXCEP_NEG_O); +#endif +#endif + return -x; +} + +ARITH_DECL_MONO_S_S(not) +{ +#ifdef ARITHMETIC_CHECKS +#if TRAP_REPRESENTATION + if ( +#if TWOS_COMPLEMENT + (x == NATIVE_SIGNED_MAX) +#elif ONES_COMPLEMENT + (x == 0) +#else + (x == NATIVE_SIGNED_MAX) +#endif + ) ARITH_WARNING(ARITH_EXCEP_NOT_T); +#endif +#endif + return ~x; +} + +ARITH_DECL_MONO_S_I(lnot) { return !x; } +ARITH_DECL_MONO_S_I(lval) { return x != 0; } + +/* + * Addition of signed values: + * -- overflows occur only when both operands are strictly positive + * -- underflows occur only when both operands are strictly negative + * -- overflow check (both operands > 0): + * ** if SIGNED_IS_BIGGER == 1, overflows are kept as such in the + * unsigned world (if the signed addition overflows, so does the + * unsigned, and vice versa) + * ** if SIGNED_IS_BIGGER == 0, no overflow can happen in the unsigned + * world + * -- underflow check (both operands < 0): + * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement) + * ++ we have a guaranteed underflow if one of the operand is equal + * to NATIVE_SIGNED_MIN; otherwise, -x and -y are valid integers, + * and we cast them into the unsigned world + * ++ if SIGNED_IS_BIGGER == 1, underflows become unsigned overflows + * with a non-zero result + * ++ if SIGNED_IS_BIGGER == 0, no overflow happens in the unsigned + * world; we use the fact that -NATIVE_SIGNED_MIN is then + * exaxctly 1 more than NATIVE_SIGNED_MAX + * ** if NEGATIVE_IS_BIGGER == 0, underflow check is identical to + * overflow check on (signed) -x and -y. + */ +ARITH_DECL_BI_SS_S(plus) +{ +#ifdef ARITHMETIC_CHECKS + if (x > 0 && y > 0 && ( +#if SIGNED_IS_BIGGER + ((arith_u)((arith_u)x + (arith_u)y) < (arith_u)x) +#else + (((arith_u)x + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX) +#endif + )) ARITH_WARNING(ARITH_EXCEP_PLUS_O); + else if (x < 0 && y < 0 && ( +#if NEGATIVE_IS_BIGGER + (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) || +#if SIGNED_IS_BIGGER + (((arith_u)(-x) + (arith_u)(-y) != 0) + && (arith_u)((arith_u)(-x) + (arith_u)(-y)) + < (arith_u)(-x)) +#else + (((arith_u)(-x) + (arith_u)(-y)) + > ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX)) +#endif +#else +#if SIGNED_IS_BIGGER + ((arith_u)((arith_u)(-x) + (arith_u)(-y)) < (arith_u)(-x)) +#else + (((arith_u)(-x) + (arith_u)(-y)) + > (arith_u)NATIVE_SIGNED_MAX) +#endif +#endif + )) ARITH_WARNING(ARITH_EXCEP_PLUS_U); +#endif + return x + y; +} + +/* + * Subtraction of signed values: + * -- overflow: only if x > 0 and y < 0 + * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement) and + * y == NATIVE_SIGNED_MIN then overflow + * ** otherwise, cast x and -y to unsigned, then add and check + * for overflows + * -- underflow: only if x < 0 and y > 0 + * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement): + * ++ if x == NATIVE_SIGNED_MIN then underflow + * ++ cast -x and y to unsigned, then add. If SIGNED_IS_BIGGER == 0, + * just check. Otherwise, check for overflow with non-zero result. + * ** if NEGATIVE_IS_BIGGER == 0: cast -x and y to unsigned, then + * add. Overflow check as in addition. + */ +ARITH_DECL_BI_SS_S(minus) +{ +#ifdef ARITHMETIC_CHECKS + if (x > 0 && y < 0 && ( +#if NEGATIVE_IS_BIGGER + (y == NATIVE_SIGNED_MIN) || +#endif +#if SIGNED_IS_BIGGER + ((arith_u)((arith_u)x + (arith_u)(-y)) < (arith_u)x) +#else + (((arith_u)x + (arith_u)(-y)) > (arith_u)NATIVE_SIGNED_MAX) +#endif + )) ARITH_WARNING(ARITH_EXCEP_MINUS_O); + else if (x < 0 && y > 0 && ( +#if NEGATIVE_IS_BIGGER + (x == NATIVE_SIGNED_MIN) || +#if SIGNED_IS_BIGGER + ((((arith_u)(-x) + (arith_u)y) != 0) && + ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x))) +#else + (((arith_u)(-x) + (arith_u)y) > + ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX)) +#endif +#else +#if SIGNED_IS_BIGGER + ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x)) +#else + (((arith_u)(-x) + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX) +#endif +#endif + )) ARITH_WARNING(ARITH_EXCEP_MINUS_U); +#endif + return x - y; +} + +ARITH_DECL_BI_SS_I(lt) { return x < y; } +ARITH_DECL_BI_SS_I(leq) { return x <= y; } +ARITH_DECL_BI_SS_I(gt) { return x > y; } +ARITH_DECL_BI_SS_I(geq) { return x >= y; } +ARITH_DECL_BI_SS_I(same) { return x == y; } +ARITH_DECL_BI_SS_I(neq) { return x != y; } + +/* + * Provided neither x nor y is a trap representation: + * -- one's complement: impossible to get a trap representation + * -- two's complement and sign + mantissa: trap representation if and + * only if x and y are strictly negative and (-x) & (-y) == 0 + * (in two's complement, -x is safe because overflow would occur only + * if x was already a trap representation). + */ +ARITH_DECL_BI_SS_S(and) +{ +#ifdef ARITHMETIC_CHECKS +#if TRAP_REPRESENTATION && !ONES_COMPLEMENT + if (x < 0 && y < 0 && ((-x) & (-y)) == 0) + ARITH_WARNING(ARITH_EXCEP_AND_T); +#endif +#endif + return x & y; +} + +/* + * Provided neither x nor y is a trap representation: + * -- two's complement: trap if and only if x != NATIVE_SIGNED_MAX && ~x == y + * -- one's complement: trap if and only if x != 0 && ~x == y + * -- mantissa + sign: trap if and only if x != 0 && -x == y + */ +ARITH_DECL_BI_SS_S(xor) +{ +#ifdef ARITHMETIC_CHECKS +#if TRAP_REPRESENTATION + if ( +#if TWOS_COMPLEMENT + (x != NATIVE_SIGNED_MAX && ~x == y) +#elif ONES_COMPLEMENT + (x != 0 && ~x == y) +#else + (x != 0 && -x == y) +#endif + ) ARITH_WARNING(ARITH_EXCEP_XOR_T); +#endif +#endif + return x ^ y; +} + +/* + * Provided neither x nor y is a trap representation: + * -- two's complement: impossible to trap + * -- one's complement: trap if and only if x != 0 && y != 0 && (~x & ~y) == 0 + * -- mantissa + sign: impossible to trap + */ +ARITH_DECL_BI_SS_S(or) +{ +#ifdef ARITHMETIC_CHECKS +#if TRAP_REPRESENTATION +#if ONES_COMPLEMENT + if (x != 0 && y != 0 && (~x & ~y) == 0) + ARITH_WARNING(ARITH_EXCEP_OR_T); +#endif +#endif +#endif + return x | y; +} + +/* + * Left-shifting by a negative or greater than type width count is + * forbidden. Left-shifting a negative value is forbidden (underflow). + * Left-shifting a positive value can trigger an overflow. We check it + * by casting into the unsigned world and simulating a truncation. + * + * If SIGNED_IS_BIGGER is set, then the signed type width is 1 more + * than the unsigned type width (the sign bit is included in the width); + * otherwise, if W is the signed type width, 1U << (W-1) is equal to + * NATIVE_SIGNED_MAX + 1. + */ +ARITH_DECL_BI_SI_S(lsh) +{ +#ifdef ARITHMETIC_CHECKS + if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C); + else if ( +#if SIGNED_IS_BIGGER + y > NATIVE_UNSIGNED_BITS +#else + y >= NATIVE_UNSIGNED_BITS + || (y > 0 && (((arith_u)1 << (y - 1)) + > (arith_u)NATIVE_SIGNED_MAX)) +#endif + ) ARITH_WARNING(ARITH_EXCEP_LSH_W); + else if (x < 0) ARITH_WARNING(ARITH_EXCEP_LSH_U); + else if (x > 0 && ((((arith_u)x << y) & NATIVE_SIGNED_MAX) >> y) + != (arith_u)x) ARITH_WARNING(ARITH_EXCEP_LSH_O); +#endif + return x << y; +} + +/* + * Right-shifting is handled as left-shifting, except that the problem + * is somehow simpler: there is no possible overflow or underflow. Only + * right-shifting a negative value yields an implementation defined + * result (_not_ an undefined behaviour). + */ +ARITH_DECL_BI_SI_S(rsh) +{ +#ifdef ARITHMETIC_CHECKS + if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C); + else if ( +#if SIGNED_IS_BIGGER + y > NATIVE_UNSIGNED_BITS +#else + y >= NATIVE_UNSIGNED_BITS + || (y > 0 && (((arith_u)1 << (y - 1)) + > (arith_u)NATIVE_SIGNED_MAX)) +#endif + ) ARITH_WARNING(ARITH_EXCEP_RSH_W); + else if (x < 0) ARITH_WARNING(ARITH_EXCEP_RSH_N); +#endif + return x >> y; +} + +/* + * Overflow can happen only if both operands have the same sign. + * Underflow can happen only if both operands have opposite signs. + * + * Overflow checking: this is done quite inefficiently by performing + * a division on the result and check if it matches the initial operand. + */ +ARITH_DECL_BI_SS_S(star) +{ +#ifdef ARITHMETIC_CHECKS + if (x == 0 || y == 0) return 0; + if (x > 0 && y > 0) { + if ((((arith_u)x * (arith_u)y) & (arith_u)NATIVE_SIGNED_MAX) + / (arith_u)y != (arith_u)x) + ARITH_WARNING(ARITH_EXCEP_STAR_O); + } else if (x < 0 && y < 0) { + if ( +#if NEGATIVE_IS_BIGGER + (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) || +#endif + (((arith_u)(-x) * (arith_u)(-y)) + & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)(-y) + != (arith_u)(-x)) + ARITH_WARNING(ARITH_EXCEP_STAR_O); + } else if (x > 0 && y < 0) { + if ((arith_u)x > (arith_u)1 && ( +#if NEGATIVE_IS_BIGGER + y == NATIVE_SIGNED_MIN || +#endif + (((arith_u)x * (arith_u)(-y)) & (arith_u)NATIVE_SIGNED_MAX) + / (arith_u)(-y) != (arith_u)x)) + ARITH_WARNING(ARITH_EXCEP_STAR_U); + } else { + if ((arith_u)y > (arith_u)1 && ( +#if NEGATIVE_IS_BIGGER + x == NATIVE_SIGNED_MIN || +#endif + (((arith_u)y * (arith_u)(-x)) & (arith_u)NATIVE_SIGNED_MAX) + / (arith_u)(-x) != (arith_u)y)) + ARITH_WARNING(ARITH_EXCEP_STAR_U); + } +#endif + return x * y; +} + +/* + * Division by 0 is an error. The only other possible problem is an + * overflow of the result. Such an overflow can only happen in two's + * complement representation, when NEGATIVE_IS_BIGGER is set, and + * one attempts to divide NATIVE_SIGNED_MIN by -1: the result is then + * -NATIVE_SIGNED_MIN, which is not representable by the type. This is + * considered as an error, not a warning, because it actually triggers + * an exception on modern Pentium-based PC. + */ +ARITH_DECL_BI_SS_S(slash) +{ + if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D); +#if NEGATIVE_IS_BIGGER + else if (x == NATIVE_SIGNED_MIN && y == (arith_s)(-1)) + ARITH_ERROR(ARITH_EXCEP_SLASH_O); +#endif + return x / y; +} + +/* + * Only division by 0 needs to be checked. + */ +ARITH_DECL_BI_SS_S(pct) +{ + if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D); + return x % y; +} + +ARITH_DECL_MONO_ST_US(octconst) +{ + arith_u z = 0; + + for (; ARITH_OCTAL(*c); c ++) { + arith_u w = ARITH_OVAL(*c); + if (z > (NATIVE_UNSIGNED_MAX_A / 8)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z *= 8; +#if 0 +/* obsolete */ +/* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 7 and 8 divides z */ + if (z > (NATIVE_UNSIGNED_MAX_A - w)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); +#endif + z += w; + } + *ru = z; +#if SIGNED_IS_BIGGER + *rs = z; + *sp = 1; +#else + if (z > NATIVE_SIGNED_MAX) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } +#endif + return c; +} + +ARITH_DECL_MONO_ST_US(decconst) +{ + arith_u z = 0; + + for (; ARITH_DECIM(*c); c ++) { + arith_u w = ARITH_DVAL(*c); + if (z > (NATIVE_UNSIGNED_MAX_A / 10)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z *= 10; + if (z > (NATIVE_UNSIGNED_MAX_A - w)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z += w; + } + *ru = z; +#if SIGNED_IS_BIGGER + *rs = z; + *sp = 1; +#else + if (z > NATIVE_SIGNED_MAX) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } +#endif + return c; +} + +ARITH_DECL_MONO_ST_US(hexconst) +{ + arith_u z = 0; + + for (; ARITH_HEXAD(*c); c ++) { + arith_u w = ARITH_HVAL(*c); + if (z > (NATIVE_UNSIGNED_MAX_A / 16)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z *= 16; +#if 0 +/* obsolete */ +/* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 15 and 16 divides z */ + if (z > (NATIVE_UNSIGNED_MAX_A - w)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); +#endif + z += w; + } + *ru = z; +#if SIGNED_IS_BIGGER + *rs = z; + *sp = 1; +#else + if (z > NATIVE_SIGNED_MAX) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } +#endif + return c; +} + +#else +/* ====================================================================== */ +/* Arithmetics with a simple simulated type */ +/* ====================================================================== */ + +/* + * We simulate a type with the following characteristics: + * -- the signed type width is equal to the unsigned type width (which + * means that there is one less value bit in the signed type); + * -- the signed type uses two's complement representation; + * -- there is no trap representation; + * -- overflows and underflows are truncated (but a warning is emitted + * if ARITHMETIC_CHECKS is defined); + * -- overflow on integer division is still an error; + * -- right-shifting of a negative value extends the sign; + * -- the shift count value is first cast to unsigned, then reduced modulo + * the type size. + * + * These characteristics follow what is usually found on modern + * architectures. + * + * The maximum emulated type size is twice the size of the unsigned native + * type which is used to emulate the type. + */ + +#undef SIMUL_ONE_TMP +#undef SIMUL_MSW_TMP1 +#undef SIMUL_MSW_MASK +#undef SIMUL_LSW_TMP1 +#undef SIMUL_LSW_MASK + +#define SIMUL_ONE_TMP ((SIMUL_ARITH_SUBTYPE)1) +#define SIMUL_MSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1)) +#define SIMUL_MSW_MASK (SIMUL_MSW_TMP1 | (SIMUL_MSW_TMP1 - SIMUL_ONE_TMP)) +#define SIMUL_LSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)) +#define SIMUL_LSW_MASK (SIMUL_LSW_TMP1 | (SIMUL_LSW_TMP1 - SIMUL_ONE_TMP)) + +#undef TMSW +#undef TLSW + +#define TMSW(x) ((x) & SIMUL_MSW_MASK) +#define TLSW(x) ((x) & SIMUL_LSW_MASK) + +#undef SIMUL_ZERO +#undef SIMUL_ONE + +#define SIMUL_ZERO arith_strc(ARITH_TYPENAME, _zero) +#define SIMUL_ONE arith_strc(ARITH_TYPENAME, _one) + +static arith_u SIMUL_ZERO = { 0, 0 }; +static arith_u SIMUL_ONE = { 0, 1 }; + +/* + * We use the fact that both the signed and unsigned type are the same + * structure. The difference between the signed and the unsigned type + * is a type information, and, as such, is considered compile-time and + * not maintained in the value structure itself. This is a job for + * the programmer / compiler. + */ +ARITH_DECL_MONO_S_U(to_u) { return x; } + +ARITH_DECL_MONO_I_U(fromint) +{ + arith_u z; + + if (x < 0) return arith_op_u(neg)(arith_op_u(fromint)(-x)); + /* + * This code works because types smaller than int are promoted + * by the C compiler before evaluating the >> operator. + */ + z.msw = TMSW(((SIMUL_ARITH_SUBTYPE)x >> (SIMUL_LSW_WIDTH - 1)) >> 1); + z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x); + return z; +} + +ARITH_DECL_MONO_L_U(fromulong) +{ + arith_u z; + +#if (ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1 == 0 + z.msw = 0; + z.lsw = x; +#else + z.msw = TMSW(x >> SIMUL_LSW_WIDTH); + z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x); +#endif + return z; +} + +ARITH_DECL_MONO_U_I(toint) +{ +#if ((INT_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0 + if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)INT_MAX) + return INT_MAX; + return (int)x.lsw; +#else +#if (INT_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0 + if (x.msw > (SIMUL_ARITH_SUBTYPE)(INT_MAX >> SIMUL_LSW_WIDTH)) + return INT_MAX; +#endif + return ((int)x.msw << SIMUL_LSW_WIDTH) | (int)x.lsw; +#endif +} + +ARITH_DECL_MONO_U_L(toulong) +{ +#if ((ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0 + if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)ULONG_MAX) + return ULONG_MAX; + return (unsigned long)x.lsw; +#else +#if (ULONG_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0 + if (x.msw > (SIMUL_ARITH_SUBTYPE)(ULONG_MAX >> SIMUL_LSW_WIDTH)) + return ULONG_MAX; +#endif + return ((unsigned long)x.msw << SIMUL_LSW_WIDTH) | (unsigned long)x.lsw; +#endif +} + +ARITH_DECL_MONO_U_U(neg) +{ + x = arith_op_u(not)(x); + return arith_op_u(plus)(x, SIMUL_ONE); +} + +ARITH_DECL_MONO_U_U(not) +{ + x.msw = TMSW(~x.msw); + x.lsw = TLSW(~x.lsw); + return x; +} + +ARITH_DECL_MONO_U_I(lnot) +{ + return x.msw == 0 && x.lsw == 0; +} + +ARITH_DECL_MONO_U_I(lval) +{ + return x.msw != 0 || x.lsw != 0; +} + +ARITH_DECL_BI_UU_U(plus) +{ + x.lsw = TLSW(x.lsw + y.lsw); + x.msw = TMSW(x.msw + y.msw); + if (x.lsw < y.lsw) x.msw = TMSW(x.msw + 1); + return x; +} + +ARITH_DECL_BI_UU_U(minus) +{ + return arith_op_u(plus)(x, arith_op_u(neg)(y)); +} + +ARITH_DECL_BI_UI_U(lsh) +{ + if (y == 0) return x; +#ifdef ARITHMETIC_CHECKS + if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C); + else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_LSH_W); +#endif + y = (unsigned)y % SIMUL_NUMBITS; + if (y >= SIMUL_LSW_WIDTH) { + /* + * We use here the fact that the LSW size is always + * equal to or greater than the MSW size. + */ + x.msw = TMSW(x.lsw << (y - SIMUL_LSW_WIDTH)); + x.lsw = 0; + return x; + } + x.msw = TMSW((x.msw << y) | (x.lsw >> (SIMUL_LSW_WIDTH - y))); + x.lsw = TLSW(x.lsw << y); + return x; +} + +ARITH_DECL_BI_UI_U(rsh) +{ +#ifdef ARITHMETIC_CHECKS + if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C); + else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_RSH_W); +#endif + y = (unsigned)y % SIMUL_NUMBITS; + if (y >= SIMUL_LSW_WIDTH) { + x.lsw = x.msw >> (y - SIMUL_LSW_WIDTH); + x.msw = 0; + return x; + } + x.lsw = TLSW((x.lsw >> y) | (x.msw << (SIMUL_LSW_WIDTH - y))); + x.msw >>= y; + return x; +} + +ARITH_DECL_BI_UU_I(lt) +{ + return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw); +} + +ARITH_DECL_BI_UU_I(leq) +{ + return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw); +} + +ARITH_DECL_BI_UU_I(gt) +{ + return arith_op_u(lt)(y, x); +} + +ARITH_DECL_BI_UU_I(geq) +{ + return arith_op_u(leq)(y, x); +} + +ARITH_DECL_BI_UU_I(same) +{ + return x.msw == y.msw && x.lsw == y.lsw; +} + +ARITH_DECL_BI_UU_I(neq) +{ + return !arith_op_u(same)(x, y); +} + +ARITH_DECL_BI_UU_U(and) +{ + x.msw &= y.msw; + x.lsw &= y.lsw; + return x; +} + +ARITH_DECL_BI_UU_U(xor) +{ + x.msw ^= y.msw; + x.lsw ^= y.lsw; + return x; +} + +ARITH_DECL_BI_UU_U(or) +{ + x.msw |= y.msw; + x.lsw |= y.lsw; + return x; +} + +#undef SIMUL_LSW_ODDLEN +#undef SIMUL_LSW_HALFLEN +#undef SIMUL_LSW_HALFMASK + +#define SIMUL_LSW_ODDLEN (SIMUL_LSW_WIDTH & 1) +#define SIMUL_LSW_HALFLEN (SIMUL_LSW_WIDTH / 2) +#define SIMUL_LSW_HALFMASK (~(~(SIMUL_ARITH_SUBTYPE)0 << SIMUL_LSW_HALFLEN)) + +ARITH_DECL_BI_UU_U(star) +{ + arith_u z; + SIMUL_ARITH_SUBTYPE a = x.lsw, b = y.lsw, t00, t01, t10, t11, c = 0, t; +#if SIMUL_LSW_ODDLEN + SIMUL_ARITH_SUBTYPE bms = b & (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)); + + b &= ~(SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)); +#endif + + t00 = (a & SIMUL_LSW_HALFMASK) * (b & SIMUL_LSW_HALFMASK); + t01 = (a & SIMUL_LSW_HALFMASK) * (b >> SIMUL_LSW_HALFLEN); + t10 = (a >> SIMUL_LSW_HALFLEN) * (b & SIMUL_LSW_HALFMASK); + t11 = (a >> SIMUL_LSW_HALFLEN) * (b >> SIMUL_LSW_HALFLEN); + t = z.lsw = t00; + z.lsw = TLSW(z.lsw + (t01 << SIMUL_LSW_HALFLEN)); + if (t > z.lsw) c ++; + t = z.lsw; + z.lsw = TLSW(z.lsw + (t10 << SIMUL_LSW_HALFLEN)); + if (t > z.lsw) c ++; +#if SIMUL_LSW_ODDLEN + t = z.lsw; + z.lsw = TLSW(z.lsw + (t11 << (2 * SIMUL_LSW_HALFLEN))); + if (t > z.lsw) c ++; + if (bms && (a & SIMUL_ONE_TMP)) { + t = z.lsw; + z.lsw = TLSW(z.lsw + b); + if (t > z.lsw) c ++; + } +#endif + z.msw = TMSW(x.lsw * y.msw + x.msw * y.lsw + c + + (t01 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN)) + + (t10 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN)) + + (t11 >> (SIMUL_LSW_WIDTH - (2 * SIMUL_LSW_HALFLEN)))); + return z; +} + +/* + * This function calculates the unsigned integer division, yielding + * both quotient and remainder. The divider (y) MUST be non-zero. + */ +static void arith_op_u(udiv)(arith_u x, arith_u y, arith_u *q, arith_u *r) +{ + int i, j; + arith_u a; + + *q = SIMUL_ZERO; + for (i = SIMUL_NUMBITS - 1; i >= 0; i --) { + if (i >= (int)SIMUL_LSW_WIDTH + && (y.msw & (SIMUL_ONE_TMP << (i - SIMUL_LSW_WIDTH)))) + break; + if (i < (int)SIMUL_LSW_WIDTH && (y.lsw & (SIMUL_ONE_TMP << i))) + break; + } + a = arith_op_u(lsh)(y, SIMUL_NUMBITS - 1 - i); + for (j = SIMUL_NUMBITS - 1 - i; j >= SIMUL_LSW_WIDTH; j --) { + if (arith_op_u(leq)(a, x)) { + x = arith_op_u(minus)(x, a); + q->msw |= SIMUL_ONE_TMP << (j - SIMUL_LSW_WIDTH); + } + a = arith_op_u(rsh)(a, 1); + } + for (; j >= 0; j --) { + if (arith_op_u(leq)(a, x)) { + x = arith_op_u(minus)(x, a); + q->lsw |= SIMUL_ONE_TMP << j; + } + a = arith_op_u(rsh)(a, 1); + } + *r = x; +} + +ARITH_DECL_BI_UU_U(slash) +{ + arith_u q, r; + + if (arith_op_u(same)(y, SIMUL_ZERO)) + ARITH_ERROR(ARITH_EXCEP_SLASH_D); + arith_op_u(udiv)(x, y, &q, &r); + return q; +} + +ARITH_DECL_BI_UU_U(pct) +{ + arith_u q, r; + + if (arith_op_u(same)(y, SIMUL_ZERO)) + ARITH_ERROR(ARITH_EXCEP_PCT_D); + arith_op_u(udiv)(x, y, &q, &r); + return r; +} + +#undef SIMUL_TRAP +#undef SIMUL_TRAPL +#define SIMUL_TRAP (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1)) +#define SIMUL_TRAPL (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)) + +ARITH_DECL_MONO_U_S(to_s) +{ +#ifdef ARITHMETIC_CHECKS + if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_CONV_O); +#endif + return x; +} + +ARITH_DECL_MONO_I_S(fromint) { return arith_op_u(fromint)(x); } +ARITH_DECL_MONO_L_S(fromlong) +{ + if (x < 0) return arith_op_u(neg)( + arith_op_u(fromulong)((unsigned long)(-x))); + return arith_op_u(fromulong)((unsigned long)x); +} + +ARITH_DECL_MONO_S_I(toint) +{ + if (x.msw & SIMUL_TRAP) return -arith_op_u(toint)(arith_op_u(neg)(x)); + return arith_op_u(toint)(x); +} + +ARITH_DECL_MONO_S_L(tolong) +{ + if (x.msw & SIMUL_TRAP) + return -(long)arith_op_u(toulong)(arith_op_u(neg)(x)); + return (long)arith_op_u(toulong)(x); +} + +ARITH_DECL_MONO_S_S(neg) +{ +#ifdef ARITHMETIC_CHECKS + if (x.lsw == 0 && x.msw == SIMUL_TRAP) + ARITH_WARNING(ARITH_EXCEP_NEG_O); +#endif + return arith_op_u(neg)(x); +} + +ARITH_DECL_MONO_S_S(not) { return arith_op_u(not)(x); } +ARITH_DECL_MONO_S_I(lnot) { return arith_op_u(lnot)(x); } +ARITH_DECL_MONO_S_I(lval) { return arith_op_u(lval)(x); } + +ARITH_DECL_BI_SS_S(plus) +{ + arith_u z = arith_op_u(plus)(x, y); + +#ifdef ARITHMETIC_CHECKS + if (x.msw & y.msw & ~z.msw & SIMUL_TRAP) + ARITH_WARNING(ARITH_EXCEP_PLUS_U); + else if (~x.msw & ~y.msw & z.msw & SIMUL_TRAP) + ARITH_WARNING(ARITH_EXCEP_PLUS_O); +#endif + return z; +} + +ARITH_DECL_BI_SS_S(minus) +{ + arith_s z = arith_op_u(minus)(x, y); + +#ifdef ARITHMETIC_CHECKS + if (x.msw & ~y.msw & ~z.msw & SIMUL_TRAP) + ARITH_WARNING(ARITH_EXCEP_MINUS_U); + else if (~x.msw & y.msw & z.msw & SIMUL_TRAP) + ARITH_WARNING(ARITH_EXCEP_MINUS_O); +#endif + return z; +} + +/* + * Since signed and unsigned widths are equal for the simulated type, + * we can use the unsigned left shift function, which performs the + * the checks on the type width. + */ +ARITH_DECL_BI_SI_S(lsh) +{ + arith_s z = arith_op_u(lsh)(x, y); + +#ifdef ARITHMETIC_CHECKS + if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_LSH_U); + else { + /* + * To check for possible overflow, we right shift the + * result. We need to make the shift count proper so that + * we do not emit a double-warning. Besides, the left shift + * could have been untruncated but yet affet the sign bit, + * so we must test this explicitly. + */ + arith_s w = arith_op_u(rsh)(z, (unsigned)y % SIMUL_NUMBITS); + + if ((z.msw & SIMUL_TRAP) || w.msw != x.msw || w.lsw != x.lsw) + ARITH_WARNING(ARITH_EXCEP_LSH_O); + } +#endif + return z; +} + +/* + * We define that right shifting a negative value, besides being worth a + * warning, duplicates the sign bit. This is the most useful and most + * usually encountered behaviour, and the standard allows it. + */ +ARITH_DECL_BI_SI_S(rsh) +{ + int xn = (x.msw & SIMUL_TRAP) != 0; + arith_s z = arith_op_u(rsh)(x, y); + int gy = (unsigned)y % SIMUL_NUMBITS; + +#ifdef ARITHMETIC_CHECKS + if (xn) ARITH_WARNING(ARITH_EXCEP_RSH_N); +#endif + if (xn && gy > 0) { + if (gy <= SIMUL_MSW_WIDTH) { + z.msw |= TMSW(~(SIMUL_MSW_MASK >> gy)); + } else { + z.msw = SIMUL_MSW_MASK; + z.lsw |= TLSW(~(SIMUL_LSW_MASK + >> (gy - SIMUL_MSW_WIDTH))); + } + } + return z; +} + +ARITH_DECL_BI_SS_I(lt) +{ + int xn = (x.msw & SIMUL_TRAP) != 0; + int yn = (y.msw & SIMUL_TRAP) != 0; + + if (xn == yn) { + return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw); + } else { + return xn; + } +} + +ARITH_DECL_BI_SS_I(leq) +{ + int xn = (x.msw & SIMUL_TRAP) != 0; + int yn = (y.msw & SIMUL_TRAP) != 0; + + if (xn == yn) { + return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw); + } else { + return xn; + } +} + +ARITH_DECL_BI_SS_I(gt) +{ + return arith_op_s(lt)(y, x); +} + +ARITH_DECL_BI_SS_I(geq) +{ + return arith_op_s(leq)(y, x); +} + +ARITH_DECL_BI_SS_I(same) +{ + return x.msw == y.msw && x.lsw == y.lsw; +} + +ARITH_DECL_BI_SS_I(neq) +{ + return !arith_op_s(same)(x, y); +} + +ARITH_DECL_BI_SS_S(and) +{ + return arith_op_u(and)(x, y); +} + +ARITH_DECL_BI_SS_S(xor) +{ + return arith_op_u(xor)(x, y); +} + +ARITH_DECL_BI_SS_S(or) +{ + return arith_op_u(or)(x, y); +} + +/* + * This function calculates the signed integer division, yielding + * both quotient and remainder. The divider (y) MUST be non-zero. + */ +static void arith_op_s(sdiv)(arith_s x, arith_s y, arith_s *q, arith_s *r) +{ + arith_u a = x, b = y, c, d; + int xn = 0, yn = 0; + + if (x.msw & SIMUL_TRAP) { a = arith_op_u(neg)(x); xn = 1; } + if (y.msw & SIMUL_TRAP) { b = arith_op_u(neg)(y); yn = 1; } + arith_op_u(udiv)(a, b, &c, &d); + if (xn != yn) *q = arith_op_u(neg)(c); else *q = c; + if (xn != yn) *r = arith_op_u(neg)(d); else *r = d; +} + +/* + * Overflow/underflow check is done the following way: obvious cases + * are checked (both upper words non-null, both upper words null...) + * and border-line occurrences are verified with an unsigned division + * (which is quite computationaly expensive). + */ +ARITH_DECL_BI_SS_S(star) +{ +#ifdef ARITHMETIC_CHECKS + arith_s z = arith_op_u(star)(x, y); + int warn = 0; + + if (x.msw > 0) { + if (y.msw > 0 +#if SIMUL_LSW_ODDLEN + || (y.lsw & SIMUL_TRAPL) +#endif + ) warn = 1; + } +#if SIMUL_LSW_ODDLEN + else if (y.msw > 0 && (x.lsw & SIMUL_TRAPL)) warn = 1; +#endif + if (!warn && (x.msw > 0 || y.msw > 0 +#if SIMUL_LSW_ODDLEN + || ((x.lsw | y.lsw) & SIMUL_TRAPL) +#endif + )) { + if (x.msw == SIMUL_MSW_MASK && x.lsw == SIMUL_LSW_MASK) { + if (y.msw == SIMUL_TRAP && y.lsw == 0) warn = 1; + } else if (!(x.msw == 0 && x.lsw == 0) + && !arith_op_s(same)(arith_op_s(slash)(z, x), y)) { + } warn = 1; + } + if (warn) ARITH_WARNING(((x.msw ^ y.msw) & SIMUL_TRAP) + ? ARITH_EXCEP_STAR_U : ARITH_EXCEP_STAR_O); + return z; +#else + return arith_op_u(star)(x, y); +#endif +} + +ARITH_DECL_BI_SS_S(slash) +{ + arith_s q, r; + + if (arith_op_s(same)(y, SIMUL_ZERO)) + ARITH_ERROR(ARITH_EXCEP_SLASH_D); + else if (x.msw == SIMUL_TRAP && x.lsw == 0 + && y.msw == SIMUL_MSW_MASK && y.lsw == SIMUL_LSW_MASK) + ARITH_ERROR(ARITH_EXCEP_SLASH_O); + arith_op_s(sdiv)(x, y, &q, &r); + return q; +} + +ARITH_DECL_BI_SS_S(pct) +{ + arith_s q, r; + + if (arith_op_s(same)(y, SIMUL_ZERO)) + ARITH_ERROR(ARITH_EXCEP_PCT_D); + arith_op_s(sdiv)(x, y, &q, &r); + return r; +} + +ARITH_DECL_MONO_ST_US(octconst) +{ + arith_u z = { 0, 0 }; + + for (; ARITH_OCTAL(*c); c ++) { + unsigned w = ARITH_OVAL(*c); + if (z.msw > (SIMUL_MSW_MASK / 8)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z = arith_op_u(lsh)(z, 3); + z.lsw |= w; + } + *ru = z; + if (z.msw & SIMUL_TRAP) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } + return c; +} + +ARITH_DECL_MONO_ST_US(decconst) +{ +#define ARITH_ALPHA_TRAP (1U << (SIMUL_MSW_WIDTH - 1)) +#define ARITH_ALPHA_MASK (ARITH_ALPHA_TRAP | (ARITH_ALPHA_TRAP - 1)) +#define ARITH_ALPHA ((ARITH_ALPHA_MASK - 10 * (ARITH_ALPHA_TRAP / 5)) + 1) +#define ARITH_ALPHA_A ((SIMUL_MSW_MASK - 10 * (SIMUL_TRAP / 5)) + 1) + + arith_u z = { 0, 0 }; + + for (; ARITH_DECIM(*c); c ++) { + unsigned w = ARITH_DVAL(*c); + SIMUL_ARITH_SUBTYPE t; + + if (z.msw > (SIMUL_MSW_MASK / 10) + || (z.msw == (SIMUL_MSW_MASK / 10) && +/* ARITH_ALPHA is between 1 and 9, inclusive. */ +#if ARITH_ALPHA == 5 + z.lsw >= SIMUL_TRAPL +#else + z.lsw > ((SIMUL_TRAPL / 5) * ARITH_ALPHA_A + + ((SIMUL_TRAPL % 5) * ARITH_ALPHA_A) / 5) +#endif + )) ARITH_ERROR(ARITH_EXCEP_CONST_O); + z = arith_op_u(plus)(arith_op_u(lsh)(z, 3), + arith_op_u(lsh)(z, 1)); + t = TLSW(z.lsw + w); + if (t < z.lsw) z.msw ++; + z.lsw = t; + } + *ru = z; + if (z.msw & SIMUL_TRAP) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } + return c; + +#undef ARITH_ALPHA_A +#undef ARITH_ALPHA +#undef ARITH_ALPHA_TRAP +#undef ARITH_ALPHA_MASK +} + +ARITH_DECL_MONO_ST_US(hexconst) +{ + arith_u z = { 0, 0 }; + + for (; ARITH_HEXAD(*c); c ++) { + unsigned w = ARITH_HVAL(*c); + if (z.msw > (SIMUL_MSW_MASK / 16)) + ARITH_ERROR(ARITH_EXCEP_CONST_O); + z = arith_op_u(lsh)(z, 4); + z.lsw |= w; + } + *ru = z; + if (z.msw & SIMUL_TRAP) { + *sp = 0; + } else { + *rs = z; + *sp = 1; + } + return c; +} + +#endif + +#undef ARITH_HVAL +#undef ARITH_HEXAD +#undef ARITH_DVAL +#undef ARITH_DECIM +#undef ARITH_OVAL +#undef ARITH_OCTAL diff --git a/libexec/auxcpp/arith.h b/libexec/auxcpp/arith.h new file mode 100644 index 00000000000..ae64e5cdabc --- /dev/null +++ b/libexec/auxcpp/arith.h @@ -0,0 +1,255 @@ +/* + * Integer arithmetic evaluation, header file. + * + * (c) Thomas Pornin 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This arithmetic evaluator uses two files: this header file (arith.h) + * and the source file (arith.c). To use this code, the source file should + * be included from another .c file which defines some macros (see below). + * Then the functions defined in the arith.c file become available to the + * including source file. If those functions are defined with external + * linkage (that is, `ARITH_FUNCTION_HEADER' does not contain `static'), + * it is possible for other source files to use the arithmetic functions + * by including the arith.h header only. The source file which includes + * arith.c should *not* include arith.h. + * + * If the #include is for arith.h, the following macros should be + * defined: + * + * -- If the evaluator is supposed to use a native type: + * NATIVE_SIGNED the native signed integer type + * NATIVE_UNSIGNED the native unsigned integer type + * + * -- If the evaluator is supposed to use an emulated type: + * SIMUL_ARITH_SUBTYPE the native unsigned type used for the simulation + * SIMUL_SUBTYPE_BITS the native unsigned type size + * SIMUL_NUMBITS the emulated type size + * + * -- For both cases: + * ARITH_TYPENAME the central arithmetic type name + * ARITH_FUNCTION_HEADER the qualifiers to add to function definitions + * + * The presence (respectively absence) of the NATIVE_SIGNED macro triggers + * the use of the native type evaluator (respectively simulated type + * evaluator). + * + * If the #include is for arith.c, the macros for arith.h should be defined, + * and the following should be defined as well: + * + * -- If the evaluator is supposed to use a native type: + * NATIVE_UNSIGNED_BITS the native unsigned type size + * NATIVE_SIGNED_MIN the native signed minimum value + * NATIVE_SIGNED_MAX the native signed maximum value + * (the last two macros must evaluate to signed constant expressions) + * + * -- For both cases: + * ARITH_WARNING(type) code to perform on warning + * ARITH_ERROR(type) code to perform on error + * + * The macro ARITH_WARNING() and ARITH_ERROR() are invoked with a + * numerical argument which is one of the enumeration constants + * defined below (ARITH_EXCEP_*) that identifies the specific problem. + * + * If the #include is for arith.c, the macro ARITHMETIC_CHECKS may be + * defined. When this macro is defined, checks are performed so that all + * operation which would lead to undefined or implementation-defined + * behaviour are first reported through ARITH_WARNING(). Code is smaller + * and faster without these checks, of course. Regardless of the status + * of that macro, divisions by 0 and overflows on signed division are + * reported as errors through ARITH_ERROR(). + * + */ + +#ifndef ARITH_H__ +#define ARITH_H__ + +enum { + /* Warnings */ + ARITH_EXCEP_CONV_O, /* overflow on conversion */ + ARITH_EXCEP_NEG_O, /* overflow on unary minus */ + ARITH_EXCEP_NOT_T, /* trap representation on bitwise inversion */ + ARITH_EXCEP_PLUS_O, /* overflow on addition */ + ARITH_EXCEP_PLUS_U, /* underflow on addition */ + ARITH_EXCEP_MINUS_O, /* overflow on subtraction */ + ARITH_EXCEP_MINUS_U, /* underflow on subtraction */ + ARITH_EXCEP_AND_T, /* trap representation on bitwise and */ + ARITH_EXCEP_XOR_T, /* trap representation on bitwise xor */ + ARITH_EXCEP_OR_T, /* trap representation on bitwise or */ + ARITH_EXCEP_LSH_W, /* left shift by type width or more */ + ARITH_EXCEP_LSH_C, /* left shift by negative count */ + ARITH_EXCEP_LSH_O, /* overflow on left shift */ + ARITH_EXCEP_LSH_U, /* underflow on left shift */ + ARITH_EXCEP_RSH_W, /* right shift by type width or more */ + ARITH_EXCEP_RSH_C, /* right shift by negative count */ + ARITH_EXCEP_RSH_N, /* right shift of negative value */ + ARITH_EXCEP_STAR_O, /* overflow on multiplication */ + ARITH_EXCEP_STAR_U, /* underflow on multiplication */ + + /* Errors */ + ARITH_EXCEP_SLASH_D, /* division by 0 */ + ARITH_EXCEP_SLASH_O, /* overflow on division */ + ARITH_EXCEP_PCT_D, /* division by 0 on modulus operator */ + ARITH_EXCEP_CONST_O /* constant too large */ +}; + +#define arith_strc_(x, y) x ## y +#define arith_strc(x, y) arith_strc_(x, y) + +#define arith_u arith_strc(u_, ARITH_TYPENAME) +#define arith_s arith_strc(s_, ARITH_TYPENAME) +#define arith_op_u(op) arith_strc(ARITH_TYPENAME, arith_strc(_u_, op)) +#define arith_op_s(op) arith_strc(ARITH_TYPENAME, arith_strc(_s_, op)) + +#define ARITH_DECL_MONO_U_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_u(op)(arith_u x) +#define ARITH_DECL_MONO_U_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_u(op)(arith_u x) +#define ARITH_DECL_MONO_U_I(op) ARITH_FUNCTION_HEADER int \ + arith_op_u(op)(arith_u x) +#define ARITH_DECL_MONO_U_L(op) ARITH_FUNCTION_HEADER unsigned long \ + arith_op_u(op)(arith_u x) +#define ARITH_DECL_MONO_S_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_s(op)(arith_s x) +#define ARITH_DECL_MONO_S_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_s(op)(arith_s x) +#define ARITH_DECL_MONO_S_I(op) ARITH_FUNCTION_HEADER int \ + arith_op_s(op)(arith_s x) +#define ARITH_DECL_MONO_S_L(op) ARITH_FUNCTION_HEADER long \ + arith_op_s(op)(arith_s x) +#define ARITH_DECL_MONO_I_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_u(op)(int x) +#define ARITH_DECL_MONO_L_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_u(op)(unsigned long x) +#define ARITH_DECL_MONO_I_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_s(op)(int x) +#define ARITH_DECL_MONO_L_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_s(op)(long x) +#define ARITH_DECL_MONO_ST_US(op) ARITH_FUNCTION_HEADER char *arith_op_u(op) \ + (char *c, arith_u *ru, arith_s *rs, int *sp) + +#define ARITH_DECL_BI_UU_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_u(op)(arith_u x, arith_u y) +#define ARITH_DECL_BI_UI_U(op) ARITH_FUNCTION_HEADER arith_u \ + arith_op_u(op)(arith_u x, int y) +#define ARITH_DECL_BI_UU_I(op) ARITH_FUNCTION_HEADER int \ + arith_op_u(op)(arith_u x, arith_u y) +#define ARITH_DECL_BI_SS_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_s(op)(arith_s x, arith_s y) +#define ARITH_DECL_BI_SI_S(op) ARITH_FUNCTION_HEADER arith_s \ + arith_op_s(op)(arith_s x, int y) +#define ARITH_DECL_BI_SS_I(op) ARITH_FUNCTION_HEADER int \ + arith_op_s(op)(arith_s x, arith_s y) + +#endif + +#ifdef NATIVE_SIGNED + +typedef NATIVE_SIGNED arith_s; +typedef NATIVE_UNSIGNED arith_u; + +#else + +#if SIMUL_NUMBITS > (2 * SIMUL_SUBTYPE_BITS) +#error Native subtype too small for arithmetic simulation. +#endif + +#define SIMUL_MSW_WIDTH (SIMUL_NUMBITS / 2) +#define SIMUL_LSW_WIDTH ((SIMUL_NUMBITS + 1) / 2) + +typedef struct { + SIMUL_ARITH_SUBTYPE msw, lsw; +} arith_u, arith_s; + +#endif + +/* functions with the unsigned type */ + +ARITH_DECL_MONO_S_U(to_u); +ARITH_DECL_MONO_I_U(fromint); +ARITH_DECL_MONO_L_U(fromulong); +ARITH_DECL_MONO_U_I(toint); +ARITH_DECL_MONO_U_L(toulong); + +ARITH_DECL_MONO_U_U(neg); +ARITH_DECL_MONO_U_U(not); +ARITH_DECL_MONO_U_I(lnot); +ARITH_DECL_MONO_U_I(lval); + +ARITH_DECL_BI_UU_U(plus); +ARITH_DECL_BI_UU_U(minus); +ARITH_DECL_BI_UI_U(lsh); +ARITH_DECL_BI_UI_U(rsh); +ARITH_DECL_BI_UU_I(lt); +ARITH_DECL_BI_UU_I(leq); +ARITH_DECL_BI_UU_I(gt); +ARITH_DECL_BI_UU_I(geq); +ARITH_DECL_BI_UU_I(same); +ARITH_DECL_BI_UU_I(neq); +ARITH_DECL_BI_UU_U(and); +ARITH_DECL_BI_UU_U(xor); +ARITH_DECL_BI_UU_U(or); +ARITH_DECL_BI_UU_U(star); +ARITH_DECL_BI_UU_U(slash); +ARITH_DECL_BI_UU_U(pct); + +/* functions with the signed type */ + +ARITH_DECL_MONO_U_S(to_s); +ARITH_DECL_MONO_I_S(fromint); +ARITH_DECL_MONO_L_S(fromlong); +ARITH_DECL_MONO_S_I(toint); +ARITH_DECL_MONO_S_L(tolong); + +ARITH_DECL_MONO_S_S(neg); +ARITH_DECL_MONO_S_S(not); +ARITH_DECL_MONO_S_I(lnot); +ARITH_DECL_MONO_S_I(lval); + +ARITH_DECL_BI_SS_S(plus); +ARITH_DECL_BI_SS_S(minus); +ARITH_DECL_BI_SI_S(lsh); +ARITH_DECL_BI_SI_S(rsh); +ARITH_DECL_BI_SS_I(lt); +ARITH_DECL_BI_SS_I(leq); +ARITH_DECL_BI_SS_I(gt); +ARITH_DECL_BI_SS_I(geq); +ARITH_DECL_BI_SS_I(same); +ARITH_DECL_BI_SS_I(neq); +ARITH_DECL_BI_SS_S(and); +ARITH_DECL_BI_SS_S(xor); +ARITH_DECL_BI_SS_S(or); +ARITH_DECL_BI_SS_S(star); +ARITH_DECL_BI_SS_S(slash); +ARITH_DECL_BI_SS_S(pct); + +/* conversions from string */ +ARITH_DECL_MONO_ST_US(octconst); +ARITH_DECL_MONO_ST_US(hexconst); +ARITH_DECL_MONO_ST_US(decconst); diff --git a/libexec/auxcpp/assert.c b/libexec/auxcpp/assert.c new file mode 100644 index 00000000000..579d47e0a0a --- /dev/null +++ b/libexec/auxcpp/assert.c @@ -0,0 +1,420 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "tune.h" +#include +#include +#include +#include +#include +#include "ucppi.h" +#include "mem.h" +#include "nhash.h" + +/* + * Assertion support. Each assertion is indexed by its predicate, and + * the list of 'questions' which yield a true answer. + */ + +static HTT assertions; +static int assertions_init_done = 0; + +static struct assert *new_assertion(void) +{ + struct assert *a = getmem(sizeof(struct assert)); + + a->nbval = 0; + return a; +} + +static void del_token_fifo(struct token_fifo *tf) +{ + size_t i; + + for (i = 0; i < tf->nt; i ++) + if (S_TOKEN(tf->t[i].type)) freemem(tf->t[i].name); + if (tf->nt) freemem(tf->t); +} + +static void del_assertion(void *va) +{ + struct assert *a = va; + size_t i; + + for (i = 0; i < a->nbval; i ++) del_token_fifo(a->val + i); + if (a->nbval) freemem(a->val); + freemem(a); +} + +/* + * print the contents of a token list + */ +static void print_token_fifo(struct token_fifo *tf) +{ + size_t i; + + for (i = 0; i < tf->nt; i ++) + if (ttMWS(tf->t[i].type)) fputc(' ', emit_output); + else fputs(token_name(tf->t + i), emit_output); +} + +/* + * print all assertions related to a given name + */ +static void print_assert(void *va) +{ + struct assert *a = va; + size_t i; + + for (i = 0; i < a->nbval; i ++) { + fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a)); + print_token_fifo(a->val + i); + fprintf(emit_output, ")\n"); + } +} + +/* + * compare two token_fifo, return 0 if they are identical, 1 otherwise. + * All whitespace tokens are considered identical, but sequences of + * whitespace are not shrinked. + */ +int cmp_token_list(struct token_fifo *f1, struct token_fifo *f2) +{ + size_t i; + + if (f1->nt != f2->nt) return 1; + for (i = 0; i < f1->nt; i ++) { + if (ttMWS(f1->t[i].type) && ttMWS(f2->t[i].type)) continue; + if (f1->t[i].type != f2->t[i].type) return 1; + if (f1->t[i].type == MACROARG + && f1->t[i].line != f2->t[i].line) return 1; + if (S_TOKEN(f1->t[i].type) + && strcmp(f1->t[i].name, f2->t[i].name)) return 1; + } + return 0; +} + +/* + * for #assert + * Assertions are not part of the ISO-C89 standard, but they are sometimes + * encountered, for instance in Solaris standard include files. + */ +int handle_assert(struct lexer_state *ls) +{ + int ina = 0, ltww; + struct token t; + struct token_fifo *atl = 0; + struct assert *a; + char *aname; + int ret = -1; + long l = ls->line; + int nnp; + size_t i; + + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) { + if (!(a = HTT_get(&assertions, ls->ctok->name))) { + a = new_assertion(); + aname = sdup(ls->ctok->name); + ina = 1; + } + goto handle_assert_next; + } + error(l, "illegal assertion name for #assert"); + goto handle_assert_warp_ign; + } + goto handle_assert_trunc; + +handle_assert_next: + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type != LPAR) { + error(l, "syntax error in #assert"); + goto handle_assert_warp_ign; + } + goto handle_assert_next2; + } + goto handle_assert_trunc; + +handle_assert_next2: + atl = getmem(sizeof(struct token_fifo)); + atl->art = atl->nt = 0; + for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { + if (ls->ctok->type == NEWLINE) break; + if (ltww && ttMWS(ls->ctok->type)) continue; + ltww = ttMWS(ls->ctok->type); + if (ls->ctok->type == LPAR) nnp ++; + else if (ls->ctok->type == RPAR) { + if (!(-- nnp)) goto handle_assert_next3; + } + t.type = ls->ctok->type; + if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); + aol(atl->t, atl->nt, t, TOKEN_LIST_MEMG); + } + goto handle_assert_trunc; + +handle_assert_next3: + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { + warning(l, "trailing garbage in #assert"); + } + } + if (atl->nt && ttMWS(atl->t[atl->nt - 1].type) && (-- atl->nt) == 0) + freemem(atl->t); + if (atl->nt == 0) { + error(l, "void assertion in #assert"); + goto handle_assert_error; + } + for (i = 0; i < a->nbval && cmp_token_list(atl, a->val + i); i ++); + if (i != a->nbval) { + /* we already have it */ + ret = 0; + goto handle_assert_error; + } + + /* This is a new assertion. Let's keep it. */ + aol(a->val, a->nbval, *atl, TOKEN_LIST_MEMG); + if (ina) { + HTT_put(&assertions, a, aname); + freemem(aname); + } + if (emit_assertions) { + fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a)); + print_token_fifo(atl); + fputs(")\n", emit_output); + } + freemem(atl); + return 0; + +handle_assert_trunc: + error(l, "unfinished #assert"); +handle_assert_error: + if (atl) { + del_token_fifo(atl); + freemem(atl); + } + if (ina) { + freemem(aname); + freemem(a); + } + return ret; +handle_assert_warp_ign: + while (!next_token(ls) && ls->ctok->type != NEWLINE); + if (ina) { + freemem(aname); + freemem(a); + } + return ret; +} + +/* + * for #unassert + */ +int handle_unassert(struct lexer_state *ls) +{ + int ltww; + struct token t; + struct token_fifo atl; + struct assert *a; + int ret = -1; + long l = ls->line; + int nnp; + size_t i; + + atl.art = atl.nt = 0; + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) { + if (!(a = HTT_get(&assertions, ls->ctok->name))) { + ret = 0; + goto handle_unassert_warp; + } + goto handle_unassert_next; + } + error(l, "illegal assertion name for #unassert"); + goto handle_unassert_warp; + } + goto handle_unassert_trunc; + +handle_unassert_next: + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type != LPAR) { + error(l, "syntax error in #unassert"); + goto handle_unassert_warp; + } + goto handle_unassert_next2; + } + if (emit_assertions) + fprintf(emit_output, "#unassert %s\n", HASH_ITEM_NAME(a)); + HTT_del(&assertions, HASH_ITEM_NAME(a)); + return 0; + +handle_unassert_next2: + for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { + if (ls->ctok->type == NEWLINE) break; + if (ltww && ttMWS(ls->ctok->type)) continue; + ltww = ttMWS(ls->ctok->type); + if (ls->ctok->type == LPAR) nnp ++; + else if (ls->ctok->type == RPAR) { + if (!(-- nnp)) goto handle_unassert_next3; + } + t.type = ls->ctok->type; + if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); + aol(atl.t, atl.nt, t, TOKEN_LIST_MEMG); + } + goto handle_unassert_trunc; + +handle_unassert_next3: + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { + warning(l, "trailing garbage in #unassert"); + } + } + if (atl.nt && ttMWS(atl.t[atl.nt - 1].type) && (-- atl.nt) == 0) + freemem(atl.t); + if (atl.nt == 0) { + error(l, "void assertion in #unassert"); + return ret; + } + for (i = 0; i < a->nbval && cmp_token_list(&atl, a->val + i); i ++); + if (i != a->nbval) { + /* we have it, undefine it */ + del_token_fifo(a->val + i); + if (i < (a->nbval - 1)) + mmvwo(a->val + i, a->val + i + 1, (a->nbval - i - 1) + * sizeof(struct token_fifo)); + if ((-- a->nbval) == 0) freemem(a->val); + if (emit_assertions) { + fprintf(emit_output, "#unassert %s(", + HASH_ITEM_NAME(a)); + print_token_fifo(&atl); + fputs(")\n", emit_output); + } + } + ret = 0; + goto handle_unassert_finish; + +handle_unassert_trunc: + error(l, "unfinished #unassert"); +handle_unassert_finish: + if (atl.nt) del_token_fifo(&atl); + return ret; +handle_unassert_warp: + while (!next_token(ls) && ls->ctok->type != NEWLINE); + return ret; +} + +/* + * Add the given assertion (as string). + */ +int make_assertion(char *aval) +{ + struct lexer_state lls; + size_t n = strlen(aval) + 1; + char *c = sdup(aval); + int ret; + + *(c + n - 1) = '\n'; + init_buf_lexer_state(&lls, 0); + lls.flags = DEFAULT_LEXER_FLAGS; + lls.input = 0; + lls.input_string = (unsigned char *)c; + lls.pbuf = 0; + lls.ebuf = n; + lls.line = -1; + ret = handle_assert(&lls); + freemem(c); + free_lexer_state(&lls); + return ret; +} + +/* + * Remove the given assertion (as string). + */ +int destroy_assertion(char *aval) +{ + struct lexer_state lls; + size_t n = strlen(aval) + 1; + char *c = sdup(aval); + int ret; + + *(c + n - 1) = '\n'; + init_buf_lexer_state(&lls, 0); + lls.flags = DEFAULT_LEXER_FLAGS; + lls.input = 0; + lls.input_string = (unsigned char *)c; + lls.pbuf = 0; + lls.ebuf = n; + lls.line = -1; + ret = handle_unassert(&lls); + freemem(c); + free_lexer_state(&lls); + return ret; +} + +/* + * erase the assertion table + */ +void wipe_assertions(void) +{ + if (assertions_init_done) HTT_kill(&assertions); + assertions_init_done = 0; +} + +/* + * initialize the assertion table + */ +void init_assertions(void) +{ + wipe_assertions(); + HTT_init(&assertions, del_assertion); + assertions_init_done = 1; +} + +/* + * retrieve an assertion from the hash table + */ +struct assert *get_assertion(char *name) +{ + return HTT_get(&assertions, name); +} + +/* + * print already defined assertions + */ +void print_assertions(void) +{ + HTT_scan(&assertions, print_assert); +} diff --git a/libexec/auxcpp/atest.c b/libexec/auxcpp/atest.c new file mode 100644 index 00000000000..7137d930f00 --- /dev/null +++ b/libexec/auxcpp/atest.c @@ -0,0 +1,236 @@ +#include +#include +#include +#include + +#if defined TEST_NATIVE + +#define NATIVE_SIGNED int +#define NATIVE_UNSIGNED unsigned + +#define NATIVE_UNSIGNED_BITS 32 +#define NATIVE_SIGNED_MIN LONG_MIN +#define NATIVE_SIGNED_MAX LONG_MAX + +#elif defined TEST_SIMUL + +#define SIMUL_ARITH_SUBTYPE unsigned short +#define SIMUL_SUBTYPE_BITS 16 +#define SIMUL_NUMBITS 31 + +#else + +#error ====== Either TEST_NATIVE or TEST_SIMUL must be defined. + +#endif + +#define ARITH_TYPENAME zoinx +#define ARITH_FUNCTION_HEADER static inline + +#define ARITH_WARNING(type) z_warn(type) +#define ARITH_ERROR(type) z_error(type) + +void z_warn(int type); +void z_error(int type); + +#include "arith.c" + +#if defined TEST_NATIVE + +static inline u_zoinx unsigned_to_uz(unsigned x) +{ + return (u_zoinx)x; +} + +static inline s_zoinx int_to_sz(int x) +{ + return (s_zoinx)x; +} + +static inline void print_uz(u_zoinx x) +{ + printf("%u", x); +} + +static inline void print_sz(s_zoinx x) +{ + printf("%d", x); +} + +#else + +static inline u_zoinx unsigned_to_uz(unsigned x) +{ + u_zoinx v; + v.msw = (x >> 16) & 0x7FFFU; + v.lsw = x & 0xFFFFU; + return v; +} + +static inline s_zoinx int_to_sz(int x) +{ + return unsigned_to_uz((unsigned)x); +} + +static inline void print_uz(u_zoinx x) +{ + printf("%u", ((unsigned)(x.msw) << 16) + (unsigned)(x.lsw)); +} + +static inline void print_sz(s_zoinx x) +{ + if (x.msw & 0x4000U) { + putchar('-'); + x = zoinx_u_neg(x); + } + print_uz(x); +} + +#endif + +static inline void print_int(int x) +{ + printf("%d", x); +} + +static jmp_buf jbuf; + +void z_warn(int type) +{ + switch (type) { + case ARITH_EXCEP_CONV_O: + fputs("[overflow on conversion] ", stdout); break; + case ARITH_EXCEP_NEG_O: + fputs("[overflow on unary minus] ", stdout); break; + case ARITH_EXCEP_NOT_T: + fputs("[trap representation on bitwise inversion] ", stdout); + break; + case ARITH_EXCEP_PLUS_O: + fputs("[overflow on addition] ", stdout); break; + case ARITH_EXCEP_PLUS_U: + fputs("[underflow on addition] ", stdout); break; + case ARITH_EXCEP_MINUS_O: + fputs("[overflow on subtraction] ", stdout); break; + case ARITH_EXCEP_MINUS_U: + fputs("[underflow on subtraction] ", stdout); break; + case ARITH_EXCEP_AND_T: + fputs("[trap representation on bitwise and] ", stdout); break; + case ARITH_EXCEP_XOR_T: + fputs("[trap representation on bitwise xor] ", stdout); break; + case ARITH_EXCEP_OR_T: + fputs("[trap representation on bitwise or] ", stdout); break; + case ARITH_EXCEP_LSH_W: + fputs("[left shift by type width or more] ", stdout); break; + case ARITH_EXCEP_LSH_C: + fputs("[left shift by negative count] ", stdout); break; + case ARITH_EXCEP_LSH_O: + fputs("[overflow on left shift] ", stdout); break; + case ARITH_EXCEP_LSH_U: + fputs("[underflow on left shift] ", stdout); break; + case ARITH_EXCEP_RSH_W: + fputs("[right shift by type width or more] ", stdout); break; + case ARITH_EXCEP_RSH_C: + fputs("[right shift by negative count] ", stdout); break; + case ARITH_EXCEP_RSH_N: + fputs("[right shift of negative value] ", stdout); break; + case ARITH_EXCEP_STAR_O: + fputs("[overflow on multiplication] ", stdout); break; + case ARITH_EXCEP_STAR_U: + fputs("[underflow on multiplication] ", stdout); break; + default: + fprintf(stdout, "UNKNOWN WARNING TYPE: %d\n", type); + exit(EXIT_FAILURE); + } +} + +void z_error(int type) +{ + switch (type) { + case ARITH_EXCEP_SLASH_D: + fputs("division by 0\n", stdout); + break; + case ARITH_EXCEP_SLASH_O: + fputs("overflow on division\n", stdout); + break; + case ARITH_EXCEP_PCT_D: + fputs("division by 0 on modulus operator\n", stdout); + break; + default: + fprintf(stdout, "UNKNOWN ERROR TYPE: %d\n", type); + exit(EXIT_FAILURE); + } + longjmp(jbuf, 1); +} + +int main(void) +{ + +#define OPTRY_GEN(op, x, y, convx, convy, printz) do { \ + printf("%s %s %s -> ", #x, #op, #y); \ + if (!setjmp(jbuf)) { \ + printz(zoinx_ ## op (convx(x), convy(y))); \ + putchar('\n'); \ + } \ + } while (0) + +#define IDENT(x) x + +#define OPTRY_UU_U(op, x, y) \ + OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_uz) + +#define OPTRY_UI_U(op, x, y) \ + OPTRY_GEN(op, x, y, unsigned_to_uz, IDENT, print_uz) + +#define OPTRY_UU_I(op, x, y) \ + OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_int) + +#define OPTRY_SS_S(op, x, y) \ + OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_sz) + +#define OPTRY_SI_S(op, x, y) \ + OPTRY_GEN(op, x, y, int_to_sz, IDENT, print_sz) + +#define OPTRY_SS_I(op, x, y) \ + OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_int) + + OPTRY_UU_U(u_plus, 3, 4); + OPTRY_UU_U(u_plus, 1549587182, 1790478233); + OPTRY_UU_U(u_minus, 1549587182, 1790478233); + OPTRY_UU_U(u_minus, 1790478233, 1549587182); + OPTRY_UU_U(u_star, 432429875, 347785487); + OPTRY_UU_U(u_slash, 432429875, 34487); + OPTRY_UU_U(u_pct, 432429875, 34487); + OPTRY_UI_U(u_lsh, 1783, 19); + OPTRY_UI_U(u_lsh, 1783, 20); + OPTRY_UI_U(u_lsh, 1783, 21); + OPTRY_UI_U(u_rsh, 475902857, 7); + OPTRY_UI_U(u_rsh, 475902857, 17); + OPTRY_UI_U(u_rsh, 475902857, 38); + + OPTRY_SS_S(s_plus, 3, 4); + OPTRY_SS_S(s_plus, 1549587182, 1790478233); + OPTRY_SS_S(s_plus, -1549587182, -1790478233); + OPTRY_SS_S(s_minus, 1549587182, 1790478233); + OPTRY_SS_S(s_minus, 1790478233, 1549587182); + OPTRY_SS_S(s_minus, -1790478233, -1549587182); + OPTRY_SS_S(s_minus, -1790478233, 1549587182); + OPTRY_SS_S(s_star, 432429875, 347785487); + OPTRY_SS_S(s_star, 432429875, -347785487); + OPTRY_SS_S(s_slash, 432429875, 34487); + OPTRY_SS_S(s_slash, -432429875, 34487); + OPTRY_SS_S(s_slash, 432429875, -34487); + OPTRY_SS_S(s_slash, -432429875, -34487); + OPTRY_SS_S(s_slash, 432429875, 0); + OPTRY_SS_S(s_slash, -2147483647 - 1, -1); + OPTRY_SS_S(s_pct, 432429875, 34487); + OPTRY_SS_S(s_pct, 432429875, 0); + OPTRY_SI_S(s_lsh, -1, 10); + OPTRY_SI_S(s_lsh, 1783, 19); + OPTRY_SI_S(s_lsh, 1783, 20); + OPTRY_SI_S(s_lsh, 1783, 21); + OPTRY_SI_S(s_rsh, -1024, 8); + OPTRY_SI_S(s_rsh, 475902857, 7); + OPTRY_SI_S(s_rsh, 475902857, 17); + + return 0; +} diff --git a/libexec/auxcpp/config.h b/libexec/auxcpp/config.h new file mode 100644 index 00000000000..a1bbe993bc5 --- /dev/null +++ b/libexec/auxcpp/config.h @@ -0,0 +1,352 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * (c) Louis P. Santillan 2011 + * This file is derived from tune.h + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* ====================================================================== */ +/* + * The LOW_MEM macro triggers the use of macro storage which uses less + * memory. It actually also improves performance on large, modern machines + * (due to less cache pressure). This option implies no limitation (except + * on the number of arguments a macro may, which is then limited to 32766) + * so it is on by default. Non-LOW_MEM code is considered deprecated. + */ +#define LOW_MEM + +/* ====================================================================== */ +/* + * Define AMIGA for systems using "drive letters" at the beginning of + * some paths; define MSDOS on systems with drive letters and using + * backslashes to seperate directory components. + */ +/* #define AMIGA */ +/* #define MSDOS */ + +/* ====================================================================== */ +/* + * Define this if your compiler does not know the strftime() function; + * TurboC 2.01 under Msdos does not know strftime(). + */ +/* #define NOSTRFTIME */ + +/* ====================================================================== */ +/* + * Buffering: there are two levels of buffering on input and output streams: + * the standard libc buffering (manageable with setbuf() and setvbuf()) + * and some buffering provided by ucpp itself. The ucpp buffering uses + * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG + * (as defined below). + * You can disable one or both of these bufferings by defining the macros + * NO_LIBC_BUF and NO_UCPP_BUF. + */ +/* #define NO_LIBC_BUF */ +/* #define NO_UCPP_BUF */ + +/* + * On Unix stations, the system call mmap() might be used on input files. + * This option is a subclause of ucpp internal buffering. On one station, + * a 10% speed improvement was observed. Do not define this unless the + * host architecture has the following characteristics: + * -- Posix / Single Unix compliance + * -- Text files correspond one to one with memory representation + * If a file is not seekable or not mmapable, ucpp will revert to the + * standard fread() solution. + * + * This feature is still considered beta quality. On some systems where + * files can be bigger than memory address space (mainly, 32-bit systems + * with files bigger than 4 GB), this option makes ucpp fail to operate + * on those extremely large files. + */ +#define UCPP_MMAP + +/* + * Performance issues: + * -- On memory-starved systems, such as Minix-i86, do not use ucpp + * buffering; keep only libc buffering. + * -- If you do not use libc buffering, activate the UCPP_MMAP option. + * Note that the UCPP_MMAP option is ignored if ucpp buffering is not + * activated. + * + * On an Athlon 1200 running FreeBSD 4.7, the best performances are + * achieved when libc buffering is activated and/or UCPP_MMAP is on. + */ + +/* ====================================================================== */ +/* + * Define this if you want ucpp to generate tokenized PRAGMA tokens; + * otherwise, it will generate raw string contents. This setting is + * irrelevant to the stand-alone version of ucpp. + */ +#define PRAGMA_TOKENIZE + +/* + * Define this to the special character that marks the end of tokens with + * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma() + * directives which use this character will be a bit more difficult to + * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because + * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be + * embedded inside tokens, neither directly nor by macro substitution and + * stringization. Besides, '\n' is portable. + */ +#define PRAGMA_TOKEN_END ((unsigned char)'\n') + +/* + * Define this if you want ucpp to include encountered #pragma directives + * in its output in non-lexer mode; _Pragma() are translated to equivalent + * #pragma directives. + */ +#define PRAGMA_DUMP + +/* + * According to my interpretation of the C99 standard, _Pragma() are + * evaluated wherever macro expansion could take place. However, Neil Booth, + * whose mother language is English (contrary to me) and who is well aware + * of the C99 standard (and especially the C preprocessor) told me that + * it was unclear whether _Pragma() are evaluated inside directives such + * as #if, #include and #line. If you want to disable the evaluation of + * _Pragma() inside such directives, define the following macro. + */ +/* #define NO_PRAGMA_IN_DIRECTIVE */ + +/* + * The C99 standard mandates that the operator `##' must yield a single, + * valid token, lest undefined behaviour befall upon thy head. Hence, + * for instance, `+ ## +=' is forbidden, because `++=' is not a valid + * token (although it is a valid list of two tokens, `++' and `='). + * However, ucpp only emits a warning for such sin, and unmerges the + * tokens (thus emitting `+' then `+=' for that example). When ucpp + * produces text output, those two tokens will be separated by a space + * character so that the basic rule of text output is preserved: when + * parsed again, text output yields the exact same stream of tokens. + * That extra space is virtual: it does not count as a true whitespace + * token for stringization. + * + * However, it might be desirable, for some uses other than preprocessing + * C source code, not to emit that extra space at all. To make ucpp behave + * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this + * can trigger spurious token merging. For instance, with that macro + * activated, `+ ## +=' will be output as `++=' which, if preprocessed + * again, will read as `++' followed by `='. + * + * All this is irrelevant to lexer mode; and trying to merge incompatible + * tokens is a shooting offence, anyway. + */ +/* #define DSHARP_TOKEN_MERGE */ + +/* ====================================================================== */ +/* + * Define INMACRO_FLAG to include two flags to the structure lexer_state, + * that tell whether tokens come from a macro-replacement, and count those + * macro-replacements. + */ +/* #define INMACRO_FLAG */ + +/* ====================================================================== */ +/* + * Paths where files are looked for by default, when #include is used. + * Typical path is /usr/local/include and /usr/include, in that order. + * If you want to set up no path, define the macro to 0. + * + * For Linux, get gcc includes too, or you will miss things like stddef.h. + * The exact path varies much, depending on the distribution. + */ +#define STD_INCLUDE_PATH "/usr/local/include", "/usr/include" + +/* ====================================================================== */ +/* + * Arithmetic code for evaluation of #if expressions. Evaluation + * uses either a native machine type, or an emulated two's complement + * type. Division by 0 and overflow on division are considered as errors + * and reported as such. If ARITHMETIC_CHECKS is defined, all other + * operations that imply undefined or implementation-defined behaviour + * are reported as warnings but otherwise performed nonetheless. + * + * For native type evaluation, the following macros should be defined: + * NATIVE_SIGNED the native signed type + * NATIVE_UNSIGNED the native corresponding unsigned type + * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits + * NATIVE_SIGNED_MIN the native signed type minimum value + * NATIVE_SIGNED_MAX the native signed type maximum value + * + * The code in the arith.c file performs some tricky detection + * operations on the native type representation and possible existence + * of a trap representation. These operations assume a C99-compliant + * compiler; on a C90-only compiler, the operations are valid but may + * yield incorrect results. You may force those settings with some + * more macros: see the comments in arith.c (look for "ARCH_DEFINED"). + * Remember that this is mostly a non-issue, unless you are building + * ucpp with a pre-C99 cross-compiler and either the host or target + * architecture uses a non-two's complement representation of signed + * integers. Such a combination is pretty rare nowadays, so the best + * you can do is forgetting completely this paragraph and live in peace. + * + * + * If you do not have a handy native type (for instance, you compile ucpp + * with a C90 compiler which lacks the "long long" type, or you compile + * ucpp for a cross-compiler which should support an evaluation integer + * type of a size that is not available on the host machine), you may use + * a simulated type. The type uses two's complement representation and + * may have any width from 2 bits to twice the underlying native type + * width, inclusive (odd widths are allowed). To use an emulated type, + * make sure that NATIVE_SIGNED is not defined, and define the following + * macros: + * SIMUL_ARITH_SUBTYPE the native underlying type to use + * SIMUL_SUBTYPE_BITS the native underlying type width + * SIMUL_NUMBITS the emulated type width + * + * Undefined and implementation-defined behaviours are warned upon, if + * ARITHMETIC_CHECKS is defined. Results are truncated to the type + * width; shift count for the << and >> operators is reduced modulo the + * emulatd type width; right shifting of a signed negative value performs + * sign extension (the result is left-padded with bits set to 1). + */ + +/* + * For native type evaluation with a 64-bit "long long" type. + */ +#define NATIVE_SIGNED long long +#define NATIVE_UNSIGNED unsigned long long +#define NATIVE_UNSIGNED_BITS 64 +#define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1) +#define NATIVE_SIGNED_MAX 9223372036854775807LL + +/* + * For emulation of a 64-bit type using a native 32-bit "unsigned long" + * type. +#undef NATIVE_SIGNED +#define SIMUL_ARITH_SUBTYPE unsigned long +#define SIMUL_SUBTYPE_BITS 32 +#define SIMUL_NUMBITS 64 + */ + +/* + * Comment out the following line if you want to deactivate arithmetic + * checks (warnings upon undefined and implementation-defined + * behaviour). Arithmetic checks slow down a bit arithmetic operations, + * especially multiplications, but this should not be an issue with + * typical C source code. + */ +#define ARITHMETIC_CHECKS + +/* ====================================================================== */ +/* + * To force signedness of wide character constants, define WCHAR_SIGNEDNESS + * to 0 for unsigned, 1 for signed. By default, wide character constants + * are signed if the native `char' type is signed, and unsigned otherwise. +#define WCHAR_SIGNEDNESS 0 + */ + +/* + * Standard assertions. They should include one cpu() assertion, one machine() + * assertion (identical to cpu()), and one or more system() assertions. + * + * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux) + * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux) + * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris) + * + * These are only suggestions. On Solaris, machine() should be defined + * for i386 or sparc (standard system header use such an assertion). For + * cross-compilation, define assertions related to the target architecture. + * + * If you want no standard assertion, define STD_ASSERT to 0. + */ +#define STD_ASSERT 0 +/* +#define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \ + "system(freebsd)" +*/ + +/* ====================================================================== */ +/* + * System predefined macros. Nothing really mandatory, but some programs + * might rely on those. + * Each string must be either "name" or "name=token-list". If you want + * no predefined macro, define STD_MACROS to 0. + */ +#define STD_MACROS 0 +/* +#define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \ + "__FreeBSD__=4", "__unix__", "__i386__" +*/ + +/* ====================================================================== */ +/* + * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers. + * See cpp.h for the definition of these flags. + */ +#define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \ + | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \ + | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \ + | KEEP_OUTPUT | HANDLE_TRIGRAPHS \ + | HANDLE_ASSERTIONS) +#define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \ + | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \ + | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS) + +/* ====================================================================== */ +/* + * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp(). + * This is non-ANSI, but it improves performance on some POSIX system. + * On typical C source code, such improvement is completely negligeable. + */ +/* #define POSIX_JMP */ + +/* ====================================================================== */ +/* + * Maximum value (plus one) of a character handled by the lexer; 128 is + * alright for ASCII native source code, but 256 is needed for EBCDIC. + * 256 is safe in both cases; you will have big problems if you set + * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory + * model), define MAX_CHAR_VAL to 128. + * + * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware + * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL + * values of type int (MSTATE is defined in lexer.c and is about 40). + */ +#define MAX_CHAR_VAL 128 + +/* + * If you want some extra character to be considered as whitespace, + * define this macro to that space. On ISO-8859-1 machines, 160 is + * the code for the unbreakable space. + */ +/* #define UNBREAKABLE_SPACE 160 */ + +/* + * If you want whitespace tokens contents to be recorded (making them + * tokens with a string content), define this. The macro STRING_TOKEN + * will be adjusted accordingly. + * Without this option, whitespace tokens are not even returned by the + * lex() function. This is irrelevant for the non-lexer mode (almost -- + * it might slow down a bit ucpp, and with this option, comments will be + * kept inside #pragma directives). + */ +/* #define SEMPER_FIDELIS */ + +/* End of options overridable by UCPP_CONFIG and config.h */ diff --git a/libexec/auxcpp/cpp.c b/libexec/auxcpp/cpp.c new file mode 100644 index 00000000000..7cdc358cac6 --- /dev/null +++ b/libexec/auxcpp/cpp.c @@ -0,0 +1,2565 @@ +/* + * C and T preprocessor, and integrated lexer + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define VERS_MAJ 1 +#define VERS_MIN 3 +/* uncomment the following if you cannot set it with a compiler flag */ +/* #define STAND_ALONE */ + +#include "tune.h" +#include +#include +#include +#include +#include +#include +#include +#include "ucppi.h" +#include "mem.h" +#include "nhash.h" +#ifdef UCPP_MMAP +#include +#include +#include +#include +#endif + +/* + * The standard path where includes are looked for. + */ +#ifdef STAND_ALONE +static char *include_path_std[] = { STD_INCLUDE_PATH, 0 }; +#endif +static char **include_path; +static size_t include_path_nb = 0; + +int no_special_macros = 0; +int emit_dependencies = 0, emit_defines = 0, emit_assertions = 0; +FILE *emit_output; + +#ifdef STAND_ALONE +static char *system_macros_def[] = { STD_MACROS, 0 }; +static char *system_assertions_def[] = { STD_ASSERT, 0 }; +#endif + +char *current_filename = 0, *current_long_filename = 0; +static int current_incdir = -1; + +#ifndef NO_UCPP_ERROR_FUNCTIONS +/* + * "ouch" is the name for an internal ucpp error. If AUDIT is not defined, + * no code calling this function will be generated; a "ouch" may still be + * emitted by getmem() (in mem.c) if MEM_CHECK is defined, but this "ouch" + * does not use this function. + */ +void ucpp_ouch(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s: ouch, ", current_filename); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + die(); +} + +/* + * report an error, with current_filename, line, and printf-like syntax + */ +void ucpp_error(long line, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (line > 0) + fprintf(stderr, "%s: line %ld: ", current_filename, line); + else if (line == 0) fprintf(stderr, "%s: ", current_filename); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + if (line >= 0) { + struct stack_context *sc = report_context(); + size_t i; + + for (i = 0; sc[i].line >= 0; i ++) + fprintf(stderr, "\tincluded from %s:%ld\n", + sc[i].long_name ? sc[i].long_name : sc[i].name, + sc[i].line); + freemem(sc); + } + va_end(ap); +} + +/* + * like error(), with the mention "warning" + */ +void ucpp_warning(long line, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (line > 0) + fprintf(stderr, "%s: warning: line %ld: ", + current_filename, line); + else if (line == 0) + fprintf(stderr, "%s: warning: ", current_filename); + else fprintf(stderr, "warning: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + if (line >= 0) { + struct stack_context *sc = report_context(); + size_t i; + + for (i = 0; sc[i].line >= 0; i ++) + fprintf(stderr, "\tincluded from %s:%ld\n", + sc[i].long_name ? sc[i].long_name : sc[i].name, + sc[i].line); + freemem(sc); + } + va_end(ap); +} +#endif /* NO_UCPP_ERROR_FUNCTIONS */ + +/* + * Some memory allocations are manually garbage-collected; essentially, + * strings duplicated in the process of macro replacement. Each such + * string is referenced in the garbage_fifo, which is cleared when all + * nested macros have been resolved. + */ + +struct garbage_fifo { + char **garbage; + size_t ngarb, memgarb; +}; + +/* + * throw_away() marks a string to be collected later + */ +void throw_away(struct garbage_fifo *gf, char *n) +{ + wan(gf->garbage, gf->ngarb, n, gf->memgarb); +} + +/* + * free marked strings + */ +void garbage_collect(struct garbage_fifo *gf) +{ + size_t i; + + for (i = 0; i < gf->ngarb; i ++) freemem(gf->garbage[i]); + gf->ngarb = 0; +} + +static void init_garbage_fifo(struct garbage_fifo *gf) +{ + gf->garbage = getmem((gf->memgarb = GARBAGE_LIST_MEMG) + * sizeof(char *)); + gf->ngarb = 0; +} + +static void free_garbage_fifo(struct garbage_fifo *gf) +{ + garbage_collect(gf); + freemem(gf->garbage); + freemem(gf); +} + +/* + * order is important: it must match the token-constants declared as an + * enum in the header file. + */ +char *operators_name[] = { + " ", "\n", " ", + "0000", "name", "bunch", "pragma", "context", + "\"dummy string\"", "'dummy char'", + "/", "/=", "-", "--", "-=", "->", "+", "++", "+=", "<", "<=", "<<", + "<<=", ">", ">=", ">>", ">>=", "=", "==", +#ifdef CAST_OP + "=>", +#endif + "~", "!=", "&", "&&", "&=", "|", "||", "|=", "%", "%=", "*", "*=", + "^", "^=", "!", + "{", "}", "[", "]", "(", ")", ",", "?", ";", + ":", ".", "...", "#", "##", " ", "ouch", "<:", ":>", "<%", "%>", + "%:", "%:%:" +}; + +/* the ascii representation of a token */ +#ifdef SEMPER_FIDELIS +#define tname(x) (ttWHI((x).type) ? " " : S_TOKEN((x).type) \ + ? (x).name : operators_name[(x).type]) +#else +#define tname(x) (S_TOKEN((x).type) ? (x).name \ + : operators_name[(x).type]) +#endif + +char *token_name(struct token *t) +{ + return tname(*t); +} + +/* + * To speed up deeply nested and repeated inclusions, we: + * -- use a hash table to remember where we found each file + * -- remember when the file is protected by a #ifndef/#define/#endif + * construction; we can then avoid including several times a file + * when this is not necessary. + * -- remember in which directory, in the include path, the file was found. + */ +struct found_file { + hash_item_header head; /* first field */ + char *name; + char *protect; +}; + +/* + * For files from system include path. + */ +struct found_file_sys { + hash_item_header head; /* first field */ + struct found_file *rff; + int incdir; +}; + +static HTT found_files, found_files_sys; +static int found_files_init_done = 0, found_files_sys_init_done = 0; + +static struct found_file *new_found_file(void) +{ + struct found_file *ff = getmem(sizeof(struct found_file)); + + ff->name = 0; + ff->protect = 0; + return ff; +} + +static void del_found_file(void *m) +{ + struct found_file *ff = (struct found_file *)m; + + if (ff->name) freemem(ff->name); + if (ff->protect) freemem(ff->protect); + freemem(ff); +} + +static struct found_file_sys *new_found_file_sys(void) +{ + struct found_file_sys *ffs = getmem(sizeof(struct found_file_sys)); + + ffs->rff = 0; + ffs->incdir = -1; + return ffs; +} + +static void del_found_file_sys(void *m) +{ + struct found_file_sys *ffs = (struct found_file_sys *)m; + + freemem(ffs); +} + +/* + * To keep up with the #ifndef/#define/#endif protection mechanism + * detection. + */ +struct protect protect_detect; +static struct protect *protect_detect_stack = 0; + +void set_init_filename(char *x, int real_file) +{ + if (current_filename) freemem(current_filename); + current_filename = sdup(x); + current_long_filename = 0; + current_incdir = -1; + if (real_file) { + protect_detect.macro = 0; + protect_detect.state = 1; + protect_detect.ff = new_found_file(); + protect_detect.ff->name = sdup(x); + HTT_put(&found_files, protect_detect.ff, x); + } else { + protect_detect.state = 0; + } +} + +static void init_found_files(void) +{ + if (found_files_init_done) HTT_kill(&found_files); + HTT_init(&found_files, del_found_file); + found_files_init_done = 1; + if (found_files_sys_init_done) HTT_kill(&found_files_sys); + HTT_init(&found_files_sys, del_found_file_sys); + found_files_sys_init_done = 1; +} + +/* + * Set the lexer state at the beginning of a file. + */ +static void reinit_lexer_state(struct lexer_state *ls, int wb) +{ +#ifndef NO_UCPP_BUF + ls->input_buf = wb ? getmem(INPUT_BUF_MEMG) : 0; +#ifdef UCPP_MMAP + ls->from_mmap = 0; +#endif +#endif + ls->input = 0; + ls->ebuf = ls->pbuf = 0; + ls->nlka = 0; + ls->macfile = 0; + ls->discard = 1; + ls->last = 0; /* we suppose '\n' is not 0 */ + ls->line = 1; + ls->ltwnl = 1; + ls->oline = 1; + ls->pending_token = 0; + ls->cli = 0; + ls->copy_line[COPY_LINE_LENGTH - 1] = 0; + ls->ifnest = 0; + ls->condf[0] = ls->condf[1] = 0; +} + +/* + * Initialize the struct lexer_state, with optional input and output buffers. + */ +void init_buf_lexer_state(struct lexer_state *ls, int wb) +{ + reinit_lexer_state(ls, wb); +#ifndef NO_UCPP_BUF + ls->output_buf = wb ? getmem(OUTPUT_BUF_MEMG) : 0; +#endif + ls->sbuf = 0; + ls->output_fifo = 0; + + ls->ctok = getmem(sizeof(struct token)); + ls->ctok->name = getmem(ls->tknl = TOKEN_NAME_MEMG); + ls->pending_token = 0; + + ls->flags = 0; + ls->count_trigraphs = 0; + ls->gf = getmem(sizeof(struct garbage_fifo)); + init_garbage_fifo(ls->gf); + ls->condcomp = 1; + ls->condnest = 0; +#ifdef INMACRO_FLAG + ls->inmacro = 0; + ls->macro_count = 0; +#endif +} + +/* + * Initialize the (complex) struct lexer_state. + */ +void init_lexer_state(struct lexer_state *ls) +{ + init_buf_lexer_state(ls, 1); + ls->input = 0; +} + +/* + * Restore what is needed from a lexer_state. This is used for #include. + */ +static void restore_lexer_state(struct lexer_state *ls, + struct lexer_state *lsbak) +{ +#ifndef NO_UCPP_BUF + freemem(ls->input_buf); + ls->input_buf = lsbak->input_buf; +#ifdef UCPP_MMAP + ls->from_mmap = lsbak->from_mmap; + ls->input_buf_sav = lsbak->input_buf_sav; +#endif +#endif + ls->input = lsbak->input; + ls->ebuf = lsbak->ebuf; + ls->pbuf = lsbak->pbuf; + ls->nlka = lsbak->nlka; + ls->discard = lsbak->discard; + ls->line = lsbak->line; + ls->oline = lsbak->oline; + ls->ifnest = lsbak->ifnest; + ls->condf[0] = lsbak->condf[0]; + ls->condf[1] = lsbak->condf[1]; +} + +/* + * close input file operations on a struct lexer_state + */ +static void close_input(struct lexer_state *ls) +{ +#ifdef UCPP_MMAP + if (ls->from_mmap) { + munmap((void *)ls->input_buf, ls->ebuf); + ls->from_mmap = 0; + ls->input_buf = ls->input_buf_sav; + } +#endif + if (ls->input) { + fclose(ls->input); + ls->input = 0; + } +} + +/* + * file_context (and the two functions push_ and pop_) are used to save + * all that is needed when including a file. + */ +static struct file_context { + struct lexer_state ls; + char *name, *long_name; + int incdir; +} *ls_stack; +static size_t ls_depth = 0; + +static void push_file_context(struct lexer_state *ls) +{ + struct file_context fc; + + fc.name = current_filename; + fc.long_name = current_long_filename; + fc.incdir = current_incdir; + mmv(&(fc.ls), ls, sizeof(struct lexer_state)); + aol(ls_stack, ls_depth, fc, LS_STACK_MEMG); + ls_depth --; + aol(protect_detect_stack, ls_depth, protect_detect, LS_STACK_MEMG); + protect_detect.macro = 0; +} + +static void pop_file_context(struct lexer_state *ls) +{ +#ifdef AUDIT + if (ls_depth <= 0) ouch("prepare to meet thy creator"); +#endif + close_input(ls); + restore_lexer_state(ls, &(ls_stack[-- ls_depth].ls)); + if (protect_detect.macro) freemem(protect_detect.macro); + protect_detect = protect_detect_stack[ls_depth]; + if (current_filename) freemem(current_filename); + current_filename = ls_stack[ls_depth].name; + current_long_filename = ls_stack[ls_depth].long_name; + current_incdir = ls_stack[ls_depth].incdir; + if (ls_depth == 0) { + freemem(ls_stack); + freemem(protect_detect_stack); + } +} + +/* + * report_context() returns the list of successive includers of the + * current file, ending with a dummy entry with a negative line number. + * The caller is responsible for freeing the returned pointer. + */ +struct stack_context *report_context(void) +{ + struct stack_context *sc; + size_t i; + + sc = getmem((ls_depth + 1) * sizeof(struct stack_context)); + for (i = 0; i < ls_depth; i ++) { + sc[i].name = ls_stack[ls_depth - i - 1].name; + sc[i].long_name = ls_stack[ls_depth - i - 1].long_name; + sc[i].line = ls_stack[ls_depth - i - 1].ls.line - 1; + } + sc[ls_depth].line = -1; + return sc; +} + +/* + * init_lexer_mode() is used to end initialization of a struct lexer_state + * if it must be used for a lexer + */ +void init_lexer_mode(struct lexer_state *ls) +{ + ls->flags = DEFAULT_LEXER_FLAGS; + ls->output_fifo = getmem(sizeof(struct token_fifo)); + ls->output_fifo->art = ls->output_fifo->nt = 0; + ls->toplevel_of = ls->output_fifo; + ls->save_ctok = ls->ctok; +} + +/* + * release memory used by a struct lexer_state; this implies closing + * any input stream held by this structure. + */ +void free_lexer_state(struct lexer_state *ls) +{ + close_input(ls); +#ifndef NO_UCPP_BUF + if (ls->input_buf) { + freemem(ls->input_buf); + ls->input_buf = 0; + } + if (ls->output_buf) { + freemem(ls->output_buf); + ls->output_buf = 0; + } +#endif + if (ls->ctok && (!ls->output_fifo || ls->output_fifo->nt == 0)) { + freemem(ls->ctok->name); + freemem(ls->ctok); + ls->ctok = 0; + } + if (ls->gf) { + free_garbage_fifo(ls->gf); + ls->gf = 0; + } + if (ls->output_fifo) { + freemem(ls->output_fifo); + ls->output_fifo = 0; + } +} + +/* + * Print line information. + */ +static void print_line_info(struct lexer_state *ls, unsigned long flags) +{ + char *fn = current_long_filename ? + current_long_filename : current_filename; + char *b, *d; + + b = getmem(50 + strlen(fn)); + if (flags & GCC_LINE_NUM) { + sprintf(b, "# %ld \"%s\"\n", ls->line, fn); + } else { + sprintf(b, "#line %ld \"%s\"\n", ls->line, fn); + } + for (d = b; *d; d ++) put_char(ls, (unsigned char)(*d)); + freemem(b); +} + +/* + * Enter a file; this implies the possible emission of a #line directive. + * The flags used are passed as second parameter instead of being + * extracted from the struct lexer_state. + * + * As a command-line option, gcc-like directives (with only a '#', + * without 'line') may be produced. + * + * enter_file() returns 1 if a (CONTEXT) token was produced, 0 otherwise. + */ +int enter_file(struct lexer_state *ls, unsigned long flags) +{ + char *fn = current_long_filename ? + current_long_filename : current_filename; + + if (!(flags & LINE_NUM)) return 0; + if ((flags & LEXER) && !(flags & TEXT_OUTPUT)) { + struct token t; + + t.type = CONTEXT; + t.line = ls->line; + t.name = fn; + print_token(ls, &t, 0); + return 1; + } + print_line_info(ls, flags); + ls->oline --; /* emitted #line troubled oline */ + return 0; +} + +#ifdef UCPP_MMAP +/* + * We open() the file, then fdopen() it and fseek() to its end. If the + * fseek() worked, we try to mmap() the file, up to the point where we + * arrived. + * On an architecture where end-of-lines are multibytes and translated + * into single '\n', bad things could happen. We strongly hope that, if + * we could fseek() to the end but could not mmap(), then we can get back. + */ +static void *find_file_map; +static size_t map_length; + +FILE *fopen_mmap_file(char *name) +{ + FILE *f; + int fd; + long l; + + find_file_map = 0; + fd = open(name, O_RDONLY, 0); + if (fd < 0) return 0; + l = lseek(fd, 0, SEEK_END); + f = fdopen(fd, "r"); + if (!f) { + close(fd); + return 0; + } + if (l < 0) return f; /* not seekable */ + map_length = l; + if ((find_file_map = mmap(0, map_length, PROT_READ, + MAP_PRIVATE, fd, 0)) == MAP_FAILED) { + /* we could not mmap() the file; get back */ + find_file_map = 0; + if (fseek(f, 0, SEEK_SET)) { + /* bwaah... can't get back. This file is cursed. */ + fclose(f); + return 0; + } + } + return f; +} + +void set_input_file(struct lexer_state *ls, FILE *f) +{ + ls->input = f; + if (find_file_map) { + ls->from_mmap = 1; + ls->input_buf_sav = ls->input_buf; + ls->input_buf = find_file_map; + ls->pbuf = 0; + ls->ebuf = map_length; + } else { + ls->from_mmap = 0; + } +} +#endif + +/* + * Find a file by looking through the include path. + * return value: a FILE * on the file, opened in "r" mode, or 0. + * + * find_file_error will contain: + * FF_ERROR on error (file not found or impossible to read) + * FF_PROTECT file is protected and therefore useless to read + * FF_KNOWN file is already known + * FF_UNKNOWN file was not already known + */ +static int find_file_error; + +enum { FF_ERROR, FF_PROTECT, FF_KNOWN, FF_UNKNOWN }; + +static FILE *find_file(char *name, int localdir) +{ + FILE *f; + int i, incdir = -1; + size_t nl = strlen(name); + char *s = 0; + struct found_file *ff = 0, *nff; + int lf = 0; + int nffa = 0; + + find_file_error = FF_ERROR; + protect_detect.state = -1; + protect_detect.macro = 0; + if (localdir) { + int i; + char *rfn = current_long_filename ? current_long_filename + : current_filename; + + for (i = strlen(rfn) - 1; i >= 0; i --) +#ifdef MSDOS + if (rfn[i] == '\\') break; +#else + if (rfn[i] == '/') break; +#endif +#if defined MSDOS + if (i >= 0 && *name != '\\' && (nl < 2 || name[1] != ':')) +#elif defined AMIGA + if (i >= 0 && *name != '/' && (nl < 2 || name[1] != ':')) +#else + if (i >= 0 && *name != '/') +#endif + { + /* + * current file is somewhere else, and the provided + * file name is not absolute, so we must adjust the + * base for looking for the file; besides, + * found_files and found_files_loc are irrelevant + * for this search. + */ + s = getmem(i + 2 + nl); + mmv(s, rfn, i); +#ifdef MSDOS + s[i] = '\\'; +#else + s[i] = '/'; +#endif + mmv(s + i + 1, name, nl); + s[i + 1 + nl] = 0; + ff = HTT_get(&found_files, s); + } else ff = HTT_get(&found_files, name); + } + if (!ff) { + struct found_file_sys *ffs = HTT_get(&found_files_sys, name); + + if (ffs) { + ff = ffs->rff; + incdir = ffs->incdir; + } + } + /* + * At that point: if the file was found in the cache, ff points to + * the cached descriptive structure; its name is s if s is not 0, + * name otherwise. + */ + if (ff) goto found_file_cache; + + /* + * This is the first time we find the file, or it was not protected. + */ + protect_detect.ff = new_found_file(); + nffa = 1; + if (localdir && +#ifdef UCPP_MMAP + (f = fopen_mmap_file(s ? s : name)) +#else + (f = fopen(s ? s : name, "r")) +#endif + ) { + lf = 1; + goto found_file; + } + /* + * If s contains a name, that name is now irrelevant: it was a + * filename for a search in the current directory, and the file + * was not found. + */ + if (s) { + freemem(s); + s = 0; + } + for (i = 0; (size_t)i < include_path_nb; i ++) { + size_t ni = strlen(include_path[i]); + + s = getmem(ni + nl + 2); + mmv(s, include_path[i], ni); +#ifdef AMIGA + /* contributed by Volker Barthelmann */ + if (ni == 1 && *s == '.') { + *s = 0; + ni = 0; + } + if (ni > 0 && s[ni - 1] != ':' && s[ni - 1] != '/') { + s[ni] = '/'; + mmv(s + ni + 1, name, nl + 1); + } else { + mmv(s + ni, name, nl + 1); + } +#else + s[ni] = '/'; + mmv(s + ni + 1, name, nl + 1); +#endif +#ifdef MSDOS + /* on msdos systems, replace all / by \ */ + { + char *c; + + for (c = s; *c; c ++) if (*c == '/') *c = '\\'; + } +#endif + incdir = i; + if ((ff = HTT_get(&found_files, s)) != 0) { + /* + * The file is known, but not as a system include + * file under the name provided. + */ + struct found_file_sys *ffs = new_found_file_sys(); + + ffs->rff = ff; + ffs->incdir = incdir; + HTT_put(&found_files_sys, ffs, name); + freemem(s); + s = 0; + if (nffa) { + del_found_file(protect_detect.ff); + protect_detect.ff = 0; + nffa = 0; + } + goto found_file_cache; + } +#ifdef UCPP_MMAP + f = fopen_mmap_file(s); +#else + f = fopen(s, "r"); +#endif + if (f) goto found_file; + freemem(s); + s = 0; + } +zero_out: + if (s) freemem(s); + if (nffa) { + del_found_file(protect_detect.ff); + protect_detect.ff = 0; + nffa = 0; + } + return 0; + + /* + * This part is invoked when the file was found in the + * cache. + */ +found_file_cache: + if (ff->protect) { + if (get_macro(ff->protect)) { + /* file is protected, do not include it */ + find_file_error = FF_PROTECT; + goto zero_out; + } + /* file is protected but the guardian macro is + not available; disable guardian detection. */ + protect_detect.state = 0; + } + protect_detect.ff = ff; +#ifdef UCPP_MMAP + f = fopen_mmap_file(HASH_ITEM_NAME(ff)); +#else + f = fopen(HASH_ITEM_NAME(ff), "r"); +#endif + if (!f) goto zero_out; + find_file_error = FF_KNOWN; + goto found_file_2; + + /* + * This part is invoked when we found a new file, which was not + * yet referenced. If lf == 1, then the file was found directly, + * otherwise it was found in some system include directory. + * A new found_file structure has been allocated and is in + * protect_detect.ff + */ +found_file: + if (f && ((emit_dependencies == 1 && lf && current_incdir == -1) + || emit_dependencies == 2)) { + fprintf(emit_output, " %s", s ? s : name); + } + nff = protect_detect.ff; + nff->name = sdup(name); +#ifdef AUDIT + if ( +#endif + HTT_put(&found_files, nff, s ? s : name) +#ifdef AUDIT + ) ouch("filename collided with a wraith") +#endif + ; + if (!lf) { + struct found_file_sys *ffs = new_found_file_sys(); + + ffs->rff = nff; + ffs->incdir = incdir; + HTT_put(&found_files_sys, ffs, name); + } + if (s) freemem(s); + s = 0; + find_file_error = FF_UNKNOWN; + ff = nff; + +found_file_2: + if (s) freemem(s); + current_long_filename = HASH_ITEM_NAME(ff); +#ifdef NO_LIBC_BUF + setbuf(f, 0); +#endif + current_incdir = incdir; + return f; +} + +/* + * Find the named file by looking through the end of the include path. + * This is for #include_next directives. + * #include_next and #include_next "foo" are considered identical, + * for all practical purposes. + */ +static FILE *find_file_next(char *name) +{ + int i; + size_t nl = strlen(name); + FILE *f; + struct found_file *ff; + + find_file_error = FF_ERROR; + protect_detect.state = -1; + protect_detect.macro = 0; + for (i = current_incdir + 1; (size_t)i < include_path_nb; i ++) { + char *s; + size_t ni = strlen(include_path[i]); + + s = getmem(ni + nl + 2); + mmv(s, include_path[i], ni); + s[ni] = '/'; + mmv(s + ni + 1, name, nl + 1); +#ifdef MSDOS + /* on msdos systems, replace all / by \ */ + { + char *c; + + for (c = s; *c; c ++) if (*c == '/') *c = '\\'; + } +#endif + ff = HTT_get(&found_files, s); + if (ff) { + /* file was found in the cache */ + if (ff->protect) { + if (get_macro(ff->protect)) { + find_file_error = FF_PROTECT; + freemem(s); + return 0; + } + /* file is protected but the guardian macro is + not available; disable guardian detection. */ + protect_detect.state = 0; + } + protect_detect.ff = ff; +#ifdef UCPP_MMAP + f = fopen_mmap_file(HASH_ITEM_NAME(ff)); +#else + f = fopen(HASH_ITEM_NAME(ff), "r"); +#endif + if (!f) { + /* file is referenced but yet unavailable. */ + freemem(s); + return 0; + } + find_file_error = FF_KNOWN; + freemem(s); + s = HASH_ITEM_NAME(ff); + } else { +#ifdef UCPP_MMAP + f = fopen_mmap_file(s); +#else + f = fopen(s, "r"); +#endif + if (f) { + if (emit_dependencies == 2) { + fprintf(emit_output, " %s", s); + } + ff = protect_detect.ff = new_found_file(); + ff->name = sdup(s); +#ifdef AUDIT + if ( +#endif + HTT_put(&found_files, ff, s) +#ifdef AUDIT + ) ouch("filename collided with a wraith") +#endif + ; + find_file_error = FF_UNKNOWN; + freemem(s); + s = HASH_ITEM_NAME(ff); + } + } + if (f) { + current_long_filename = s; + current_incdir = i; + return f; + } + freemem(s); + } + return 0; +} + +/* + * The #if directive. This function parse the expression, performs macro + * expansion (and handles the "defined" operator), and call eval_expr. + * return value: 1 if the expression is true, 0 if it is false, -1 on error. + */ +static int handle_if(struct lexer_state *ls) +{ + struct token_fifo tf, tf1, tf2, tf3, *save_tf; + long l = ls->line; + unsigned long z; + int ret = 0, ltww = 1; + + /* first, get the whole line */ + tf.art = tf.nt = 0; + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + struct token t; + + if (ltww && ttMWS(ls->ctok->type)) continue; + ltww = ttMWS(ls->ctok->type); + t.type = ls->ctok->type; + t.line = l; + if (S_TOKEN(ls->ctok->type)) { + t.name = sdup(ls->ctok->name); + throw_away(ls->gf, t.name); + } + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } + if (ltww && tf.nt) if ((-- tf.nt) == 0) freemem(tf.t); + if (tf.nt == 0) { + error(l, "void condition for a #if/#elif"); + return -1; + } + /* handle the "defined" operator */ + tf1.art = tf1.nt = 0; + while (tf.art < tf.nt) { + struct token *ct, rt; + struct macro *m; + size_t nidx, eidx; + + ct = tf.t + (tf.art ++); + if (ct->type == NAME && !strcmp(ct->name, "defined")) { + if (tf.art >= tf.nt) goto store_token; + nidx = tf.art; + if (ttMWS(tf.t[nidx].type)) + if (++ nidx >= tf.nt) goto store_token; + if (tf.t[nidx].type == NAME) { + eidx = nidx; + goto check_macro; + } + if (tf.t[nidx].type != LPAR) goto store_token; + if (++ nidx >= tf.nt) goto store_token; + if (ttMWS(tf.t[nidx].type)) + if (++ nidx >= tf.nt) goto store_token; + if (tf.t[nidx].type != NAME) goto store_token; + eidx = nidx + 1; + if (eidx >= tf.nt) goto store_token; + if (ttMWS(tf.t[eidx].type)) + if (++ eidx >= tf.nt) goto store_token; + if (tf.t[eidx].type != RPAR) goto store_token; + goto check_macro; + } + store_token: + aol(tf1.t, tf1.nt, *ct, TOKEN_LIST_MEMG); + continue; + + check_macro: + m = get_macro(tf.t[nidx].name); + rt.type = NUMBER; + rt.name = m ? "1L" : "0L"; + aol(tf1.t, tf1.nt, rt, TOKEN_LIST_MEMG); + tf.art = eidx + 1; + } + freemem(tf.t); + if (tf1.nt == 0) { + error(l, "void condition (after expansion) for a #if/#elif"); + return -1; + } + + /* perform all macro substitutions */ + tf2.art = tf2.nt = 0; + save_tf = ls->output_fifo; + ls->output_fifo = &tf2; + while (tf1.art < tf1.nt) { + struct token *ct; + + ct = tf1.t + (tf1.art ++); + if (ct->type == NAME) { + struct macro *m = get_macro(ct->name); + + if (m) { + if (substitute_macro(ls, m, &tf1, 0, +#ifdef NO_PRAGMA_IN_DIRECTIVE + 1, +#else + 0, +#endif + ct->line)) { + ls->output_fifo = save_tf; + goto error1; + } + continue; + } + } else if ((ct->type == SHARP || ct->type == DIG_SHARP) + && (ls->flags & HANDLE_ASSERTIONS)) { + /* we have an assertion; parse it */ + int nnp, ltww = 1; + size_t i = tf1.art; + struct token_fifo atl; + char *aname; + struct assert *a; + int av = 0; + struct token rt; + + atl.art = atl.nt = 0; + while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++; + if (i >= tf1.nt) goto assert_error; + if (tf1.t[i].type != NAME) goto assert_error; + aname = tf1.t[i ++].name; + while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++; + if (i >= tf1.nt) goto assert_generic; + if (tf1.t[i].type != LPAR) goto assert_generic; + i ++; + for (nnp = 1; nnp && i < tf1.nt; i ++) { + if (ltww && ttMWS(tf1.t[i].type)) continue; + if (tf1.t[i].type == LPAR) nnp ++; + else if (tf1.t[i].type == RPAR + && (-- nnp) == 0) { + tf1.art = i + 1; + break; + } + ltww = ttMWS(tf1.t[i].type); + aol(atl.t, atl.nt, tf1.t[i], TOKEN_LIST_MEMG); + } + if (nnp) goto assert_error; + if (ltww && atl.nt && (-- atl.nt) == 0) freemem(atl.t); + if (atl.nt == 0) goto assert_error; + + /* the assertion is in aname and atl; check it */ + a = get_assertion(aname); + if (a) for (i = 0; i < a->nbval; i ++) + if (!cmp_token_list(&atl, a->val + i)) { + av = 1; + break; + } + rt.type = NUMBER; + rt.name = av ? "1" : "0"; + aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG); + if (atl.nt) freemem(atl.t); + continue; + + assert_generic: + tf1.art = i; + rt.type = NUMBER; + rt.name = get_assertion(aname) ? "1" : "0"; + aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG); + continue; + + assert_error: + error(l, "syntax error for assertion in #if"); + ls->output_fifo = save_tf; + goto error1; + } + aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); + } + ls->output_fifo = save_tf; + freemem(tf1.t); + if (tf2.nt == 0) { + error(l, "void condition (after expansion) for a #if/#elif"); + return -1; + } + + /* + * suppress whitespace and replace rogue identifiers by 0 + */ + tf3.art = tf3.nt = 0; + while (tf2.art < tf2.nt) { + struct token *ct = tf2.t + (tf2.art ++); + + if (ttMWS(ct->type)) continue; + if (ct->type == NAME) { + /* + * a rogue identifier; we replace it with "0". + */ + struct token rt; + + rt.type = NUMBER; + rt.name = "0"; + aol(tf3.t, tf3.nt, rt, TOKEN_LIST_MEMG); + continue; + } + aol(tf3.t, tf3.nt, *ct, TOKEN_LIST_MEMG); + } + freemem(tf2.t); + + if (tf3.nt == 0) { + error(l, "void condition (after expansion) for a #if/#elif"); + return -1; + } + eval_line = l; + z = eval_expr(&tf3, &ret, (ls->flags & WARN_STANDARD) != 0); + freemem(tf3.t); + if (ret) return -1; + return (z != 0); + +error1: + if (tf1.nt) freemem(tf1.t); + if (tf2.nt) freemem(tf2.t); + return -1; +} + +/* + * A #include was found; parse the end of line, replace macros if + * necessary. + * + * If nex is set to non-zero, the directive is considered as a #include_next + * (extension to C99, mimicked from GNU) + */ +static int handle_include(struct lexer_state *ls, unsigned long flags, int nex) +{ + int c, string_fname = 0; + char *fname; + unsigned char *fname2; + size_t fname_ptr = 0; + long l = ls->line; + int x, y; + FILE *f; + struct token_fifo tf, tf2, *save_tf; + size_t nl; + int tgd; + struct lexer_state alt_ls; + +#define left_angle(t) ((t) == LT || (t) == LEQ || (t) == LSH \ + || (t) == ASLSH || (t) == DIG_LBRK || (t) == LBRA) +#define right_angle(t) ((t) == GT || (t) == RSH || (t) == ARROW \ + || (t) == DIG_RBRK || (t) == DIG_RBRA) + + while ((c = grap_char(ls)) >= 0 && c != '\n') { + if (space_char(c)) { + discard_char(ls); + continue; + } + if (c == '<') { + discard_char(ls); + while ((c = grap_char(ls)) >= 0) { + discard_char(ls); + if (c == '\n') goto include_last_chance; + if (c == '>') break; + aol(fname, fname_ptr, (char)c, FNAME_MEMG); + } + aol(fname, fname_ptr, (char)0, FNAME_MEMG); + string_fname = 0; + goto do_include; + } else if (c == '"') { + discard_char(ls); + while ((c = grap_char(ls)) >= 0) { + discard_char(ls); + if (c == '\n') { + /* macro replacements won't save that one */ + if (fname_ptr) freemem(fname); + goto include_error; + } + if (c == '"') break; + aol(fname, fname_ptr, (char)c, FNAME_MEMG); + } + aol(fname, fname_ptr, (char)0, FNAME_MEMG); + string_fname = 1; + goto do_include; + } + goto include_macro; + } + +include_last_chance: + /* + * We found a '<' but not the trailing '>'; so we tokenize the + * line, and try to act upon it. The standard lets us free in that + * matter, and no sane programmer would use such a construct, but + * it is no reason not to support it. + */ + if (fname_ptr == 0) goto include_error; + fname2 = getmem(fname_ptr + 1); + mmv(fname2 + 1, fname, fname_ptr); + fname2[0] = '<'; + /* + * We merely copy the lexer_state structure; this should be ok, + * since we do want to share the memory structure (garbage_fifo), + * and do not touch any other context-full thing. + */ + alt_ls = *ls; + alt_ls.input = 0; + alt_ls.input_string = fname2; + alt_ls.pbuf = 0; + alt_ls.ebuf = fname_ptr + 1; + tf.art = tf.nt = 0; + while (!next_token(&alt_ls)) { + if (!ttMWS(alt_ls.ctok->type)) { + struct token t; + + t.type = alt_ls.ctok->type; + t.line = l; + if (S_TOKEN(alt_ls.ctok->type)) { + t.name = sdup(alt_ls.ctok->name); + throw_away(alt_ls.gf, t.name); + } + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } + } + freemem(fname2); + if (alt_ls.pbuf < alt_ls.ebuf) goto include_error; + /* tokenizing failed */ + goto include_macro2; + +include_error: + error(l, "invalid '#include'"); + return 1; + +include_macro: + tf.art = tf.nt = 0; + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + if (!ttMWS(ls->ctok->type)) { + struct token t; + + t.type = ls->ctok->type; + t.line = l; + if (S_TOKEN(ls->ctok->type)) { + t.name = sdup(ls->ctok->name); + throw_away(ls->gf, t.name); + } + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } + } +include_macro2: + tf2.art = tf2.nt = 0; + save_tf = ls->output_fifo; + ls->output_fifo = &tf2; + while (tf.art < tf.nt) { + struct token *ct; + + ct = tf.t + (tf.art ++); + if (ct->type == NAME) { + struct macro *m = get_macro(ct->name); + if (m) { + if (substitute_macro(ls, m, &tf, 0, +#ifdef NO_PRAGMA_IN_DIRECTIVE + 1, +#else + 0, +#endif + ct->line)) { + ls->output_fifo = save_tf; + return -1; + } + continue; + } + } + aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); + } + freemem(tf.t); + ls->output_fifo = save_tf; + for (x = 0; (size_t)x < tf2.nt && ttWHI(tf2.t[x].type); x ++); + for (y = tf2.nt - 1; y >= 0 && ttWHI(tf2.t[y].type); y --); + if ((size_t)x >= tf2.nt) goto include_macro_err; + if (tf2.t[x].type == STRING) { + if (y != x) goto include_macro_err; + if (tf2.t[x].name[0] == 'L') { + if (ls->flags & WARN_STANDARD) + warning(l, "wide string for #include"); + fname = sdup(tf2.t[x].name); + nl = strlen(fname); + *(fname + nl - 1) = 0; + mmvwo(fname, fname + 2, nl - 2); + } else { + fname = sdup(tf2.t[x].name); + nl = strlen(fname); + *(fname + nl - 1) = 0; + mmvwo(fname, fname + 1, nl - 1); + } + string_fname = 1; + } else if (left_angle(tf2.t[x].type) && right_angle(tf2.t[y].type)) { + int i, j; + + if (ls->flags & WARN_ANNOYING) warning(l, "reconstruction " + "of in #include"); + for (j = 0, i = x; i <= y; i ++) if (!ttWHI(tf2.t[i].type)) + j += strlen(tname(tf2.t[i])); + fname = getmem(j + 1); + for (j = 0, i = x; i <= y; i ++) { + if (ttWHI(tf2.t[i].type)) continue; + strcpy(fname + j, tname(tf2.t[i])); + j += strlen(tname(tf2.t[i])); + } + *(fname + j - 1) = 0; + mmvwo(fname, fname + 1, j); + string_fname = 0; + } else goto include_macro_err; + freemem(tf2.t); + goto do_include_next; + +include_macro_err: + error(l, "macro expansion did not produce a valid filename " + "for #include"); + if (tf2.nt) freemem(tf2.t); + return 1; + +do_include: + tgd = 1; + while (!next_token(ls)) { + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(l, "trailing garbage in #include"); + tgd = 0; + } + if (ls->ctok->type == NEWLINE) break; + } + + /* the increment of ls->line is intended so that the line + numbering is reported correctly in report_context() even if + the #include is at the end of the file with no trailing newline */ + if (ls->ctok->type != NEWLINE) ls->line ++; +do_include_next: + if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT)) + put_char(ls, '\n'); + push_file_context(ls); + reinit_lexer_state(ls, 1); +#ifdef MSDOS + /* on msdos systems, replace all / by \ */ + { + char *d; + + for (d = fname; *d; d ++) if (*d == '/') *d = '\\'; + } +#endif + f = nex ? find_file_next(fname) : find_file(fname, string_fname); + if (!f) { + current_filename = 0; + pop_file_context(ls); + if (find_file_error == FF_ERROR) { + error(l, "file '%s' not found", fname); + freemem(fname); + return 1; + } + /* file was found, but it is useless to include it again */ + freemem(fname); + return 0; + } +#ifdef UCPP_MMAP + set_input_file(ls, f); +#else + ls->input = f; +#endif + current_filename = fname; + enter_file(ls, flags); + return 0; + +#undef left_angle +#undef right_angle +} + +/* + * for #line directives + */ +static int handle_line(struct lexer_state *ls, unsigned long flags) +{ + char *fname; + long l = ls->line; + struct token_fifo tf, tf2, *save_tf; + size_t nl, j; + unsigned long z; + + tf.art = tf.nt = 0; + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + if (!ttMWS(ls->ctok->type)) { + struct token t; + + t.type = ls->ctok->type; + t.line = l; + if (S_TOKEN(ls->ctok->type)) { + t.name = sdup(ls->ctok->name); + throw_away(ls->gf, t.name); + } + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } + } + tf2.art = tf2.nt = 0; + save_tf = ls->output_fifo; + ls->output_fifo = &tf2; + while (tf.art < tf.nt) { + struct token *ct; + + ct = tf.t + (tf.art ++); + if (ct->type == NAME) { + struct macro *m = get_macro(ct->name); + if (m) { + if (substitute_macro(ls, m, &tf, 0, +#ifdef NO_PRAGMA_IN_DIRECTIVE + 1, +#else + 0, +#endif + ct->line)) { + ls->output_fifo = save_tf; + return -1; + } + continue; + } + } + aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); + } + freemem(tf.t); + for (tf2.art = 0; tf2.art < tf2.nt && ttWHI(tf2.t[tf2.art].type); + tf2.art ++); + ls->output_fifo = save_tf; + if (tf2.art == tf2.nt || (tf2.t[tf2.art].type != NUMBER + && tf2.t[tf2.art].type != CHAR)) { + error(l, "not a valid number for #line"); + goto line_macro_err; + } + for (j = 0; tf2.t[tf2.art].name[j]; j ++) + if (tf2.t[tf2.art].name[j] < '0' + || tf2.t[tf2.art].name[j] > '9') + if (ls->flags & WARN_STANDARD) + warning(l, "non-standard line number in #line"); + if (catch(eval_exception)) goto line_macro_err; + z = strtoconst(tf2.t[tf2.art].name); + if (j > 10 || z > 2147483647U) { + error(l, "out-of-bound line number for #line"); + goto line_macro_err; + } + ls->oline = ls->line = z; + if ((++ tf2.art) < tf2.nt) { + size_t i; + + for (i = tf2.art; i < tf2.nt && ttMWS(tf2.t[i].type); i ++); + if (i < tf2.nt) { + if (tf2.t[i].type != STRING) { + error(l, "not a valid filename for #line"); + goto line_macro_err; + } + if (tf2.t[i].name[0] == 'L') { + if (ls->flags & WARN_STANDARD) { + warning(l, "wide string for #line"); + } + fname = sdup(tf2.t[i].name); + nl = strlen(fname); + *(fname + nl - 1) = 0; + mmvwo(fname, fname + 2, nl - 2); + } else { + fname = sdup(tf2.t[i].name); + nl = strlen(fname); + *(fname + nl - 1) = 0; + mmvwo(fname, fname + 1, nl - 1); + } + if (current_filename) freemem(current_filename); + current_filename = fname; + } + for (i ++; i < tf2.nt && ttMWS(tf2.t[i].type); i ++); + if (i < tf2.nt && (ls->flags & WARN_STANDARD)) { + warning(l, "trailing garbage in #line"); + } + } + freemem(tf2.t); + enter_file(ls, flags); + return 0; + +line_macro_err: + if (tf2.nt) freemem(tf2.t); + return 1; +} + +/* + * a #error directive: we emit the message without any modification + * (except the usual backslash+newline and trigraphs) + */ +static void handle_error(struct lexer_state *ls) +{ + int c; + size_t p = 0, lp = 128; + long l = ls->line; + unsigned char *buf = getmem(lp); + + while ((c = grap_char(ls)) >= 0 && c != '\n') { + discard_char(ls); + wan(buf, p, (unsigned char)c, lp); + } + wan(buf, p, 0, lp); + error(l, "#error%s", buf); + freemem(buf); +} + +/* + * convert digraph tokens to their standard equivalent. + */ +static int undig(int type) +{ + static int ud[6] = { LBRK, RBRK, LBRA, RBRA, SHARP, DSHARP }; + + return ud[type - DIG_LBRK]; +} + +#ifdef PRAGMA_TOKENIZE +/* + * Make a compressed representation of a token list; the contents of + * the token_fifo are freed. Values equal to 0 are replaced by + * PRAGMA_TOKEN_END (by default, (unsigned char)'\n') and the compressed + * string is padded by a 0 (so that it may be * handled like a string). + * Digraph tokens are replaced by their non-digraph equivalents. + */ +struct comp_token_fifo compress_token_list(struct token_fifo *tf) +{ + struct comp_token_fifo ct; + size_t l; + + for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) { + l ++; + if (S_TOKEN(tf->t[tf->art].type)) + l += strlen(tf->t[tf->art].name) + 1; + } + ct.t = getmem((ct.length = l) + 1); + for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) { + int tt = tf->t[tf->art].type; + + if (tt == 0) tt = PRAGMA_TOKEN_END; + if (tt > DIGRAPH_TOKENS && tt < DIGRAPH_TOKENS_END) + tt = undig(tt); + ct.t[l ++] = tt; + if (S_TOKEN(tt)) { + char *tn = tf->t[tf->art].name; + size_t sl = strlen(tn); + + mmv(ct.t + l, tn, sl); + l += sl; + ct.t[l ++] = PRAGMA_TOKEN_END; + freemem(tn); + } + } + ct.t[l] = 0; + if (tf->nt) freemem(tf->t); + ct.rp = 0; + return ct; +} +#endif + +/* + * A #pragma directive: we make a PRAGMA token containing the rest of + * the line. + * + * We strongly hope that we are called only in LEXER mode. + */ +static void handle_pragma(struct lexer_state *ls) +{ + unsigned char *buf; + struct token t; + long l = ls->line; + +#ifdef PRAGMA_TOKENIZE + struct token_fifo tf; + + tf.art = tf.nt = 0; + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (!ttMWS(ls->ctok->type)) break; + if (ls->ctok->type != NEWLINE) { + do { + struct token t; + + t.type = ls->ctok->type; + if (ttMWS(t.type)) continue; + if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } while (!next_token(ls) && ls->ctok->type != NEWLINE); + } + if (tf.nt == 0) { + /* void pragma are silently ignored */ + return; + } + buf = (compress_token_list(&tf)).t; +#else + int c, x = 1, y = 32; + + while ((c = grap_char(ls)) >= 0 && c != '\n') { + discard_char(ls); + if (!space_char(c)) break; + } + /* void #pragma are ignored */ + if (c == '\n') return; + buf = getmem(y); + buf[0] = c; + while ((c = grap_char(ls)) >= 0 && c != '\n') { + discard_char(ls); + wan(buf, x, c, y); + } + for (x --; x >= 0 && space_char(buf[x]); x --); + x ++; + wan(buf, x, 0, y); +#endif + t.type = PRAGMA; + t.line = l; + t.name = (char *)buf; + aol(ls->output_fifo->t, ls->output_fifo->nt, t, TOKEN_LIST_MEMG); + throw_away(ls->gf, (char *)buf); +} + +/* + * We saw a # at the beginning of a line (or preceeded only by whitespace). + * We check the directive name and act accordingly. + */ +static int handle_cpp(struct lexer_state *ls, int sharp_type) +{ +#define condfset(x) do { \ + ls->condf[(x) / 32] |= 1UL << ((x) % 32); \ + } while (0) +#define condfclr(x) do { \ + ls->condf[(x) / 32] &= ~(1UL << ((x) % 32)); \ + } while (0) +#define condfval(x) ((ls->condf[(x) / 32] & (1UL << ((x) % 32))) != 0) + + long l = ls->line; + unsigned long save_flags = ls->flags; + int ret = 0; + + save_flags = ls->flags; + ls->flags |= LEXER; + while (!next_token(ls)) { + int t = ls->ctok->type; + + switch (t) { + case COMMENT: + if (ls->flags & WARN_ANNOYING) { + warning(l, "comment in the middle of " + "a cpp directive"); + } + /* fall through */ + case NONE: + continue; + case NEWLINE: + /* null directive */ + if (ls->flags & WARN_ANNOYING) { + /* truly an annoying warning; null directives + are rare but may increase readability of + some source files, and they are legal */ + warning(l, "null cpp directive"); + } + if (!(ls->flags & LEXER)) put_char(ls, '\n'); + goto handle_exit2; + case NAME: + break; + default: + if (ls->flags & FAIL_SHARP) { + /* LPS 20050602 - ignores '#!' if on the first line */ + if( ( l == 1 ) && + ( ls->condcomp ) ) + { + ret = 1; + } + else + /* LPS 20050602 */ + if (ls->condcomp) { + error(l, "rogue '#'"); + ret = 1; + } else { + if (ls->flags & WARN_STANDARD) { + warning(l, "rogue '#' in code " + "compiled out"); + ret = 0; + } + } + ls->flags = save_flags; + goto handle_warp_ign; + } else { + struct token u; + + u.type = sharp_type; + u.line = l; + ls->flags = save_flags; + print_token(ls, &u, 0); + print_token(ls, ls->ctok, 0); + if (ls->flags & WARN_ANNOYING) { + warning(l, "rogue '#' dumped"); + } + goto handle_exit3; + } + } + if (ls->condcomp) { + if (!strcmp(ls->ctok->name, "define")) { + ret = handle_define(ls); + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "undef")) { + ret = handle_undef(ls); + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "if")) { + if ((++ ls->ifnest) > 63) goto too_many_if; + condfclr(ls->ifnest - 1); + ret = handle_if(ls); + if (ret > 0) ret = 0; + else if (ret == 0) { + ls->condcomp = 0; + ls->condmet = 0; + ls->condnest = ls->ifnest - 1; + } + else ret = 1; + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "ifdef")) { + if ((++ ls->ifnest) > 63) goto too_many_if; + condfclr(ls->ifnest - 1); + ret = handle_ifdef(ls); + if (ret > 0) ret = 0; + else if (ret == 0) { + ls->condcomp = 0; + ls->condmet = 0; + ls->condnest = ls->ifnest - 1; + } + else ret = 1; + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "ifndef")) { + if ((++ ls->ifnest) > 63) goto too_many_if; + condfclr(ls->ifnest - 1); + ret = handle_ifndef(ls); + if (ret > 0) ret = 0; + else if (ret == 0) { + ls->condcomp = 0; + ls->condmet = 0; + ls->condnest = ls->ifnest - 1; + } + else ret = 1; + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "else")) { + if (ls->ifnest == 0 + || condfval(ls->ifnest - 1)) { + error(l, "rogue #else"); + ret = 1; + goto handle_warp; + } + condfset(ls->ifnest - 1); + if (ls->ifnest == 1) protect_detect.state = 0; + ls->condcomp = 0; + ls->condmet = 1; + ls->condnest = ls->ifnest - 1; + goto handle_warp; + } else if (!strcmp(ls->ctok->name, "elif")) { + if (ls->ifnest == 0 + || condfval(ls->ifnest - 1)) { + error(l, "rogue #elif"); + ret = 1; + goto handle_warp_ign; + } + if (ls->ifnest == 1) protect_detect.state = 0; + ls->condcomp = 0; + ls->condmet = 1; + ls->condnest = ls->ifnest - 1; + goto handle_warp_ign; + } else if (!strcmp(ls->ctok->name, "endif")) { + if (ls->ifnest == 0) { + error(l, "unmatched #endif"); + ret = 1; + goto handle_warp; + } + if ((-- ls->ifnest) == 0 + && protect_detect.state == 2) { + protect_detect.state = 3; + } + goto handle_warp; + } else if (!strcmp(ls->ctok->name, "include")) { + ret = handle_include(ls, save_flags, 0); + goto handle_exit3; + } else if (!strcmp(ls->ctok->name, "include_next")) { + ret = handle_include(ls, save_flags, 1); + goto handle_exit3; + } else if (!strcmp(ls->ctok->name, "pragma")) { + if (!(save_flags & LEXER)) { +#ifdef PRAGMA_DUMP + /* dump #pragma in output */ + struct token u; + + u.type = sharp_type; + u.line = l; + ls->flags = save_flags; + print_token(ls, &u, 0); + print_token(ls, ls->ctok, 0); + while (ls->flags |= LEXER, + !next_token(ls)) { + long save_line; + + ls->flags &= ~LEXER; + save_line = ls->line; + ls->line = l; + print_token(ls, ls->ctok, 0); + ls->line = save_line; + if (ls->ctok->type == NEWLINE) + break; + } + goto handle_exit3; +#else + if (ls->flags & WARN_PRAGMA) + warning(l, "#pragma ignored " + "and not dumped"); + goto handle_warp_ign; +#endif + } + if (!(ls->flags & HANDLE_PRAGMA)) + goto handle_warp_ign; + handle_pragma(ls); + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "error")) { + ret = 1; + handle_error(ls); + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "line")) { + ret = handle_line(ls, save_flags); + goto handle_exit; + } else if ((ls->flags & HANDLE_ASSERTIONS) + && !strcmp(ls->ctok->name, "assert")) { + ret = handle_assert(ls); + goto handle_exit; + } else if ((ls->flags & HANDLE_ASSERTIONS) + && !strcmp(ls->ctok->name, "unassert")) { + ret = handle_unassert(ls); + goto handle_exit; + } + } else { + if (!strcmp(ls->ctok->name, "else")) { + if (condfval(ls->ifnest - 1) + && (ls->flags & WARN_STANDARD)) { + warning(l, "rogue #else in code " + "compiled out"); + } + if (ls->condnest == ls->ifnest - 1) { + if (!ls->condmet) ls->condcomp = 1; + } + condfset(ls->ifnest - 1); + if (ls->ifnest == 1) protect_detect.state = 0; + goto handle_warp; + } else if (!strcmp(ls->ctok->name, "elif")) { + if (condfval(ls->ifnest - 1) + && (ls->flags & WARN_STANDARD)) { + warning(l, "rogue #elif in code " + "compiled out"); + } + if (ls->condnest != ls->ifnest - 1 + || ls->condmet) + goto handle_warp_ign; + if (ls->ifnest == 1) protect_detect.state = 0; + ret = handle_if(ls); + if (ret > 0) { + ls->condcomp = 1; + ls->condmet = 1; + ret = 0; + } else if (ret < 0) ret = 1; + goto handle_exit; + } else if (!strcmp(ls->ctok->name, "endif")) { + if ((-- ls->ifnest) == ls->condnest) { + if (ls->ifnest == 0 && + protect_detect.state == 2) + protect_detect.state = 3; + ls->condcomp = 1; + } + goto handle_warp; + } else if (!strcmp(ls->ctok->name, "if") + || !strcmp(ls->ctok->name, "ifdef") + || !strcmp(ls->ctok->name, "ifndef")) { + if ((++ ls->ifnest) > 63) goto too_many_if; + condfclr(ls->ifnest - 1); + } + goto handle_warp_ign; + } + /* + * Unrecognized directive. We emit either an error or + * an annoying warning, depending on a command-line switch. + */ + if (ls->flags & FAIL_SHARP) { + error(l, "unknown cpp directive '#%s'", + ls->ctok->name); + goto handle_warp_ign; + } else { + struct token u; + + u.type = sharp_type; + u.line = l; + ls->flags = save_flags; + print_token(ls, &u, 0); + print_token(ls, ls->ctok, 0); + if (ls->flags & WARN_ANNOYING) { + warning(l, "rogue '#' dumped"); + } + } + } + return 1; + +handle_warp_ign: + while (!next_token(ls)) if (ls->ctok->type == NEWLINE) break; + goto handle_exit; +handle_warp: + while (!next_token(ls)) { + if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { + warning(l, "trailing garbage in " + "preprocessing directive"); + } + if (ls->ctok->type == NEWLINE) break; + } +handle_exit: + if (!(ls->flags & LEXER)) put_char(ls, '\n'); +handle_exit3: + if (protect_detect.state == 1) { + protect_detect.state = 0; + } else if (protect_detect.state == -1) { + /* just after the #include */ + protect_detect.state = 1; + } +handle_exit2: + ls->flags = save_flags; + return ret; +too_many_if: + error(l, "too many levels of conditional inclusion (max 63)"); + ret = 1; + goto handle_warp; +#undef condfset +#undef condfclr +#undef condfval +} + +/* + * This is the main entry function. It maintains count of #, and call the + * appropriate functions when it encounters a cpp directive or a macro + * name. + * return value: positive on error; CPPERR_EOF means "end of input reached" + */ +int cpp(struct lexer_state *ls) +{ + int r = 0; + + while (next_token(ls)) { + if (protect_detect.state == 3) { + /* + * At that point, protect_detect.ff->protect might + * be non-zero, if the file has been recursively + * included, and a guardian detected. + */ + if (!protect_detect.ff->protect) { + /* Cool ! A new guardian has been detected. */ + protect_detect.ff->protect = + protect_detect.macro; + } else if (protect_detect.macro) { + /* We found a guardian but an old one. */ + freemem(protect_detect.macro); + } + protect_detect.macro = 0; + } + if (ls->ifnest) { + error(ls->line, "unterminated #if construction " + "(depth %ld)", ls->ifnest); + r = CPPERR_NEST; + } + if (ls_depth == 0) return CPPERR_EOF; + close_input(ls); + if (!(ls->flags & LEXER) && !ls->ltwnl) { + put_char(ls, '\n'); + ls->ltwnl = 1; + } + pop_file_context(ls); + ls->oline ++; + if (enter_file(ls, ls->flags)) { + ls->ctok->type = NEWLINE; + ls->ltwnl = 1; + break; + } + } + if (!(ls->ltwnl && (ls->ctok->type == SHARP + || ls->ctok->type == DIG_SHARP)) + && protect_detect.state == 1 && !ttWHI(ls->ctok->type)) { + /* the first non-whitespace token encountered is not + a sharp introducing a cpp directive */ + protect_detect.state = 0; + } + if (protect_detect.state == 3 && !ttWHI(ls->ctok->type)) { + /* a non-whitespace token encountered after the #endif */ + protect_detect.state = 0; + } + if (ls->condcomp) { + if (ls->ltwnl && (ls->ctok->type == SHARP + || ls->ctok->type == DIG_SHARP)) { + int x = handle_cpp(ls, ls->ctok->type); + + ls->ltwnl = 1; + return r ? r : x; + } + if (ls->ctok->type == NAME) { + struct macro *m; + + if ((m = get_macro(ls->ctok->name)) != 0) { + int x; + + x = substitute_macro(ls, m, 0, 1, 0, + ls->ctok->line); + if (!(ls->flags & LEXER)) + garbage_collect(ls->gf); + return r ? r : x; + } + if (!(ls->flags & LEXER)) + print_token(ls, ls->ctok, 0); + } + } else { + if (ls->ltwnl && (ls->ctok->type == SHARP + || ls->ctok->type == DIG_SHARP)) { + int x = handle_cpp(ls, ls->ctok->type); + + ls->ltwnl = 1; + return r ? r : x; + } + } + if (ls->ctok->type == NEWLINE) ls->ltwnl = 1; + else if (!ttWHI(ls->ctok->type)) ls->ltwnl = 0; + return r ? r : -1; +} + +#ifndef STAND_ALONE +/* + * llex() and lex() are the lexing functions, when the preprocessor is + * linked to another code. llex() should be called only by lex(). + */ +static int llex(struct lexer_state *ls) +{ + struct token_fifo *tf = ls->output_fifo; + int r; + + if (tf->nt != 0) { + if (tf->art < tf->nt) { +#ifdef INMACRO_FLAG + if (!ls->inmacro) { + ls->inmacro = 1; + ls->macro_count ++; + } +#endif + ls->ctok = tf->t + (tf->art ++); + if (ls->ctok->type > DIGRAPH_TOKENS + && ls->ctok->type < DIGRAPH_TOKENS_END) { + ls->ctok->type = undig(ls->ctok->type); + } + return 0; + } else { +#ifdef INMACRO_FLAG + ls->inmacro = 0; +#endif + freemem(tf->t); + tf->art = tf->nt = 0; + garbage_collect(ls->gf); + ls->ctok = ls->save_ctok; + } + } + r = cpp(ls); + if (ls->ctok->type > DIGRAPH_TOKENS + && ls->ctok->type < LAST_MEANINGFUL_TOKEN) { + ls->ctok->type = undig(ls->ctok->type); + } + if (r > 0) return r; + if (r < 0) return 0; + return llex(ls); +} + +/* + * lex() reads the next token from the processed stream and stores it + * into ls->ctok. + * return value: non zero on error (including CPPERR_EOF, which is not + * quite an error) + */ +int lex(struct lexer_state *ls) +{ + int r; + + do { + r = llex(ls); +#ifdef SEMPER_FIDELIS + } while (!r && !ls->condcomp); +#else + } while (!r && (!ls->condcomp || (ttWHI(ls->ctok->type) && + (!(ls->flags & LINE_NUM) || ls->ctok->type != NEWLINE)))); +#endif + return r; +} +#endif + +/* + * check_cpp_errors() must be called when the end of input is reached; + * it checks pending errors due to truncated constructs (actually none, + * this is reserved for future evolutions). + */ +int check_cpp_errors(struct lexer_state *ls) +{ + if (ls->flags & KEEP_OUTPUT) { + put_char(ls, '\n'); + } + if (emit_dependencies) fputc('\n', emit_output); +#ifndef NO_UCPP_BUF + if (!(ls->flags & LEXER)) { + flush_output(ls); + } +#endif + if ((ls->flags & WARN_TRIGRAPHS) && ls->count_trigraphs) + warning(0, "%ld trigraph(s) encountered", ls->count_trigraphs); + return 0; +} + +/* + * init_cpp() initializes static tables inside ucpp. It needs not be + * called more than once. + */ +void init_cpp(void) +{ + init_cppm(); +} + +/* + * (re)init the global tables. + * If standard_assertions is non 0, init the assertions table. + */ +void init_tables(int with_assertions) +{ + time_t t; + struct tm *ct; + + init_buf_lexer_state(&dsharp_lexer, 0); +#ifdef PRAGMA_TOKENIZE + init_buf_lexer_state(&tokenize_lexer, 0); +#endif + time(&t); + ct = localtime(&t); +#ifdef NOSTRFTIME + /* we have a quite old compiler, that does not know the + (standard since 1990) strftime() function. */ + { + char *c = asctime(ct); + + compile_time[0] = '"'; + mmv(compile_time + 1, c + 11, 8); + compile_time[9] = '"'; + compile_time[10] = 0; + compile_date[0] = '"'; + mmv(compile_date + 1, c + 4, 7); + mmv(compile_date + 8, c + 20, 4); + compile_date[12] = '"'; + compile_date[13] = 0; + } +#else + strftime(compile_time, 12, "\"%H:%M:%S\"", ct); + strftime(compile_date, 24, "\"%b %d %Y\"", ct); +#endif + init_macros(); + if (with_assertions) init_assertions(); + init_found_files(); +} + +/* + * Resets the include path. + */ +void init_include_path(char *incpath[]) +{ + if (include_path_nb) { + size_t i; + + for (i = 0; i < include_path_nb; i ++) + freemem(include_path[i]); + freemem(include_path); + include_path_nb = 0; + } + if (incpath) { + int i; + + for (i = 0; incpath[i]; i ++) + aol(include_path, include_path_nb, + sdup(incpath[i]), INCPATH_MEMG); + } +} + +/* + * add_incpath() adds "path" to the standard include path. + */ +void add_incpath(char *path) +{ + aol(include_path, include_path_nb, sdup(path), INCPATH_MEMG); +} + +/* + * This function cleans the memory. It should release all allocated + * memory structures and may be called even if the current pre-processing + * is not finished or reported an error. + */ +void wipeout() +{ + struct lexer_state ls; + + if (include_path_nb > 0) { + size_t i; + + for (i = 0; i < include_path_nb; i ++) + freemem(include_path[i]); + freemem(include_path); + include_path = 0; + include_path_nb = 0; + } + if (current_filename) freemem(current_filename); + current_filename = 0; + current_long_filename = 0; + current_incdir = -1; + protect_detect.state = 0; + if (protect_detect.macro) freemem(protect_detect.macro); + protect_detect.macro = 0; + protect_detect.ff = 0; + init_lexer_state(&ls); + while (ls_depth > 0) pop_file_context(&ls); + free_lexer_state(&ls); + free_lexer_state(&dsharp_lexer); +#ifdef PRAGMA_TOKENIZE + free_lexer_state(&tokenize_lexer); +#endif + if (found_files_init_done) HTT_kill(&found_files); + found_files_init_done = 0; + if (found_files_sys_init_done) HTT_kill(&found_files_sys); + found_files_sys_init_done = 0; + wipe_macros(); + wipe_assertions(); +} + +#ifdef STAND_ALONE +/* + * print some help + */ +static void usage(char *command_name) +{ + fprintf(stderr, + "Usage: %s [options] [file]\n" + "language options:\n" + " -C keep comments in output\n" + " -s keep '#' when no cpp directive is recognized\n" + " -l do not emit line numbers\n" + " -lg emit gcc-like line numbers\n" + " -CC disable C++-like comments\n" + " -a, -na, -a0 handle (or not) assertions\n" + " -V disable macros with extra arguments\n" + " -u understand UTF-8 in source\n" + " -X enable -a, -u and -Y\n" + " -c90 mimic C90 behaviour\n" + " -t disable trigraph support\n" + "warning options:\n" + " -wt emit a final warning when trigaphs are encountered\n" + " -wtt emit warnings for each trigaph encountered\n" + " -wa emit warnings that are usually useless\n" + " -w0 disable standard warnings\n" + "directory options:\n" + " -I directory add 'directory' before the standard include path\n" + " -J directory add 'directory' after the standard include path\n" + " -zI do not use the standard include path\n" + " -M emit Makefile-like dependencies instead of normal " + "output\n" + " -Ma emit also dependancies for system files\n" + " -o file store output in file\n" + "macro and assertion options:\n" + " -Dmacro predefine 'macro'\n" + " -Dmacro=def predefine 'macro' with 'def' content\n" + " -Umacro undefine 'macro'\n" + " -Afoo(bar) assert foo(bar)\n" + " -Bfoo(bar) unassert foo(bar)\n" + " -Y predefine system-dependant macros\n" + " -Z do not predefine special macros\n" + " -d emit defined macros\n" + " -e emit assertions\n" + "misc options:\n" + " -v print version number and settings\n" + " -h show this help\n", + command_name); +} + +/* + * print version and compile-time settings + */ +static void version(void) +{ + size_t i; + + fprintf(stderr, "ucpp version %d.%d\n", VERS_MAJ, VERS_MIN); + fprintf(stderr, "search path:\n"); + for (i = 0; i < include_path_nb; i ++) + fprintf(stderr, " %s\n", include_path[i]); +} + +/* + * parse_opt() initializes many things according to the command-line + * options. + * Return values: + * 0 on success + * 1 on semantic error (redefinition of a special macro, for instance) + * 2 on syntaxic error (unknown options for instance) + */ +static int parse_opt(int argc, char *argv[], struct lexer_state *ls) +{ + int i, ret = 0; + char *filename = 0; + int with_std_incpath = 1; + int print_version = 0, print_defs = 0, print_asserts = 0; + int system_macros = 0, standard_assertions = 1; + + init_lexer_state(ls); + ls->flags = DEFAULT_CPP_FLAGS; + emit_output = ls->output = stdout; + for (i = 1; i < argc; i ++) if (argv[i][0] == '-') { + if (!strcmp(argv[i], "-h")) { + return 2; + } else if (!strcmp(argv[i], "-C")) { + ls->flags &= ~DISCARD_COMMENTS; + } else if (!strcmp(argv[i], "-CC")) { + ls->flags &= ~CPLUSPLUS_COMMENTS; + } else if (!strcmp(argv[i], "-a")) { + ls->flags |= HANDLE_ASSERTIONS; + } else if (!strcmp(argv[i], "-na")) { + ls->flags |= HANDLE_ASSERTIONS; + standard_assertions = 0; + } else if (!strcmp(argv[i], "-a0")) { + ls->flags &= ~HANDLE_ASSERTIONS; + } else if (!strcmp(argv[i], "-V")) { + ls->flags &= ~MACRO_VAARG; + } else if (!strcmp(argv[i], "-u")) { + ls->flags |= UTF8_SOURCE; + } else if (!strcmp(argv[i], "-X")) { + ls->flags |= HANDLE_ASSERTIONS; + ls->flags |= UTF8_SOURCE; + system_macros = 1; + } else if (!strcmp(argv[i], "-c90")) { + ls->flags &= ~MACRO_VAARG; + ls->flags &= ~CPLUSPLUS_COMMENTS; + c99_compliant = 0; + c99_hosted = -1; + } else if (!strcmp(argv[i], "-t")) { + ls->flags &= ~HANDLE_TRIGRAPHS; + } else if (!strcmp(argv[i], "-wt")) { + ls->flags |= WARN_TRIGRAPHS; + } else if (!strcmp(argv[i], "-wtt")) { + ls->flags |= WARN_TRIGRAPHS_MORE; + } else if (!strcmp(argv[i], "-wa")) { + ls->flags |= WARN_ANNOYING; + } else if (!strcmp(argv[i], "-w0")) { + ls->flags &= ~WARN_STANDARD; + ls->flags &= ~WARN_PRAGMA; + } else if (!strcmp(argv[i], "-s")) { + ls->flags &= ~FAIL_SHARP; + } else if (!strcmp(argv[i], "-l")) { + ls->flags &= ~LINE_NUM; + } else if (!strcmp(argv[i], "-lg")) { + ls->flags |= GCC_LINE_NUM; + } else if (!strcmp(argv[i], "-M")) { + ls->flags &= ~KEEP_OUTPUT; + emit_dependencies = 1; + } else if (!strcmp(argv[i], "-Ma")) { + ls->flags &= ~KEEP_OUTPUT; + emit_dependencies = 2; + } else if (!strcmp(argv[i], "-Y")) { + system_macros = 1; + } else if (!strcmp(argv[i], "-Z")) { + no_special_macros = 1; + } else if (!strcmp(argv[i], "-d")) { + ls->flags &= ~KEEP_OUTPUT; + print_defs = 1; + } else if (!strcmp(argv[i], "-e")) { + ls->flags &= ~KEEP_OUTPUT; + print_asserts = 1; + } else if (!strcmp(argv[i], "-zI")) { + with_std_incpath = 0; + } else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "-J")) { + i ++; + } else if (!strcmp(argv[i], "-o")) { + if ((++ i) >= argc) { + error(-1, "missing filename after -o"); + return 2; + } + if (argv[i][0] == '-' && argv[i][1] == 0) { + emit_output = ls->output = stdout; + } else { + ls->output = fopen(argv[i], "w"); + if (!ls->output) { + error(-1, "failed to open for " + "writing: %s", argv[i]); + return 2; + } + emit_output = ls->output; + } + } else if (!strcmp(argv[i], "-v")) { + print_version = 1; + } else if (argv[i][1] != 'I' && argv[i][1] != 'J' + && argv[i][1] != 'D' && argv[i][1] != 'U' + && argv[i][1] != 'A' && argv[i][1] != 'B') + warning(-1, "unknown option '%s'", argv[i]); + } else { + if (filename != 0) { + error(-1, "spurious filename '%s'", argv[i]); + return 2; + } + filename = argv[i]; + } + init_tables(ls->flags & HANDLE_ASSERTIONS); + init_include_path(0); + if (filename) { +#ifdef UCPP_MMAP + FILE *f = fopen_mmap_file(filename); + + ls->input = 0; + if (f) set_input_file(ls, f); +#else + ls->input = fopen(filename, "r"); +#endif + if (!ls->input) { + error(-1, "file '%s' not found", filename); + return 1; + } +#ifdef NO_LIBC_BUF + setbuf(ls->input, 0); +#endif + set_init_filename(filename, 1); + } else { + ls->input = stdin; + set_init_filename("", 0); + } + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'I') + add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]); + if (system_macros) for (i = 0; system_macros_def[i]; i ++) + ret = ret || define_macro(ls, system_macros_def[i]); + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'D') + ret = ret || define_macro(ls, argv[i] + 2); + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'U') + ret = ret || undef_macro(ls, argv[i] + 2); + if (ls->flags & HANDLE_ASSERTIONS) { + if (standard_assertions) + for (i = 0; system_assertions_def[i]; i ++) + make_assertion(system_assertions_def[i]); + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'A') + ret = ret || make_assertion(argv[i] + 2); + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'B') + ret = ret || destroy_assertion(argv[i] + 2); + } else { + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' + && (argv[i][1] == 'A' || argv[i][1] == 'B')) + warning(-1, "assertions disabled"); + } + if (with_std_incpath) { + for (i = 0; include_path_std[i]; i ++) + add_incpath(include_path_std[i]); + } + for (i = 1; i < argc; i ++) + if (argv[i][0] == '-' && argv[i][1] == 'J') + add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]); + + if (print_version) { + version(); + return 1; + } + if (print_defs) { + print_defines(); + emit_defines = 1; + } + if (print_asserts && (ls->flags & HANDLE_ASSERTIONS)) { + print_assertions(); + emit_assertions = 1; + } + return ret; +} + +int main(int argc, char *argv[]) +{ + struct lexer_state ls; + int r, fr = 0; + + init_cpp(); + if ((r = parse_opt(argc, argv, &ls)) != 0) { + if (r == 2) usage(argv[0]); + return EXIT_FAILURE; + } + enter_file(&ls, ls.flags); + while ((r = cpp(&ls)) < CPPERR_EOF) fr = fr || (r > 0); + fr = fr || check_cpp_errors(&ls); + free_lexer_state(&ls); + wipeout(); +#ifdef MEM_DEBUG + report_leaks(); +#endif + return fr ? EXIT_FAILURE : EXIT_SUCCESS; +} +#endif diff --git a/libexec/auxcpp/cpp.h b/libexec/auxcpp/cpp.h new file mode 100644 index 00000000000..2bb707a324c --- /dev/null +++ b/libexec/auxcpp/cpp.h @@ -0,0 +1,317 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__CPP__ +#define UCPP__CPP__ + +/* + * Uncomment the following if you want ucpp to use externally provided + * error-reporting functions (ucpp_warning(), ucpp_error() and ucpp_ouch()) + */ +/* #define NO_UCPP_ERROR_FUNCTIONS */ + +/* + * Tokens (do not change the order unless checking operators_name[] in cpp.c) + * + * It is important that the token NONE is 0 + * Check the STRING_TOKEN macro + */ +#define CPPERR 512 +enum { + NONE, /* whitespace */ + NEWLINE, /* newline */ + COMMENT, /* comment */ + NUMBER, /* number constant */ + NAME, /* identifier */ + BUNCH, /* non-C characters */ + PRAGMA, /* a #pragma directive */ + CONTEXT, /* new file or #line */ + STRING, /* constant "xxx" */ + CHAR, /* constant 'xxx' */ + SLASH, /* / */ + ASSLASH, /* /= */ + MINUS, /* - */ + MMINUS, /* -- */ + ASMINUS, /* -= */ + ARROW, /* -> */ + PLUS, /* + */ + PPLUS, /* ++ */ + ASPLUS, /* += */ + LT, /* < */ + LEQ, /* <= */ + LSH, /* << */ + ASLSH, /* <<= */ + GT, /* > */ + GEQ, /* >= */ + RSH, /* >> */ + ASRSH, /* >>= */ + ASGN, /* = */ + SAME, /* == */ +#ifdef CAST_OP + CAST, /* => */ +#endif + NOT, /* ~ */ + NEQ, /* != */ + AND, /* & */ + LAND, /* && */ + ASAND, /* &= */ + OR, /* | */ + LOR, /* || */ + ASOR, /* |= */ + PCT, /* % */ + ASPCT, /* %= */ + STAR, /* * */ + ASSTAR, /* *= */ + CIRC, /* ^ */ + ASCIRC, /* ^= */ + LNOT, /* ! */ + LBRA, /* { */ + RBRA, /* } */ + LBRK, /* [ */ + RBRK, /* ] */ + LPAR, /* ( */ + RPAR, /* ) */ + COMMA, /* , */ + QUEST, /* ? */ + SEMIC, /* ; */ + COLON, /* : */ + DOT, /* . */ + MDOTS, /* ... */ + SHARP, /* # */ + DSHARP, /* ## */ + + OPT_NONE, /* optional space to separate tokens in text output */ + + DIGRAPH_TOKENS, /* there begin digraph tokens */ + + /* for DIG_*, do not change order, unless checking undig() in cpp.c */ + DIG_LBRK, /* <: */ + DIG_RBRK, /* :> */ + DIG_LBRA, /* <% */ + DIG_RBRA, /* %> */ + DIG_SHARP, /* %: */ + DIG_DSHARP, /* %:%: */ + + DIGRAPH_TOKENS_END, /* digraph tokens end here */ + + LAST_MEANINGFUL_TOKEN, /* reserved words will go there */ + + MACROARG, /* special token for representing macro arguments */ + + UPLUS = CPPERR, /* unary + */ + UMINUS /* unary - */ +}; + +#include "tune.h" +#include +#include + +struct token { + int type; + long line; + char *name; +}; + +struct token_fifo { + struct token *t; + size_t nt, art; +}; + +struct lexer_state { + /* input control */ + FILE *input; +#ifndef NO_UCPP_BUF + unsigned char *input_buf; +#ifdef UCPP_MMAP + int from_mmap; + unsigned char *input_buf_sav; +#endif +#endif + unsigned char *input_string; + size_t ebuf; + size_t pbuf; + int lka[2]; + int nlka; + int macfile; + int last; + int discard; + unsigned long utf8; + unsigned char copy_line[COPY_LINE_LENGTH]; + int cli; + + /* output control */ + FILE *output; + struct token_fifo *output_fifo, *toplevel_of; +#ifndef NO_UCPP_BUF + unsigned char *output_buf; +#endif + size_t sbuf; + + /* token control */ + struct token *ctok; + struct token *save_ctok; + size_t tknl; + int ltwnl; + int pending_token; +#ifdef INMACRO_FLAG + int inmacro; + long macro_count; +#endif + + /* lexer options */ + long line; + long oline; + unsigned long flags; + long count_trigraphs; + struct garbage_fifo *gf; + int ifnest; + int condnest; + int condcomp; + int condmet; + unsigned long condf[2]; +}; + +/* + * Flags for struct lexer_state + */ +/* warning flags */ +#define WARN_STANDARD 0x000001UL /* emit standard warnings */ +#define WARN_ANNOYING 0x000002UL /* emit annoying warnings */ +#define WARN_TRIGRAPHS 0x000004UL /* warn when trigraphs are used */ +#define WARN_TRIGRAPHS_MORE 0x000008UL /* extra-warn for trigraphs */ +#define WARN_PRAGMA 0x000010UL /* warn for pragmas in non-lexer mode */ + +/* error flags */ +#define FAIL_SHARP 0x000020UL /* emit errors on rogue '#' */ +#define CCHARSET 0x000040UL /* emit errors on non-C characters */ + +/* emission flags */ +#define DISCARD_COMMENTS 0x000080UL /* discard comments from text output */ +#define CPLUSPLUS_COMMENTS 0x000100UL /* understand C++-like comments */ +#define LINE_NUM 0x000200UL /* emit #line directives in output */ +#define GCC_LINE_NUM 0x000400UL /* same as #line, with gcc-syntax */ + +/* language flags */ +#define HANDLE_ASSERTIONS 0x000800UL /* understand assertions */ +#define HANDLE_PRAGMA 0x001000UL /* emit PRAGMA tokens in lexer mode */ +#define MACRO_VAARG 0x002000UL /* understand macros with '...' */ +#define UTF8_SOURCE 0x004000UL /* identifiers are in UTF8 encoding */ +#define HANDLE_TRIGRAPHS 0x008000UL /* handle trigraphs */ + +/* global ucpp behaviour */ +#define LEXER 0x010000UL /* behave as a lexer */ +#define KEEP_OUTPUT 0x020000UL /* emit the result of preprocessing */ +#define COPY_LINE 0x040000UL /* make a copy of the parsed line */ + +/* internal flags */ +#define READ_AGAIN 0x080000UL /* emit again the last token */ +#define TEXT_OUTPUT 0x100000UL /* output text */ + +/* + * Public function prototypes + */ + +#ifndef NO_UCPP_BUF +void flush_output(struct lexer_state *); +#endif + +void init_assertions(void); +int make_assertion(char *); +int destroy_assertion(char *); +void print_assertions(void); + +void init_macros(void); +int define_macro(struct lexer_state *, char *); +int undef_macro(struct lexer_state *, char *); +void print_defines(void); + +void set_init_filename(char *, int); +void init_cpp(void); +void init_include_path(char *[]); +void init_lexer_state(struct lexer_state *); +void init_lexer_mode(struct lexer_state *); +void free_lexer_state(struct lexer_state *); +void wipeout(void); +int lex(struct lexer_state *); +int check_cpp_errors(struct lexer_state *); +void add_incpath(char *); +void init_tables(int); +int enter_file(struct lexer_state *, unsigned long); +int cpp(struct lexer_state *); +void set_identifier_char(int c); +void unset_identifier_char(int c); + +#ifdef UCPP_MMAP +FILE *fopen_mmap_file(char *); +void set_input_file(struct lexer_state *, FILE *); +#endif + +struct stack_context { + char *long_name, *name; + long line; +}; +struct stack_context *report_context(void); + +extern int no_special_macros, system_macros, + emit_dependencies, emit_defines, emit_assertions; +extern int c99_compliant, c99_hosted; +extern FILE *emit_output; +extern char *current_filename, *current_long_filename; +extern char *operators_name[]; +extern struct protect { + char *macro; + int state; + struct found_file *ff; +} protect_detect; + +void ucpp_ouch(char *, ...); +void ucpp_error(long, char *, ...); +void ucpp_warning(long, char *, ...); + +extern int *transient_characters; + +/* + * Errors from CPPERR_EOF and above are not real erros, only show-stoppers. + * Errors below CPPERR_EOF are real ones. + */ +#define CPPERR_NEST 900 +#define CPPERR_EOF 1000 + +/* + * This macro tells whether the name field of a given token type is + * relevant, or not. Irrelevant name field means that it might point + * to outerspace. + */ +#ifdef SEMPER_FIDELIS +#define STRING_TOKEN(x) ((x) == NONE || ((x) >= COMMENT && (x) <= CHAR)) +#else +#define STRING_TOKEN(x) ((x) >= NUMBER && (x) <= CHAR) +#endif + +#endif diff --git a/libexec/auxcpp/eval.c b/libexec/auxcpp/eval.c new file mode 100644 index 00000000000..82971973f75 --- /dev/null +++ b/libexec/auxcpp/eval.c @@ -0,0 +1,699 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "tune.h" +#include +#include +#include +#include +#include "ucppi.h" +#include "mem.h" + +JMP_BUF eval_exception; +long eval_line; +static int emit_eval_warnings; + +/* + * If you want to hardcode a conversion table, define a static array + * of 256 int, and make transient_characters point to it. + */ +int *transient_characters = 0; + +#define OCTAL(x) ((x) >= '0' && (x) <= '7') +#define DECIM(x) ((x) >= '0' && (x) <= '9') +#define HEXAD(x) (DECIM(x) \ + || (x) == 'a' || (x) == 'b' || (x) == 'c' \ + || (x) == 'd' || (x) == 'e' || (x) == 'f' \ + || (x) == 'A' || (x) == 'B' || (x) == 'C' \ + || (x) == 'D' || (x) == 'E' || (x) == 'F') +#define OVAL(x) ((int)((x) - '0')) +#define DVAL(x) ((int)((x) - '0')) +#define HVAL(x) (DECIM(x) ? DVAL(x) \ + : (x) == 'a' || (x) == 'A' ? 10 \ + : (x) == 'b' || (x) == 'B' ? 11 \ + : (x) == 'c' || (x) == 'C' ? 12 \ + : (x) == 'd' || (x) == 'D' ? 13 \ + : (x) == 'e' || (x) == 'E' ? 14 : 15) + +#define ARITH_TYPENAME big +#define ARITH_FUNCTION_HEADER static inline + +#define ARITH_ERROR(type) z_error(type) +static void z_error(int type); + +#ifdef ARITHMETIC_CHECKS +#define ARITH_WARNING(type) z_warn(type) +static void z_warn(int type); +#endif + +#include "arith.c" + +static void z_error(int type) +{ + switch (type) { + case ARITH_EXCEP_SLASH_D: + error(eval_line, "division by 0"); + break; + case ARITH_EXCEP_SLASH_O: + error(eval_line, "overflow on division"); + break; + case ARITH_EXCEP_PCT_D: + error(eval_line, "division by 0 on modulus operator"); + break; + case ARITH_EXCEP_CONST_O: + error(eval_line, "constant too large for destination type"); + break; +#ifdef AUDIT + default: + ouch("erroneous integer error: %d", type); +#endif + } + throw(eval_exception); +} + +#ifdef ARITHMETIC_CHECKS +static void z_warn(int type) +{ + switch (type) { + case ARITH_EXCEP_CONV_O: + warning(eval_line, "overflow on integer conversion"); + break; + case ARITH_EXCEP_NEG_O: + warning(eval_line, "overflow on unary minus"); + break; + case ARITH_EXCEP_NOT_T: + warning(eval_line, + "bitwise inversion yields trap representation"); + break; + case ARITH_EXCEP_PLUS_O: + warning(eval_line, "overflow on addition"); + break; + case ARITH_EXCEP_PLUS_U: + warning(eval_line, "underflow on addition"); + break; + case ARITH_EXCEP_MINUS_O: + warning(eval_line, "overflow on subtraction"); + break; + case ARITH_EXCEP_MINUS_U: + warning(eval_line, "underflow on subtraction"); + break; + case ARITH_EXCEP_AND_T: + warning(eval_line, + "bitwise AND yields trap representation"); + break; + case ARITH_EXCEP_XOR_T: + warning(eval_line, + "bitwise XOR yields trap representation"); + break; + case ARITH_EXCEP_OR_T: + warning(eval_line, + "bitwise OR yields trap representation"); + break; + case ARITH_EXCEP_LSH_W: + warning(eval_line, "left shift count greater than " + "or equal to type width"); + break; + case ARITH_EXCEP_LSH_C: + warning(eval_line, "left shift count negative"); + break; + case ARITH_EXCEP_LSH_O: + warning(eval_line, "overflow on left shift"); + break; + case ARITH_EXCEP_RSH_W: + warning(eval_line, "right shift count greater than " + "or equal to type width"); + break; + case ARITH_EXCEP_RSH_C: + warning(eval_line, "right shift count negative"); + break; + case ARITH_EXCEP_RSH_N: + warning(eval_line, "right shift of negative value"); + break; + case ARITH_EXCEP_STAR_O: + warning(eval_line, "overflow on multiplication"); + break; + case ARITH_EXCEP_STAR_U: + warning(eval_line, "underflow on multiplication"); + break; +#ifdef AUDIT + default: + ouch("erroneous integer warning: %d", type); +#endif + } +} +#endif + +typedef struct { + int sign; + union { + u_big uv; + s_big sv; + } u; +} ppval; + +static int boolval(ppval x) +{ + return x.sign ? big_s_lval(x.u.sv) : big_u_lval(x.u.uv); +} + +#if !defined(WCHAR_SIGNEDNESS) +# if CHAR_MIN == 0 +# define WCHAR_SIGNEDNESS 0 +# else +# define WCHAR_SIGNEDNESS 1 +# endif +#endif + +/* + * Check the suffix, return 1 if it is signed, 0 otherwise. 1 is + * returned for a void suffix. Legal suffixes are: + * unsigned: u U ul uL Ul UL lu Lu lU LU ull uLL Ull ULL llu LLu llU LLU + * signed: l L ll LL + */ +static int pp_suffix(char *d, char *refc) +{ + if (!*d) return 1; + if (*d == 'u' || *d == 'U') { + if (!*(++ d)) return 0; + if (*d == 'l' || *d == 'L') { + char *e = d + 1; + + if (*e && *e != *d) goto suffix_error; + if (!*e || !*(e + 1)) return 0; + goto suffix_error; + } + goto suffix_error; + } + if (*d == 'l' || *d == 'L') { + if (!*(++ d)) return 1; + if (*d == *(d - 1)) { + d ++; + if (!*d) return 1; + } + if (*d == 'u' || *d == 'U') { + d ++; + if (!*d) return 0; + } + goto suffix_error; + } +suffix_error: + error(eval_line, "invalid integer constant '%s'", refc); + throw(eval_exception); + return 666; +} + +static unsigned long pp_char(char *c, char *refc) +{ + unsigned long r = 0; + + c ++; + if (*c == '\\') { + int i; + + c ++; + switch (*c) { + case 'n': r = '\n'; c ++; break; + case 't': r = '\t'; c ++; break; + case 'v': r = '\v'; c ++; break; + case 'b': r = '\b'; c ++; break; + case 'r': r = '\r'; c ++; break; + case 'f': r = '\f'; c ++; break; + case 'a': r = '\a'; c ++; break; + case '\\': r = '\\'; c ++; break; + case '\?': r = '\?'; c ++; break; + case '\'': r = '\''; c ++; break; + case '\"': r = '\"'; c ++; break; + case 'u': + for (i = 0, c ++; i < 4 && HEXAD(*c); i ++, c ++) { + r = (r * 16) + HVAL(*c); + } + if (i != 4) { + error(eval_line, "malformed UCN in %s", refc); + throw(eval_exception); + } + break; + case 'U': + for (i = 0, c ++; i < 8 && HEXAD(*c); i ++, c ++) { + r = (r * 16) + HVAL(*c); + } + if (i != 8) { + error(eval_line, "malformed UCN in %s", refc); + throw(eval_exception); + } + break; + case 'x': + for (c ++; HEXAD(*c); c ++) r = (r * 16) + HVAL(*c); + break; + default: + if (OCTAL(*c)) { + r = OVAL(*(c ++)); + if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++)); + if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++)); + } else { + error(eval_line, "invalid escape sequence " + "'\\%c'", *c); + throw(eval_exception); + } + } + } else if (*c == '\'') { + error(eval_line, "empty character constant"); + throw(eval_exception); + } else { + r = *((unsigned char *)(c ++)); + } + + if (transient_characters && r < 256) { + r = transient_characters[(size_t)r]; + } + + if (*c != '\'' && emit_eval_warnings) { + warning(eval_line, "multicharacter constant"); + } + return r; +} + +static ppval pp_strtoconst(char *refc) +{ + ppval q; + char *c = refc, *d; + u_big ru; + s_big rs; + int sp, dec; + + if (*c == '\'' || *c == 'L') { + q.sign = (*c == 'L') ? WCHAR_SIGNEDNESS : 1; + if (*c == 'L' && *(++ c) != '\'') { + error(eval_line, + "invalid wide character constant: %s", refc); + throw(eval_exception); + } + if (q.sign) { + q.u.sv = big_s_fromlong(pp_char(c, refc)); + } else { + q.u.uv = big_u_fromulong(pp_char(c, refc)); + } + return q; + } + if (*c == '0') { + /* octal or hexadecimal */ + dec = 0; + c ++; + if (*c == 'x' || *c == 'X') { + c ++; + d = big_u_hexconst(c, &ru, &rs, &sp); + } else { + d = big_u_octconst(c, &ru, &rs, &sp); + } + } else { + dec = 1; + d = big_u_decconst(c, &ru, &rs, &sp); + } + q.sign = pp_suffix(d, refc); + if (q.sign) { + if (!sp) { + if (dec) { + error(eval_line, "constant too large " + "for destination type"); + throw(eval_exception); + } else { + warning(eval_line, "constant is so large " + "that it is unsigned"); + } + q.u.uv = ru; + q.sign = 0; + } else { + q.u.sv = rs; + } + } else { + q.u.uv = ru; + } + return q; +} + +/* + * Used by #line directives -- anything beyond what can be put in an + * unsigned long, is considered absurd. + */ +unsigned long strtoconst(char *c) +{ + ppval q = pp_strtoconst(c); + + if (q.sign) q.u.uv = big_s_to_u(q.u.sv); + return big_u_toulong(q.u.uv); +} + +#define OP_UN(x) ((x) == LNOT || (x) == NOT || (x) == UPLUS \ + || (x) == UMINUS) + +static ppval eval_opun(int op, ppval v) +{ + if (op == LNOT) { + v.sign = 1; + v.u.sv = big_s_fromint(big_s_lnot(v.u.sv)); + return v; + } + if (v.sign) { + switch (op) { + case NOT: v.u.sv = big_s_not(v.u.sv); break; + case UPLUS: break; + case UMINUS: v.u.sv = big_s_neg(v.u.sv); break; + } + } else { + switch (op) { + case NOT: v.u.uv = big_u_not(v.u.uv); break; + case UPLUS: break; + case UMINUS: v.u.uv = big_u_neg(v.u.uv); break; + } + } + return v; +} + +#define OP_BIN(x) ((x) == STAR || (x) == SLASH || (x) == PCT \ + || (x) == PLUS || (x) == MINUS || (x) == LSH \ + || (x) == RSH || (x) == LT || (x) == LEQ \ + || (x) == GT || (x) == GEQ || (x) == SAME \ + || (x) == NEQ || (x) == AND || (x) == CIRC \ + || (x) == OR || (x) == LAND || (x) == LOR \ + || (x) == COMMA) + +static ppval eval_opbin(int op, ppval v1, ppval v2) +{ + ppval r; + int iv2 = 0; + + switch (op) { + case STAR: case SLASH: case PCT: + case PLUS: case MINUS: case AND: + case CIRC: case OR: + /* promote operands, adjust signedness of result */ + if (!v1.sign || !v2.sign) { + if (v1.sign) { + v1.u.uv = big_s_to_u(v1.u.sv); + v1.sign = 0; + } else if (v2.sign) { + v2.u.uv = big_s_to_u(v2.u.sv); + v2.sign = 0; + } + r.sign = 0; + } else { + r.sign = 1; + } + break; + case LT: case LEQ: case GT: + case GEQ: case SAME: case NEQ: + /* promote operands */ + if (!v1.sign || !v2.sign) { + if (v1.sign) { + v1.u.uv = big_s_to_u(v1.u.sv); + v1.sign = 0; + } else if (v2.sign) { + v2.u.uv = big_s_to_u(v2.u.sv); + v2.sign = 0; + } + } + /* fall through */ + case LAND: + case LOR: + /* result is signed anyway */ + r.sign = 1; + break; + case LSH: + case RSH: + /* result is as signed as left operand; convert right + operand to int */ + r.sign = v1.sign; + if (v2.sign) { + iv2 = big_s_toint(v2.u.sv); + } else { + iv2 = big_u_toint(v2.u.uv); + } + break; + case COMMA: + if (emit_eval_warnings) { + warning(eval_line, "ISO C forbids evaluated comma " + "operators in #if expressions"); + } + r.sign = v2.sign; + break; +#ifdef AUDIT + default: ouch("a good operator is a dead operator"); +#endif + } + +#define SBINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, v2.u.sv); \ + else r.u.uv = big_u_ ## x (v1.u.uv, v2.u.uv); + +#define NSSBINOP(x) if (v1.sign) r.u.sv = big_s_fromint(big_s_ ## x \ + (v1.u.sv, v2.u.sv)); else r.u.sv = big_s_fromint( \ + big_u_ ## x (v1.u.uv, v2.u.uv)); + +#define LBINOP(x) if (v1.sign) r.u.sv = big_s_fromint( \ + big_s_lval(v1.u.sv) x big_s_lval(v2.u.sv)); \ + else r.u.sv = big_s_fromint( \ + big_u_lval(v1.u.uv) x big_u_lval(v2.u.uv)); + +#define ABINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, iv2); \ + else r.u.uv = big_u_ ## x (v1.u.uv, iv2); + + switch (op) { + case STAR: SBINOP(star); break; + case SLASH: SBINOP(slash); break; + case PCT: SBINOP(pct); break; + case PLUS: SBINOP(plus); break; + case MINUS: SBINOP(minus); break; + case LSH: ABINOP(lsh); break; + case RSH: ABINOP(rsh); break; + case LT: NSSBINOP(lt); break; + case LEQ: NSSBINOP(leq); break; + case GT: NSSBINOP(gt); break; + case GEQ: NSSBINOP(geq); break; + case SAME: NSSBINOP(same); break; + case NEQ: NSSBINOP(neq); break; + case AND: SBINOP(and); break; + case CIRC: SBINOP(xor); break; + case OR: SBINOP(or); break; + case LAND: LBINOP(&&); break; + case LOR: LBINOP(||); break; + case COMMA: r = v2; break; + } + return r; +} + +#define ttOP(x) (OP_UN(x) || OP_BIN(x) || (x) == QUEST || (x) == COLON) + +static int op_prec(int op) +{ + switch (op) { + case LNOT: + case NOT: + case UPLUS: + case UMINUS: + return 13; + case STAR: + case SLASH: + case PCT: + return 12; + case PLUS: + case MINUS: + return 11; + case LSH: + case RSH: + return 10; + case LT: + case LEQ: + case GT: + case GEQ: + return 9; + case SAME: + case NEQ: + return 8; + case AND: + return 7; + case CIRC: + return 6; + case OR: + return 5; + case LAND: + return 4; + case LOR: + return 3; + case QUEST: + return 2; + case COMMA: + return 1; + } +#ifdef AUDIT + ouch("an unknown species should have a higher precedence"); +#endif + return 666; +} + +/* + * Perform the hard work of evaluation. + * + * This function works because: + * -- all unary operators are right to left associative, and with + * identical precedence + * -- all binary operators are left to right associative + * -- there is only one non-unary and non-binary operator: the quest-colon + * + * If do_eval is 0, the evaluation of operators is not done. This is + * for sequence point operators (&&, || and ?:). + */ +static ppval eval_shrd(struct token_fifo *tf, int minprec, int do_eval) +{ + ppval top; + struct token *ct; + + top.sign = 1; + if (tf->art == tf->nt) goto trunc_err; + ct = tf->t + (tf->art ++); + if (ct->type == LPAR) { + top = eval_shrd(tf, 0, do_eval); + if (tf->art == tf->nt) goto trunc_err; + ct = tf->t + (tf->art ++); + if (ct->type != RPAR) { + error(eval_line, "a right parenthesis was expected"); + throw(eval_exception); + } + } else if (ct->type == NUMBER || ct->type == CHAR) { + top = pp_strtoconst(ct->name); + } else if (OP_UN(ct->type)) { + top = eval_opun(ct->type, eval_shrd(tf, + op_prec(ct->type), do_eval)); + goto eval_loop; + } else if (ttOP(ct->type)) goto rogue_op_err; + else { + goto invalid_token_err; + } + +eval_loop: + if (tf->art == tf->nt) { + return top; + } + ct = tf->t + (tf->art ++); + if (OP_BIN(ct->type)) { + int bp = op_prec(ct->type); + + if (bp > minprec) { + ppval tr; + + if ((ct->type == LOR && boolval(top)) + || (ct->type == LAND && !boolval(top))) { + tr = eval_shrd(tf, bp, 0); + if (do_eval) { + top.sign = 1; + if (ct->type == LOR) + top.u.sv = big_s_fromint(1); + if (ct->type == LAND) + top.u.sv = big_s_fromint(0); + } + } else { + tr = eval_shrd(tf, bp, do_eval); + if (do_eval) + top = eval_opbin(ct->type, top, tr); + } + goto eval_loop; + } + } else if (ct->type == QUEST) { + int bp = op_prec(QUEST); + ppval r1, r2; + + if (bp >= minprec) { + int qv = boolval(top); + + r1 = eval_shrd(tf, bp, qv ? do_eval : 0); + if (tf->art == tf->nt) goto trunc_err; + ct = tf->t + (tf->art ++); + if (ct->type != COLON) { + error(eval_line, "a colon was expected"); + throw(eval_exception); + } + r2 = eval_shrd(tf, bp, qv ? 0 : do_eval); + if (do_eval) { + if (qv) top = r1; else top = r2; + } + goto eval_loop; + } + } + tf->art --; + return top; + +trunc_err: + error(eval_line, "truncated constant integral expression"); + throw(eval_exception); +rogue_op_err: + error(eval_line, "rogue operator '%s' in constant integral " + "expression", operators_name[ct->type]); + throw(eval_exception); +invalid_token_err: + error(eval_line, "invalid token in constant integral expression"); + throw(eval_exception); +} + +#define UNARY(x) ((x) != NUMBER && (x) != NAME && (x) != CHAR \ + && (x) != RPAR) + +/* + * Evaluate the integer expression contained in the given token_fifo. + * Evaluation is made by precedence of operators, as described in the + * Dragon Book. The unary + and - are distinguished from their binary + * counterparts using the Fortran way: a + or a - is considered unary + * if it does not follow a constant, an identifier or a right parenthesis. + */ +unsigned long eval_expr(struct token_fifo *tf, int *ret, int ew) +{ + size_t sart; + ppval r; + + emit_eval_warnings = ew; + if (catch(eval_exception)) goto eval_err; + /* first, distinguish unary + and - from binary + and - */ + for (sart = tf->art; tf->art < tf->nt; tf->art ++) { + if (tf->t[tf->art].type == PLUS) { + if (sart == tf->art || UNARY(tf->t[tf->art - 1].type)) + tf->t[tf->art].type = UPLUS; + } else if (tf->t[tf->art].type == MINUS) { + if (sart == tf->art || UNARY(tf->t[tf->art - 1].type)) + tf->t[tf->art].type = UMINUS; + } + } + tf->art = sart; + r = eval_shrd(tf, 0, 1); + if (tf->art < tf->nt) { + error(eval_line, "trailing garbage in constant integral " + "expression"); + goto eval_err; + } + *ret = 0; + return boolval(r); +eval_err: + *ret = 1; + return 0; +} diff --git a/libexec/auxcpp/hash.c b/libexec/auxcpp/hash.c new file mode 100644 index 00000000000..60bd2124137 --- /dev/null +++ b/libexec/auxcpp/hash.c @@ -0,0 +1,329 @@ +/* + * Generic hash table routines. + * (c) Thomas Pornin 1998, 1999, 2000 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "hash.h" +#include "mem.h" +#include "tune.h" + +/* + * hash_string() is a sample hash function for strings + */ +int hash_string(char *s) +{ +#ifdef FAST_HASH + unsigned h = 0, g; + + while (*s) { + h = (h << 4) + *(unsigned char *)(s ++); + if ((g = h & 0xF000U) != 0) h ^= (g >> 12); + h &= ~g; + } + return (h ^ (h >> 9)) & 127U; +#else + unsigned char h = 0; + + for (; *s; s ++) h ^= (unsigned char)(*s); + return ((int)h); +#endif +} + +/* + * struct hash_item is the basic data type to internally handle hash tables + */ +struct hash_item { + void *data; + struct hash_item *next; +}; + +/* + * This function adds an entry to the struct hash_item list + */ +static struct hash_item *add_entry(struct hash_item *blist, void *data) +{ + struct hash_item *t = getmem(sizeof(struct hash_item)); + + t->data = data; + t->next = blist; + return t; +} + +/* + * This function finds a struct hash_item in a list, using the + * comparison function provided as cmpdata (*cmpdata() returns + * non-zero if the two parameters are to be considered identical). + * + * It returns 0 if the item is not found. + */ +static struct hash_item *get_entry(struct hash_item *blist, void *data, + int (*cmpdata)(void *, void *)) +{ + while (blist) { + if ((*cmpdata)(data, blist->data)) return blist; + blist = blist->next; + } + return 0; +} + +/* + * This function acts like get_entry but deletes the found item, using + * the provided function deldata(); it returns 0 if the given data was + * not found. + */ +static struct hash_item *del_entry(struct hash_item *blist, void *data, + int (*cmpdata)(void *, void *), void (*deldata)(void *)) +{ + struct hash_item *prev = 0, *save = blist; + + while (blist) { + if ((*cmpdata)(data, blist->data)) { + if (deldata) (*deldata)(blist->data); + if (prev) prev->next = blist->next; + if (save == blist) save = blist->next; + freemem(blist); + return save; + } + prev = blist; + blist = blist->next; + } + return 0; +} + +/* + * This function creates a new hashtable, with the hashing and comparison + * functions given as parameters + */ +struct HT *newHT(int n, int (*cmpdata)(void *, void *), int (*hash)(void *), + void (*deldata)(void *)) +{ + struct HT *t = getmem(sizeof(struct HT)); + int i; + + t->lists = getmem(n * sizeof(struct hash_item *)); + for (i = 0; i < n; i ++) t->lists[i] = 0; + t->nb_lists = n; + t->cmpdata = cmpdata; + t->hash = hash; + t->deldata = deldata; + return t; +} + +/* + * This function adds a new entry in the hashtable ht; it returns 0 + * on success, or a pointer to the already present item otherwise. + */ +void *putHT(struct HT *ht, void *data) +{ + int h; + struct hash_item *d; + + h = ((*(ht->hash))(data)); +#ifndef FAST_HASH + h %= ht->nb_lists; +#endif + if ((d = get_entry(ht->lists[h], data, ht->cmpdata))) + return d->data; + ht->lists[h] = add_entry(ht->lists[h], data); + return 0; +} + +/* + * This function adds a new entry in the hashtable ht, even if an equal + * entry is already there. Exercise caution ! + * The new entry will "hide" the old one, which means that the new will be + * found upon lookup/delete, not the old one. + */ +void *forceputHT(struct HT *ht, void *data) +{ + int h; + + h = ((*(ht->hash))(data)); +#ifndef FAST_HASH + h %= ht->nb_lists; +#endif + ht->lists[h] = add_entry(ht->lists[h], data); + return 0; +} + +/* + * This function finds the entry corresponding to *data in the + * hashtable ht (using the comparison function given as argument + * to newHT) + */ +void *getHT(struct HT *ht, void *data) +{ + int h; + struct hash_item *t; + + h = ((*(ht->hash))(data)); +#ifndef FAST_HASH + h %= ht->nb_lists; +#endif + if ((t = get_entry(ht->lists[h], data, ht->cmpdata)) == 0) + return 0; + return (t->data); +} + +/* + * This function finds and delete the entry corresponding to *data + * in the hashtable ht (using the comparison function given as + * argument to newHT). + */ + +int delHT(struct HT *ht, void *data) +{ + int h; + + h = ((*(ht->hash))(data)); +#ifndef FAST_HASH + h %= ht->nb_lists; +#endif + ht->lists[h] = del_entry(ht->lists[h], data, ht->cmpdata, ht->deldata); + return 1; +} + +/* + * This function completely eradicates from memory a given hash table, + * releasing all objects + */ +void killHT(struct HT *ht) +{ + int i; + struct hash_item *t, *n; + void (*dd)(void *) = ht->deldata; + + for (i = 0; i < ht->nb_lists; i ++) for (t = ht->lists[i]; t;) { + n = t->next; + if (dd) (*dd)(t->data); + freemem(t); + t = n; + } + freemem(ht->lists); + freemem(ht); +} + +/* + * This function stores a backup of the hash table, for context stacking. + */ +void saveHT(struct HT *ht, void **buffer) +{ + struct hash_item **b = (struct hash_item **)buffer; + + mmv(b, ht->lists, ht->nb_lists * sizeof(struct hash_item *)); +} + +/* + * This function restores the saved state of the hash table. + * Do NOT use if some of the entries that were present before the backup + * have been removed (even temporarily). + */ +void restoreHT(struct HT *ht, void **buffer) +{ + struct hash_item **b = (struct hash_item **)buffer; + int i; + + for (i = 0; i < ht->nb_lists; i ++) { + struct hash_item *t = ht->lists[i], *n; + + while (t != b[i]) { + n = t->next; + (*(ht->deldata))(t->data); + freemem(t); + t = n; + } + ht->lists[i] = b[i]; + } +} + +/* + * This function is evil. It inserts a new item in a saved hash table, + * tweaking the save buffer and the hash table in order to keep things + * stable. There are no checks. + */ +void tweakHT(struct HT *ht, void **buffer, void *data) +{ + int h; + struct hash_item *d, *e; + + h = ((*(ht->hash))(data)); +#ifndef FAST_HASH + h %= ht->nb_lists; +#endif + for (d = ht->lists[h]; d != buffer[h]; d = d->next); + d = add_entry(buffer[h], data); + if (buffer[h] == ht->lists[h]) { + buffer[h] = ht->lists[h] = d; + return; + } + for (e = ht->lists[h]; e->next != buffer[h]; e = e->next); + e->next = d; + buffer[h] = d; +} + +/* + * This function scans the whole table and calls the given function on + * each entry. + */ +void scanHT(struct HT *ht, void (*action)(void *)) +{ + int i; + + for (i = 0; i < ht->nb_lists; i ++) { + struct hash_item *t = ht->lists[i]; + + while (t) { + (*action)(t->data); + t = t->next; + } + } +} + +/* + * The two following fonctions are generic for storing structures + * uniquely identified by their name, which must be the first + * field of the structure. + */ +int hash_struct(void *m) +{ + char *n = *(char **)m; + +#ifdef FAST_HASH + return hash_string(n); +#else + return hash_string(n) & 127; +#endif +} + +int cmp_struct(void *m1, void *m2) +{ + char *n1 = *(char **)m1, *n2 = *(char **)m2; + + return !strcmp(n1, n2); +} diff --git a/libexec/auxcpp/hash.h b/libexec/auxcpp/hash.h new file mode 100644 index 00000000000..3adbb777df9 --- /dev/null +++ b/libexec/auxcpp/hash.h @@ -0,0 +1,58 @@ +/* + * (c) Thomas Pornin 1998, 1999, 2000 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__HASH__ +#define UCPP__HASH__ + +struct hash_item; + +struct HT { + struct hash_item **lists; + int nb_lists; + int (*cmpdata)(void *, void *); + int (*hash)(void *); + void (*deldata)(void *); +}; + +int hash_string(char *); +struct HT *newHT(int, int (*)(void *, void *), int (*)(void *), + void (*)(void *)); +void *putHT(struct HT *, void *); +void *forceputHT(struct HT *, void *); +void *getHT(struct HT *, void *); +int delHT(struct HT *, void *); +void killHT(struct HT *); +void saveHT(struct HT *, void **); +void restoreHT(struct HT *, void **); +void tweakHT(struct HT *, void **, void *); +void scanHT(struct HT *, void (*)(void *)); +int hash_struct(void *); +int cmp_struct(void *, void *); + +#endif diff --git a/libexec/auxcpp/lexer.c b/libexec/auxcpp/lexer.c new file mode 100644 index 00000000000..38125676318 --- /dev/null +++ b/libexec/auxcpp/lexer.c @@ -0,0 +1,1020 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "tune.h" +#include +#include +#include +#include +#include "ucppi.h" +#include "mem.h" +#ifdef UCPP_MMAP +#include +#include +#include +#endif + +/* + * Character classes for description of the automaton. + * The characters used for representing classes should not appear + * explicitely in an automaton rule. + */ +#define SPC ' ' /* whitespace characters */ +#define ALP 'Z' /* A-Z, a-z, _ */ +#define NUM '9' /* 0-9 */ +#define ANY 'Y' /* any character */ +#define VCH 'F' /* void character (for end of input) */ + +/* + * flags and macros to test those flags + * STO: the currently read string is a complete token + * PUT: the currently read character must be added to the string + * FRZ: the currently read character must be kept and read again + */ +#define MOD_MK 255 +#define noMOD(x) ((x) & 255) +#define STO(x) ((x) | 256) +#define ttSTO(x) ((x) & 256) +#define FRZ(x) ((x) | 512) +#define ttFRZ(x) ((x) & 512) +#define PUT(x) ((x) | 1024) +#define ttPUT(x) ((x) & 1024) + +/* order is important */ +enum { + S_START, S_SPACE, S_BANG, S_STRING, S_STRING2, S_COLON, + S_SHARP, S_PCT, S_PCT2, S_PCT3, S_AMPER, S_CHAR, S_CHAR2, S_STAR, + S_PLUS, S_MINUS, S_DOT, S_DOT2, S_SLASH, S_NUMBER, S_NUMBER2, S_LT, + S_LT2, S_EQ, S_GT, S_GT2, S_CIRC, S_PIPE, S_BACKSLASH, + S_COMMENT, S_COMMENT2, S_COMMENT3, S_COMMENT4, S_COMMENT5, + S_NAME, S_NAME_BS, S_LCHAR, + MSTATE, + S_ILL, S_DDOT, S_DDSHARP, S_BS, S_ROGUE_BS, S_BEHEAD, S_DECAY, + S_TRUNC, S_TRUNCC, S_OUCH +}; + +#define CMT(x) ((x) >= S_COMMENT && (x) <= S_COMMENT5) + +#define CMCR 2 + +/* + * This is the description of the automaton. It is not used "as is" + * but copied at execution time into a table. + * + * To my utmost displeasure, there are a few hacks in read_token() + * (which uses the transformed automaton) about the special handling + * of slashes, sharps, and the letter L. + */ +static struct machine_state { + int state; + unsigned char input[CMCR]; + int new_state; +} cppms[] = { + /* S_START is the generic beginning state */ + { S_START, { ANY }, S_ILL }, +#ifdef SEMPER_FIDELIS + { S_START, { SPC }, PUT(S_SPACE) }, +#else + { S_START, { SPC }, S_SPACE }, +#endif + { S_START, { '\n' }, STO(NEWLINE) }, + { S_START, { '!' }, S_BANG }, + { S_START, { '"' }, PUT(S_STRING) }, + { S_START, { '#' }, S_SHARP }, + { S_START, { '%' }, S_PCT }, + { S_START, { '&' }, S_AMPER }, + { S_START, { '\'' }, PUT(S_CHAR) }, + { S_START, { '(' }, STO(LPAR) }, + { S_START, { ')' }, STO(RPAR) }, + { S_START, { '*' }, S_STAR }, + { S_START, { '+' }, S_PLUS }, + { S_START, { ',' }, STO(COMMA) }, + { S_START, { '-' }, S_MINUS }, + { S_START, { '.' }, PUT(S_DOT) }, +#ifdef SEMPER_FIDELIS + { S_START, { '/' }, PUT(S_SLASH) }, +#else + { S_START, { '/' }, S_SLASH }, +#endif + { S_START, { NUM }, PUT(S_NUMBER) }, + { S_START, { ':' }, S_COLON }, + { S_START, { ';' }, STO(SEMIC) }, + { S_START, { '<' }, S_LT }, + { S_START, { '=' }, S_EQ }, + { S_START, { '>' }, S_GT }, + { S_START, { '?' }, STO(QUEST) }, + { S_START, { ALP }, PUT(S_NAME) }, + { S_START, { 'L' }, PUT(S_LCHAR) }, + { S_START, { '[' }, STO(LBRK) }, + { S_START, { ']' }, STO(RBRK) }, + { S_START, { '^' }, S_CIRC }, + { S_START, { '{' }, STO(LBRA) }, + { S_START, { '|' }, S_PIPE }, + { S_START, { '}' }, STO(RBRA) }, + { S_START, { '~' }, STO(NOT) }, + { S_START, { '\\' }, S_BACKSLASH }, + + /* after a space */ + { S_SPACE, { ANY }, FRZ(STO(NONE)) }, +#ifdef SEMPER_FIDELIS + { S_SPACE, { SPC }, PUT(S_SPACE) }, +#else + { S_SPACE, { SPC }, S_SPACE }, +#endif + + /* after a ! */ + { S_BANG, { ANY }, FRZ(STO(LNOT)) }, + { S_BANG, { '=' }, STO(NEQ) }, + + /* after a " */ + { S_STRING, { ANY }, PUT(S_STRING) }, + { S_STRING, { VCH }, FRZ(S_TRUNC) }, + { S_STRING, { '\n' }, FRZ(S_BEHEAD) }, + { S_STRING, { '\\' }, PUT(S_STRING2) }, + { S_STRING, { '"' }, PUT(STO(STRING)) }, + + { S_STRING2, { ANY }, PUT(S_STRING) }, + { S_STRING2, { VCH }, FRZ(S_TRUNC) }, + + /* after a # */ + { S_SHARP, { ANY }, FRZ(STO(SHARP)) }, + { S_SHARP, { '#' }, STO(DSHARP) }, + + /* after a : */ + { S_COLON, { ANY }, FRZ(STO(COLON)) }, + { S_COLON, { '>' }, STO(DIG_RBRK) }, + + /* after a % */ + { S_PCT, { ANY }, FRZ(STO(PCT)) }, + { S_PCT, { '=' }, STO(ASPCT) }, + { S_PCT, { '>' }, STO(DIG_RBRA) }, + { S_PCT, { ':' }, S_PCT2 }, + + /* after a %: */ + { S_PCT2, { ANY }, FRZ(STO(DIG_SHARP)) }, + { S_PCT2, { '%' }, S_PCT3 }, + + /* after a %:% */ + { S_PCT3, { ANY }, FRZ(S_DDSHARP) }, + { S_PCT3, { ':' }, STO(DIG_DSHARP) }, + + /* after a & */ + { S_AMPER, { ANY }, FRZ(STO(AND)) }, + { S_AMPER, { '=' }, STO(ASAND) }, + { S_AMPER, { '&' }, STO(LAND) }, + + /* after a ' */ + { S_CHAR, { ANY }, PUT(S_CHAR) }, + { S_CHAR, { VCH }, FRZ(S_TRUNC) }, + { S_CHAR, { '\'' }, PUT(STO(CHAR)) }, + { S_CHAR, { '\\' }, PUT(S_CHAR2) }, + + /* after a \ in a character constant + useful only for '\'' */ + { S_CHAR2, { ANY }, PUT(S_CHAR) }, + { S_CHAR2, { VCH }, FRZ(S_TRUNC) }, + + /* after a * */ + { S_STAR, { ANY }, FRZ(STO(STAR)) }, + { S_STAR, { '=' }, STO(ASSTAR) }, + + /* after a + */ + { S_PLUS, { ANY }, FRZ(STO(PLUS)) }, + { S_PLUS, { '+' }, STO(PPLUS) }, + { S_PLUS, { '=' }, STO(ASPLUS) }, + + /* after a - */ + { S_MINUS, { ANY }, FRZ(STO(MINUS)) }, + { S_MINUS, { '-' }, STO(MMINUS) }, + { S_MINUS, { '=' }, STO(ASMINUS) }, + { S_MINUS, { '>' }, STO(ARROW) }, + + /* after a . */ + { S_DOT, { ANY }, FRZ(STO(DOT)) }, + { S_DOT, { NUM }, PUT(S_NUMBER) }, + { S_DOT, { '.' }, S_DOT2 }, + + /* after .. */ + { S_DOT2, { ANY }, FRZ(S_DDOT) }, + { S_DOT2, { '.' }, STO(MDOTS) }, + + /* after a / */ + { S_SLASH, { ANY }, FRZ(STO(SLASH)) }, + { S_SLASH, { '=' }, STO(ASSLASH) }, +#ifdef SEMPER_FIDELIS + { S_SLASH, { '*' }, PUT(S_COMMENT) }, + { S_SLASH, { '/' }, PUT(S_COMMENT5) }, +#else + { S_SLASH, { '*' }, S_COMMENT }, + { S_SLASH, { '/' }, S_COMMENT5 }, +#endif + /* + * There is a little hack in read_token() to disable + * this last rule, if C++ (C99) comments are not enabled. + */ + + /* after a number */ + { S_NUMBER, { ANY }, FRZ(STO(NUMBER)) }, + { S_NUMBER, { ALP, NUM }, PUT(S_NUMBER) }, + { S_NUMBER, { '.' }, PUT(S_NUMBER) }, + { S_NUMBER, { 'E', 'e' }, PUT(S_NUMBER2) }, + { S_NUMBER, { 'P', 'p' }, PUT(S_NUMBER2) }, + + { S_NUMBER2, { ANY }, FRZ(STO(NUMBER)) }, + { S_NUMBER2, { ALP, NUM }, PUT(S_NUMBER) }, + { S_NUMBER2, { '+', '-' }, PUT(S_NUMBER) }, + + /* after a < */ + { S_LT, { ANY }, FRZ(STO(LT)) }, + { S_LT, { '=' }, STO(LEQ) }, + { S_LT, { '<' }, S_LT2 }, + { S_LT, { ':' }, STO(DIG_LBRK) }, + { S_LT, { '%' }, STO(DIG_LBRA) }, + + { S_LT2, { ANY }, FRZ(STO(LSH)) }, + { S_LT2, { '=' }, STO(ASLSH) }, + + /* after a > */ + { S_GT, { ANY }, FRZ(STO(GT)) }, + { S_GT, { '=' }, STO(GEQ) }, + { S_GT, { '>' }, S_GT2 }, + + { S_GT2, { ANY }, FRZ(STO(RSH)) }, + { S_GT2, { '=' }, STO(ASRSH) }, + + /* after a = */ + { S_EQ, { ANY }, FRZ(STO(ASGN)) }, + { S_EQ, { '=' }, STO(SAME) }, +#ifdef CAST_OP + { S_EQ, { '>' }, STO(CAST) }, +#endif + + /* after a \ */ + { S_BACKSLASH, { ANY }, FRZ(S_BS) }, + { S_BACKSLASH, { 'U', 'u' }, FRZ(S_NAME_BS) }, + + /* after a letter */ + { S_NAME, { ANY }, FRZ(STO(NAME)) }, + { S_NAME, { ALP, NUM }, PUT(S_NAME) }, + { S_NAME, { '\\' }, S_NAME_BS }, + + /* after a \ in an identifier */ + { S_NAME_BS, { ANY }, FRZ(S_ROGUE_BS) }, + { S_NAME_BS, { 'u', 'U' }, PUT(S_NAME) }, + + /* after a L */ + { S_LCHAR, { ANY }, FRZ(S_NAME) }, + { S_LCHAR, { '"' }, PUT(S_STRING) }, + { S_LCHAR, { '\'' }, PUT(S_CHAR) }, + + /* after a ^ */ + { S_CIRC, { ANY }, FRZ(STO(CIRC)) }, + { S_CIRC, { '=' }, STO(ASCIRC) }, + + /* after a | */ + { S_PIPE, { ANY }, FRZ(STO(OR)) }, + { S_PIPE, { '=' }, STO(ASOR) }, + { S_PIPE, { '|' }, STO(LOR) }, + + /* after a / and * */ +#ifdef SEMPER_FIDELIS + { S_COMMENT, { ANY }, PUT(S_COMMENT) }, + { S_COMMENT, { VCH }, FRZ(S_TRUNCC) }, + { S_COMMENT, { '*' }, PUT(S_COMMENT2) }, + + { S_COMMENT2, { ANY }, FRZ(S_COMMENT) }, + { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) }, + { S_COMMENT2, { '*' }, PUT(S_COMMENT2) }, + { S_COMMENT2, { '/' }, STO(PUT(COMMENT)) }, + + { S_COMMENT5, { ANY }, PUT(S_COMMENT5) }, + { S_COMMENT5, { VCH }, FRZ(S_DECAY) }, + { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) }, +#else + { S_COMMENT, { ANY }, S_COMMENT }, + { S_COMMENT, { VCH }, FRZ(S_TRUNCC) }, + { S_COMMENT, { '*' }, S_COMMENT2 }, + + { S_COMMENT2, { ANY }, FRZ(S_COMMENT) }, + { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) }, + { S_COMMENT2, { '*' }, S_COMMENT2 }, + { S_COMMENT2, { '/' }, STO(COMMENT) }, + + { S_COMMENT5, { ANY }, S_COMMENT5 }, + { S_COMMENT5, { VCH }, FRZ(S_DECAY) }, + { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) }, +#endif + + /* dummy end of machine description */ + { 0, { 0 }, 0 } +}; + +/* + * cppm is the table used to store the automaton: if we are in state s + * and we read character c, we apply the action cppm[s][c] (jumping to + * another state, or emitting a token). + * cppm_vch is the table for the special virtual character "end of input" + */ +static int cppm[MSTATE][MAX_CHAR_VAL]; +static int cppm_vch[MSTATE]; + +/* + * init_cppm() fills cppm[][] with the information stored in cppms[]. + * It must be called before beginning the lexing process. + */ +void init_cppm(void) +{ + int i, j, k, c; + static unsigned char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + static unsigned char lower[] = "abcdefghijklmnopqrstuvwxyz"; + unsigned char *cp; + + for (i = 0; i < MSTATE; i ++) { + for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[i][j] = S_OUCH; + cppm_vch[i] = S_OUCH; + } + for (i = 0; cppms[i].input[0]; i ++) for (k = 0; k < CMCR; k ++) { + int s = cppms[i].state; + int ns = cppms[i].new_state; + + switch (c = cppms[i].input[k]) { + case 0: + break; + case SPC: + /* see space_char() also */ + cppm[s][' '] = ns; + cppm[s]['\t'] = ns; + cppm[s]['\v'] = ns; + cppm[s]['\f'] = ns; +#ifdef UNBREAKABLE_SPACE + if (MAX_CHAR_VAL > UNBREAKABLE_SPACE) + cppm[s][UNBREAKABLE_SPACE] = ns; +#endif + break; + case ALP: + for (cp = upper; *cp; cp ++) cppm[s][(int)*cp] = ns; + for (cp = lower; *cp; cp ++) cppm[s][(int)*cp] = ns; + cppm[s]['_'] = ns; + break; + case NUM: + for (j = '0'; j <= '9'; j ++) cppm[s][j] = ns; + break; + case ANY: + for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[s][j] = ns; + cppm_vch[s] = ns; + break; + case VCH: + cppm_vch[s] = ns; + break; + default: + cppm[s][c] = ns; + break; + } + } +} + +/* + * Make some character as equivalent to a letter for identifiers. + */ +void set_identifier_char(int c) +{ + cppm[S_START][c] = PUT(S_NAME); + cppm[S_NAME][c] = PUT(S_NAME); +} + +/* + * Remove the "identifier" status from a character. + */ +void unset_identifier_char(int c) +{ + cppm[S_START][c] = S_ILL; + cppm[S_NAME][c] = FRZ(STO(NAME)); +} + +int space_char(int c) +{ + if (c == ' ' || c == '\t' || c == '\v' || c == '\f' +#ifdef UNBREAKABLE_SPACE + || c == UNBREAKABLE_SPACE +#endif + ) return 1; + return 0; +} + +#ifndef NO_UCPP_BUF +/* + * our output buffer is full, flush it + */ +void flush_output(struct lexer_state *ls) +{ + size_t x = ls->sbuf, y = 0, z; + + if (ls->sbuf == 0) return; + do { + z = fwrite(ls->output_buf + y, 1, x, ls->output); + x -= z; + y += z; + } while (z && x > 0); + if (!y) { + error(ls->line, "could not flush output (disk full ?)"); + die(); + } + ls->sbuf = 0; +} +#endif + +/* + * Output one character; flush the buffer if needed. + * This function should not be called, except by put_char(). + */ +static inline void write_char(struct lexer_state *ls, unsigned char c) +{ +#ifndef NO_UCPP_BUF + ls->output_buf[ls->sbuf ++] = c; + if (ls->sbuf == OUTPUT_BUF_MEMG) flush_output(ls); +#else + if (putc((int)c, ls->output) == EOF) { + error(ls->line, "output write error (disk full ?)"); + die(); + } +#endif + if (c == '\n') { + ls->oline ++; + } +} + +/* + * schedule a character for output + */ +void put_char(struct lexer_state *ls, unsigned char c) +{ + if (ls->flags & KEEP_OUTPUT) write_char(ls, c); +} + +/* + * get next raw input character + */ +static inline int read_char(struct lexer_state *ls) +{ + unsigned char c; + + if (!ls->input) { + return ((ls->pbuf ++) < ls->ebuf) ? + ls->input_string[ls->pbuf - 1] : -1; + } + while (1) { +#ifndef NO_UCPP_BUF + if (ls->pbuf == ls->ebuf) { +#ifdef UCPP_MMAP + if (ls->from_mmap) { + munmap((void *)ls->input_buf, ls->ebuf); + ls->from_mmap = 0; + ls->input_buf = ls->input_buf_sav; + } +#endif + ls->ebuf = fread(ls->input_buf, 1, + INPUT_BUF_MEMG, ls->input); + ls->pbuf = 0; + } + if (ls->ebuf == 0) return -1; + c = ls->input_buf[ls->pbuf ++]; +#else + int x = getc(ls->input); + + if (x == EOF) return -1; + c = x; +#endif + if (ls->flags & COPY_LINE) { + if (c == '\n') { + ls->copy_line[ls->cli] = 0; + ls->cli = 0; + } else if (ls->cli < (COPY_LINE_LENGTH - 1)) { + ls->copy_line[ls->cli ++] = c; + } + } + if (ls->macfile && c == '\n') { + ls->macfile = 0; + continue; + } + ls->macfile = 0; + if (c == '\r') { + /* + * We found a '\r'; we handle it as a newline + * and ignore the next newline. This should work + * with all combinations of Msdos, MacIntosh and + * Unix files on these three platforms. On other + * platforms, native file formats are always + * supported. + */ + ls->macfile = 1; + c = '\n'; + } + break; + } + return c; +} + +/* + * next_fifo_char(), char_lka1() and char_lka2() give a two character + * look-ahead on the input stream; this is needed for trigraphs + */ +static inline int next_fifo_char(struct lexer_state *ls) +{ + int c; + + if (ls->nlka != 0) { + c = ls->lka[0]; + ls->lka[0] = ls->lka[1]; + ls->nlka --; + } else c = read_char(ls); + return c; +} + +static inline int char_lka1(struct lexer_state *ls) +{ + if (ls->nlka == 0) { + ls->lka[0] = read_char(ls); + ls->nlka ++; + } + return ls->lka[0]; +} + +static inline int char_lka2(struct lexer_state *ls) +{ +#ifdef AUDIT + if (ls->nlka == 0) ouch("always in motion future is"); +#endif + if (ls->nlka == 1) { + ls->lka[1] = read_char(ls); + ls->nlka ++; + } + return ls->lka[1]; +} + +static struct trigraph { + int old, new; +} trig[9] = { + { '=', '#' }, + { '/', '\\' }, + { '\'', '^' }, + { '(', '[' }, + { ')', ']' }, + { '!', '|' }, + { '<', '{' }, + { '>', '}' }, + { '-', '~' } +}; + +/* + * Returns the next character, after treatment of trigraphs and terminating + * backslashes. Return value is -1 if there is no more input. + */ +static inline int next_char(struct lexer_state *ls) +{ + int c; + + if (!ls->discard) return ls->last; + ls->discard = 0; + do { + c = next_fifo_char(ls); + /* check trigraphs */ + if (c == '?' && char_lka1(ls) == '?' + && (ls->flags & HANDLE_TRIGRAPHS)) { + int i, d; + + d = char_lka2(ls); + for (i = 0; i < 9; i ++) if (d == trig[i].old) { + if (ls->flags & WARN_TRIGRAPHS) { + ls->count_trigraphs ++; + } + if (ls->flags & WARN_TRIGRAPHS_MORE) { + warning(ls->line, "trigraph ?""?%c " + "encountered", d); + } + next_fifo_char(ls); + next_fifo_char(ls); + c = trig[i].new; + break; + } + } + if (c == '\\' && char_lka1(ls) == '\n') { + ls->line ++; + next_fifo_char(ls); + } else if (c == '\r' && char_lka1(ls) == '\n') { + ls->line ++; + next_fifo_char(ls); + c = '\n'; + return c; + } else { + ls->last = c; + return c; + } + } while (1); +} + +/* + * wrapper for next_char(), to be called from outside + * (used by #error, #include directives) + */ +int grap_char(struct lexer_state *ls) +{ + return next_char(ls); +} + +/* + * Discard the current character, so that the next call to next_char() + * will step into the input stream. + */ +void discard_char(struct lexer_state *ls) +{ +#ifdef AUDIT + if (ls->discard) ouch("overcollecting garbage"); +#endif + ls->discard = 1; + ls->utf8 = 0; + if (ls->last == '\n') ls->line ++; +} + +/* + * Convert an UTF-8 encoded character to a Universal Character Name + * using \u (or \U when appropriate). + */ +static int utf8_to_string(unsigned char buf[], unsigned long utf8) +{ + unsigned long val = 0; + static char hex[16] = "0123456789abcdef"; + + if (utf8 & 0x80UL) { + unsigned long x1, x2, x3, x4; + + x1 = (utf8 >> 24) & 0x7fUL; + x2 = (utf8 >> 16) & 0x7fUL; + x3 = (utf8 >> 8) & 0x7fUL; + x4 = (utf8) & 0x3fUL; + x1 &= 0x07UL; + if (x2 & 0x40UL) x2 &= 0x0fUL; + if (x3 & 0x40UL) x3 &= 0x1fUL; + val = x4 | (x3 << 6) | (x2 << 12) | (x1 << 16); + } else val = utf8; + if (val < 128) { + buf[0] = val; + buf[1] = 0; + return 1; + } else if (val < 0xffffUL) { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(size_t)(val >> 12)]; + buf[3] = hex[(size_t)((val >> 8) & 0xfU)]; + buf[4] = hex[(size_t)((val >> 4) & 0xfU)]; + buf[5] = hex[(size_t)(val & 0xfU)]; + buf[6] = 0; + return 6; + } + buf[0] = '\\'; + buf[1] = 'U'; + buf[2] = '0'; + buf[3] = '0'; + buf[4] = hex[(size_t)(val >> 20)]; + buf[5] = hex[(size_t)((val >> 16) & 0xfU)]; + buf[6] = hex[(size_t)((val >> 12) & 0xfU)]; + buf[7] = hex[(size_t)((val >> 8) & 0xfU)]; + buf[8] = hex[(size_t)((val >> 4) & 0xfU)]; + buf[9] = hex[(size_t)(val & 0xfU)]; + buf[10] = 0; + return 10; +} + +/* + * Scan the identifier and put it in canonical form: + * -- tranform \U0000xxxx into \uxxxx + * -- inside \u and \U, make letters low case + * -- report (some) incorrect use of UCN + */ +static void canonize_id(struct lexer_state *ls, char *id) +{ + char *c, *d; + + for (c = d = id; *c;) { + if (*c == '\\') { + int i; + + if (!*(c + 1)) goto canon_error; + if (*(c + 1) == 'U') { + for (i = 0; i < 8 && *(c + i + 2); i ++); + if (i != 8) goto canon_error; + *(d ++) = '\\'; + c += 2; + for (i = 0; i < 4 && *(c + i) == '0'; i ++); + if (i == 4) { + *(d ++) = 'u'; + c += 4; + } else { + *(d ++) = 'U'; + i = 8; + } + for (; i > 0; i --) { + switch (*c) { + case 'A': *(d ++) = 'a'; break; + case 'B': *(d ++) = 'b'; break; + case 'C': *(d ++) = 'c'; break; + case 'D': *(d ++) = 'd'; break; + case 'E': *(d ++) = 'e'; break; + case 'F': *(d ++) = 'f'; break; + default: *(d ++) = *c; break; + } + c ++; + } + } else if (*(c + 1) == 'u') { + for (i = 0; i < 4 && *(c + i + 2); i ++); + if (i != 4) goto canon_error; + *(d ++) = '\\'; + *(d ++) = 'u'; + c += 2; + for (; i > 0; i --) { + switch (*c) { + case 'A': *(d ++) = 'a'; break; + case 'B': *(d ++) = 'b'; break; + case 'C': *(d ++) = 'c'; break; + case 'D': *(d ++) = 'd'; break; + case 'E': *(d ++) = 'e'; break; + case 'F': *(d ++) = 'f'; break; + default: *(d ++) = *c; break; + } + c ++; + } + } else goto canon_error; + continue; + } + *(d ++) = *(c ++); + } + *d = 0; + return; + +canon_error: + for (; *c; *(d ++) = *(c ++)); + if (ls->flags & WARN_STANDARD) { + warning(ls->line, "malformed identifier with UCN: '%s'", id); + } + *d = 0; +} + +/* + * Run the automaton, in order to get the next token. + * This function should not be called, except by next_token() + * + * return value: 1 on error, 2 on end-of-file, 0 otherwise. + */ +static inline int read_token(struct lexer_state *ls) +{ + int cstat = S_START, nstat; + size_t ltok = 0; + int c, outc = 0, ucn_in_id = 0; + int shift_state; + unsigned long utf8; + long l = ls->line; + + ls->ctok->line = l; + if (ls->pending_token) { + if ((ls->ctok->type = ls->pending_token) == BUNCH) { + ls->ctok->name[0] = '\\'; + ls->ctok->name[1] = 0; + } + ls->pending_token = 0; + return 0; + } + if (ls->flags & UTF8_SOURCE) { + utf8 = ls->utf8; + shift_state = 0; + } + if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT)) + for (; ls->line > ls->oline;) put_char(ls, '\n'); + do { + c = next_char(ls); + if (c < 0) { + if ((ls->flags & UTF8_SOURCE) && shift_state) { + if (ls->flags & WARN_STANDARD) + warning(ls->line, "truncated UTF-8 " + "character"); + shift_state = 0; + utf8 = 0; + } + if (cstat == S_START) return 2; + nstat = cppm_vch[cstat]; + } else { + if (ls->flags & UTF8_SOURCE) { + if (shift_state) { + if ((c & 0xc0) != 0x80) { + if (ls->flags & WARN_STANDARD) + warning(ls->line, + "truncated " + "UTF-8 " + "character"); + shift_state = 0; + utf8 = 0; + c = '_'; + } else { + utf8 = (utf8 << 8) | c; + if (-- shift_state) { + ls->discard = 1; + continue; + } + c = '_'; + } + } else if ((c & 0xc0) == 0xc0) { + if ((c & 0x30) == 0x30) { + shift_state = 3; + } else if (c & 0x20) { + shift_state = 2; + } else { + shift_state = 1; + } + utf8 = c; + ls->discard = 1; + continue; + } else utf8 = 0; + } + nstat = cppm[cstat][c < MAX_CHAR_VAL ? c : 0]; + } +#ifdef AUDIT + if (nstat == S_OUCH) { + ouch("bad move..."); + } +#endif + /* + * disable C++-like comments + */ + if (nstat == S_COMMENT5 && !(ls->flags & CPLUSPLUS_COMMENTS)) + nstat = FRZ(STO(SLASH)); + + if (noMOD(nstat) >= MSTATE && !ttSTO(nstat)) + switch (noMOD(nstat)) { + case S_ILL: + if (ls->flags & CCHARSET) { + error(ls->line, "illegal character '%c'", c); + return 1; + } + nstat = PUT(STO(BUNCH)); + break; + case S_BS: + ls->ctok->name[0] = '\\'; + ltok ++; + nstat = FRZ(STO(BUNCH)); + if (!(ls->flags & LEXER)) put_char(ls, '\\'); + break; + case S_ROGUE_BS: + ls->pending_token = BUNCH; + nstat = FRZ(STO(NAME)); + break; + case S_DDOT: + ls->pending_token = DOT; + nstat = FRZ(STO(DOT)); + break; + case S_DDSHARP: + ls->pending_token = PCT; + nstat = FRZ(STO(DIG_SHARP)); + break; + case S_BEHEAD: + error(l, "unfinished string at end of line"); + return 1; + case S_DECAY: + warning(l, "unterminated // comment"); + nstat = FRZ(STO(COMMENT)); + break; + case S_TRUNC: + error(l, "truncated token"); + return 1; + case S_TRUNCC: + error(l, "truncated comment"); + return 1; +#ifdef AUDIT + case S_OUCH: + ouch("machine went out of control"); + break; +#endif + } + if (!ttFRZ(nstat)) { + discard_char(ls); + if (!(ls->flags & LEXER) && ls->condcomp) { + int z = ttSTO(nstat) ? S_ILL : noMOD(nstat); + + if (cstat == S_NAME || z == S_NAME + || ((CMT(cstat) || CMT(z)) + && (ls->flags & DISCARD_COMMENTS))) { + outc = 0; + } else if (z == S_LCHAR || z == S_SLASH + || (z == S_SHARP && ls->ltwnl) + || (z == S_PCT && ls->ltwnl) + || (z == S_BACKSLASH)) { + outc = c; + } else if (z == S_PCT2 && ls->ltwnl) { + outc = -1; + } else if (z == S_PCT3 && ls->ltwnl) { + /* we have %:% but this still might + not be a %:%: */ + outc = -2; + } else { + if (outc < 0) { + put_char(ls, '%'); + put_char(ls, ':'); + if (outc == -2) + put_char(ls, '%'); + outc = 0; + } else if (outc) { + put_char(ls, outc); + outc = 0; + } + put_char(ls, c); + } + } + } else if (outc == '/' && !(ls->flags & LEXER) + && ls->condcomp) { + /* this is a hack: we need to dump a pending slash */ + put_char(ls, outc); + outc = 0; + } + if (ttPUT(nstat)) { + if (cstat == S_NAME_BS) { + ucn_in_id = 1; + wan(ls->ctok->name, ltok, '\\', ls->tknl); + } + if ((ls->flags & UTF8_SOURCE) && utf8) { + unsigned char buf[11]; + int i, j; + + for (i = 0, j = utf8_to_string(buf, utf8); + i < j; i ++) + wan(ls->ctok->name, ltok, buf[i], + ls->tknl); + /* if (j > 1) ucn_in_id = 1; */ + } else wan(ls->ctok->name, ltok, + (unsigned char)c, ls->tknl); + } + if (ttSTO(nstat)) { + if (S_TOKEN(noMOD(nstat))) { + wan(ls->ctok->name, ltok, + (unsigned char)0, ls->tknl); + } + ls->ctok->type = noMOD(nstat); + break; + } + cstat = noMOD(nstat); + } while (1); + if (!(ls->flags & LEXER) && (ls->flags & DISCARD_COMMENTS) + && ls->ctok->type == COMMENT) put_char(ls, ' '); + if (ucn_in_id && ls->ctok->type == NAME) + canonize_id(ls, ls->ctok->name); + return 0; +} + +/* + * fills ls->ctok with the next token + */ +int next_token(struct lexer_state *ls) +{ + if (ls->flags & READ_AGAIN) { + ls->flags &= ~READ_AGAIN; + if (!(ls->flags & LEXER)) { + char *c = S_TOKEN(ls->ctok->type) ? + ls->ctok->name : token_name(ls->ctok); + if (ls->ctok->type == OPT_NONE) { + ls->ctok->type = NONE; +#ifdef SEMPER_FIDELIS + ls->ctok->name[0] = ' '; + ls->ctok->name[1] = 0; +#endif + put_char(ls, ' '); + } else if (ls->ctok->type != NAME && + !(ls->ltwnl && (ls->ctok->type == SHARP + || ls->ctok->type == DIG_SHARP))) + for (; *c; c ++) put_char(ls, *c); + } + return 0; + } + return read_token(ls); +} diff --git a/libexec/auxcpp/macro.c b/libexec/auxcpp/macro.c new file mode 100644 index 00000000000..5b9540c67c2 --- /dev/null +++ b/libexec/auxcpp/macro.c @@ -0,0 +1,1921 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "tune.h" +#include +#include +#include +#include +#include "ucppi.h" +#include "mem.h" +#include "nhash.h" + +/* + * we store macros in a hash table, and retrieve them using their name + * as identifier. + */ +static HTT macros; +static int macros_init_done = 0; + +static void del_macro(void *m) +{ + struct macro *n = m; + size_t i; + + for (i = 0; (int)i < n->narg; i ++) freemem(n->arg[i]); + if (n->narg > 0) freemem(n->arg); +#ifdef LOW_MEM + if (n->cval.length) freemem(n->cval.t); +#else + if (n->val.nt) { + for (i = 0; i < n->val.nt; i ++) + if (S_TOKEN(n->val.t[i].type)) + freemem(n->val.t[i].name); + freemem(n->val.t); + } +#endif + freemem(n); +} + +static inline struct macro *new_macro(void) +{ + struct macro *m = getmem(sizeof(struct macro)); + + m->narg = -1; + m->nest = 0; +#ifdef LOW_MEM + m->cval.length = 0; +#else + m->val.nt = m->val.art = 0; +#endif + m->vaarg = 0; + return m; +} + +/* + * for special macros, and the "defined" operator + */ +enum { + MAC_NONE, MAC_DEFINED, + MAC_LINE, MAC_FILE, MAC_DATE, MAC_TIME, MAC_STDC, MAC_PRAGMA +}; +#define MAC_SPECIAL MAC_LINE + +/* + * returns 1 for "defined" + * returns x > 1 for a special macro such as __FILE__ + * returns 0 otherwise + */ +static inline int check_special_macro(char *name) +{ + if (!strcmp(name, "defined")) return MAC_DEFINED; + if (*name != '_') return MAC_NONE; + if (*(name + 1) == 'P') { + if (!strcmp(name, "_Pragma")) return MAC_PRAGMA; + return MAC_NONE; + } else if (*(name + 1) != '_') return MAC_NONE; + if (no_special_macros) return MAC_NONE; + if (!strcmp(name, "__LINE__")) return MAC_LINE; + else if (!strcmp(name, "__FILE__")) return MAC_FILE; + else if (!strcmp(name, "__DATE__")) return MAC_DATE; + else if (!strcmp(name, "__TIME__")) return MAC_TIME; + else if (!strcmp(name, "__STDC__")) return MAC_STDC; + return MAC_NONE; +} + +int c99_compliant = 1; +int c99_hosted = 1; + +/* + * add the special macros to the macro table + */ +static void add_special_macros(void) +{ + struct macro *m; + + HTT_put(¯os, new_macro(), "__LINE__"); + HTT_put(¯os, new_macro(), "__FILE__"); + HTT_put(¯os, new_macro(), "__DATE__"); + HTT_put(¯os, new_macro(), "__TIME__"); + HTT_put(¯os, new_macro(), "__STDC__"); + m = new_macro(); m->narg = 1; + m->arg = getmem(sizeof(char *)); m->arg[0] = sdup("foo"); + HTT_put(¯os, m, "_Pragma"); + if (c99_compliant) { +#ifndef LOW_MEM + struct token t; +#endif + + m = new_macro(); +#ifdef LOW_MEM + m->cval.t = getmem(9); + m->cval.t[0] = NUMBER; + mmv(m->cval.t + 1, "199901L", 8); + m->cval.length = 9; +#else + t.type = NUMBER; + t.line = 0; + t.name = sdup("199901L"); + aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); +#endif + HTT_put(¯os, m, "__STDC_VERSION__"); + } + if (c99_hosted) { +#ifndef LOW_MEM + struct token t; +#endif + + m = new_macro(); +#ifdef LOW_MEM + m->cval.t = getmem(3); + m->cval.t[0] = NUMBER; + mmv(m->cval.t + 1, "1", 2); + m->cval.length = 3; +#else + t.type = NUMBER; + t.line = 0; + t.name = sdup("1"); + aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); +#endif + HTT_put(¯os, m, "__STDC_HOSTED__"); + } +} + +#ifdef LOW_MEM +/* + * We store macro arguments as a single-byte token MACROARG, followed + * by the argument number as a one or two-byte value. If the argument + * number is between 0 and 127 (inclusive), it is stored as such in + * a single byte. Otherwise, it is supposed to be a 14-bit number, with + * the 7 upper bits stored in the first byte (with the high bit set to 1) + * and the 7 lower bits in the second byte. + */ +#endif + +/* + * print the content of a macro, in #define form + */ +static void print_macro(void *vm) +{ + struct macro *m = vm; + char *mname = HASH_ITEM_NAME(m); + int x = check_special_macro(mname); + size_t i; + + if (x != MAC_NONE) { + fprintf(emit_output, "/* #define %s */ /* special */\n", + mname); + return; + } + fprintf(emit_output, "#define %s", mname); + if (m->narg >= 0) { + fprintf(emit_output, "("); + for (i = 0; i < (size_t)(m->narg); i ++) { + fprintf(emit_output, i ? ", %s" : "%s", m->arg[i]); + } + if (m->vaarg) { + fputs(m->narg ? ", ..." : "...", emit_output); + } + fprintf(emit_output, ")"); + } +#ifdef LOW_MEM + if (m->cval.length == 0) { + fputc('\n', emit_output); + return; + } + fputc(' ', emit_output); + for (i = 0; i < m->cval.length;) { + int tt = m->cval.t[i ++]; + + if (tt == MACROARG) { + unsigned anum = m->cval.t[i]; + + if (anum >= 128) anum = ((anum & 127U) << 8) + | m->cval.t[++ i]; + if (anum == (unsigned)m->narg) + fputs("__VA_ARGS__", emit_output); + else + fputs(m->arg[anum], emit_output); + i ++; + } + else if (S_TOKEN(tt)) { + fputs((char *)(m->cval.t + i), emit_output); + i += 1 + strlen((char *)(m->cval.t + i)); + } else fputs(operators_name[tt], emit_output); + } +#else + if (m->val.nt == 0) { + fputc('\n', emit_output); + return; + } + fputc(' ', emit_output); + for (i = 0; i < m->val.nt; i ++) { + if (m->val.t[i].type == MACROARG) { + if (m->val.t[i].line == m->narg) + fputs("__VA_ARGS__", emit_output); + else + fputs(m->arg[(size_t)(m->val.t[i].line)], + emit_output); + } else fputs(token_name(m->val.t + i), emit_output); + } +#endif + fputc('\n', emit_output); +} + +/* + * Send a token to the output (a token_fifo in lexer mode, the output + * buffer in stand alone mode). + */ +void print_token(struct lexer_state *ls, struct token *t, long uz_line) +{ + char *x = t->name; + + if (uz_line && t->line < 0) t->line = uz_line; + if (ls->flags & LEXER) { + struct token at; + + at = *t; + if (S_TOKEN(t->type)) { + at.name = sdup(at.name); + throw_away(ls->gf, at.name); + } + aol(ls->output_fifo->t, ls->output_fifo->nt, at, + TOKEN_LIST_MEMG); + return; + } + if (ls->flags & KEEP_OUTPUT) { + for (; ls->oline < ls->line;) put_char(ls, '\n'); + } + if (!S_TOKEN(t->type)) x = operators_name[t->type]; + for (; *x; x ++) put_char(ls, *x); +} + +/* + * Send a token to the output at a given line (this is for text output + * and unreplaced macros due to lack of arguments). + */ +static void print_token_nailed(struct lexer_state *ls, struct token *t, + long nail_line) +{ + char *x = t->name; + + if (ls->flags & LEXER) { + print_token(ls, t, 0); + return; + } + if (ls->flags & KEEP_OUTPUT) { + for (; ls->oline < nail_line;) put_char(ls, '\n'); + } + if (!S_TOKEN(t->type)) x = operators_name[t->type]; + for (; *x; x ++) put_char(ls, *x); +} + +/* + * send a reduced whitespace token to the output + */ +#define print_space(ls) do { \ + struct token lt; \ + lt.type = OPT_NONE; \ + lt.line = (ls)->line; \ + print_token((ls), <, 0); \ + } while (0) + +/* + * We found a #define directive; parse the end of the line, perform + * sanity checks, store the new macro into the "macros" hash table. + * + * In case of a redefinition of a macro: we enforce the rule that a + * macro should be redefined identically, including the spelling of + * parameters. We emit an error on offending code; dura lex, sed lex. + * After all, it is easy to avoid such problems, with a #undef directive. + */ +int handle_define(struct lexer_state *ls) +{ + struct macro *m = 0, *n; +#ifdef LOW_MEM + struct token_fifo mv; +#endif + int ltwws = 1, redef = 0; + char *mname = 0; + int narg; + size_t nt; + long l = ls->line; + +#ifdef LOW_MEM + mv.art = mv.nt = 0; +#endif + /* find the next non-white token on the line, this should be + the macro name */ + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) mname = sdup(ls->ctok->name); + break; + } + if (mname == 0) { + error(l, "missing macro name"); + return 1; + } + if (check_special_macro(mname)) { + error(l, "trying to redefine the special macro %s", mname); + goto warp_error; + } + /* + * If a macro with this name was already defined: the K&R + * states that the new macro should be identical to the old one + * (with some arcane rule of equivalence of whitespace); otherwise, + * redefining the macro is an error. Most preprocessors would + * only emit a warning (or nothing at all) on an unidentical + * redefinition. + * + * Since it is easy to avoid this error (with a #undef directive), + * we choose to enforce the rule and emit an error. + */ + if ((n = HTT_get(¯os, mname)) != 0) { + /* redefinition of a macro: we must check that we define + it identical */ + redef = 1; +#ifdef LOW_MEM + n->cval.rp = 0; +#endif + freemem(mname); + mname = 0; + } + if (!redef) { + m = new_macro(); + m->narg = -1; +#ifdef LOW_MEM +#define mval mv +#else +#define mval (m->val) +#endif + } + if (next_token(ls)) goto define_end; + /* + * Check if the token immediately following the macro name is + * a left parenthesis; if so, then this is a macro with arguments. + * Collect their names and try to match the next parenthesis. + */ + if (ls->ctok->type == LPAR) { + int i, j; + int need_comma = 0, saw_mdots = 0; + + narg = 0; + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) { + error(l, "truncated macro definition"); + goto define_error; + } + if (ls->ctok->type == COMMA) { + if (saw_mdots) { + error(l, "'...' must end the macro " + "argument list"); + goto warp_error; + } + if (!need_comma) { + error(l, "void macro argument"); + goto warp_error; + } + need_comma = 0; + continue; + } else if (ls->ctok->type == NAME) { + if (saw_mdots) { + error(l, "'...' must end the macro " + "argument list"); + goto warp_error; + } + if (need_comma) { + error(l, "missing comma in " + "macro argument list"); + goto warp_error; + } + if (!redef) { + aol(m->arg, narg, + sdup(ls->ctok->name), 8); + /* we must keep track of m->narg + so that cleanup in case of + error works. */ + m->narg = narg; + if (narg == 128 + && (ls->flags & WARN_STANDARD)) + warning(l, "more arguments to " + "macro than the ISO " + "limit (127)"); +#ifdef LOW_MEM + if (narg == 32767) { + error(l, "too many arguments " + "in macro definition " + "(max 32766)"); + goto warp_error; + } +#endif + } else { + /* this is a redefinition of the + macro; check equality between + old and new definitions */ + if (narg >= n->narg) goto redef_error; + if (strcmp(ls->ctok->name, + n->arg[narg ++])) + goto redef_error; + } + need_comma = 1; + continue; + } else if ((ls->flags & MACRO_VAARG) + && ls->ctok->type == MDOTS) { + if (need_comma) { + error(l, "missing comma before '...'"); + goto warp_error; + } + if (redef && !n->vaarg) goto redef_error; + if (!redef) m->vaarg = 1; + saw_mdots = 1; + need_comma = 1; + continue; + } else if (ls->ctok->type == RPAR) { + if (narg > 0 && !need_comma) { + error(l, "void macro argument"); + goto warp_error; + } + if (redef && n->vaarg && !saw_mdots) + goto redef_error; + break; + } else if (ttMWS(ls->ctok->type)) { + continue; + } + error(l, "invalid macro argument"); + goto warp_error; + } + if (!redef) { + for (i = 1; i < narg; i ++) for (j = 0; j < i; j ++) + if (!strcmp(m->arg[i], m->arg[j])) { + error(l, "duplicate macro " + "argument"); + goto warp_error; + } + } + if (!redef) m->narg = narg; + } else { + if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) + warning(ls->line, "identifier not followed by " + "whitespace in #define"); + ls->flags |= READ_AGAIN; + narg = 0; + } + if (redef) nt = 0; + + /* now, we have the arguments. Let's get the macro contents. */ + while (!next_token(ls) && ls->ctok->type != NEWLINE) { + struct token t; + + t.type = ls->ctok->type; + if (ltwws && ttMWS(t.type)) continue; + t.line = 0; + if (t.type == NAME) { + int i; + + if ((ls->flags & MACRO_VAARG) + && !strcmp(ls->ctok->name, "__VA_ARGS__")) { + if (redef) { + if (!n->vaarg) goto redef_error; + } else if (!m->vaarg) { + error(l, "'__VA_ARGS__' is forbidden " + "in macros with a fixed " + "number of arguments"); + goto warp_error; + } + t.type = MACROARG; + t.line = redef ? n->narg : m->narg; + } + for (i = 0; i < narg; i ++) + if (!strcmp(redef ? n->arg[i] : m->arg[i], + ls->ctok->name)) { + t.type = MACROARG; + /* this is a hack: we store the + argument number in the line field */ + t.line = i; + break; + } + } + if (!redef && S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); + if (ttMWS(t.type)) { + if (ltwws) continue; +#ifdef SEMPER_FIDELIS + t.type = OPT_NONE; +#else + t.type = NONE; +#endif + ltwws = 1; + } else ltwws = 0; + if (!redef) { + /* we ensure that each macro token has a correct + line number */ + if (t.type != MACROARG) t.line = 1; + aol(mval.t, mval.nt, t, TOKEN_LIST_MEMG); + } else { +#ifdef LOW_MEM + int tt; + + if (n->cval.rp >= n->cval.length) { +#ifdef SEMPER_FIDELIS + if (t.type != OPT_NONE) goto redef_error; +#else + if (t.type != NONE) goto redef_error; +#endif + } else if (t.type != n->cval.t[n->cval.rp]) { + goto redef_error; + } else if (t.type == MACROARG) { + unsigned anum = n->cval.t[n->cval.rp + 1]; + + if (anum >= 128U) anum = ((anum & 127U) << 8) + | m->cval.t[n->cval.rp + 2]; + if (anum != (unsigned)t.line) goto redef_error; + } else if (S_TOKEN(t.type) && strcmp(ls->ctok->name, + (char *)(n->cval.t + n->cval.rp + 1))) { + goto redef_error; + } + tt = n->cval.t[n->cval.rp ++]; + if (S_TOKEN(tt)) n->cval.rp += 1 + + strlen((char *)(n->cval.t + n->cval.rp)); + else if (tt == MACROARG) { + if (n->cval.t[++ n->cval.rp] >= 128) + n->cval.rp ++; + } +#else + if (nt >= n->val.nt) { +#ifdef SEMPER_FIDELIS + if (t.type != OPT_NONE) goto redef_error; +#else + if (t.type != NONE) goto redef_error; +#endif + } else if (t.type != n->val.t[nt].type + || (t.type == MACROARG + && t.line != n->val.t[nt].line) + || (S_TOKEN(t.type) && strcmp(ls->ctok->name, + n->val.t[nt].name))) { + goto redef_error; + } +#endif + nt ++; + } + } + + if (redef) { +#ifdef LOW_MEM + if (n->cval.rp < n->cval.length) goto redef_error_2; +#else + if (nt < n->val.nt) goto redef_error_2; +#endif + return 0; + } + + /* now we have the complete macro; perform some checks about + the operators # and ##, and, if everything is ok, + store the macro into the hash table */ +define_end: +#ifdef SEMPER_FIDELIS + if (mval.nt && mval.t[mval.nt - 1].type == OPT_NONE) { +#else + if (mval.nt && mval.t[mval.nt - 1].type == NONE) { +#endif + mval.nt --; + if (mval.nt == 0) freemem(mval.t); + } + if (mval.nt != 0) { + size_t i; + + /* some checks about the macro */ + if (mval.t[0].type == DSHARP + || mval.t[0].type == DIG_DSHARP + || mval.t[mval.nt - 1].type == DSHARP + || mval.t[mval.nt - 1].type == DIG_DSHARP) { + error(l, "operator '##' may neither begin " + "nor end a macro"); + goto define_error; + } + if (m->narg >= 0) for (i = 0; i < mval.nt; i ++) + if ((mval.t[i].type == SHARP + || mval.t[i].type == DIG_SHARP) && + (i == (mval.nt - 1) + || (ttMWS(mval.t[i + 1].type) && + (i == mval.nt - 2 + || mval.t[i + 2].type != MACROARG)) + || (!ttMWS(mval.t[i + 1].type) + && mval.t[i + 1].type != MACROARG))) { + error(l, "operator '#' not followed " + "by a macro argument"); + goto define_error; + } + } +#ifdef LOW_MEM + { + size_t i, l; + + for (i = 0, l = 0; i < mval.nt; i ++) { + l ++; + if (S_TOKEN(mval.t[i].type)) + l += 1 + strlen(mval.t[i].name); + else if (mval.t[i].type == MACROARG) { + l ++; + if (mval.t[i].line >= 128) l ++; + } + } + m->cval.length = l; + if (l) m->cval.t = getmem(l); + for (i = 0, l = 0; i < mval.nt; i ++) { + m->cval.t[l ++] = mval.t[i].type; + if (S_TOKEN(mval.t[i].type)) { + size_t x = 1 + strlen(mval.t[i].name); + + mmv(m->cval.t + l, mval.t[i].name, x); + l += x; + freemem(mval.t[i].name); + } + else if (mval.t[i].type == MACROARG) { + unsigned anum = mval.t[i].line; + + if (anum >= 128) { + m->cval.t[l ++] = 128 | (anum >> 8); + m->cval.t[l ++] = anum & 0xFF; + } else { + m->cval.t[l ++] = anum; + } + } + } + if (mval.nt) freemem(mval.t); + } +#endif + HTT_put(¯os, m, mname); + freemem(mname); + if (emit_defines) print_macro(m); + return 0; + +redef_error: + while (ls->ctok->type != NEWLINE && !next_token(ls)); +redef_error_2: + error(l, "macro '%s' redefined unidentically", HASH_ITEM_NAME(n)); + return 1; +warp_error: + while (ls->ctok->type != NEWLINE && !next_token(ls)); +define_error: + if (m) del_macro(m); + if (mname) freemem(mname); +#ifdef LOW_MEM + if (mv.nt) { + size_t i; + + for (i = 0; i < mv.nt; i ++) + if (S_TOKEN(mv.t[i].type)) freemem(mv.t[i].name); + freemem(mv.t); + } +#endif + return 1; +#undef mval +} + +/* + * Get the arguments for a macro. This code is tricky because there can + * be multiple sources for these arguments, if we are in the middle of + * a macro replacement; arguments are macro-replaced before inclusion + * into the macro replacement. + * + * return value: + * 1 no argument (last token read from next_token()) + * 2 no argument (last token read from tfi) + * 3 no argument (nothing read) + * 4 error + * + * Void arguments are allowed in C99. + */ +static int collect_arguments(struct lexer_state *ls, struct token_fifo *tfi, + int penury, struct token_fifo *atl, int narg, int vaarg, int *wr) +{ + int ltwws = 1, npar = 0, i; + struct token *ct = 0; + int read_from_fifo = 0; + long begin_line = ls->line; + +#define unravel(ls) (read_from_fifo = 0, !((tfi && tfi->art < tfi->nt \ + && (read_from_fifo = 1) != 0 && (ct = tfi->t + (tfi->art ++))) \ + || ((!tfi || penury) && !next_token(ls) && (ct = (ls)->ctok)))) + + /* + * collect_arguments() is assumed to setup correctly atl + * (this is not elegant, but it works) + */ + for (i = 0; i < narg; i ++) atl[i].art = atl[i].nt = 0; + if (vaarg) atl[narg].art = atl[narg].nt = 0; + *wr = 0; + while (!unravel(ls)) { + if (!read_from_fifo && ct->type == NEWLINE) ls->ltwnl = 1; + if (ttWHI(ct->type)) { + *wr = 1; + continue; + } + if (ct->type == LPAR) { + npar = 1; + } + break; + } + if (!npar) { + if (ct == ls->ctok) return 1; + if (read_from_fifo) return 2; + return 3; + } + if (!read_from_fifo && ct == ls->ctok) ls->ltwnl = 0; + i = 0; + if ((narg + vaarg) == 0) { + while(!unravel(ls)) { + if (ttWHI(ct->type)) continue; + if (ct->type == RPAR) goto harvested; + npar = 1; + goto too_many_args; + } + } + while (!unravel(ls)) { + struct token t; + + if (ct->type == LPAR) npar ++; + else if (ct->type == RPAR && (-- npar) == 0) { + if (atl[i].nt != 0 + && ttMWS(atl[i].t[atl[i].nt - 1].type)) + atl[i].nt --; + i ++; + /* + * C99 standard states that at least one argument + * should be present for the ... part; to relax + * this behaviour, change 'narg + vaarg' to 'narg'. + */ + if (i < (narg + vaarg)) { + error(begin_line, "not enough arguments " + "to macro"); + return 4; + } + if (i > narg) { + if (!(ls->flags & MACRO_VAARG) || !vaarg) + goto too_many_args; + } + goto harvested; + } else if (ct->type == COMMA && npar <= 1 && i < narg) { + if (atl[i].nt != 0 + && ttMWS(atl[i].t[atl[i].nt - 1].type)) + atl[i].nt --; + if (++ i == narg) { + if (!(ls->flags & MACRO_VAARG) || !vaarg) + goto too_many_args; + } + if (i > 30000) goto too_many_args; + ltwws = 1; + continue; + } else if (ltwws && ttWHI(ct->type)) continue; + + t.type = ct->type; + if (!read_from_fifo) t.line = ls->line; else t.line = ct->line; + /* + * Stringification applies only to macro arguments; + * so we handle here OPT_NONE. + * OPT_NONE is kept, but does not count as whitespace, + * and merges with other whitespace to give a fully + * qualified NONE token. Two OPT_NONE tokens merge. + * Initial and final OPT_NONE are discarded (initial + * is already done, as OPT_NONE is matched by ttWHI). + */ + if (ttWHI(t.type)) { + if (t.type != OPT_NONE) { + t.type = NONE; +#ifdef SEMPER_FIDELIS + t.name = sdup(" "); + throw_away(ls->gf, t.name); +#endif + ltwws = 1; + } + if (atl[i].nt > 0 + && atl[i].t[atl[i].nt - 1].type == OPT_NONE) + atl[i].nt --; + } else { + ltwws = 0; + if (S_TOKEN(t.type)) { + t.name = ct->name; + if (ct == (ls)->ctok) { + t.name = sdup(t.name); + throw_away(ls->gf, t.name); + } + } + } + aol(atl[i].t, atl[i].nt, t, TOKEN_LIST_MEMG); + } + error(begin_line, "unfinished macro call"); + return 4; +too_many_args: + error(begin_line, "too many arguments to macro"); + while (npar && !unravel(ls)) { + if (ct->type == LPAR) npar ++; + else if (ct->type == RPAR) npar --; + } + return 4; +harvested: + if (i > 127 && (ls->flags & WARN_STANDARD)) + warning(begin_line, "macro call with %d arguments (ISO " + "specifies 127 max)", i); + return 0; +#undef unravel +} + +/* + * concat_token() is called when the ## operator is used. It uses + * the struct lexer_state dsharp_lexer to parse the result of the + * concatenation. + * + * Law enforcement: if the whole string does not produce a valid + * single token, an error (non-zero result) is returned. + */ +struct lexer_state dsharp_lexer; + +static inline int concat_token(struct token *t1, struct token *t2) +{ + char *n1 = token_name(t1), *n2 = token_name(t2); + size_t l1 = strlen(n1), l2 = strlen(n2); + unsigned char *x = getmem(l1 + l2 + 1); + int r; + + mmv(x, n1, l1); + mmv(x + l1, n2, l2); + x[l1 + l2] = 0; + dsharp_lexer.input = 0; + dsharp_lexer.input_string = x; + dsharp_lexer.pbuf = 0; + dsharp_lexer.ebuf = l1 + l2; + dsharp_lexer.discard = 1; + dsharp_lexer.flags = DEFAULT_LEXER_FLAGS; + dsharp_lexer.pending_token = 0; + r = next_token(&dsharp_lexer); + freemem(x); + return (r == 1 || dsharp_lexer.pbuf < (l1 + l2) + || dsharp_lexer.pending_token + || (dsharp_lexer.pbuf == (l1 + l2) && !dsharp_lexer.discard)); +} + +#ifdef PRAGMA_TOKENIZE +/* + * tokenize_string() takes a string as input, and split it into tokens, + * reassembling the tokens into a single compressed string generated by + * compress_token_list(); this function is used for _Pragma processing. + */ +struct lexer_state tokenize_lexer; + +static char *tokenize_string(struct lexer_state *ls, char *buf) +{ + struct token_fifo tf; + size_t bl = strlen(buf); + int r; + + tokenize_lexer.input = 0; + tokenize_lexer.input_string = (unsigned char *)buf; + tokenize_lexer.pbuf = 0; + tokenize_lexer.ebuf = bl; + tokenize_lexer.discard = 1; + tokenize_lexer.flags = ls->flags | LEXER; + tokenize_lexer.pending_token = 0; + tf.art = tf.nt = 0; + while (!(r = next_token(&tokenize_lexer))) { + struct token t, *ct = tokenize_lexer.ctok; + + if (ttWHI(ct->type)) continue; + t = *ct; + if (S_TOKEN(t.type)) t.name = sdup(t.name); + aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); + } + if (tokenize_lexer.pbuf < bl) goto tokenize_error; + return (char *)((compress_token_list(&tf)).t); + +tokenize_error: + if (tf.nt) { + for (tf.art = 0; tf.art < tf.nt; tf.art ++) + if (S_TOKEN(tf.t[tf.art].type)) + freemem(tf.t[tf.art].name); + freemem(tf.t); + } + return 0; +} +#endif + +/* + * stringify_string() has a self-explanatory name. It is called when + * the # operator is used in a macro and a string constant must be + * stringified. + */ +static inline char *stringify_string(char *x) +{ + size_t l; + int i, inside_str = 0, inside_cc = 0, must_quote, has_quoted = 0; + char *y, *d; + + for (i = 0; i < 2; i ++) { + if (i) d[0] = '"'; + for (l = 1, y = x; *y; y ++, l ++) { + must_quote = 0; + if (inside_cc) { + if (*y == '\\') { + must_quote = 1; + has_quoted = 1; + } else if (!has_quoted && *y == '\'') + inside_cc = 0; + } else if (inside_str) { + if (*y == '"' || *y == '\\') must_quote = 1; + if (*y == '\\') has_quoted = 1; + else if (!has_quoted && *y == '"') + inside_str = 0; + } else if (*y == '"') { + inside_str = 1; + must_quote = 1; + } else if (*y == '\'') { + inside_cc = 1; + } + if (must_quote) { + if (i) d[l] = '\\'; + l ++; + } + if (i) d[l] = *y; + } + if (!i) d = getmem(l + 2); + if (i) { + d[l] = '"'; + d[l + 1] = 0; + } + } + return d; +} + +/* + * stringify() produces a constant string, result of the # operator + * on a list of tokens. + */ +static char *stringify(struct token_fifo *tf) +{ + size_t tlen; + size_t i; + char *x, *y; + + for (tlen = 0, i = 0; i < tf->nt; i ++) + if (tf->t[i].type < CPPERR && tf->t[i].type != OPT_NONE) + tlen += strlen(token_name(tf->t + i)); + if (tlen == 0) return sdup("\"\""); + x = getmem(tlen + 1); + for (tlen = 0, i = 0; i < tf->nt; i ++) { + if (tf->t[i].type >= CPPERR || tf->t[i].type == OPT_NONE) + continue; + strcpy(x + tlen, token_name(tf->t + i)); + tlen += strlen(token_name(tf->t + i)); + } + /* no need to add a trailing 0: strcpy() did that (and the string + is not empty) */ + y = stringify_string(x); + freemem(x); + return y; +} + +/* + * Two strings evaluated at initialization time, to handle the __TIME__ + * and __DATE__ special macros. + * + * C99 specifies that these macros should remain constant throughout + * the whole preprocessing. + */ +char compile_time[12], compile_date[24]; + +/* + * substitute_macro() performs the macro substitution. It is called when + * an identifier recognized as a macro name has been found; this function + * tries to collect the arguments (if needed), applies # and ## operators + * and perform recursive and nested macro expansions. + * + * In the substitution of a macro, we remove all newlines that were in the + * arguments. This might confuse error reporting (which could report + * erroneous line numbers) or have worse effect is the preprocessor is + * used for another language pickier than C. Since the interface between + * the preprocessor and the compiler is not fully specified, I believe + * that this is no violation of the standard. Comments welcome. + * + * We take tokens from tfi. If tfi has no more tokens to give: we may + * take some tokens from ls to complete a call (fetch arguments) if + * and only if penury is non zero. + */ +int substitute_macro(struct lexer_state *ls, struct macro *m, + struct token_fifo *tfi, int penury, int reject_nested, long l) +{ + char *mname = HASH_ITEM_NAME(m); + struct token_fifo *atl, etl; + struct token t, *ct; + int i, save_nest = m->nest; + size_t save_art, save_tfi, etl_limit; + int ltwds, ntwds, ltwws; + int pragma_op = 0; + + /* + * Reject the replacement, if we are already inside the macro. + */ + if (m->nest > reject_nested) { + t.type = NAME; + t.line = ls->line; + t.name = mname; + print_token(ls, &t, 0); + return 0; + } + + /* + * put a separation from preceeding tokens + */ + print_space(ls); + + /* + * Check if the macro is a special one. + */ + if ((i = check_special_macro(mname)) >= MAC_SPECIAL) { + /* we have a special macro */ + switch (i) { + char buf[30], *bbuf, *cfn; + + case MAC_LINE: + t.type = NUMBER; + t.line = l; + sprintf(buf, "%ld", l); + t.name = buf; + print_space(ls); + print_token(ls, &t, 0); + break; + case MAC_FILE: + t.type = STRING; + t.line = l; + cfn = current_long_filename ? + current_long_filename : current_filename; + bbuf = getmem(2 * strlen(cfn) + 3); + { + char *c, *d; + int lcwb = 0; + + bbuf[0] = '"'; + for (c = cfn, d = bbuf + 1; *c; c ++) { + if (*c == '\\') { + if (lcwb) continue; + *(d ++) = '\\'; + lcwb = 1; + } else lcwb = 0; + *(d ++) = *c; + } + *(d ++) = '"'; + *(d ++) = 0; + } + t.name = bbuf; + print_space(ls); + print_token(ls, &t, 0); + freemem(bbuf); + break; + case MAC_DATE: + t.type = STRING; + t.line = l; + t.name = compile_date; + print_space(ls); + print_token(ls, &t, 0); + break; + case MAC_TIME: + t.type = STRING; + t.line = l; + t.name = compile_time; + print_space(ls); + print_token(ls, &t, 0); + break; + case MAC_STDC: + t.type = NUMBER; + t.line = l; + t.name = "1"; + print_space(ls); + print_token(ls, &t, 0); + break; + case MAC_PRAGMA: + if (reject_nested > 0) { + /* do not replace _Pragma() unless toplevel */ + t.type = NAME; + t.line = ls->line; + t.name = mname; + print_token(ls, &t, 0); + return 0; + } + pragma_op = 1; + goto collect_args; +#ifdef AUDIT + default: + ouch("unbekanntes fliegendes macro"); +#endif + } + return 0; + } + + /* + * If the macro has arguments, collect them. + */ +collect_args: + if (m->narg >= 0) { + unsigned long save_flags = ls->flags; + int wr = 0; + + ls->flags |= LEXER; + if (m->narg > 0 || m->vaarg) + atl = getmem((m->narg + m->vaarg) + * sizeof(struct token_fifo)); + switch (collect_arguments(ls, tfi, penury, atl, + m->narg, m->vaarg, &wr)) { + case 1: + /* the macro expected arguments, but we did not + find any; the last read token should be read + again. */ + ls->flags = save_flags | READ_AGAIN; + goto no_argument_next; + case 2: + tfi->art --; + /* fall through */ + case 3: + ls->flags = save_flags; + no_argument_next: + t.type = NAME; + t.line = l; + t.name = mname; + print_token_nailed(ls, &t, l); + if (wr) { + t.type = NONE; + t.line = l; +#ifdef SEMPER_FIDELIS + t.name = " "; +#endif + print_token(ls, &t, 0); + goto exit_macro_2; + } + goto exit_macro_1; + case 4: + ls->flags = save_flags; + goto exit_error_1; + } + ls->flags = save_flags; + } + + /* + * If the macro is _Pragma, and we got here, then we have + * exactly one argument. We check it, unstringize it, and + * emit a PRAGMA token. + */ + if (pragma_op) { + char *pn; + + if (atl[0].nt != 1 || atl[0].t[0].type != STRING) { + error(ls->line, "invalid argument to _Pragma"); + if (atl[0].nt) freemem(atl[0].t); + freemem(atl); + goto exit_error; + } + pn = atl[0].t[0].name; + if ((pn[0] == '"' && pn[1] == '"') || (pn[0] == 'L' + && pn[1] == '"' && pn[2] == '"')) { + /* void pragma -- just ignore it */ + freemem(atl[0].t); + freemem(atl); + return 0; + } + if (ls->flags & TEXT_OUTPUT) { +#ifdef PRAGMA_DUMP + /* + * This code works because we actually evaluate arguments in a + * lazy way: we scan a macro argument only if it appears in the + * output, and exactly as many times as it appears. Therefore, + * _Pragma() will get evaluated just like they should. + */ + char *c = atl[0].t[0].name, *d; + + for (d = "\n#pragma "; *d; d ++) put_char(ls, *d); + d = (*c == 'L') ? c + 2 : c + 1; + for (; *d != '"'; d ++) { + if (*d == '\\' && (*(d + 1) == '\\' + || *(d + 1) == '"')) { + d ++; + } + put_char(ls, *d); + } + put_char(ls, '\n'); + ls->oline = ls->line; + enter_file(ls, ls->flags); +#else + if (ls->flags & WARN_PRAGMA) + warning(ls->line, + "_Pragma() ignored and not dumped"); +#endif + } else if (ls->flags & HANDLE_PRAGMA) { + char *c = atl[0].t[0].name, *d, *buf; + struct token t; + + /* a wide string is a string */ + if (*c == 'L') c ++; + c ++; + for (buf = d = getmem(strlen(c)); *c != '"'; c ++) { + if (*c == '\\' && (*(c + 1) == '\\' + || *(c + 1) == '"')) { + *(d ++) = *(++ c); + } else *(d ++) = *c; + } + *d = 0; + t.type = PRAGMA; + t.line = ls->line; +#ifdef PRAGMA_TOKENIZE + t.name = tokenize_string(ls, buf); + freemem(buf); + buf = t.name; + if (!buf) { + freemem(atl[0].t); + freemem(atl); + goto exit_error; + } +#else + t.name = buf; +#endif + aol(ls->toplevel_of->t, ls->toplevel_of->nt, + t, TOKEN_LIST_MEMG); + throw_away(ls->gf, buf); + } + freemem(atl[0].t); + freemem(atl); + return 0; + } + + /* + * Now we expand and replace the arguments in the macro; we + * also handle '#' and '##'. If we find an argument, that has + * to be replaced, we expand it in its own token list, then paste + * it. Tricky point: when we paste an argument, we must scan + * again the resulting list for further replacements. This + * implies problems with regards to nesting self-referencing + * macros. + * + * We do then YAUH (yet another ugly hack): if a macro is replaced, + * and nested replacement exhibit the same macro, we mark it with + * a negative line number. All produced negative line numbers + * must be cleaned in the end. + */ + +#define ZAP_LINE(t) do { \ + if ((t).type == NAME) { \ + struct macro *zlm = HTT_get(¯os, (t).name); \ + if (zlm && zlm->nest > reject_nested) \ + (t).line = -1 - (t).line; \ + } \ + } while (0) + +#ifdef LOW_MEM + save_art = m->cval.rp; + m->cval.rp = 0; +#else + save_art = m->val.art; + m->val.art = 0; +#endif + etl.art = etl.nt = 0; + m->nest = reject_nested + 1; + ltwds = ntwds = 0; +#ifdef LOW_MEM + while (m->cval.rp < m->cval.length) { +#else + while (m->val.art < m->val.nt) { +#endif + size_t next, z; +#ifdef LOW_MEM + struct token uu; + + ct = &uu; + ct->line = 1; + t.type = ct->type = m->cval.t[m->cval.rp ++]; + if (ct->type == MACROARG) { + unsigned anum = m->cval.t[m->cval.rp ++]; + + if (anum >= 128U) anum = ((anum & 127U) << 8) + | (unsigned)m->cval.t[m->cval.rp ++]; + ct->line = anum; + } else if (S_TOKEN(ct->type)) { + t.name = ct->name = (char *)(m->cval.t + m->cval.rp); + m->cval.rp += 1 + strlen(ct->name); + } +#ifdef SEMPER_FIDELIS + else if (ct->type == OPT_NONE) { + t.type = ct->type = NONE; + t.name = ct->name = " "; + } +#endif + t.line = ls->line; + next = m->cval.rp; + if ((next < m->cval.length && (m->cval.t[z = next] == DSHARP + || m->cval.t[z = next] == DIG_DSHARP)) + || ((next + 1) < m->cval.length + && ttWHI(m->cval.t[next]) + && (m->cval.t[z = next + 1] == DSHARP + || m->cval.t[z = next + 1] == DIG_DSHARP))) { + ntwds = 1; + m->cval.rp = z; + } else ntwds = 0; +#else + ct = m->val.t + (m->val.art ++); + next = m->val.art; + t.type = ct->type; + t.line = ls->line; +#ifdef SEMPER_FIDELIS + if (t.type == OPT_NONE) { + t.type = NONE; + t.name = " "; + } else +#endif + t.name = ct->name; + if ((next < m->val.nt && (m->val.t[z = next].type == DSHARP + || m->val.t[z = next].type == DIG_DSHARP)) + || ((next + 1) < m->val.nt + && ttWHI(m->val.t[next].type) + && (m->val.t[z = next + 1].type == DSHARP + || m->val.t[z = next + 1].type == DIG_DSHARP))) { + ntwds = 1; + m->val.art = z; + } else ntwds = 0; +#endif + if (ct->type == MACROARG) { +#ifdef DSHARP_TOKEN_MERGE + int need_opt_space = 1; +#endif + z = ct->line; /* the argument number is there */ + if (ltwds && atl[z].nt != 0 && etl.nt) { + if (concat_token(etl.t + (-- etl.nt), + atl[z].t)) { + warning(ls->line, "operator '##' " + "produced the invalid token " + "'%s%s'", + token_name(etl.t + etl.nt), + token_name(atl[z].t)); +#if 0 +/* obsolete */ +#ifdef LOW_MEM + m->cval.rp = save_art; +#else + m->val.art = save_art; +#endif + etl.nt ++; + goto exit_error_2; +#endif + etl.nt ++; + atl[z].art = 0; +#ifdef DSHARP_TOKEN_MERGE + need_opt_space = 0; +#endif + } else { + if (etl.nt == 0) freemem(etl.t); + else if (!ttWHI(etl.t[etl.nt - 1] + .type)) { + t.type = OPT_NONE; + t.line = ls->line; + aol(etl.t, etl.nt, t, + TOKEN_LIST_MEMG); + } + t.type = dsharp_lexer.ctok->type; + t.line = ls->line; + if (S_TOKEN(t.type)) { + t.name = sdup(dsharp_lexer + .ctok->name); + throw_away(ls->gf, t.name); + } + ZAP_LINE(t); + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + atl[z].art = 1; + } + } else atl[z].art = 0; + if ( +#ifdef DSHARP_TOKEN_MERGE + need_opt_space && +#endif + atl[z].art < atl[z].nt && (!etl.nt + || !ttWHI(etl.t[etl.nt - 1].type))) { + t.type = OPT_NONE; + t.line = ls->line; + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + } + if (ltwds || ntwds) { + while (atl[z].art < atl[z].nt) { + t = atl[z].t[atl[z].art ++]; + t.line = ls->line; + ZAP_LINE(t); + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + } + } else { + struct token_fifo *save_tf; + unsigned long save_flags; + int ret = 0; + + atl[z].art = 0; + save_tf = ls->output_fifo; + ls->output_fifo = &etl; + save_flags = ls->flags; + ls->flags |= LEXER; + while (atl[z].art < atl[z].nt) { + struct macro *nm; + struct token *cct; + + cct = atl[z].t + (atl[z].art ++); + if (cct->type == NAME + && cct->line >= 0 + && (nm = HTT_get(¯os, + cct->name)) + && nm->nest <= + (reject_nested + 1)) { + ret |= substitute_macro(ls, + nm, atl + z, 0, + reject_nested + 1, l); + continue; + } + t = *cct; + ZAP_LINE(t); + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + } + ls->output_fifo = save_tf; + ls->flags = save_flags; + if (ret) { +#ifdef LOW_MEM + m->cval.rp = save_art; +#else + m->val.art = save_art; +#endif + goto exit_error_2; + } + } + if (!ntwds && (!etl.nt + || !ttWHI(etl.t[etl.nt - 1].type))) { + t.type = OPT_NONE; + t.line = ls->line; + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + } + ltwds = 0; + continue; + } + /* + * This code is definitely cursed. + * + * For the extremely brave reader who tries to understand + * what is happening: ltwds is a flag meaning "last token + * was double-sharp" and ntwds means "next token will be + * double-sharp". The tokens are from the macro definition, + * and scanned from left to right. Arguments that are + * not implied into a #/## construction are macro-expanded + * seperately, then included into the token stream. + */ + if (ct->type == DSHARP || ct->type == DIG_DSHARP) { + if (ltwds) { + error(ls->line, "quad sharp"); +#ifdef LOW_MEM + m->cval.rp = save_art; +#else + m->val.art = save_art; +#endif + goto exit_error_2; + } +#ifdef LOW_MEM + if (m->cval.rp < m->cval.length + && ttMWS(m->cval.t[m->cval.rp])) + m->cval.rp ++; +#else + if (m->val.art < m->val.nt + && ttMWS(m->val.t[m->val.art].type)) + m->val.art ++; +#endif + ltwds = 1; + continue; + } else if (ltwds && etl.nt != 0) { + if (concat_token(etl.t + (-- etl.nt), ct)) { + warning(ls->line, "operator '##' produced " + "the invalid token '%s%s'", + token_name(etl.t + etl.nt), + token_name(ct)); +#if 0 +/* obsolete */ +#ifdef LOW_MEM + m->cval.rp = save_art; +#else + m->val.art = save_art; +#endif + etl.nt ++; + goto exit_error_2; +#endif + etl.nt ++; + } else { + if (etl.nt == 0) freemem(etl.t); + t.type = dsharp_lexer.ctok->type; + t.line = ls->line; + if (S_TOKEN(t.type)) { + t.name = sdup(dsharp_lexer.ctok->name); + throw_away(ls->gf, t.name); + } + ct = &t; + } + } + ltwds = 0; +#ifdef LOW_MEM + if ((ct->type == SHARP || ct->type == DIG_SHARP) + && next < m->cval.length + && (m->cval.t[next] == MACROARG + || (ttMWS(m->cval.t[next]) + && (next + 1) < m->cval.length + && m->cval.t[next + 1] == MACROARG))) { + + unsigned anum; +#else + if ((ct->type == SHARP || ct->type == DIG_SHARP) + && next < m->val.nt + && (m->val.t[next].type == MACROARG + || (ttMWS(m->val.t[next].type) + && (next + 1) < m->val.nt + && m->val.t[next + 1].type == MACROARG))) { +#endif + /* + * We have a # operator followed by (an optional + * whitespace and) a macro argument; this means + * stringification. So be it. + */ +#ifdef LOW_MEM + if (ttMWS(m->cval.t[next])) m->cval.rp ++; +#else + if (ttMWS(m->val.t[next].type)) m->val.art ++; +#endif + t.type = STRING; +#ifdef LOW_MEM + anum = m->cval.t[++ m->cval.rp]; + if (anum >= 128U) anum = ((anum & 127U) << 8) + | (unsigned)m->cval.t[++ m->cval.rp]; + t.name = stringify(atl + anum); + m->cval.rp ++; +#else + t.name = stringify(atl + + (size_t)(m->val.t[m->val.art ++].line)); +#endif + throw_away(ls->gf, t.name); + ct = &t; + /* + * There is no need for extra spaces here. + */ + } + t = *ct; + ZAP_LINE(t); + aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); + } +#ifdef LOW_MEM + m->cval.rp = save_art; +#else + m->val.art = save_art; +#endif + + /* + * Now etl contains the expanded macro, to be parsed again for + * further expansions -- much easier, since '#' and '##' have + * already been handled. + * However, we might need some input from tfi. So, we paste + * the contents of tfi after etl, and we put back what was + * not used. + * + * Some adjacent spaces are merged; only unique NONE, or sequences + * OPT_NONE NONE are emitted. + */ + etl_limit = etl.nt; + if (tfi) { + save_tfi = tfi->art; + while (tfi->art < tfi->nt) aol(etl.t, etl.nt, + tfi->t[tfi->art ++], TOKEN_LIST_MEMG); + } + ltwws = 0; + while (etl.art < etl_limit) { + struct macro *nm; + + ct = etl.t + (etl.art ++); + if (ct->type == NAME && ct->line >= 0 + && (nm = HTT_get(¯os, ct->name))) { + if (substitute_macro(ls, nm, &etl, + penury, reject_nested, l)) { + m->nest = save_nest; + goto exit_error_2; + } + ltwws = 0; + continue; + } + if (ttMWS(ct->type)) { + if (ltwws == 1) { + if (ct->type == OPT_NONE) continue; + ltwws = 2; + } else if (ltwws == 2) continue; + else if (ct->type == OPT_NONE) ltwws = 1; + else ltwws = 2; + } else ltwws = 0; + if (ct->line >= 0) ct->line = l; + print_token(ls, ct, reject_nested ? 0 : l); + } + if (etl.nt) freemem(etl.t); + if (tfi) { + tfi->art = save_tfi + (etl.art - etl_limit); + } + +exit_macro_1: + print_space(ls); +exit_macro_2: + for (i = 0; i < (m->narg + m->vaarg); i ++) + if (atl[i].nt) freemem(atl[i].t); + if (m->narg > 0 || m->vaarg) freemem(atl); + m->nest = save_nest; + return 0; + +exit_error_2: + if (etl.nt) freemem(etl.t); +exit_error_1: + for (i = 0; i < (m->narg + m->vaarg); i ++) + if (atl[i].nt) freemem(atl[i].t); + if (m->narg > 0 || m->vaarg) freemem(atl); + m->nest = save_nest; +exit_error: + return 1; +} + +/* + * print already defined macros + */ +void print_defines(void) +{ + HTT_scan(¯os, print_macro); +} + +/* + * define_macro() defines a new macro, whom definition is given in + * the command-line syntax: macro=def + * The '=def' part is optional. + * + * It returns non-zero on error. + */ +int define_macro(struct lexer_state *ls, char *def) +{ + char *c = sdup(def), *d; + int with_def = 0; + int ret = 0; + + for (d = c; *d && *d != '='; d ++); + if (*d) { + *d = ' '; + with_def = 1; + } + if (with_def) { + struct lexer_state lls; + size_t n = strlen(c) + 1; + + if (c == d) { + error(-1, "void macro name"); + ret = 1; + } else { + *(c + n - 1) = '\n'; + init_buf_lexer_state(&lls, 0); + lls.flags = ls->flags | LEXER; + lls.input = 0; + lls.input_string = (unsigned char *)c; + lls.pbuf = 0; + lls.ebuf = n; + lls.line = -1; + ret = handle_define(&lls); + free_lexer_state(&lls); + } + } else { + struct macro *m; + + if (!*c) { + error(-1, "void macro name"); + ret = 1; + } else if ((m = HTT_get(¯os, c)) +#ifdef LOW_MEM + && (m->cval.length != 3 + || m->cval.t[0] != NUMBER + || strcmp((char *)(m->cval.t + 1), "1"))) { +#else + && (m->val.nt != 1 + || m->val.t[0].type != NUMBER + || strcmp(m->val.t[0].name, "1"))) { +#endif + error(-1, "macro %s already defined", c); + ret = 1; + } else { +#ifndef LOW_MEM + struct token t; +#endif + + m = new_macro(); +#ifdef LOW_MEM + m->cval.length = 3; + m->cval.t = getmem(3); + m->cval.t[0] = NUMBER; + m->cval.t[1] = '1'; + m->cval.t[2] = 0; +#else + t.type = NUMBER; + t.name = sdup("1"); + aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); +#endif + HTT_put(¯os, m, c); + } + } + freemem(c); + return ret; +} + +/* + * undef_macro() undefines the macro whom name is given as "def"; + * it is not an error to try to undef a macro that does not exist. + * + * It returns non-zero on error (undefinition of a special macro, + * void macro name). + */ +int undef_macro(struct lexer_state *ls, char *def) +{ + char *c = def; + + if (!*c) { + error(-1, "void macro name"); + return 1; + } + if (HTT_get(¯os, c)) { + if (check_special_macro(c)) { + error(-1, "trying to undef special macro %s", c); + return 1; + } else HTT_del(¯os, c); + } + return 0; +} + +/* + * We saw a #ifdef directive. Parse the line. + * return value: 1 if the macro is defined, 0 if it is not, -1 on error + */ +int handle_ifdef(struct lexer_state *ls) +{ + while (!next_token(ls)) { + int tgd = 1; + + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) { + int x = (HTT_get(¯os, ls->ctok->name) != 0); + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(ls->line, "trailing garbage " + "in #ifdef"); + tgd = 0; + } + return x; + } + error(ls->line, "illegal macro name for #ifdef"); + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(ls->line, "trailing garbage in " + "#ifdef"); + tgd = 0; + } + return -1; + } + error(ls->line, "unfinished #ifdef"); + return -1; +} + +/* + * for #undef + * return value: 1 on error, 0 on success. Undefining a macro that was + * already not defined is not an error. + */ +int handle_undef(struct lexer_state *ls) +{ + while (!next_token(ls)) { + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) { + struct macro *m = HTT_get(¯os, ls->ctok->name); + int tgd = 1; + + if (m != 0) { + if (check_special_macro(ls->ctok->name)) { + error(ls->line, "trying to undef " + "special macro %s", + ls->ctok->name); + goto undef_error; + } + if (emit_defines) + fprintf(emit_output, "#undef %s\n", + ls->ctok->name); + HTT_del(¯os, ls->ctok->name); + } + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(ls->line, "trailing garbage " + "in #undef"); + tgd = 0; + } + return 0; + } + error(ls->line, "illegal macro name for #undef"); + undef_error: + while (!next_token(ls) && ls->ctok->type != NEWLINE); + return 1; + } + error(ls->line, "unfinished #undef"); + return 1; +} + +/* + * for #ifndef + * return value: 0 if the macro is defined, 1 if it is not, -1 on error. + */ +int handle_ifndef(struct lexer_state *ls) +{ + while (!next_token(ls)) { + int tgd = 1; + + if (ls->ctok->type == NEWLINE) break; + if (ttMWS(ls->ctok->type)) continue; + if (ls->ctok->type == NAME) { + int x = (HTT_get(¯os, ls->ctok->name) == 0); + + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(ls->line, "trailing garbage " + "in #ifndef"); + tgd = 0; + } + if (protect_detect.state == 1) { + protect_detect.state = 2; + protect_detect.macro = sdup(ls->ctok->name); + } + return x; + } + error(ls->line, "illegal macro name for #ifndef"); + while (!next_token(ls) && ls->ctok->type != NEWLINE) + if (tgd && !ttWHI(ls->ctok->type) + && (ls->flags & WARN_STANDARD)) { + warning(ls->line, "trailing garbage in " + "#ifndef"); + tgd = 0; + } + return -1; + } + error(ls->line, "unfinished #ifndef"); + return -1; +} + +/* + * erase the macro table. + */ +void wipe_macros(void) +{ + if (macros_init_done) HTT_kill(¯os); + macros_init_done = 0; +} + +/* + * initialize the macro table + */ +void init_macros(void) +{ + wipe_macros(); + HTT_init(¯os, del_macro); + macros_init_done = 1; + if (!no_special_macros) add_special_macros(); +} + +/* + * find a macro from its name + */ +struct macro *get_macro(char *name) +{ + return HTT_get(¯os, name); +} diff --git a/libexec/auxcpp/mem.c b/libexec/auxcpp/mem.c new file mode 100644 index 00000000000..dabde952e32 --- /dev/null +++ b/libexec/auxcpp/mem.c @@ -0,0 +1,328 @@ +/* + * Memory manipulation routines + * (c) Thomas Pornin 1998 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "mem.h" +#include +#include +#include + +/* + * Shifting a pointer of that some bytes is supposed to satisfy + * alignment requirements. This is *not* guaranteed by the standard + * but should work everywhere anyway. + */ +#define ALIGNSHIFT (sizeof(long) > sizeof(long double) \ + ? sizeof(long) : sizeof(long double)) + +#ifdef AUDIT +void die(void) +{ + abort(); +} + +static void suicide(unsigned long e) +{ + fprintf(stderr, "ouch: Schrodinger's beef is not dead ! %lx\n", e); + die(); +} +#else +void die(void) +{ + exit(EXIT_FAILURE); +} +#endif + +#if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG +/* + * This function is equivalent to a malloc(), but will display an error + * message and exit if the wanted memory is not available + */ +#ifdef MEM_DEBUG +static void *getmem_raw(size_t x) +#else +void *(getmem)(size_t x) +#endif +{ + void *m; + +#ifdef AUDIT + m = malloc(x + ALIGNSHIFT); +#else + m = malloc(x); +#endif + if (m == 0) { + fprintf(stderr, "ouch: malloc() failed\n"); + die(); + } +#ifdef AUDIT + *((unsigned long *)m) = 0xdeadbeefUL; + return (void *)(((char *)m) + ALIGNSHIFT); +#else + return m; +#endif +} +#endif + +#ifndef MEM_DEBUG +/* + * This function is equivalent to a realloc(); if the realloc() call + * fails, it will try a malloc() and a memcpy(). If not enough memory is + * available, the program exits with an error message + */ +void *(incmem)(void *m, size_t x, size_t nx) +{ + void *nm; + +#ifdef AUDIT + m = (void *)(((char *)m) - ALIGNSHIFT); + if (*((unsigned long *)m) != 0xdeadbeefUL) + suicide(*((unsigned long *)m)); + x += ALIGNSHIFT; nx += ALIGNSHIFT; +#endif + if (!(nm = realloc(m, nx))) { + if (x > nx) x = nx; + nm = (getmem)(nx); + memcpy(nm, m, x); + /* free() and not freemem(), because of the Schrodinger beef */ + free(m); + } +#ifdef AUDIT + return (void *)(((char *)nm) + ALIGNSHIFT); +#else + return nm; +#endif +} +#endif + +#if defined AUDIT || defined MEM_DEBUG +/* + * This function frees the given block + */ +#ifdef MEM_DEBUG +static void freemem_raw(void *x) +#else +void (freemem)(void *x) +#endif +{ +#ifdef AUDIT + void *y = (void *)(((char *)x) - ALIGNSHIFT); + + if ((*((unsigned long *)y)) != 0xdeadbeefUL) + suicide(*((unsigned long *)y)); + *((unsigned long *)y) = 0xfeedbabeUL; + free(y); +#else + free(x); +#endif +} +#endif + +#ifdef AUDIT +/* + * This function copies n bytes from src to dest + */ +void *mmv(void *dest, void *src, size_t n) +{ + return memcpy(dest, src, n); +} + +/* + * This function copies n bytes from src to dest + */ +void *mmvwo(void *dest, void *src, size_t n) +{ + return memmove(dest, src, n); +} +#endif + +#ifndef MEM_DEBUG +/* + * This function creates a new char * and fills it with a copy of src + */ +char *(sdup)(char *src) +{ + size_t n = 1 + strlen(src); + char *x = getmem(n); + + mmv(x, src, n); + return x; +} +#endif + +#ifdef MEM_DEBUG +/* + * We include here special versions of getmem(), freemem() and incmem() + * that track allocations and are used to detect memory leaks. + * + * Each allocation is referenced in a list, with a serial number. + */ + +/* + * Define "true" functions for applications that need pointers + * to such functions. + */ +void *(getmem)(size_t n) +{ + return getmem(n); +} + +void (freemem)(void *x) +{ + freemem(x); +} + +void *(incmem)(void *x, size_t s, size_t ns) +{ + return incmem(x, s, ns); +} + +char *(sdup)(char *s) +{ + return sdup(s); +} + +static long current_serial = 0L; + +/* must be a power of two */ +#define MEMDEBUG_MEMG 128U + +static struct mem_track { + void *block; + long serial; + char *file; + int line; +} *mem = 0; + +static size_t meml = 0; + +static unsigned int current_ptr = 0; + +static void *true_incmem(void *x, size_t old_size, size_t new_size) +{ + void * y = realloc(x, new_size); + + if (y == 0) { + y = malloc(new_size); + if (y == 0) { + fprintf(stderr, "ouch: malloc() failed\n"); + die(); + } + mmv(y, x, old_size < new_size ? old_size : new_size); + free(x); + } + return y; +} + +static long find_free_block(void) +{ + unsigned int n; + size_t i; + + for (i = 0, n = current_ptr; i < meml; i ++) { + if (mem[n].block == 0) { + current_ptr = n; + return n; + } + n = (n + 1) & (meml - 1U); + } + if (meml == 0) { + size_t j; + + meml = MEMDEBUG_MEMG; + mem = malloc(meml * sizeof(struct mem_track)); + current_ptr = 0; + for (j = 0; j < meml ; j ++) mem[j].block = 0; + } else { + size_t j; + + mem = true_incmem(mem, meml * sizeof(struct mem_track), + 2 * meml * sizeof(struct mem_track)); + current_ptr = meml; + for (j = meml; j < 2 * meml ; j ++) mem[j].block = 0; + meml *= 2; + } + return current_ptr; +} + +void *getmem_debug(size_t n, char *file, int line) +{ + void *x = getmem_raw(n + ALIGNSHIFT); + long i = find_free_block(); + + *(long *)x = i; + mem[i].block = x; + mem[i].serial = current_serial ++; + mem[i].file = file; + mem[i].line = line; + return (void *)((unsigned char *)x + ALIGNSHIFT); +} + +void freemem_debug(void *x, char *file, int line) +{ + void *y = (unsigned char *)x - ALIGNSHIFT; + long i = *(long *)y; + + if (i < 0 || (size_t)i >= meml || mem[i].block != y) { + fprintf(stderr, "ouch: freeing free people (from %s:%d)\n", + file, line); + die(); + } + mem[i].block = 0; + freemem_raw(y); +} + +void *incmem_debug(void *x, size_t ol, size_t nl, char *file, int line) +{ + void *y = getmem_debug(nl, file, line); + mmv(y, x, ol < nl ? ol : nl); + freemem_debug(x, file, line); + return y; +} + +char *sdup_debug(char *src, char *file, int line) +{ + size_t n = 1 + strlen(src); + char *x = getmem_debug(n, file, line); + + mmv(x, src, n); + return x; +} + +void report_leaks(void) +{ + size_t i; + + for (i = 0; i < meml; i ++) { + if (mem[i].block) fprintf(stderr, "leak: serial %ld, %s:%d\n", + mem[i].serial, mem[i].file, mem[i].line); + } +} + +#endif diff --git a/libexec/auxcpp/mem.h b/libexec/auxcpp/mem.h new file mode 100644 index 00000000000..4403c2fc476 --- /dev/null +++ b/libexec/auxcpp/mem.h @@ -0,0 +1,155 @@ +/* + * (c) Thomas Pornin 1998 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__MEM__ +#define UCPP__MEM__ + +#include + +void die(void); + +#if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG +void *getmem(size_t); +#else +#define getmem malloc +#endif + +#if defined MEM_DEBUG +void *getmem_debug(size_t, char *, int); +#undef getmem +#define getmem(x) getmem_debug(x, __FILE__, __LINE__) +#endif + +#if defined AUDIT || defined MEM_DEBUG +void freemem(void *); +#else +#define freemem free +#endif + +#if defined MEM_DEBUG +void freemem_debug(void *, char *, int); +#undef freemem +#define freemem(x) freemem_debug(x, __FILE__, __LINE__) +#endif + +void *incmem(void *, size_t, size_t); +char *sdup(char *); + +#if defined MEM_DEBUG +void *incmem_debug(void *, size_t, size_t, char *, int); +#undef incmem +#define incmem(x, y, z) incmem_debug(x, y, z, __FILE__, __LINE__) +void report_leaks(void); +char *sdup_debug(char *, char *, int); +#define sdup(x) sdup_debug(x, __FILE__, __LINE__) +#endif + +#ifdef AUDIT +void *mmv(void *, void *, size_t); +void *mmvwo(void *, void *, size_t); +#else +#define mmv memcpy +#define mmvwo memmove +#endif + +/* + * this macro adds the object obj at the end of the array list, handling + * memory allocation when needed; ptr contains the number of elements in + * the array, and memg is the granularity of memory allocations (a power + * of 2 is recommanded, for optimization reasons). + * + * list and ptr may be updated, and thus need to be lvalues. + */ +#define aol(list, ptr, obj, memg) do { \ + if (((ptr) % (memg)) == 0) { \ + if ((ptr) != 0) { \ + (list) = incmem((list), (ptr) * sizeof(obj), \ + ((ptr) + (memg)) * sizeof(obj)); \ + } else { \ + (list) = getmem((memg) * sizeof(obj)); \ + } \ + } \ + (list)[(ptr) ++] = (obj); \ + } while (0) + +/* + * bol() does the same as aol(), but adds the new item at the beginning + * of the list; beware, the computational cost is greater. + */ +#define bol(list, ptr, obj, memg) do { \ + if (((ptr) % (memg)) == 0) { \ + if ((ptr) != 0) { \ + (list) = incmem((list), (ptr) * sizeof(obj), \ + ((ptr) + (memg)) * sizeof(obj)); \ + } else { \ + (list) = getmem((memg) * sizeof(obj)); \ + } \ + } \ + if ((ptr) != 0) \ + mmvwo((list) + 1, (list), (ptr) * sizeof(obj)); \ + (ptr) ++; \ + (list)[0] = (obj); \ + } while (0) + +/* + * mbol() does the same as bol(), but adds the new item at the given + * emplacement; bol() is equivalent to mbol with 0 as last argument. + */ +#define mbol(list, ptr, obj, memg, n) do { \ + if (((ptr) % (memg)) == 0) { \ + if ((ptr) != 0) { \ + (list) = incmem((list), (ptr) * sizeof(obj), \ + ((ptr) + (memg)) * sizeof(obj)); \ + } else { \ + (list) = getmem((memg) * sizeof(obj)); \ + } \ + } \ + if ((ptr) > n) \ + mmvwo((list) + n + 1, (list) + n, \ + ((ptr) - n) * sizeof(obj)); \ + (ptr) ++; \ + (list)[n] = (obj); \ + } while (0) + +/* + * this macro adds the object obj at the end of the array list, doubling + * the size of list when needed; as for aol(), ptr and list must be + * lvalues, and so must be llng + */ + +#define wan(list, ptr, obj, llng) do { \ + if ((ptr) == (llng)) { \ + (llng) += (llng); \ + (list) = incmem((list), (ptr) * sizeof(obj), \ + (llng) * sizeof(obj)); \ + } \ + (list)[(ptr) ++] = (obj); \ + } while (0) + +#endif diff --git a/libexec/auxcpp/nhash.c b/libexec/auxcpp/nhash.c new file mode 100644 index 00000000000..6e5e4f7f8eb --- /dev/null +++ b/libexec/auxcpp/nhash.c @@ -0,0 +1,481 @@ +/* + * Mixed hash table / binary tree code. + * (c) Thomas Pornin 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include "nhash.h" +#include "mem.h" + +/* + * Hash a string into an `unsigned' value. This function is derived + * from the hash function used in the ELF binary object file format + * hash tables. The result size is a 32-bit number if the `unsigned' + * type is big enough to hold 32-bit arbitrary numbers, a 16-bit number + * otherwise. + */ +static unsigned hash_string(char *name) +{ + unsigned h = 0; + + for (h = 0; *name; name ++) { + unsigned g; + + h = (h << 4) + *(unsigned char *)name; +#if UINT_MAX >= 0xffffffffU + g = h & 0xF0000000U; + h ^= (g >> 24); +#else + g = h & 0xF000U; + h ^= (g >> 12); +#endif + h &= ~g; + } + return h; +} + +/* + * Each item in the table is a structure beginning with a `hash_item_header' + * structure. Those headers define binary trees such that all left-descendants + * (respectively right-descendants) of a given tree node have an associated + * hash value strictly smaller (respectively greater) than the hash value + * associated with this node. + * + * The `ident' field points to an array of char. The `sizeof(unsigned)' + * first `char' contain a copy of an `unsigned' value which is the hashed + * string, except the least significant bit. When this bit is set to 0, + * the node contains the unique item using that hash value. If the bit + * is set to 1, then there are several items with that hash value. + * + * When several items share the same hash value, they are linked together + * in a linked list by their `left' field. The node contains no data; + * it is a "fake item". + * + * The `char' following the hash value encode the item name for true items. + * For fake items, they contain the pointer to the first true item of the + * corresponding link list (suitably aligned). + * + * There are HTT_NUM_TREES trees; the items are sorted among trees by the + * lest significant bits of their hash value. + */ + +static void internal_init(HTT *htt, void (*deldata)(void *), int reduced) +{ + htt->deldata = deldata; + if (reduced) { + HTT2 *htt2 = (HTT2 *)htt; + + htt2->tree[0] = htt2->tree[1] = NULL; + } else { + unsigned u; + + for (u = 0; u < HTT_NUM_TREES; u ++) htt->tree[u] = NULL; + } +} + +/* see nhash.h */ +void HTT_init(HTT *htt, void (*deldata)(void *)) +{ + internal_init(htt, deldata, 0); +} + +/* see nhash.h */ +void HTT2_init(HTT2 *htt, void (*deldata)(void *)) +{ + internal_init((HTT *)htt, deldata, 1); +} + +#define PTR_SHIFT (sizeof(hash_item_header *) * \ + ((sizeof(unsigned) + sizeof(hash_item_header *) - 1) / \ + sizeof(hash_item_header *))) + +#define TREE(u) (*(reduced ? ((HTT2 *)htt)->tree + ((u) & 1) \ + : htt->tree + ((u) & (HTT_NUM_TREES - 1)))) + +/* + * Find a node for the given hash value. If `father' is not NULL, fill + * `*father' with a pointer to the node's father. + * If the return value is NULL, then no existing node was found; if `*father' + * is also NULL, the tree is empty. If the return value is not NULL but + * `*father' is NULL, then the found node is the tree root. + * + * If `father' is not NULL, then `*leftson' is filled with 1 if the node + * was looked for as the father left son, 0 otherwise. + */ +static hash_item_header *find_node(HTT *htt, unsigned u, + hash_item_header **father, int *leftson, int reduced) +{ + hash_item_header *node = TREE(u); + hash_item_header *nodef = NULL; + int ls; + + u &= ~1U; + while (node != NULL) { + unsigned v = *(unsigned *)(node->ident); + unsigned w = v & ~1U; + + if (u == w) break; + nodef = node; + if (u < w) { + node = node->left; + ls = 1; + } else { + node = node->right; + ls = 0; + } + } + if (father != NULL) { + *father = nodef; + *leftson = ls; + } + return node; +} + +static void *internal_get(HTT *htt, char *name, int reduced) +{ + unsigned u = hash_string(name), v; + hash_item_header *node = find_node(htt, u, NULL, NULL, reduced); + + if (node == NULL) return NULL; + v = *(unsigned *)(node->ident); + if ((v & 1U) == 0) { + return (strcmp(HASH_ITEM_NAME(node), name) == 0) ? node : NULL; + } + node = *(hash_item_header **)(node->ident + PTR_SHIFT); + while (node != NULL) { + if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node; + node = node->left; + } + return NULL; +} + +/* see nhash.h */ +void *HTT_get(HTT *htt, char *name) +{ + return internal_get(htt, name, 0); +} + +/* see nhash.h */ +void *HTT2_get(HTT2 *htt, char *name) +{ + return internal_get((HTT *)htt, name, 1); +} + +/* + * Make an item identifier from its name and its hash value. + */ +static char *make_ident(char *name, unsigned u) +{ + size_t n = strlen(name) + 1; + char *ident = getmem(n + sizeof(unsigned)); + + *(unsigned *)ident = u & ~1U; + memcpy(ident + sizeof(unsigned), name, n); + return ident; +} + +/* + * Make an identifier for a fake item, pointing to a true item. + */ +static char *make_fake_ident(unsigned u, hash_item_header *next) +{ + char *ident = getmem(PTR_SHIFT + sizeof(hash_item_header *)); + + *(unsigned *)ident = u | 1U; + *(hash_item_header **)(ident + PTR_SHIFT) = next; + return ident; +} + +/* + * Adding an item is straightforward: + * 1. look for its emplacement + * 2. if no node is found, use the item as a new node and link it to the tree + * 3. if a node is found: + * 3.1. if the node is real, check for name inequality, then create a + * fake node and assemble the two-element linked list + * 3.2. if the node is fake, look for the name in the list; if not found, + * add the node at the list end + */ +static void *internal_put(HTT *htt, void *item, char *name, int reduced) +{ + unsigned u = hash_string(name), v; + int ls; + hash_item_header *father; + hash_item_header *node = find_node(htt, u, &father, &ls, reduced); + hash_item_header *itemg = item, *pnode; + + if (node == NULL) { + itemg->left = itemg->right = NULL; + itemg->ident = make_ident(name, u); + if (father == NULL) { + TREE(u) = itemg; + } else if (ls) { + father->left = itemg; + } else { + father->right = itemg; + } + return NULL; + } + v = *(unsigned *)(node->ident); + if ((v & 1U) == 0) { + if (strcmp(HASH_ITEM_NAME(node), name) == 0) + return node; + pnode = getmem(sizeof *pnode); + pnode->left = node->left; + pnode->right = node->right; + pnode->ident = make_fake_ident(u, node); + node->left = itemg; + node->right = NULL; + itemg->left = itemg->right = NULL; + itemg->ident = make_ident(name, u); + if (father == NULL) { + TREE(u) = pnode; + } else if (ls) { + father->left = pnode; + } else { + father->right = pnode; + } + return NULL; + } + node = *(hash_item_header **)(node->ident + PTR_SHIFT); + while (node != NULL) { + if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node; + pnode = node; + node = node->left; + } + itemg->left = itemg->right = NULL; + itemg->ident = make_ident(name, u); + pnode->left = itemg; + return NULL; +} + +/* see nhash.h */ +void *HTT_put(HTT *htt, void *item, char *name) +{ + return internal_put(htt, item, name, 0); +} + +/* see nhash.h */ +void *HTT2_put(HTT2 *htt, void *item, char *name) +{ + return internal_put((HTT *)htt, item, name, 1); +} + +/* + * A fake node subnode list has shrunk to one item only; make the + * node real again. + * fnode the fake node + * node the last remaining node + * father the fake node father (NULL if the fake node is root) + * leftson 1 if the fake node is a left son, 0 otehrwise + * u the hash value for this node + */ +static void shrink_node(HTT *htt, hash_item_header *fnode, + hash_item_header *node, hash_item_header *father, int leftson, + unsigned u, int reduced) +{ + node->left = fnode->left; + node->right = fnode->right; + if (father == NULL) { + TREE(u) = node; + } else if (leftson) { + father->left = node; + } else { + father->right = node; + } + freemem(fnode->ident); + freemem(fnode); +} + +/* + * Deletion algorithm: + * 1. look for the node; if not found, exit + * 2. if the node is real: + * 2.1. check for equality; exit otherwise + * 2.2. delete the node + * 2.3. promote the leftest of right descendants or rightest of left + * descendants + * 3. if the node is fake: + * 3.1. check the list items for equality; exit otherwise + * 3.2. delete the correct item + * 3.3. if there remains only one item, supress the fake node + */ +static int internal_del(HTT *htt, char *name, int reduced) +{ + unsigned u = hash_string(name), v; + int ls; + hash_item_header *father; + hash_item_header *node = find_node(htt, u, &father, &ls, reduced); + hash_item_header *pnode, *fnode, *znode; + char *tmp; + + if (node == NULL) return 0; + v = *(unsigned *)(node->ident); + if ((v & 1U) != 0) { + fnode = node; + node = znode = *(hash_item_header **)(node->ident + PTR_SHIFT); + pnode = NULL; + while (node != NULL) { + if (strcmp(HASH_ITEM_NAME(node), name) == 0) break; + pnode = node; + node = node->left; + } + if (node == NULL) return 0; + if (pnode == NULL) { + /* + * We supress the first item in the list. + */ + *(hash_item_header **)(fnode->ident + PTR_SHIFT) = + node->left; + if (node->left->left == NULL) { + shrink_node(htt, fnode, node->left, + father, ls, u, reduced); + } + } else { + pnode->left = node->left; + if (pnode->left == NULL && znode == pnode) { + shrink_node(htt, fnode, pnode, + father, ls, u, reduced); + } + } + } else { + if (strcmp(HASH_ITEM_NAME(node), name) != 0) return 0; + if (node->left != NULL) { + for (znode = node, pnode = node->left; pnode->right; + znode = pnode, pnode = pnode->right); + if (znode != node) { + znode->right = pnode->left; + pnode->left = node->left; + } + pnode->right = node->right; + } else if (node->right != NULL) { + for (znode = node, pnode = node->right; pnode->left; + znode = pnode, pnode = pnode->left); + if (znode != node) { + znode->left = pnode->right; + pnode->right = node->right; + } + pnode->left = node->left; + } else pnode = NULL; + if (father == NULL) { + TREE(u) = pnode; + } else if (ls) { + father->left = pnode; + } else { + father->right = pnode; + } + } + tmp = node->ident; + htt->deldata(node); + freemem(tmp); + return 1; +} + +/* see nhash.h */ +int HTT_del(HTT *htt, char *name) +{ + return internal_del(htt, name, 0); +} + +/* see nhash.h */ +int HTT2_del(HTT2 *htt, char *name) +{ + return internal_del((HTT *)htt, name, 1); +} + +/* + * Apply `action()' on all nodes of the tree whose root is given as + * parameter `node'. If `wipe' is non-zero, the nodes are removed + * from memory. + */ +static void scan_node(hash_item_header *node, void (*action)(void *), int wipe) +{ + unsigned v; + + if (node == NULL) return; + scan_node(node->left, action, wipe); + scan_node(node->right, action, wipe); + v = *(unsigned *)(node->ident); + if ((v & 1U) != 0) { + hash_item_header *pnode, *nnode; + + for (pnode = *(hash_item_header **)(node->ident + PTR_SHIFT); + pnode != NULL; pnode = nnode) { + char *tmp = pnode->ident; + + nnode = pnode->left; + action(pnode); + if (wipe) freemem(tmp); + } + if (wipe) { + freemem(node->ident); + freemem(node); + } + } else { + char *tmp = node->ident; + + action(node); + if (wipe) freemem(tmp); + } +} + +/* see nhash.h */ +void HTT_scan(HTT *htt, void (*action)(void *)) +{ + unsigned u; + + for (u = 0; u < HTT_NUM_TREES; u ++) { + scan_node(htt->tree[u], action, 0); + } +} + +/* see nhash.h */ +void HTT2_scan(HTT2 *htt, void (*action)(void *)) +{ + scan_node(htt->tree[0], action, 0); + scan_node(htt->tree[1], action, 0); +} + +/* see nhash.h */ +void HTT_kill(HTT *htt) +{ + unsigned u; + + for (u = 0; u < HTT_NUM_TREES; u ++) { + scan_node(htt->tree[u], htt->deldata, 1); + } +} + +/* see nhash.h */ +void HTT2_kill(HTT2 *htt) +{ + scan_node(htt->tree[0], htt->deldata, 1); + scan_node(htt->tree[1], htt->deldata, 1); +} diff --git a/libexec/auxcpp/nhash.h b/libexec/auxcpp/nhash.h new file mode 100644 index 00000000000..00156f57b8e --- /dev/null +++ b/libexec/auxcpp/nhash.h @@ -0,0 +1,132 @@ +/* + * (c) Thomas Pornin 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__NHASH__ +#define UCPP__NHASH__ + +/* + * Each item stored in the hash table should be a structure beginning + * with the following header. + */ +typedef struct hash_item_header_ { + char *ident; + struct hash_item_header_ *left, *right; +} hash_item_header; + +/* + * This macro takes as argument a pointer to a hash table item (a + * structure beginning with `hash_item_header') and returns a pointer to + * the item name. This name should be considered as read-only. The + * retrieved pointer can become invalid whenever a new item is inserted + * in or removed from the table. + */ +#define HASH_ITEM_NAME(s) (((hash_item_header *)(s))->ident + sizeof(unsigned)) + +/* + * Number of lists for the primary hash step. Can be reduced to save more + * memory, or increased to speed things up. It should be a power of 2 + * greater or equal than 2 and smaller than UINT_MAX. + */ +#define HTT_NUM_TREES 128 + +/* + * Type for a hash table. + */ +typedef struct { + void (*deldata)(void *); + hash_item_header *tree[HTT_NUM_TREES]; +} HTT; + +/* + * Type for a reduced version of HTT with only two binary trees. That + * version has a lower initialization time and is suitable for situation + * where only a limited number of elements will be stored, but new tables + * need frequent initializations. + */ +typedef struct { + void (*deldata)(void *); + hash_item_header *tree[2]; +} HTT2; + +/* + * Initialize a hash table. The `deldata' parameter should point to a + * function which will be invoked on any item removed from the table; + * that function should take care of the release of memory allocated for + * that item (except the hash_item_header contents, which are handled + * internally). + */ +void HTT_init(HTT *htt, void (*deldata)(void *)); + +/* + * Link an item into the hash table under the given name. If another + * item of identical name is already present in the table, a pointer to + * that item is returned; otherwise, the new item is linked into the + * table and NULL is returned. The object pointed to by `item' is + * linked from the table, but not the string pointed to by `name'. + */ +void *HTT_put(HTT *htt, void *item, char *name); + +/* + * Retrieve an item by name from the hash table. NULL is returned if + * the object is not found. + */ +void *HTT_get(HTT *htt, char *name); + +/* + * Remove an item from the hash table. 1 is returned if the item was + * removed, 0 if it was not found. + */ +int HTT_del(HTT *htt, char *name); + +/* + * For all items stored within the hash table, invoke the provided + * function with the item as parameter. The function may abort the + * scan by performing a longjmp() to a context encapsulating the + * call to that function. + */ +void HTT_scan(HTT *htt, void (*action)(void *)); + +/* + * Release the whole table contents. After a call to this function, + * the table is ready to accept new items. + */ +void HTT_kill(HTT *htt); + +/* + * The following functions are identical to the HTT_*() functions, except + * that they operate on the reduced HTT2 tables. + */ +void HTT2_init(HTT2 *htt, void (*deldata)(void *)); +void *HTT2_put(HTT2 *htt, void *item, char *name); +void *HTT2_get(HTT2 *htt, char *name); +int HTT2_del(HTT2 *htt, char *name); +void HTT2_scan(HTT2 *htt, void (*action)(void *)); +void HTT2_kill(HTT2 *htt); + +#endif diff --git a/libexec/auxcpp/sample.c b/libexec/auxcpp/sample.c new file mode 100644 index 00000000000..f94f5c9d7ec --- /dev/null +++ b/libexec/auxcpp/sample.c @@ -0,0 +1,114 @@ +/* + * Sample code showing how to use ucpp as an integrated lexer. + * This file is public domain. + */ + +/* + * This is an example of how to use ucpp as a preprocessor and lexer + * into another project. The steps are those described in ucpp README + * file. To use this code, compile the ucpp source files with + * STAND_ALONE not defined, and link them with this code. The resulting + * binary will take a C source file as standard input, preprocess it, + * and output each non-whitespace token on stdout, with its numerical + * value (defined as an enum in cpp.h) and its contents. This code + * defines no system include path. + * + * This code supposes that the ucpp files are compiled with PRAGMA_TOKENIZE + * enabled (see the tune.h file). + */ + +#include +#include +#include +#include "mem.h" +#include "cpp.h" + +int main(int argc, char *argv[]) +{ + int i, r; + struct lexer_state ls; + + /* step 1 */ + init_cpp(); + + /* step 2 */ + no_special_macros = 0; + emit_defines = emit_assertions = 0; + + /* step 3 -- with assertions */ + init_tables(1); + + /* step 4 -- no default include path */ + init_include_path(0); + + /* step 5 -- no need to reset the two emit_* variables set in 2 */ + emit_dependencies = 0; + + /* step 6 -- we work with stdin, this is not a real filename */ + set_init_filename("[stdin]", 0); + + /* step 7 -- we make sure that assertions are on, and pragma are + handled */ + init_lexer_state(&ls); + init_lexer_mode(&ls); + ls.flags |= HANDLE_ASSERTIONS | HANDLE_PRAGMA | LINE_NUM; + + /* step 8 -- input is from stdin */ + ls.input = stdin; + + /* step 9 -- we do not have any macro to define, but we add any + argument as an include path */ + for (i = 1; i < argc; i ++) add_incpath(argv[i]); + + /* step 10 -- we are a lexer and we want CONTEXT tokens */ + enter_file(&ls, ls.flags); + + /* read tokens until end-of-input is reached -- errors (non-zero + return values different from CPPERR_EOF) are ignored */ + while ((r = lex(&ls)) < CPPERR_EOF) { + if (r) { + /* error condition -- no token was retrieved */ + continue; + } + /* we print each token: its numerical value, and its + string content; if this is a PRAGMA token, the + string content is in fact a compressed token list, + that we uncompress and print. */ + if (ls.ctok->type == PRAGMA) { + unsigned char *c = (unsigned char *)(ls.ctok->name); + + printf("line %ld: <#pragma>\n", ls.line); + for (; *c; c ++) { + int t = *c; + + if (STRING_TOKEN(t)) { + printf(" <%2d> ", t); + for (c ++; *c != PRAGMA_TOKEN_END; + c ++) putchar(*c); + putchar('\n'); + } else { + printf(" <%2d> `%s'\n", t, + operators_name[t]); + } + } + } else if (ls.ctok->type == CONTEXT) { + printf("new context: file '%s', line %ld\n", + ls.ctok->name, ls.ctok->line); + } else if (ls.ctok->type == NEWLINE) { + printf("[newline]\n"); + } else { + printf("line %ld: <%2d> `%s'\n", ls.ctok->line, + ls.ctok->type, + STRING_TOKEN(ls.ctok->type) ? ls.ctok->name + : operators_name[ls.ctok->type]); + } + } + + /* give back memory and exit */ + wipeout(); + free_lexer_state(&ls); +#ifdef MEM_DEBUG + report_leaks(); +#endif + return 0; +} diff --git a/libexec/auxcpp/tune.h b/libexec/auxcpp/tune.h new file mode 100644 index 00000000000..e4afc31f9c4 --- /dev/null +++ b/libexec/auxcpp/tune.h @@ -0,0 +1,422 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__TUNE__ +#define UCPP__TUNE__ + +#ifdef UCPP_CONFIG +#include "config.h" +#else + +/* ====================================================================== */ +/* + * The LOW_MEM macro triggers the use of macro storage which uses less + * memory. It actually also improves performance on large, modern machines + * (due to less cache pressure). This option implies no limitation (except + * on the number of arguments a macro may, which is then limited to 32766) + * so it is on by default. Non-LOW_MEM code is considered deprecated. + */ +#define LOW_MEM + +/* ====================================================================== */ +/* + * Define AMIGA for systems using "drive letters" at the beginning of + * some paths; define MSDOS on systems with drive letters and using + * backslashes to seperate directory components. + */ +/* #define AMIGA */ +/* #define MSDOS */ + +/* ====================================================================== */ +/* + * Define this if your compiler does not know the strftime() function; + * TurboC 2.01 under Msdos does not know strftime(). + */ +/* #define NOSTRFTIME */ + +/* ====================================================================== */ +/* + * Buffering: there are two levels of buffering on input and output streams: + * the standard libc buffering (manageable with setbuf() and setvbuf()) + * and some buffering provided by ucpp itself. The ucpp buffering uses + * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG + * (as defined below). + * You can disable one or both of these bufferings by defining the macros + * NO_LIBC_BUF and NO_UCPP_BUF. + */ +/* #define NO_LIBC_BUF */ +/* #define NO_UCPP_BUF */ + +/* + * On Unix stations, the system call mmap() might be used on input files. + * This option is a subclause of ucpp internal buffering. On one station, + * a 10% speed improvement was observed. Do not define this unless the + * host architecture has the following characteristics: + * -- Posix / Single Unix compliance + * -- Text files correspond one to one with memory representation + * If a file is not seekable or not mmapable, ucpp will revert to the + * standard fread() solution. + * + * This feature is still considered beta quality. On some systems where + * files can be bigger than memory address space (mainly, 32-bit systems + * with files bigger than 4 GB), this option makes ucpp fail to operate + * on those extremely large files. + */ +#define UCPP_MMAP + +/* + * Performance issues: + * -- On memory-starved systems, such as Minix-i86, do not use ucpp + * buffering; keep only libc buffering. + * -- If you do not use libc buffering, activate the UCPP_MMAP option. + * Note that the UCPP_MMAP option is ignored if ucpp buffering is not + * activated. + * + * On an Athlon 1200 running FreeBSD 4.7, the best performances are + * achieved when libc buffering is activated and/or UCPP_MMAP is on. + */ + +/* ====================================================================== */ +/* + * Define this if you want ucpp to generate tokenized PRAGMA tokens; + * otherwise, it will generate raw string contents. This setting is + * irrelevant to the stand-alone version of ucpp. + */ +#define PRAGMA_TOKENIZE + +/* + * Define this to the special character that marks the end of tokens with + * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma() + * directives which use this character will be a bit more difficult to + * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because + * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be + * embedded inside tokens, neither directly nor by macro substitution and + * stringization. Besides, '\n' is portable. + */ +#define PRAGMA_TOKEN_END ((unsigned char)'\n') + +/* + * Define this if you want ucpp to include encountered #pragma directives + * in its output in non-lexer mode; _Pragma() are translated to equivalent + * #pragma directives. + */ +#define PRAGMA_DUMP + +/* + * According to my interpretation of the C99 standard, _Pragma() are + * evaluated wherever macro expansion could take place. However, Neil Booth, + * whose mother language is English (contrary to me) and who is well aware + * of the C99 standard (and especially the C preprocessor) told me that + * it was unclear whether _Pragma() are evaluated inside directives such + * as #if, #include and #line. If you want to disable the evaluation of + * _Pragma() inside such directives, define the following macro. + */ +/* #define NO_PRAGMA_IN_DIRECTIVE */ + +/* + * The C99 standard mandates that the operator `##' must yield a single, + * valid token, lest undefined behaviour befall upon thy head. Hence, + * for instance, `+ ## +=' is forbidden, because `++=' is not a valid + * token (although it is a valid list of two tokens, `++' and `='). + * However, ucpp only emits a warning for such sin, and unmerges the + * tokens (thus emitting `+' then `+=' for that example). When ucpp + * produces text output, those two tokens will be separated by a space + * character so that the basic rule of text output is preserved: when + * parsed again, text output yields the exact same stream of tokens. + * That extra space is virtual: it does not count as a true whitespace + * token for stringization. + * + * However, it might be desirable, for some uses other than preprocessing + * C source code, not to emit that extra space at all. To make ucpp behave + * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this + * can trigger spurious token merging. For instance, with that macro + * activated, `+ ## +=' will be output as `++=' which, if preprocessed + * again, will read as `++' followed by `='. + * + * All this is irrelevant to lexer mode; and trying to merge incompatible + * tokens is a shooting offence, anyway. + */ +/* #define DSHARP_TOKEN_MERGE */ + +/* ====================================================================== */ +/* + * Define INMACRO_FLAG to include two flags to the structure lexer_state, + * that tell whether tokens come from a macro-replacement, and count those + * macro-replacements. + */ +/* #define INMACRO_FLAG */ + +/* ====================================================================== */ +/* + * Paths where files are looked for by default, when #include is used. + * Typical path is /usr/local/include and /usr/include, in that order. + * If you want to set up no path, define the macro to 0. + * + * For Linux, get gcc includes too, or you will miss things like stddef.h. + * The exact path varies much, depending on the distribution. + */ +#define STD_INCLUDE_PATH "/usr/local/include", "/usr/include" + +/* ====================================================================== */ +/* + * Arithmetic code for evaluation of #if expressions. Evaluation + * uses either a native machine type, or an emulated two's complement + * type. Division by 0 and overflow on division are considered as errors + * and reported as such. If ARITHMETIC_CHECKS is defined, all other + * operations that imply undefined or implementation-defined behaviour + * are reported as warnings but otherwise performed nonetheless. + * + * For native type evaluation, the following macros should be defined: + * NATIVE_SIGNED the native signed type + * NATIVE_UNSIGNED the native corresponding unsigned type + * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits + * NATIVE_SIGNED_MIN the native signed type minimum value + * NATIVE_SIGNED_MAX the native signed type maximum value + * + * The code in the arith.c file performs some tricky detection + * operations on the native type representation and possible existence + * of a trap representation. These operations assume a C99-compliant + * compiler; on a C90-only compiler, the operations are valid but may + * yield incorrect results. You may force those settings with some + * more macros: see the comments in arith.c (look for "ARCH_DEFINED"). + * Remember that this is mostly a non-issue, unless you are building + * ucpp with a pre-C99 cross-compiler and either the host or target + * architecture uses a non-two's complement representation of signed + * integers. Such a combination is pretty rare nowadays, so the best + * you can do is forgetting completely this paragraph and live in peace. + * + * + * If you do not have a handy native type (for instance, you compile ucpp + * with a C90 compiler which lacks the "long long" type, or you compile + * ucpp for a cross-compiler which should support an evaluation integer + * type of a size that is not available on the host machine), you may use + * a simulated type. The type uses two's complement representation and + * may have any width from 2 bits to twice the underlying native type + * width, inclusive (odd widths are allowed). To use an emulated type, + * make sure that NATIVE_SIGNED is not defined, and define the following + * macros: + * SIMUL_ARITH_SUBTYPE the native underlying type to use + * SIMUL_SUBTYPE_BITS the native underlying type width + * SIMUL_NUMBITS the emulated type width + * + * Undefined and implementation-defined behaviours are warned upon, if + * ARITHMETIC_CHECKS is defined. Results are truncated to the type + * width; shift count for the << and >> operators is reduced modulo the + * emulatd type width; right shifting of a signed negative value performs + * sign extension (the result is left-padded with bits set to 1). + */ + +/* + * For native type evaluation with a 64-bit "long long" type. + */ +#define NATIVE_SIGNED long long +#define NATIVE_UNSIGNED unsigned long long +#define NATIVE_UNSIGNED_BITS 64 +#define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1) +#define NATIVE_SIGNED_MAX 9223372036854775807LL + +/* + * For emulation of a 64-bit type using a native 32-bit "unsigned long" + * type. +#undef NATIVE_SIGNED +#define SIMUL_ARITH_SUBTYPE unsigned long +#define SIMUL_SUBTYPE_BITS 32 +#define SIMUL_NUMBITS 64 + */ + +/* + * Comment out the following line if you want to deactivate arithmetic + * checks (warnings upon undefined and implementation-defined + * behaviour). Arithmetic checks slow down a bit arithmetic operations, + * especially multiplications, but this should not be an issue with + * typical C source code. + */ +#define ARITHMETIC_CHECKS + +/* ====================================================================== */ +/* + * To force signedness of wide character constants, define WCHAR_SIGNEDNESS + * to 0 for unsigned, 1 for signed. By default, wide character constants + * are signed if the native `char' type is signed, and unsigned otherwise. +#define WCHAR_SIGNEDNESS 0 + */ + +/* + * Standard assertions. They should include one cpu() assertion, one machine() + * assertion (identical to cpu()), and one or more system() assertions. + * + * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux) + * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux) + * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris) + * + * These are only suggestions. On Solaris, machine() should be defined + * for i386 or sparc (standard system header use such an assertion). For + * cross-compilation, define assertions related to the target architecture. + * + * If you want no standard assertion, define STD_ASSERT to 0. + */ +/* +#define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \ + "system(freebsd)" +*/ + +/* ====================================================================== */ +/* + * System predefined macros. Nothing really mandatory, but some programs + * might rely on those. + * Each string must be either "name" or "name=token-list". If you want + * no predefined macro, define STD_MACROS to 0. + */ +/* +#define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \ + "__FreeBSD__=4", "__unix__", "__i386__" +*/ + +/* ====================================================================== */ +/* + * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers. + * See cpp.h for the definition of these flags. + */ +#define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \ + | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \ + | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \ + | KEEP_OUTPUT | HANDLE_TRIGRAPHS \ + | HANDLE_ASSERTIONS) +#define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \ + | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \ + | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS) + +/* ====================================================================== */ +/* + * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp(). + * This is non-ANSI, but it improves performance on some POSIX system. + * On typical C source code, such improvement is completely negligeable. + */ +/* #define POSIX_JMP */ + +/* ====================================================================== */ +/* + * Maximum value (plus one) of a character handled by the lexer; 128 is + * alright for ASCII native source code, but 256 is needed for EBCDIC. + * 256 is safe in both cases; you will have big problems if you set + * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory + * model), define MAX_CHAR_VAL to 128. + * + * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware + * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL + * values of type int (MSTATE is defined in lexer.c and is about 40). + */ +#define MAX_CHAR_VAL 128 + +/* + * If you want some extra character to be considered as whitespace, + * define this macro to that space. On ISO-8859-1 machines, 160 is + * the code for the unbreakable space. + */ +/* #define UNBREAKABLE_SPACE 160 */ + +/* + * If you want whitespace tokens contents to be recorded (making them + * tokens with a string content), define this. The macro STRING_TOKEN + * will be adjusted accordingly. + * Without this option, whitespace tokens are not even returned by the + * lex() function. This is irrelevant for the non-lexer mode (almost -- + * it might slow down a bit ucpp, and with this option, comments will be + * kept inside #pragma directives). + */ +/* #define SEMPER_FIDELIS */ + +#endif +/* End of options overridable by UCPP_CONFIG and config.h */ + +/* ====================================================================== */ +/* + * Some constants used for memory increment granularity. Increasing these + * values reduces the number of calls to malloc() but increases memory + * consumption. + * + * Values should be powers of 2. + */ + +/* for cpp.c */ +#define COPY_LINE_LENGTH 80 +#define INPUT_BUF_MEMG 8192 +#define OUTPUT_BUF_MEMG 8192 +#define TOKEN_NAME_MEMG 64 /* must be at least 4 */ +#define TOKEN_LIST_MEMG 32 +#define INCPATH_MEMG 16 +#define GARBAGE_LIST_MEMG 32 +#define LS_STACK_MEMG 4 +#define FNAME_MEMG 32 + +/* ====================================================================== */ + +/* To protect the innocent. */ +#if defined(NO_UCPP_BUF) && defined(UCPP_MMAP) +#undef UCPP_MMAP +#endif + +#if defined(UCPP_MMAP) || defined(POSIX_JMP) +#ifndef _POSIX_SOURCE +#define _POSIX_SOURCE 1 +#endif +#endif + +/* + * C90 does not know about the "inline" keyword, but C99 does know, + * and some C90 compilers know it as an extension. This part detects + * these occurrences. + */ + +#ifndef INLINE + +#if __STDC__ && __STDC_VERSION__ >= 199901L +/* this is a C99 compiler, keep inline unchanged */ +#elif defined(__GNUC__) +/* this is GNU gcc; modify inline. The semantics is not identical to C99 + but the differences are irrelevant as long as inline functions are static */ +#undef inline +#define inline __inline__ +#elif defined(__DECC) && defined(__linux__) +/* this is Compaq C under Linux, use __inline__ */ +#undef inline +#define inline __inline__ +#else +/* unknown compiler -> deactivate inline */ +#undef inline +#define inline +#endif + +#else +/* INLINE has been set, use its value */ +#undef inline +#define inline INLINE +#endif + +#endif diff --git a/libexec/auxcpp/ucpp.1 b/libexec/auxcpp/ucpp.1 new file mode 100644 index 00000000000..c6c30515056 --- /dev/null +++ b/libexec/auxcpp/ucpp.1 @@ -0,0 +1,212 @@ +.TH UCPP 1 "Oct 21 2000" +.SH NAME +ucpp \- C preprocessor +.SH SYNOPSIS +.B ucpp +[ +.I options +] +[ +.I file +] +.SH DESCRIPTION +.LP +.B ucpp +is a C preprocessor mostly compatible with ISO-C99. +It is rather strict and uses only a small amount of memory. It uses +standard input as primary input if no file argument is given. +.SH OPTIONS +There are several classes of options. +.TP +.B Language Options +.TP +.BI \-C +keep comments in the output. +.TP +.BI \-s +if a rogue '#' is encountered, do not emit an error and keep it in +the output. +.TP +.BI \-l +supress the emission of '#line' directives in the output. +.TP +.BI \-lg +convert the '#line' to the gcc-style equivalent. +.TP +.BI \-CC +disable C++-like comments (a '//' begins a comment, up to the end +of the line). Use this option to get closer to C90 behaviour. +.TP +.B \-a, \-na +handle assertions (defined with #assert); +.B \-a +also defines the standard assertions +.I #machine +, +.I #cpu +and +.I #system +(see +.B \-e +to get the local definition of such assertions). +.TP +.BI \-a0 +disable assertion support. +.TP +.BI \-V +disable support for macros with a variable number of arguments: in C99, +a macro may be declared with +.I ... +as the last argument; inside the replacement list, +.I __VA_ARGS__ +is replaced with the optional extra arguments given in the call to the macro. +Use this option to get closer to C90 behaviour. +.TP +.BI \-u +enable UTF-8 support: with this option, the source is considered as +an ISO/10646 source, encoded in UTF-8. Characters represented as two bytes +or more are considered as alphabetic characters, like letters, and +therefore usable in identifiers. These characters hold the same +syntactic value than the corresponding Universal Character Names. +.TP +.BI \-X +enable +.B \-a, \-u +and +.B \-Y. +This should make +.B ucpp +behave closer to what is requested from a "modern" C preprocessor. +.TP +.BI \-c90 +enable +.B \-V +and +.B \-CC, +and do not define +.B __STDC_VERSION__. +This should make +.B ucpp +mimic older C90 behaviour. +.TP +.BI \-t +disable trigraph support; this seems to be required for some legacy code. +.TP +.B Warning Options +.TP +.BI \-wt +emit a final warning when trigraphs are encountered. +.TP +.BI \-wtt +emit warnings for each trigraph encountered. +.TP +.BI \-wa +emit annoying warnings (these are usually useless). +.TP +.BI \-w0 +supress standard warnings. +.TP +.B Directory Options +.TP +.BI \-I directory +.TP +.BI "\-I " directory +add +.I directory +to the include path, before the standard include path. +.TP +.BI \-J directory +.TP +.BI "\-J " directory +add +.I directory +to the include path, after the standard include path. +.TP +.BI \-zI +do not use the standard (compile-time) include path. +.TP +.BI \-M +emit only the names of encountered files, separated by spaces; this is +intended for automatic generation of Makefile dependencies. +.TP +.BI \-Ma +do the same as +.B \-M +but also for system files. +.TP +.BI "\-o " file +direct the ouput to +.I file +instead of standard output. +.TP +.B Macro Options +.TP +.BI \-D macro +predefine +.I macro +with content +.B 1. +.TP +.BI \-D macro=def +predefine +.I macro +with the content +.I def. +.TP +.BI \-U macro +undefine +.I macro. +.TP +.BI \-Y +predefine system-dependant macros. +.TP +.BI \-Z +do not predefine special macros such as +.B __TIME__. +.TP +.BI \-A foo(bar) +add +.I foo(bar) +to the list of assertions. +.TP +.BI \-B foo(bar) +remove +.I foo(bar) +of the list of assertions; you may also use +.BI \-B foo +to remove all +.BI \-B foo(xxx) +from the list of assertions. +.TP +.BI \-d +instead of normal output, emit '#define' directives representing all +macros defined during processing. +.TP +.BI \-e +instead of normal output, emit '#assert' directives representing all +assertions defined during processing. +.TP +.B Miscellaneous Options +.TP +.BI \-v +print version number, include path and (optionaly) defined assertions. +.TP +.BI \-h +print some help. +.SH ENVIRONMENT +.PP +.B ucpp +is not itself affected by environment variables. However, it uses +library functions that might be affected, depending on the system. +.SH AUTHOR +Thomas Pornin +.SH BUGS +.PP +.B ucpp +is considered stable software. However improbable it is, please report +bugs to the author (possibly with a file that exhibits the problem) if +the latest version, available from this site: +.TP +http://pornin.nerim.net/ucpp/ +.PP +has the bug. diff --git a/libexec/auxcpp/ucppi.h b/libexec/auxcpp/ucppi.h new file mode 100644 index 00000000000..ce4df74be52 --- /dev/null +++ b/libexec/auxcpp/ucppi.h @@ -0,0 +1,196 @@ +/* + * (c) Thomas Pornin 1999 - 2002 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef UCPP__UCPPI__ +#define UCPP__UCPPI__ + +#include "tune.h" +#include "cpp.h" +#include "nhash.h" + +/* + * A macro represented in a compact form; simple tokens are represented + * by one byte, containing their number. Tokens with a string value are + * followed by the value (string finished by a 0). Macro arguments are + * followed by the argument number (in one byte -- thus implying a hard + * limit of 254 arguments (number 255 is for __VA_ARGS__). + */ +struct comp_token_fifo { + size_t length; + size_t rp; + unsigned char *t; +}; + +/* These declarations are used only internally by ucpp */ + +/* + * S_TOKEN(x) checks whether x is a token type with an embedded string + * ttMWS(x) checks whether x is macro whitespace (space, comment...) + * ttWHI(x) checks whether x is whitespace (MWS or newline) + */ +#define S_TOKEN(x) STRING_TOKEN(x) +#define ttMWS(x) ((x) == NONE || (x) == COMMENT || (x) == OPT_NONE) +#define ttWHI(x) (ttMWS(x) || (x) == NEWLINE) + +/* + * Function prototypes + */ +/* + * from lexer.c + */ +#define init_cppm ucpp_init_cppm +#define put_char ucpp_put_char +#define discard_char ucpp_discard_char +#define next_token ucpp_next_token +#define grap_char ucpp_grap_char +#define space_char ucpp_space_char + +void init_cppm(void); +void put_char(struct lexer_state *, unsigned char); +void discard_char(struct lexer_state *); +int next_token(struct lexer_state *); +int grap_char(struct lexer_state *); +int space_char(int); + +/* + * from assert.c + */ +struct assert { + hash_item_header head; /* first field */ + size_t nbval; + struct token_fifo *val; +}; + +#define cmp_token_list ucpp_cmp_token_list +#define handle_assert ucpp_handle_assert +#define handle_unassert ucpp_handle_unassert +#define get_assertion ucpp_get_assertion +#define wipe_assertions ucpp_wipe_assertions + +int cmp_token_list(struct token_fifo *, struct token_fifo *); +int handle_assert(struct lexer_state *); +int handle_unassert(struct lexer_state *); +struct assert *get_assertion(char *); +void wipe_assertions(void); + +/* + * from macro.c + */ +struct macro { + hash_item_header head; /* first field */ + int narg; + char **arg; + int nest; + int vaarg; +#ifdef LOW_MEM + struct comp_token_fifo cval; +#else + struct token_fifo val; +#endif +}; + +#define print_token ucpp_print_token +#define handle_define ucpp_handle_define +#define handle_undef ucpp_handle_undef +#define handle_ifdef ucpp_handle_ifdef +#define handle_ifndef ucpp_handle_ifndef +#define substitute_macro ucpp_substitute_macro +#define get_macro ucpp_get_macro +#define wipe_macros ucpp_wipe_macros +#define dsharp_lexer ucpp_dsharp_lexer +#define compile_time ucpp_compile_time +#define compile_date ucpp_compile_date +#ifdef PRAGMA_TOKENIZE +#define tokenize_lexer ucpp_tokenize_lexer +#endif + +void print_token(struct lexer_state *, struct token *, long); +int handle_define(struct lexer_state *); +int handle_undef(struct lexer_state *); +int handle_ifdef(struct lexer_state *); +int handle_ifndef(struct lexer_state *); +int substitute_macro(struct lexer_state *, struct macro *, + struct token_fifo *, int, int, long); +struct macro *get_macro(char *); +void wipe_macros(void); + +extern struct lexer_state dsharp_lexer; +extern char compile_time[], compile_date[]; +#ifdef PRAGMA_TOKENIZE +extern struct lexer_state tokenize_lexer; +#endif + +/* + * from eval.c + */ +#define strtoconst ucpp_strtoconst +#define eval_expr ucpp_eval_expr +#define eval_line ucpp_eval_line + +unsigned long strtoconst(char *); +unsigned long eval_expr(struct token_fifo *, int *, int); +extern long eval_line; + +#define eval_exception ucpp_eval_exception + +#ifdef POSIX_JMP +#define JMP_BUF sigjmp_buf +#define catch(x) sigsetjmp((x), 0) +#define throw(x) siglongjmp((x), 1) +#else +#define JMP_BUF jmp_buf +#define catch(x) setjmp((x)) +#define throw(x) longjmp((x), 1) +#endif +extern JMP_BUF eval_exception; + +/* + * from cpp.c + */ +#define token_name ucpp_token_name +#define throw_away ucpp_throw_away +#define garbage_collect ucpp_garbage_collect +#define init_buf_lexer_state ucpp_init_buf_lexer_state +#ifdef PRAGMA_TOKENIZE +#define compress_token_list ucpp_compress_token_list +#endif + +char *token_name(struct token *); +void throw_away(struct garbage_fifo *, char *); +void garbage_collect(struct garbage_fifo *); +void init_buf_lexer_state(struct lexer_state *, int); +#ifdef PRAGMA_TOKENIZE +struct comp_token_fifo compress_token_list(struct token_fifo *); +#endif + +#define ouch ucpp_ouch +#define error ucpp_error +#define warning ucpp_warning + +#endif -- 2.20.1