C Program Representation Library
This is the Monday cpr
library, providing an
abstract syntax tree language for representing programs in the
C programming language.
Copyright ©2004–2007 Peter S. Housel.
This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The C programming language serves as a popular language for systems and applications programming, as well as a “least common denominator” language for describing system interfaces, and as a “portable assembly language” that compilers for other languages can emit.
This library provides a means for Dylan programs to represent C program code and declarations, and to parse and unparse C language programs.
<Module Dylan-user
>=
define library cpr
use Common-Dylan;
use Source-Location;
use IO;
use System;
<Import external libraries used by cpr
>
export cpr-preprocessor,
cpr;
end library;
define module cpr-preprocessor <create
identifiers exported by thecpr-preprocessor
module> end module;
define module cpr use cpr-preprocessor, export: all; <<Utility module definitions>create
identifiers exported by thecpr
module> end module;
define module cpr-internals
use Common-Dylan, exclude: { format-to-string };
use Locators;
use cpr-preprocessor;
use cpr;
<Modules imported by the cpr-internals
module>
end module;
The C preprocessor FIXME
<create
identifiers exported by the cpr-preprocessor
module>=
// Preprocessor dialects create <C-preprocessor-dialect>, $C90-C-preprocessor-dialect, $C99-C-preprocessor-dialect, $MSVC6-C-preprocessor-dialect, $gnu89-C-preprocessor-dialect, $gnu99-C-preprocessor-dialect;
|
[Open Class] |
The class of representations of C preprocessor dialects.
<object>
None.
FIXME
<Module cpr-internals
>=
define open class <C-preprocessor-dialect> (<object>)
<Slots in the <C-preprocessor-dialect>
class>
end class;
<Define an initialize
method on <C-preprocessor-dialect>
><Define the preprocessor-directive-productions
macro>
|
[Constant] |
The C preprocessor dialect representing ISO/IEC 9899:1990.
<C-preprocessor-dialect>
FIXME
<Module cpr-internals
>+=
define constant $C90-C-preprocessor-dialect :: <C-preprocessor-dialect> = make(<C-preprocessor-dialect>);
|
[Constant] |
The C preprocessor dialect representing ISO/IEC 9899:1999.
<C-preprocessor-dialect>
FIXME
<Module cpr-internals
>+=
define constant $C99-C-preprocessor-dialect :: <C-preprocessor-dialect> = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
|
[Constant] |
The C preprocessor dialect representing Microsoft Visual C version 6.
<C-preprocessor-dialect>
FIXME
<Module cpr-internals
>+=
define constant $MSVC6-C-preprocessor-dialect :: <C-preprocessor-dialect> = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
|
[Constant] |
The C preprocessor dialect representing ISO/IEC 9899:1990 C with GNU extensions.
<C-preprocessor-dialect>
FIXME
<Module cpr-internals
>+=
define constant $gnu89-C-preprocessor-dialect :: <C-preprocessor-dialect> = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
|
[Constant] |
The C preprocessor dialect representing ISO/IEC 9899:1999 with GNU extensions.
<C-preprocessor-dialect>
FIXME
<Module cpr-internals
>+=
define constant $gnu99-C-preprocessor-dialect :: <C-preprocessor-dialect> = make(<C-preprocessor-dialect>, parent: $C99-C-preprocessor-dialect);
<create
identifiers exported by the cpr-preprocessor
module>+=
// Translation units create <C-preprocessing-translation-unit-representation>, preprocessing-header-search-path, preprocessing-system-header-search-path, preprocessing-macros, preprocessor-define, preprocessor-undefine, preprocess-C-source-file, preprocess-C-header-file, preprocess-C-system-header-file, preprocess-C-stream, preprocessor-token-string;
|
[Class] |
The class of representations of entire C preprocessing translation units.
<object>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-preprocessing-translation-unit-representation> (<object>)
constant slot preprocessing-header-search-path :: <deque>
= make(<deque>);
constant slot preprocessing-system-header-search-path :: <deque>
= make(<deque>);
constant slot preprocessing-trigraph-support? :: <boolean> = #f,
init-keyword: trigraph-support:;
<Slots in <C-preprocessing-translation-unit-representation>
>
end class;
preprocessor-define |
[Generic Function] |
Adds a preprocessor macro definition to a preprocessor translation unit.
preprocessor-define
preprocessing-translation-unit
identifier
replacement
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.identifier
<byte-string>
.replacement
<byte-string>
.None.
FIXME
<Module cpr-internals
>+=
define method preprocessor-define
(preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
identifier :: <byte-string>,
replacement :: <byte-string>)
=> ();
<Define identifier
as an object-like preprocessing macro>
end method;
preprocessor-undefine |
[Generic Function] |
Removes a preprocessor macro definition to a preprocessor translation unit.
preprocessor-define
preprocessing-translation-unit
identifier
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.identifier
<byte-string>
.None.
FIXME
<Module cpr-internals
>+=
define method preprocessor-undefine
(preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
identifier :: <byte-string>)
=> ();
<Undefine identifier
>
end method;
preprocess-C-source-file |
[Generic Function] |
Preprocesses a C source file.
preprocess-C-source-file
preprocessing-translation-unit
locator
consumer
consumer-data
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.locator
<file-locator>
.consumer
<function>
.consumer-data
<object>
.None.
FIXME
<Module cpr-internals
>+=
define method preprocess-C-source-file
(preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
locator :: <file-locator>,
consumer :: <function>,
consumer-data,
#key dialect = $C99-C-preprocessor-dialect)
=> ();
<Preprocess source file locator
>
end method;
preprocess-C-header-file |
[Generic Function] |
Preprocesses a C header file.
preprocess-C-header-file
preprocessing-translation-unit
locator
search-directory
consumer
consumer-data
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.locator
<file-locator>
.search-directory
<directory-locator>
.consumer
<function>
.consumer-data
<object>
.None.
FIXME
<Module cpr-internals
>+=
define method preprocess-C-header-file
(preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
locator :: <file-locator>,
search-directory :: <directory-locator>,
consumer :: <function>,
consumer-data,
#key dialect = $C99-C-preprocessor-dialect)
=> ();
<Preprocess the header file locator
>
end method;
preprocess-C-system-header-file |
[Generic Function] |
Preprocesses a C system header file.
preprocess-C-system-header-file
preprocessing-translation-unit
locator
consumer
consumer-data
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.locator
<file-locator>
.consumer
<function>
.consumer-data
<object>
.None.
FIXME
<Module cpr-internals
>+=
define method preprocess-C-system-header-file
(preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
locator :: <file-locator>,
consumer :: <function>,
consumer-data,
#key dialect = $C99-C-preprocessor-dialect)
=> ();
<Preprocess the system header file locator
>
end method;
preprocess-C-stream |
[Generic Function] |
Preprocesses C code from an input stream.
preprocess-C-stream
preprocessing-translation-unit
stream
directory
rangemap
consumer
consumer-data
⇒
preprocessing-translation-unit
<C-preprocessing-translation-unit-representation>
.stream
<stream>
.rangemap
<source-location-rangemap>
.consumer
<function>
.consumer-data
<object>
.None.
FIXME
<Module cpr-internals
>+=
define generic preprocess-C-stream (preprocessing-translation-unit :: <C-preprocessing-translation-unit-representation>, stream :: <stream>, directory :: false-or(<directory-locator>), rangemap :: <source-location-rangemap>, consumer :: <function>, consumer-data, #key start-position, dialect = $C99-C-preprocessor-dialect) => (end-position :: <integer>);
preprocessor-token-string |
[Function] |
Returns a string corresponding to a preprocessing token.
preprocessor-token-string
token-name
token-value
⇒
string
token-name
<symbol>
.token-value
<object>
.string
<string>
.FIXME
<Module cpr-internals
>+=
define function preprocessor-token-string
(token-name :: <symbol>, token-value :: <object>)
=> (string :: <byte-string>);
select (token-name)
<Cases for preprocessor-token-string
>
otherwise =>
error("preprocessor-token-string called for unknown token %s(%=)",
token-name, token-value);
end select
end function;
The C language FIXME
<create
identifiers exported by the cpr
module>=
// Language dialects create <C-language-dialect>, $C90-C-language-dialect, $C99-C-language-dialect, $MSVC6-C-language-dialect;
|
[Open Class] |
The class of representations of C language dialects.
<object>
None.
FIXME
<Module cpr-internals
>+=
define open class <C-language-dialect> (<object>) // FIXME end class;
|
[Constant] |
The C language dialect representing ISO/IEC 9899:1990.
<C-language-dialect>
FIXME
<Module cpr-internals
>+=
define constant $C90-C-language-dialect :: <C-language-dialect> = make(<C-language-dialect>);
|
[Constant] |
The C language dialect representing ISO/IEC 9899:1999.
<C-language-dialect>
FIXME
<Module cpr-internals
>+=
define constant $C99-C-language-dialect :: <C-language-dialect> = make(<C-language-dialect>);
|
[Constant] |
The C language dialect representing Microsoft Visual C version 6.
<C-language-dialect>
FIXME
<Module cpr-internals
>+=
define constant $MSVC6-C-language-dialect :: <C-language-dialect> = make(<C-language-dialect>);
<create
identifiers exported by the cpr
module>+=
// Platforms create <C-translation-platform>, platform-system-include-path, platform-char-bits, platform-char-signed?, platform-type-size, platform-type-alignment;
<create
identifiers exported by the cpr
module>+=
// Translation units create <C-translation-unit-representation>, translation-unit-external-declarations, parse-C-source-file, parse-C-header-file, parse-C-system-header-file;
|
[Class] |
The class of representations of entire C translation units.
<object>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-translation-unit-representation> (<object>) constant slot translation-unit-preprocessing-translation-unit :: <C-preprocessing-translation-unit-representation>, required-init-keyword: preprocessing-translation-unit:; <> end class;
parse-C-source-file |
[Generic Function] |
Parses a C source file and adds a representation of its contents to a translation unit object.
parse-C-source-file
translation-unit
locator
⇒
translation-unit
<C-translation-unit-representation>
.locator
<file-locator>
.None.
FIXME
<Module cpr-internals
>+=
define method parse-C-source-file (translation-unit :: <C-translation-unit-representation>, locator :: <file-locator>) => (); <Parse the source filelocator
intotranslation-unit
> end method;
parse-C-header-file |
[Generic Function] |
Parses a C header file and adds a representation of its contents to a translation unit object.
parse-C-header-file
translation-unit
locator
⇒
translation-unit
<C-translation-unit-representation>
.locator
<file-locator>
.None.
FIXME
<Module cpr-internals
>+=
define method parse-C-header-file (translation-unit :: <C-translation-unit-representation>, locator :: <file-locator>) => (); <> end method;
parse-C-system-header-file |
[Generic Function] |
Parses a C system header file and adds a representation of its contents to a translation unit object.
parse-C-system-header-file
translation-unit
locator
⇒
translation-unit
<C-translation-unit-representation>
.locator
<file-locator>
.None.
FIXME
<Module cpr-internals
>+=
define method parse-C-system-header-file (translation-unit :: <C-translation-unit-representation>, locator :: <file-locator>) => (); <> end method;
<create
identifiers exported by the cpr
module>+=
// Types create <C-type-representation>, <C-integer-type-representation>, <C-enum-type-representation>, <C-float-type-representation>, <C-array-type-representation>, <C-function-type-representation>, <C-struct/union-type-representation>, <C-pointer-type-representation>, $C-void*-representation, $C-char*-representation, $C-const-char*-representation;
|
[Abstract Class] |
The class of representations of C types.
<object>
FIXME
FIXME
<Module cpr-internals
>+=
define abstract class <C-type-representation> (<object>) <> end class;
|
[Class] |
The class of representations of C integer types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-integer-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C enumeration types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-enum-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C floating-point types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-float-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C array types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-array-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C function types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-function-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C struct
and union
types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-struct/union-type-representation> (<C-type-representation>) <> end class;
|
[Class] |
The class of representations of C array types.
<C-type-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-pointer-type-representation> (<C-type-representation>) <> end class;
<create
identifiers exported by the cpr
module>+=
// Expressions create <C-expression-representation>, expression-type, <C-constant-expression-representation>, <C-string-literal-expression-representation>, expression-value, <C-variable-reference-expression-representation>, expression-variable, <C-function-reference-expression-representation>, expression-operator, <C-unary-expression-representation>, expression-unary-operand, <C-binary-expression-representation>, expression-binary-left, expression-binary-right, <C-conditional-expression-representation>, expression-conditional-condition, expression-conditional-true, expression-conditional-false, <C-cast-expression-representation>, expression-cast-operand, <C-sizeof-type-expression-representation>, expression-sizeof-type, <C-function-call-expression-representation>, expression-call-function, expression-call-arguments, <C-member-expression-representation>, expression-member-operand, expression-member-name, print-C-expression, $precedence-level-assignment-expression, $precedence-level-expression;
|
[Abstract Class] |
The class of representations of C expressions.
<source-location-mixin>
FIXME
FIXME
<Module cpr-internals
>+=
define abstract class <C-expression-representation> (<source-location-mixin>) <> end class;
define sealed domain initialize(<C-expression-representation>);
expression-type |
[Generic Function] |
Returns the type of a C expression.
expression-type
expression
⇒
type
expression
<C-expression-representation>
.type
<C-type-representation>
.FIXME
<Module cpr-internals
>+=
define generic expression-type (expression :: <C-expression-representation>) => (type :: <C-type-representation>);
|
[Class] |
The class of representations of C constant expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-constant-expression-representation> (<C-expression-representation>) constant slot expression-value :: <object>, required-init-keyword: value:; end class;
define sealed domain make(singleton(<C-constant-expression-representation>));
|
[Class] |
The class of representations of C string literal expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-string-literal-expression-representation> (<C-expression-representation>) constant slot expression-value :: <object>, required-init-keyword: value:; end class;
define sealed domain make(singleton(<C-string-literal-expression-representation>));
|
[Class] |
The class of representations of C variable reference expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-variable-reference-expression-representation> (<C-expression-representation>) constant slot expression-variable :: <string>, // FIXME required-init-keyword: variable:; end class;
define sealed domain make(singleton(<C-variable-reference-expression-representation>));
|
[Class] |
The class of representations of C function reference expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-function-reference-expression-representation> (<C-expression-representation>) // end class;
define sealed domain make(singleton(<C-function-reference-expression-representation>));
|
[Class] |
The class of representations of C unary expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define constant <C-unary-expression-operator> = one-of(#"POSTINC", #"POSTDEC", #"PREINC", #"PREDEC", #"ADDROF", #"DEREF", #"PLUS", #"MINUS", #"BITWISE-NOT", #"LOGICAL-NOT", #"SIZEOF", #"ALIGNOF");
define class <C-unary-expression-representation> (<C-expression-representation>) slot expression-type :: <C-type-representation>, init-keyword: type:; constant slot expression-operator :: <C-unary-expression-operator>, required-init-keyword: operator:; constant slot expression-unary-operand :: <C-expression-representation>, required-init-keyword: operand:; end class;
define sealed domain make(singleton(<C-unary-expression-representation>));
|
[Class] |
The class of representations of C binary expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define constant <C-binary-expression-operator> = one-of(#"MUL", #"DIV", #"MOD", #"ADD", #"SUB", #"SHL", #"SHR", #"LT", #"LE", #"GT", #"GE", #"EQ", #"NE", #"BITWISE-AND", #"BITWISE-XOR", #"BITWISE-OR", #"LOGICAL-AND", #"LOGICAL-OR", #"ASSIGN", #"COMMA");
define class <C-binary-expression-representation> (<C-expression-representation>) slot expression-type :: <C-type-representation>, init-keyword: type:; constant slot expression-operator :: <C-binary-expression-operator>, required-init-keyword: operator:; constant slot expression-binary-right :: <C-expression-representation>, required-init-keyword: right:; constant slot expression-binary-left :: <C-expression-representation>, required-init-keyword: left:; end class;
define sealed domain make(singleton(<C-binary-expression-representation>));
|
[Class] |
The class of representations of C binary expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-conditional-expression-representation> (<C-expression-representation>) constant slot expression-conditional-condition :: <C-expression-representation>, required-init-keyword: condition:; constant slot expression-conditional-true :: <C-expression-representation>, required-init-keyword: true:; constant slot expression-conditional-false :: <C-expression-representation>, required-init-keyword: false:; end class;
define sealed domain make(singleton(<C-conditional-expression-representation>));
|
[Class] |
The class of representations of C cast expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-cast-expression-representation> (<C-expression-representation>) slot expression-type :: <C-type-representation>, init-keyword: type:; constant slot expression-cast-operand :: <C-expression-representation>, required-init-keyword: operand:; end class;
define sealed domain make(singleton(<C-cast-expression-representation>));
|
[Class] |
The class of representations of C sizeof
expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-sizeof-type-expression-representation> (<C-expression-representation>) constant slot expression-sizeof-type :: <C-type-representation>, required-init-keyword: sizeof-type:; end class;
define sealed domain make(singleton(<C-sizeof-type-expression-representation>));
|
[Class] |
The class of representations of C function call expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-function-call-expression-representation> (<C-expression-representation>) constant slot expression-call-function :: <C-expression-representation>, required-init-keyword: function:; constant slot expression-call-arguments :: <sequence>, required-init-keyword: arguments:; end class;
define sealed domain make(singleton(<C-function-call-expression-representation>));
|
[Class] |
The class of representations of C
struct
/union
field member
expressions.
<C-expression-representation>
FIXME
FIXME
<Module cpr-internals
>+=
define class <C-member-expression-representation> (<C-expression-representation>) constant slot expression-member-operand :: <C-expression-representation>, required-init-keyword: operand:; constant slot expression-member-name :: <string>, required-init-keyword: name:; end class;
define sealed domain make(singleton(<C-member-expression-representation>));
print-C-expression |
[Generic Function] |
Prints a C expression using C syntax.
print-C-expression
expression
steam
⇒
expression
<C-expression-representation>
.stream
<stream>
.None.
FIXME
<Module cpr-internals
>+=
define generic print-C-expression (expression :: <C-expression-representation>, stream :: <stream>, #key level) => ();
$precedence-level-assignment-expression |
[Constant] |
FIXME
FIXME
<Module cpr-internals
>+=
define constant $precedence-level-assignment-expression :: <integer> = 15;
$precedence-level-expression |
[Constant] |
FIXME
FIXME
<Module cpr-internals
>+=
define constant $precedence-level-expression :: <integer> = 16;
We'll be representing C identifiers using
interned strings; in particular, for any
particular identifier name, there will be a canonical
<byte-string>
object that represents its
name.
<Utility module definitions>=
define module interned-string use common-dylan; use byte-vector; export intern-string; end module;
We'll keep track of interned strings in an ordinary chained hash table.
<Module interned-string
>=
define variable *interned-string-buckets* :: <simple-object-vector> = make(<simple-object-vector>, size: 53);
define variable *interned-string-count* :: <integer> = 0;
The string table is made up of entries, each of type
<interned-string-entry>
:
<Module interned-string
>+=
define class <interned-string-entry> (<object>) constant slot interned-string :: <byte-string>, required-init-keyword: interned-string:; constant slot interned-string-hash-value :: <integer>, required-init-keyword: hash:; slot interned-string-entry-next :: false-or(<interned-string-entry>), init-value: #f, init-keyword: next:; end class;
define sealed domain make(singleton(<interned-string-entry>));
define sealed domain initialize(<interned-string-entry>);
The following hash function is modeled after the one in SML/NJ.
<Module interned-string
>+=
define function interned-string-hash (name :: <byte-string>, start :: <integer>, _end :: <integer>) => (hash-value :: <integer>); for (i :: <integer> from start below _end, hash :: <integer> = 0 then logand(ash(hash, 5) + hash + 720 + as(<integer>, name[i]), #x3FFFFF)) finally hash end for end function;
We'll use the intern-string
method to return
canonical strings.
<Module interned-string
>+=
define method intern-string (name :: <byte-string>, #key start: start :: false-or(<integer>) = 0, end: _end :: false-or(<integer>) = name.size) => (canonical-string :: <byte-string>); let hash-value = interned-string-hash(name, start, _end); let bucket-value = modulo(hash-value, *interned-string-buckets*.size); block (return) for (entry = *interned-string-buckets*[bucket-value] then entry.interned-string-entry-next, while: entry) if (entry.interned-string-hash-value = hash-value & entry.interned-string.size = _end - start & block (next) for (i :: <integer> from start below _end, j :: <integer> from 0) if (name[i] ~== entry.interned-string[j]) next(#f); end if; end for; #t; end block) return(entry.interned-string); end if; end for; <Return a new canonical string> end block; end method;
We insert a new entry into the table when necessary, insuring that the hash table maintains a loading factor of no more than unity.
<Return a new canonical string>=
*interned-string-count* := *interned-string-count* + 1; if (*interned-string-count* > *interned-string-buckets*.size) <Resize the hash table> bucket-value := modulo(hash-value, *interned-string-buckets*.size); end if; let new-string = make(<byte-string>, size: _end - start); copy-bytes(new-string, 0, name, start, _end - start); let new-entry = make(<interned-string-entry>, interned-string: new-string, hash: hash-value, next: *interned-string-buckets*[bucket-value]); *interned-string-buckets*[bucket-value] := new-entry; new-string
If we have to grow the hash table, we grow it to a larger prime number of buckets. The table of primes was borrowed from the Gwydion Dylan d2c runtime library; that version borrowed the same table from somewhere else.
<Module interned-string
>+=
define constant $prime-table = #[/* 53, */ 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457];
<Resize the hash table>=
let new-size = block(done) for (prime in $prime-table) if (prime > *interned-string-count*) done(prime); end if; end for; end block; let new-buckets = make(<simple-object-vector>, size: new-size); for (bucket in *interned-string-buckets*) iterate loop (entry = bucket) if (entry) let next = entry.interned-string-entry-next; let bucket-value = modulo(entry.interned-string-hash-value, new-size); entry.interned-string-entry-next := new-buckets[bucket-value]; new-buckets[bucket-value] := entry; loop(next); end if; end iterate; end for; *interned-string-buckets* := new-buckets;
Given a canonicalized string representation, the only thing we need to implement symbol tables for the representation is a hierarchical lookup table facility.
<Utility module definitions>+=
define module hierarchical-table use common-dylan; export <hierarchical-table>; end module;
<Module hierarchical-table
>=
define class <hierarchical-table> (<mutable-explicit-key-collection>, <stretchy-collection>) slot parent-table :: false-or(<hierarchical-table>), init-value: #f, init-keyword: parent:; constant slot object-table :: <object-table> = make(<object-table>); end class;
define sealed domain make(singleton(<hierarchical-table>));
define sealed domain initialize(<hierarchical-table>);
The element-setter
method just delegates to
the contained object table.
<Module hierarchical-table
>+=
define method element-setter (value, table :: <hierarchical-table>, key) => (value); element-setter(value, table.object-table, key) end method;
The getter method retrieves the value for the given key (if there is one), and memoizes the lookup in the top-level table.
<Module hierarchical-table
>+=
define method element (table :: <hierarchical-table>, key, #key default = $unsupplied) => (value); block (return) for (search-table = table then search-table.parent-table, while: search-table) let value = element(search-table.object-table, key, default: $unfound); if (found?(value)) if (search-table == table) return(value) else return(element(table.object-table, key) := value) end if; end; end for; if (supplied?(default)) default else error("element %= not found"); end if end block end method;
<Module hierarchical-table
>+=
define method remove-key! (table :: <hierarchical-table>, key) => (present? :: <boolean>); let present? = #f; for (search-table = table then search-table.parent-table, while: search-table) present? := remove-key!(search-table, key) | present?; end for; present? end method;
We can implement forward-iteration-protocol
on
<hierarchical-table>
by first flattening
the hierarchy, copying entries from parent tables in the
top-level table, and then iterating through the top-level
table.
<Module hierarchical-table
>+=
define method forward-iteration-protocol (table :: <hierarchical-table>) => (initial-state, limit, next-state :: <function>, finished-state? :: <function>, current-key :: <function>, current-element :: <function>, current-element-setter :: <function>, copy-state :: <function>); for (other-table = table.parent-table then other-table.parent-table, while: other-table) for (item keyed-by key in other-table.object-table) unless (found?(element(table.object-table, key, default: $unfound))) table.object-table[key] := item; end unless; end for; end for; table.parent-table := #f; forward-iteration-protocol(table.object-table) end method;
The translation of a C program consists of several phases. The first few phases take one or more C source or header files and combine them to form a preprocessing translation unit. After the preprocessing phases are complete, the combined text of the program forms a translation unit, which is then translated by the remaining phases.
The preprocess-C-source-file
method
opens the given locator
, marks the beginning of
the file in a new rangemap, and preprocesses the file using
preprocess-C-stream
.
<Modules imported by the cpr-internals
module>=
use streams; use file-system; use source-location; use source-location-rangemap; use source-location-conditions;
<Preprocess source file locator
>=
let rangemap = make(<source-location-rangemap>); rangemap-add-line-file(rangemap, 0, 1, locator); with-open-file(stream = locator) preprocess-C-stream(preprocessing-translation-unit, stream, locator.locator-directory, rangemap, consumer, consumer-data, dialect: dialect); end;
For #include
files we maintain two search paths,
one for ordinary header files and one for system header
files. Normally these paths are identical but they are kept
separate for full generality.
<Module cpr-internals
>+=
define method do-with-C-header-file (locator :: <file-locator>, search-directory :: false-or(<directory-locator>), search-path :: <sequence>, function :: <function>) let found-locator = if (~locator-relative?(locator)) locator elseif (search-directory & file-exists?(merge-locators(locator, search-directory))) merge-locators(locator, search-directory) else block (found) for (directory in search-path) let merged = merge-locators(locator, directory); if (file-exists?(merged)) found(merged) end if; finally locator end for end block end if; let stream = make(<file-stream>, direction: #"input", locator: found-locator); block () function(found-locator, stream); cleanup close(stream); end block; end method;
<Preprocess the header file locator
>=
do-with-C-header-file (locator, search-directory, preprocessing-translation-unit.preprocessing-header-search-path, method (found-locator :: <file-locator>, stream :: <stream>) => (); let rangemap = make(<source-location-rangemap>); rangemap-add-line-file(rangemap, 0, 1, found-locator); preprocess-C-stream(preprocessing-translation-unit, stream, found-locator.locator-directory, rangemap, consumer, consumer-data, dialect: dialect); end);
<Preprocess the system header file locator
>=
do-with-C-header-file (locator, #f, preprocessing-translation-unit.preprocessing-system-header-search-path, method (found-locator :: <file-locator>, stream :: <stream>) => (); let rangemap = make(<source-location-rangemap>); rangemap-add-line-file(rangemap, 0, 1, found-locator); preprocess-C-stream(preprocessing-translation-unit, stream, found-locator.locator-directory, rangemap, consumer, consumer-data, dialect: dialect); end);
The first thing the preprocessor does is to read the
input file, and to splice source lines that end with
\
together with the lines that follow them.
Our main file-reading loop reads the source file one input buffer at a time and tokenizes it.
<Module cpr-internals
>+=
define method preprocess-C-stream (preprocessing-translation-unit :: <C-preprocessing-translation-unit-representation>, stream :: <buffered-stream>, directory :: false-or(<directory-locator>), rangemap :: <source-location-rangemap>, consumer :: <function>, consumer-data, #key start-position :: <integer> = 0, dialect :: <C-preprocessor-dialect> = $C99-C-preprocessor-dialect) => (end-position :: <integer>); <Initialize the line-splicing state> <Initialize the trigraph state> <Initialize thescanner
> <Initialize the preprocessor andpreprocessor-token-dispatcher
> let buf = get-input-buffer(stream); let text :: <byte-string> = make(<byte-string>, size: if (buf) buf.buffer-size else 0 end + 2); iterate buf-loop (buf :: false-or(<buffer>) = buf) if (buf) let text-size :: <integer> = buf.buffer-end - buf.buffer-next; if (preprocessing-translation-unit.preprocessing-trigraph-support?) <Copy text and translate trigraphs frombuf
totext
> else copy-bytes(text, 0, buf, buf.buffer-next, text-size); end if; <Line-splice and tokenize the program text intext
> buf.buffer-next := buf.buffer-end; buf-loop(next-input-buffer(stream)); end if; end; release-input-buffer(stream); <Finalize the trigraph state> <Finalize the scanner and preprocessor and return the end position> end method;
Line splice processing is slightly complicated by the fact that a line splice may consist of a backslash followed by a CR, an LF, or a CR+LF pair, and the fact that one of these sequences might cross a buffer boundary.
<Initialize the line-splicing state>=
let splice-state :: one-of(#f, #"backslash", #"backslash-cr") = #f;
Our approach involves sending segments of the buffer, as large as possible, to the scanner. If we encounter a line splice, we end the current segment and begin a new one after the splice.
This is compliated slightly by the case when we see a backslash at the end of the buffer, and can't tell whether the backslash needs to be included or not. We take the approach of scanning the buffer without the backslash, and scan a new backslash before the beginning of the next buffer if it turns out not to be part of a splice.
<Line-splice and tokenize the program text in text
>=
iterate char-loop (start :: <buffer-index> = 0,
i :: <buffer-index> = 0)
if (i < text-size)
let c :: <byte-character> = as(<character>, text[i]);
if (c == '\\')
splice-state := #"backslash";
char-loop(start, i + 1);
elseif (splice-state == #f)
char-loop(start, i + 1);
elseif (splice-state == #"backslash")
if (c == '\r')
scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
text, start, i - 1);
splice-state := #"backslash-cr";
char-loop(i + 1, i + 1);
elseif (c == '\n')
if (i > start)
scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
text, start, i - 1);
end if;
rangemap-add-line(rangemap, scanner.scanner-source-position, #f);
splice-state := #f;
char-loop(i + 1, i + 1);
elseif (i = 0)
scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
"\\", 0, 1);
if (c ~== '\\')
splice-state := #f;
end if;
char-loop(i, i + 1);
else
if (c ~== '\\')
splice-state := #f;
end if;
char-loop(start, i + 1);
end if;
else // (splice-state == #"backslash-cr"
)
splice-state := #f;
rangemap-add-line(rangemap, scanner.scanner-source-position, #f);
if (c == '\n')
char-loop(i + 1, i + 1);
else
if (c == '\\')
splice-state := #"backslash";
end if;
char-loop(i, i + 1);
end if;
end if;
else
scan-C-preprocessing-tokens
(preprocessor-token-dispatcher, scanner, text, start,
if (splice-state == #"backslash") i - 1 else i end);
end if;
end iterate;
The next phase separates the program text into preprocessing tokens. The set of preprocessing tokens is similar to the set of C tokens, with a few additions.
We use the simple-parser
library to do
tokenization and parsing of C program text.
<Import external libraries used by cpr
>=
use simple-parser;
<Modules imported by the cpr-internals
module>+=
use byte-vector; use simple-parser; use simple-lexical-scanner;
The extract-action
method is used as the
semantic value function for various preprocessing tokens to
extract the original recognized token string.
<Module cpr-internals
>+=
define function extract-action (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>) => (result :: <byte-string>); let result = make(<byte-string>, size: token-end - token-start); copy-bytes(result, 0, token-string, token-start, token-end - token-start); result end;
<Module cpr-internals
>+=
define constant $C-tokens = simple-lexical-definition token EOF; <Token definitions> token UNKNOWN-TOKEN = ".", priority: -1, semantic-value-function: extract-action; end;
<Cases for preprocessor-token-string
>=
#"UNKNOWN-TOKEN" => token-value;
For example, whitespace, comments, and line breaks are significant during preprocessing. We will eliminate these before the C parser sees them.
<Token definitions>=
// Whitespace and comments token WHITESPACE = "([ \t\f]|/\\*([^*]|\\*+[^*/])*\\*+/)+"; // New-line characters and line-end comments token NEW-LINE = "(//[^\r\n]*)?(\n|\r|\r\n)";
The canonical string representation of whitespace is a single space character.
<Cases for preprocessor-token-string
>+=
#"WHITESPACE" => " ";
Recognition of identifiers is straightforward.
<Modules imported by the cpr-internals
module>+=
use interned-string;
// Identifiers token IDENTIFIER :: <string> = "[a-zA-Z_][0-9a-zA-Z_]*", semantic-value-function: method (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>); intern-string(token-string, start: token-start, end: token-end) end;
<Cases for preprocessor-token-string
>+=
#"IDENTIFIER" => token-value;
During preprocessing, numbers are not interpreted completely, but are recognized as preprocessing numbers. The grammar for preprocessing numbers is a superset of the various kinds of numeric tokens, and also includes a few things that are not valid numeric tokens at all.
// Preprocessing numbers token PP-NUMBER = "\\.?[0-9]([.0-9a-zA-Z_]|[eEpP][-+])*", semantic-value-function: extract-action;
<Cases for preprocessor-token-string
>+=
#"PP-NUMBER" => token-value;
Character constants and string literals are described by similar regular expressions.
// Character constants name c-char = "[^'\\\\\r\n]|\\\\(['\"?\\abfnrtv]|[0-7]([0-7][0-7]?)?|x[0-9a-fA-F]+)"; token CHARACTER-CONSTANT :: <string> = "L?'{c-char}+'", // FIXME semantic-value-function: extract-action; // String literals name s-char = "[^\"\\\\\r\n]|\\\\(['\"?\\abfnrtv]|[0-7]([0-7][0-7]?)?|x[0-9a-fA-F]+)"; token STRING-LITERAL :: <string> = "L?\"{s-char}*\"", // FIXME semantic-value-function: extract-action;
<Cases for preprocessor-token-string
>+=
#"CHARACTER-CONSTANT" => token-value; #"STRING-LITERAL" => token-value;
The remaining tokens at the preprocessing stage are considered punctuation. We provide semantic value functions for some of the punctuation tokens so we can distingush between the alternate spellings for stringization purposes. In all current cases looking at the length of the recognized token is sufficient.
<Semantic value function definitions>=
define function punctuation-value-function (length :: <integer>) => (func :: <function>); method (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>) => (result :: <boolean>); token-end - token-start = length end end function;
// Punctuation token SHARP = "#|%:", semantic-value-function: punctuation-value-function(1); token SHARPSHARP = "##|%:%:", semantic-value-function: punctuation-value-function(2); token LBRACK = "\\[|<:", semantic-value-function: punctuation-value-function(1); token RBRACK = "\\]|:>", semantic-value-function: punctuation-value-function(1); token LBRACE = "\\{|<%", semantic-value-function: punctuation-value-function(1); token RBRACE = "\\}|%>", semantic-value-function: punctuation-value-function(1); token LPAREN = "\\("; token RPAREN = "\\)"; token DOT = "\\."; token ARROW = "->"; token INC = "\\+\\+"; token DEC = "-" "-"; token AMP = "&"; token STAR = "\\*"; token PLUS = "\\+"; token MINUS = "-"; token TILDE = "~"; token BANG = "!"; token SLASH = "/"; token PERCENT = "%"; token SHL = "<<"; token SHR = ">>"; token LT = "<"; token GT = ">"; token LE = "<="; token GE = ">="; token EQ = "=="; token NE = "!="; token HAT = "^"; token OR = "\\|"; token AMPAMP = "&&"; token OROR = "\\|\\|"; token QUEST = "\\?"; token COLON = ":"; token SEMI = ";"; token ELIPSIS = "\\.\\.\\."; token EQUALS = "="; token STAR-EQUALS = "\\*="; token SLASH-EQUALS = "/="; token PERCENT-EQUALS = "%="; token PLUS-EQUALS = "\\+="; token MINUS-EQUALS = "-="; token SHL-EQUALS = "<<="; token SHR-EQUALS = ">>="; token AMP-EQUALS = "&="; token HAT-EQUALS = "^="; token OR-EQUALS = "\\|="; token COMMA = ",";
<Cases for preprocessor-token-string
>+=
#"SHARP" => if (token-value) "#" else "%:" end; #"SHARPSHARP" => if (token-value) "##" else "%:%:" end; #"LBRACK" => if (token-value) "[" else "<:" end; #"RBRACK" => if (token-value) "]" else ":>" end; #"LBRACE" => if (token-value) "{" else "<%" end; #"RBRACE" => if (token-value) "}" else "%>" end; #"LPAREN" => "("; #"RPAREN" => ")"; #"DOT" => "."; #"ARROW" => "->"; #"INC" => "++"; #"DEC" => "--"; #"AMP" => "&"; #"STAR" => "*"; #"PLUS" => "+"; #"MINUS" => "-"; #"TILDE" => "~"; #"BANG" => "!"; #"SLASH" => "/"; #"PERCENT" => "%"; #"SHL" => "<<"; #"SHR" => ">>"; #"LT" => "<"; #"GT" => ">"; #"LE" => "<="; #"GE" => ">="; #"EQ" => "=="; #"NE" => "!="; #"HAT" => "^"; #"OR" => "|"; #"AMPAMP" => "&&"; #"OROR" => "||"; #"QUEST" => "?"; #"COLON" => ":"; #"SEMI" => ";"; #"ELIPSIS" => "..."; #"EQUALS" => "="; #"STAR-EQUALS" => "*="; #"SLASH-EQUALS" => "/="; #"PERCENT-EQUALS" => "%="; #"PLUS-EQUALS" => "+="; #"MINUS-EQUALS" => "-="; #"SHL-EQUALS" => "<<="; #"SHR-EQUALS" => ">>="; #"AMP-EQUALS" => "&="; #"HAT-EQUALS" => "^="; #"OR-EQUALS" => "|="; #"COMMA" => ",";
So, that's the scanner we'll use.
<Initialize the scanner
>=
let scanner = make(<simple-lexical-scanner>, definition: $C-tokens, rangemap: rangemap); scanner.scanner-source-position := start-position;
<Module cpr-internals
>+=
define function scan-C-preprocessing-tokens (preprocessor-token-dispatcher :: <C-preprocessor-token-dispatcher>, scanner :: <simple-lexical-scanner>, text :: <byte-string>, start :: <buffer-index>, _end :: <buffer-index>) => (); scan-tokens(scanner, C-preprocessor-dispatch-token, preprocessor-token-dispatcher, text, start: start, end: _end, partial?: #t); end function;
<Finalize the scanner and preprocessor and return the end position>=
scan-tokens(scanner, C-preprocessor-dispatch-token, preprocessor-token-dispatcher, "", partial?: #f); let end-position = scanner.scanner-source-position; C-preprocessor-dispatch-token(preprocessor-token-dispatcher, 0, #"EOF", #f, end-position, end-position); <Finalize the preprocessor> end-position
At various points we'll need to further interpret preprocessing numbers, character constants, and string constants beyond just treating them as preprocessing tokens.
Preprocessing numbers can be interpreted as integer or floating-point constants.
<Module cpr-internals
>+=
define function C-preprocessing-number-value
(string :: <byte-string>)
=> (value :: <number>, type :: <symbol>);
let string-size = string.size;
local
<Local methods in C-preprocessing-number-value
>;
select (string[0])
'0' =>
initial-zero-part();
'1' =>
decimal-integer-part(1, 1);
'2' =>
decimal-integer-part(1, 2);
'3' =>
decimal-integer-part(1, 3);
'4' =>
decimal-integer-part(1, 4);
'5' =>
decimal-integer-part(1, 5);
'6' =>
decimal-integer-part(1, 6);
'7' =>
decimal-integer-part(1, 7);
'8' =>
decimal-integer-part(1, 8);
'9' =>
decimal-integer-part(1, 9);
'.' =>
error("floating constants are not yet supported");
otherwise =>
error("unrecognized constant %s", string);
end select
end function;
An initial 0
could indicate an octal or
hexadecimal integer constant, or a hexadecimal
floating-point constant.
<Local methods in C-preprocessing-number-value
>=
method initial-zero-part () => (value :: <number>, type :: <symbol>); if (1 >= string-size) values(0, #"int") else select (string[1]) 'x', 'X' => hexadecimal-integer-part(2, 0); '0' => octal-integer-part(2, 0); '1' => octal-integer-part(2, 1); '2' => octal-integer-part(2, 2); '3' => octal-integer-part(2, 3); '4' => octal-integer-part(2, 4); '5' => octal-integer-part(2, 5); '6' => octal-integer-part(2, 6); '7' => octal-integer-part(2, 7); 'l', 'L', 'u', 'U' => integer-suffix-part(1, 0, #t); otherwise => error("unrecognized octal constant %s", string) end select end if end method,
<Local methods in C-preprocessing-number-value
>+=
method hexadecimal-integer-part (i :: <integer>, value :: <integer>) => (value :: <number>, type :: <symbol>); if (i >= string-size) values(value, #"int") else select (string[i]) '0' => hexadecimal-integer-part(i + 1, value * 16 + 0); '1' => hexadecimal-integer-part(i + 1, value * 16 + 1); '2' => hexadecimal-integer-part(i + 1, value * 16 + 2); '3' => hexadecimal-integer-part(i + 1, value * 16 + 3); '4' => hexadecimal-integer-part(i + 1, value * 16 + 4); '5' => hexadecimal-integer-part(i + 1, value * 16 + 5); '6' => hexadecimal-integer-part(i + 1, value * 16 + 6); '7' => hexadecimal-integer-part(i + 1, value * 16 + 7); '8' => hexadecimal-integer-part(i + 1, value * 16 + 8); '9' => hexadecimal-integer-part(i + 1, value * 16 + 9); 'a', 'A' => hexadecimal-integer-part(i + 1, value * 16 + 10); 'b', 'B' => hexadecimal-integer-part(i + 1, value * 16 + 11); 'c', 'C' => hexadecimal-integer-part(i + 1, value * 16 + 12); 'd', 'D' => hexadecimal-integer-part(i + 1, value * 16 + 13); 'e', 'E' => hexadecimal-integer-part(i + 1, value * 16 + 14); 'f', 'F' => hexadecimal-integer-part(i + 1, value * 16 + 15); 'l', 'L', 'u', 'U' => integer-suffix-part(i, value, #t); '.', 'p', 'P' => error("floating hexadecimal constants are not yet supported"); otherwise => error("unrecognized hexadecimal constant %s", string) end end if end method,
<Local methods in C-preprocessing-number-value
>+=
method octal-integer-part (i :: <integer>, value :: <integer>) => (value :: <number>, type :: <symbol>); if (i >= string-size) values(value, #"int") else select (string[i]) '0' => octal-integer-part(i + 1, value * 8 + 0); '1' => octal-integer-part(i + 1, value * 8 + 1); '2' => octal-integer-part(i + 1, value * 8 + 2); '3' => octal-integer-part(i + 1, value * 8 + 3); '4' => octal-integer-part(i + 1, value * 8 + 4); '5' => octal-integer-part(i + 1, value * 8 + 5); '6' => octal-integer-part(i + 1, value * 8 + 6); '7' => octal-integer-part(i + 1, value * 8 + 7); 'l', 'L', 'u', 'U' => integer-suffix-part(i, value, #t); otherwise => error("unrecognized octal constant %s", string) end select end if end method,
<Local methods in C-preprocessing-number-value
>+=
method decimal-integer-part (i :: <integer>, value :: <integer>) => (value :: <number>, type :: <symbol>); if (i >= string-size) values(value, #"int") else select (string[i]) '0' => decimal-integer-part(i + 1, value * 10 + 0); '1' => decimal-integer-part(i + 1, value * 10 + 1); '2' => decimal-integer-part(i + 1, value * 10 + 2); '3' => decimal-integer-part(i + 1, value * 10 + 3); '4' => decimal-integer-part(i + 1, value * 10 + 4); '5' => decimal-integer-part(i + 1, value * 10 + 5); '6' => decimal-integer-part(i + 1, value * 10 + 6); '7' => decimal-integer-part(i + 1, value * 10 + 7); '8' => decimal-integer-part(i + 1, value * 10 + 8); '9' => decimal-integer-part(i + 1, value * 10 + 9); 'l', 'L', 'u', 'U' => integer-suffix-part(i, value, #f); '.', 'e', 'E', 'f', 'F' => error("floating constants are not yet supported"); otherwise => error("unrecognized decimal constant %s", string); end select end if end method,
<Local methods in C-preprocessing-number-value
>+=
method integer-suffix-part (i :: <integer>, value :: <integer>, flag? :: <boolean>) => (value :: <number>, type :: <symbol>); values(value, #"signed-int") end method
<Module cpr-internals
>+=
define function C-character-constant-value (string :: <byte-string>) => (value :: <number>); let _start = if (string[0] == 'L') 2 else 1 end; let _end = string.size - 1; iterate loop(i = _start, value = 0) if (i >= _end) value elseif (string[i] == '\\') let (character-value, next-index) = C-character-escape-value(string, i, _end); loop(next-index, logior(ash(value, 8), character-value)); else loop(i + 1, logior(ash(value, 8), as(<integer>, string[i]))); end if end iterate end function;
<Module cpr-internals
>+=
define function C-character-escape-value (string :: <byte-string>, i :: <integer>, _end :: <integer>) => (value :: <integer>, next-index :: <integer>); select (string[i]) '\'', '\"', '?', '\\' => values(as(<integer>, string[i]), i + 1); 'a' => values(#x07, i + 1); 'b' => values(#x08, i + 1); 'f' => values(#x0C, i + 1); 'n' => values(#x0A, i + 1); 'r' => values(#x0D, i + 1); 't' => values(#x09, i + 1); 'v' => values(#x0B, i + 1); '0', '1', '2', '3', '4', '5', '6', '7' => error("Octal escape"); 'x' => error("Hex escape"); otherwise => error("Illegal escape"); end end function;
The scanner needs to pass various information to the
token consumer, including the
preprocessing-translation-unit
, and the
rangemap
. This needs to be encapsulated into a
single object.
<Module cpr-internals
>+=
define class <C-preprocessor-token-dispatcher> (<object>)
constant slot token-dispatcher-preprocessing-translation-unit
:: <C-preprocessing-translation-unit-representation>,
required-init-keyword: preprocessing-translation-unit:;
constant slot token-dispatcher-scanner :: <simple-lexical-scanner>,
required-init-keyword: scanner:;
constant slot token-dispatcher-directory :: false-or(<directory-locator>),
required-init-keyword: directory:;
constant slot token-dispatcher-rangemap :: <source-location-rangemap>,
required-init-keyword: rangemap:;
<Additional slots in <C-preprocessor-token-dispatcher>
>
end class;
define sealed domain make(singleton(<C-preprocessor-token-dispatcher>));
define sealed domain initialize(<C-preprocessor-token-dispatcher>);
We'll implement our preprocessor as a state machine, with
states represented by token consumer functions. The
C-preprocessor-dispatch-token
function will
serve as a trampoline, dispatching to the various state
functions depending on the current state.
<Additional slots in <C-preprocessor-token-dispatcher>
>=
slot token-dispatcher-state-function :: <function>, required-init-keyword: consumer-state-function:;
<Module cpr-internals
>+=
define function C-preprocessor-dispatch-token (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name, token-value, start-position, end-position) => (); token-dispatcher.token-dispatcher-state-function (token-dispatcher, token-number, token-name, token-value, start-position, end-position); end function;
For brevity's sake, we'll provide macros for defining state functions and for making state transitions.
<Module cpr-internals
>+=
define macro preprocessor-state-function-definer { define preprocessor-state-function ?state:name (?token-dispatcher:variable, ?token-number:variable, ?token-name:variable, ?token-value:variable, ?start-position:variable, ?end-position:variable) ?:body end } => { define function "C-preprocess-token-" ## ?state (?token-dispatcher, ?token-number, ?token-name, ?token-value, ?start-position, ?end-position) => (); ?body end } end macro;
define macro enter-state { enter-state(?token-dispatcher:expression, ?state:name) } => { ?token-dispatcher.token-dispatcher-state-function := "C-preprocess-token-" ## ?state } end macro;
The preprocessor-error
method can be used by
preprocessor state functions to signal errors.
<Module cpr-internals
>+=
define function preprocessor-error (token-dispatcher :: <C-preprocessor-token-dispatcher>, start-position :: <integer>, end-position :: <integer>, string :: <string>, #rest arguments) => (no-return :: <bottom>); let srcloc = range-source-location(token-dispatcher.token-dispatcher-rangemap, start-position, end-position); apply(source-error, srcloc, string, arguments); end function;
The
<Module cpr-internals
>+=
define macro with-preprocessing-directive-restart { with-preprocessing-directive-restart (?dispatcher:expression, ?token-name:expression) ?:body end } => { block (exit) ?body; exception (restart :: <source-error-recovery-restart>) if (?token-name == #"NEW-LINE") enter-state(?dispatcher, linestart); else enter-state(?dispatcher, sharp-skipping); end if; exit(); end block } end macro;
The preprocessor needs to keep various types of state information concerning active conditionals, as well as directives that are in the process of being parsed. The most important of the state variables are the consumer function receiving tokens from the preprocessor and its state data, and an indication of whether or not the current input is being skipped due to a false conditional.
<Module cpr-internals
>+=
define class <C-preprocessor-state> (<object>)
constant slot preprocessor-dialect :: <C-preprocessor-dialect>,
required-init-keyword: dialect:;
constant slot preprocessor-consumer :: <function>,
required-init-keyword: consumer:;
constant slot preprocessor-consumer-data,
required-init-keyword: consumer-data:;
slot preprocessor-skipping? :: <boolean>,
init-value: #f, init-keyword: skipping?:;
<Additional slots in <C-preprocessor-state>
>
end class;
define sealed domain make(singleton(<C-preprocessor-state>));
define sealed domain initialize(<C-preprocessor-state>);
<Additional slots in <C-preprocessor-token-dispatcher>
>+=
constant slot token-dispatcher-state :: <C-preprocessor-state>, required-init-keyword: consumer-state:;
<Initialize the preprocessor and preprocessor-token-dispatcher
>=
let preprocessor-state = make(<C-preprocessor-state>, dialect: dialect, consumer: consumer, consumer-data: consumer-data); let preprocessor-token-dispatcher = make(<C-preprocessor-token-dispatcher>, preprocessing-translation-unit: preprocessing-translation-unit, scanner: scanner, directory: directory, rangemap: rangemap, consumer-state-function: C-preprocess-token-linestart, consumer-state: preprocessor-state);
The C-preprocessor-output-token
function
sends a token to the preprocessor's consumer.
<Module cpr-internals
>+=
define inline function C-preprocessor-output-token (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) => (); let preprocessor-state = token-dispatcher.token-dispatcher-state; let rangemap = token-dispatcher.token-dispatcher-rangemap; preprocessor-state.preprocessor-consumer (preprocessor-state.preprocessor-consumer-data, token-name, token-value, rangemap, start-position, end-position); end function;
The exact set of supported preprocessor directives are
parameterizable through
<C-preprocessor-dialect>
objects. Since
preprocessor-dialects can inherit from other dialects, we
use a <hierarchical-table>
to store
information about these directives.
<Modules imported by the cpr-internals
module>+=
use hierarchical-table;
<Slots in the <C-preprocessor-dialect>
class>=
constant slot preprocessor-dialect-parent :: false-or(<C-preprocessor-dialect>) = #f, init-keyword: parent:; slot preprocessor-dialect-directives :: <hierarchical-table>;
<Define an initialize
method on <C-preprocessor-dialect>
>=
define method initialize (dialect :: <C-preprocessor-dialect>, #key parent :: false-or(<C-preprocessor-dialect>), #all-keys) => (); next-method(); dialect.preprocessor-dialect-directives := make(<hierarchical-table>, parent: parent & parent.preprocessor-dialect-directives); end method;
The <C-preprocessor-directive>
class
stores information about these individual directives.
Information we need includes:
#include
directive uses a slightly different set of preprocessing
tokens.)<Module cpr-internals
>+=
define class <C-preprocessor-directive> (<object>) constant slot directive-skipping-function :: <function>, init-value: method (dispatcher) #f end, init-keyword: skipping-function:; constant slot directive-macro-replaced?, init-value: #f, init-keyword: macro-replaced?:; constant slot directive-lexical-definition :: false-or(<simple-lexical-definition>), init-value: #f, init-keyword: lexical-definition:; end class;
define sealed domain make(singleton(<C-preprocessor-directive>));
define sealed domain initialize(<C-preprocessor-directive>);
<Module cpr-internals
>+=
define method define-C-preprocessor-directive (dialect :: <C-preprocessor-dialect>, directive :: <string>, #rest keys) => (); dialect.preprocessor-dialect-directives[intern-string(directive)] := apply(make, <C-preprocessor-directive>, keys); end method;
We'll use a LR parser to recognize C preprocessor directives. The grammar productions defining these directives, and the resulting parser automaton, will be stored with the preprocessor dialect.
<Slots in the <C-preprocessor-dialect>
class>+=
slot preprocessor-dialect-productions :: <sequence>, init-value: #[], init-keyword: productions:;
We'll compute the parser automaton, recognizing a
nonterminal called directive
, as soon as it is
needed.
<Slots in the <C-preprocessor-dialect>
class>+=
slot %preprocessor-dialect-automaton;
<Module cpr-internals
>+=
define method preprocessor-dialect-automaton (dialect :: <C-preprocessor-dialect>) => (automaton); if (slot-initialized?(dialect, %preprocessor-dialect-automaton)) dialect.%preprocessor-dialect-automaton else for (the-dialect = dialect then the-dialect.preprocessor-dialect-parent, productions = #[] then concatenate(productions, the-dialect.preprocessor-dialect-productions), while: the-dialect) finally dialect.%preprocessor-dialect-automaton := simple-parser-automaton($C-tokens, productions, #[#"directive"], end-symbol: #"NEW-LINE") end for end if end;
For convenience's sake, we'll provide a specialized macro
for defining the syntax of directives. Note that the
#
and the directive name are included in the
right-hand side of the production, because we want the
source location information for the reduced production to
include the entire directive.
<Modules imported by the cpr-internals
module>+=
use simple-parser-automaton;
<Define the preprocessor-directive-productions
macro>=
define macro preprocessor-directive-productions { preprocessor-directive-productions ?clauses end } => { vector(?clauses) } clauses: { } => { } { directive ?name:token [?symbols] (?variables:*) ?:body ... } => { make(<simple-production>, nonterminal: #"directive", derives: preprocessor-directive-derives(intern-string(?name), ?symbols), action: production-user-reduce-action-function([]; [SHARP d ?symbols]; [?variables]; ?body)), ... } symbols: { } => { } { ?symbol:name ... } => { ?symbol ... } end macro;
define macro preprocessor-directive-derives { preprocessor-directive-derives(?name:expression, ?symbols) } => { vector(#"SHARP", ?name, ?symbols) } symbols: { } => { } { ?symbol:name ... } => { ?#"symbol", ... } end macro;
<Module cpr-internals
>+=
$C90-C-preprocessor-dialect.preprocessor-dialect-productions
:= concatenate(preprocessor-directive-productions
<C90 preprocessor directive grammar productions>
end,
simple-grammar-productions
<C90 preprocessor directive auxiliary grammar productions>
end,
<Productions for the preprocessing-token
>);
<Module cpr-internals
>+=
$C99-C-preprocessor-dialect.preprocessor-dialect-productions := concatenate(preprocessor-directive-productions <C99 preprocessor directive grammar productions> end);
<Module cpr-internals
>+=
$gnu89-C-preprocessor-dialect.preprocessor-dialect-productions := concatenate(preprocessor-directive-productions <GNU C preprocessor directive grammar productions> end, simple-grammar-productions <GNU C preprocessor directive auxiliary grammar productions> end);
$gnu99-C-preprocessor-dialect.preprocessor-dialect-productions := concatenate(preprocessor-directive-productions <> end);
Along with the dispatcher state machine, we'll use a
<simple-parser>
to recognize preprocessor
directives.
<Additional slots in <C-preprocessor-state>
>=
slot preprocessor-directive-parser :: <simple-parser>, init-keyword: directive-parser:;
<Initialize the preprocessor and preprocessor-token-dispatcher
>+=
preprocessor-state.preprocessor-directive-parser := make(<simple-parser>, automaton: dialect.preprocessor-dialect-automaton, start-symbol: #"directive", rangemap: rangemap, consumer-data: preprocessor-token-dispatcher);
Initially the preprocessor input is considered to be at the start of a line. Preprocessing directives are only recognized in this state.
When we see a #
in this state, we reset the
parser and feed the token to it.
<Module cpr-internals
>+=
define preprocessor-state-function linestart (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; select (token-name) #"NEW-LINE", #"WHITESPACE" => ; // stay in this state #"SHARP" => enter-state(token-dispatcher, sharp); let parser = preprocessor-state.preprocessor-directive-parser; simple-parser-reset(parser, start-symbol: #"directive"); simple-parser-consume-token(parser, 0, #"SHARP", #f, start-position, end-position); #"EOF" => ; // EOF is okay here #"IDENTIFIER" => enter-state(token-dispatcher, linemid); unless (preprocessor-state.preprocessor-skipping?) <Expand the identifier if it has a macro definition, and send it directly to the output otherwise> end unless; #"BLUE-IDENTIFIER" => enter-state(token-dispatcher, linemid); unless (preprocessor-state.preprocessor-skipping?) C-preprocessor-output-token(token-dispatcher, token-number, #"IDENTIFIER", token-value, start-position, end-position); end unless; otherwise => enter-state(token-dispatcher, linemid); unless (preprocessor-state.preprocessor-skipping?) C-preprocessor-output-token(token-dispatcher, token-number, token-name, token-value, start-position, end-position); end unless; end select; end preprocessor-state-function;
Processing in the middle of a line is similar, except that we return to the line-start state if we see a newline.
<Module cpr-internals
>+=
define preprocessor-state-function linemid (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; select (token-name) #"NEW-LINE" => enter-state(token-dispatcher, linestart); #"WHITESPACE" => ; // stay in this state #"EOF" => signal("File does not end in a newline"); #"IDENTIFIER" => unless (preprocessor-state.preprocessor-skipping?) <Expand the identifier if it has a macro definition, and send it directly to the output otherwise> end unless; #"BLUE-IDENTIFIER" => unless (preprocessor-state.preprocessor-skipping?) C-preprocessor-output-token(token-dispatcher, token-number, #"IDENTIFIER", token-value, start-position, end-position); end unless; otherwise => unless (preprocessor-state.preprocessor-skipping?) C-preprocessor-output-token(token-dispatcher, token-number, token-name, token-value, start-position, end-position); end unless; end select; end preprocessor-state-function;
After seeing #
at the beginning of a line, we
look for a preprocessor directive. Unrecognized directives
are okay if we're skipping due to a false conditional. Note
that we explicitly ignore the null directive (#
immediately followed by a newline).
<Module cpr-internals
>+=
define preprocessor-state-function sharp (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; select (token-name) #"IDENTIFIER" => let dialect = preprocessor-state.preprocessor-dialect; let directive = element(dialect.preprocessor-dialect-directives, token-value, default: #f); if (directive) let parser = preprocessor-state.preprocessor-directive-parser; simple-parser-consume-token(parser, 0, token-value, #f, start-position, end-position); if (preprocessor-state.preprocessor-skipping?) if (~directive.directive-skipping-function(token-dispatcher)) enter-state(token-dispatcher, sharp-skipping); elseif (directive.directive-macro-replaced?) enter-state(token-dispatcher, sharp-directive-expanded); else enter-state(token-dispatcher, sharp-directive); end if; else if (directive.directive-lexical-definition) token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition := directive.directive-lexical-definition; end if; if (directive.directive-macro-replaced?) enter-state(token-dispatcher, sharp-directive-expanded); else enter-state(token-dispatcher, sharp-directive); end if; end if; elseif (preprocessor-state.preprocessor-skipping?) enter-state(token-dispatcher, sharp-skipping); else with-preprocessing-directive-restart (token-dispatcher, token-name) preprocessor-error(token-dispatcher, start-position, end-position, "invalid preprocessing directive #%s", token-value); end; end if; #"NEW-LINE" => // Null directive enter-state(token-dispatcher, linestart); #"WHITESPACE" => ; // remain in this state otherwise => if (preprocessor-state.preprocessor-skipping?) enter-state(token-dispatcher, sharp-skipping); else with-preprocessing-directive-restart (token-dispatcher, token-name) preprocessor-error(token-dispatcher, start-position, end-position, "invalid preprocessing directive"); end; end if; end select; end preprocessor-state-function;
While processing a directive, we feed the tokens into the directive parser. Whitespace is recognized as a token in one particular spot, as we'll see later, so we feed whitespace tokens into the parser only if they can be recognized at that point.
Line end constitutes the end of the directive, and as treated as the end-of-input marker by the parser. Once we feed this token into the parser (causing the directive's reduce action to be triggered), we return to the initial state.
<Module cpr-internals
>+=
define preprocessor-state-function sharp-directive (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; let parser = preprocessor-state.preprocessor-directive-parser; select (token-name) #"WHITESPACE" => if (simple-parser-can-consume-token?(parser, token-number, token-name)) simple-parser-consume-token(parser, token-number, token-name, #f, start-position, end-position); end if; #"NEW-LINE" => with-preprocessing-directive-restart (token-dispatcher, token-name) simple-parser-consume-token(parser, token-number, token-name, token-value, start-position, end-position); enter-state(token-dispatcher, linestart); end; #"EOF" => signal("File does not end in a newline"); simple-parser-consume-token(parser, 0, #"NEW-LINE", #f, start-position, end-position); otherwise => with-preprocessing-directive-restart (token-dispatcher, token-name) simple-parser-consume-token(parser, token-number, token-name, token-value, start-position, end-position); end; end select; end preprocessor-state-function;
If we're skipping the rest of a preprocessor directive
line (due either to a false conditional or to a syntax
error) then we enter the
C-preprocess-token-sharp-skipping
state.
<Module cpr-internals
>+=
define preprocessor-state-function sharp-skipping (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) if (token-name == #"NEW-LINE") enter-state(token-dispatcher, linestart); end if; end preprocessor-state-function;
Definitions of macros defined using #define
are
stored in a preprocessing-macro-definitions
table of the preprocessing translation unit.
<Slots in <C-preprocessing-translation-unit-representation>
>=
constant slot preprocessing-macro-definitions :: <object-table> = make(<object-table>);
Each macro definition consists of a list of macro
parameters and a variable argument parameter for
function-like macros, and a sequence containing the
replacement list of the macro. There is also a mutable
flag, preprocessing-macro-suppressed?
, which is
set during expansion of the macro to prevent it from being
recursively expanded.
<Module cpr-internals
>+=
define class <C-preprocessing-macro> (<source-location-mixin>) constant slot preprocessing-macro-parameters :: false-or(<sequence>), init-value: #f, init-keyword: parameters:; constant slot preprocessing-macro-varargs-parameter :: false-or(<byte-string>), init-value: #f, init-keyword: varargs-parameter:; constant slot preprocessing-macro-replacement :: <sequence>, required-init-keyword: replacement:; constant slot preprocessing-macro-expander :: false-or(<function>), init-value: #f, init-keyword: expander:; slot preprocessing-macro-suppressed? :: <boolean>, init-value: #f; end class;
define sealed domain make(singleton(<C-preprocessing-macro>));
define sealed domain initialize(<C-preprocessing-macro>);
<Module cpr-internals
>+=
define macro with-macro-suppressed { with-macro-suppressed (?definition:expression) ?:body end } => { block () ?definition.preprocessing-macro-suppressed? := #t; ?body cleanup ?definition.preprocessing-macro-suppressed? := #f; end block } end macro;
Macros can be redefined without complaint if the new definition is equivalent to the previous one:
<Module cpr-internals
>+=
define sealed method \= (macro1 :: <C-preprocessing-macro>, macro2 :: <C-preprocessing-macro>) => (equal? :: <boolean>); (macro1.preprocessing-macro-parameters = macro2.preprocessing-macro-parameters) & (macro1.preprocessing-macro-varargs-parameter = macro2.preprocessing-macro-varargs-parameter) & (macro1.preprocessing-macro-replacement = macro2.preprocessing-macro-replacement) end method;
The elements in the replacement list are the individual preprocessing tokens.
<Module cpr-internals
>+=
define class <C-preprocessing-token> (<object>) constant slot preprocessing-token-name :: <symbol>, required-init-keyword: name:; constant slot preprocessing-token-value, init-value: #f, init-keyword: value:; /* constant slot preprocessing-token-start-position :: false-or(<integer>), init-value: #f, init-keyword: start-position:; constant slot preprocessing-token-end-position :: false-or(<integer>), init-value: #f, init-keyword: end-position:; */ end class;
<Module cpr-internals
>+=
define sealed method \= (token1 :: <C-preprocessing-token>, token2 :: <C-preprocessing-token>) => (result :: <boolean>); token1.preprocessing-token-name == token2.preprocessing-token-name & token1.preprocessing-token-value = token2.preprocessing-token-value end method;
Given a sequence of preprocessor tokens, we sometimes need to determine whether there is (already) whitespace at the end.
<Module cpr-internals
>+=
define function trailing-whitespace? (tokens :: <sequence>) => (result :: <boolean>); if (empty?(tokens)) #f else tokens[tokens.size - 1].preprocessing-token-name == #"WHITESPACE" end if end function;
Whitespace following a replacement list is not considered
part of the replacement list. The
trim-whitespace!
and
trim-whitespace
functions are used to strip off
such whitespace.
<Module cpr-internals
>+=
define function trim-whitespace! (tokens :: <stretchy-sequence>) => (tokens :: <stretchy-sequence>); if (trailing-whitespace?(tokens)) tokens.size := tokens.size - 1; end if; tokens end function;
define method trim-whitespace (tokens :: <sequence>) => (result :: <sequence>); if (trailing-whitespace?(tokens)) copy-sequence(tokens, end: tokens.size - 1) else tokens end if end method;
define method trim-whitespace (tokens :: <stretchy-sequence>) => (result :: <stretchy-sequence>); trim-whitespace!(tokens); end method;
#define
Directive<Module cpr-internals
>+=
define function preprocessor-add-macro-definition (preprocessing-translation-unit :: <C-preprocessing-translation-unit-representation>, macro-name :: <byte-string>, definition :: <C-preprocessing-macro>) => (); let previous-definition = element(preprocessing-translation-unit.preprocessing-macro-definitions, macro-name, default: #f); if (previous-definition) if (definition ~= previous-definition) source-warning(definition, "redefined macro %s", macro-name); source-warning(previous-definition, "the previous definition was here"); end if; end if; preprocessing-translation-unit.preprocessing-macro-definitions[macro-name] := definition; end function;
<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "define");
<C90 preprocessor directive grammar productions>=
directive "define" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, replacement: #[]); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition); directive "define" [IDENTIFIER WHITESPACE pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition); directive "define" [IDENTIFIER LPAREN identifier-list-opt RPAREN pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, parameters: identifier-list-opt, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition);
Function-like macros with variable arguments were added in C99.
<Module cpr-internals
>+=
define constant $C99-varargs-parameter :: <string> = intern-string("__VA_ARGS__");
<C99 preprocessor directive grammar productions>=
directive "define" [IDENTIFIER LPAREN ELIPSIS RPAREN pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, parameters: #[], varargs-parameter: $C99-varargs-parameter, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition); directive "define" [IDENTIFIER LPAREN identifier-list COMMA ELIPSIS RPAREN pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, parameters: identifier-list, varargs-parameter: $C99-varargs-parameter, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition);
GNU C has its own version of varargs function-like macros.
<GNU C preprocessor directive grammar productions>=
directive "define" [IDENTIFIER LPAREN gnu-varargs RPAREN pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, parameters: #[], varargs-parameter: gnu-varargs, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition); directive "define" [IDENTIFIER LPAREN identifier-list COMMA gnu-varargs RPAREN pp-tokens-opt] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let definition = make(<C-preprocessing-macro>, source-location: srcloc, parameters: identifier-list, varargs-parameter: gnu-varargs, replacement: trim-whitespace(pp-tokens-opt)); preprocessor-add-macro-definition (dispatcher.token-dispatcher-preprocessing-translation-unit, IDENTIFIER, definition);
<C90 preprocessor directive auxiliary grammar productions>=
production identifier-list-opt :: <sequence> => [/* empty */] () #[]; production identifier-list-opt :: <sequence> => [identifier-list]; production identifier-list => [IDENTIFIER]; production identifier-list => [identifier-list COMMA IDENTIFIER]; production pp-tokens-opt :: <sequence> => [/* empty */] () #[]; production pp-tokens-opt :: <sequence> => [pp-tokens]; production pp-tokens => [preprocessing-token]; production pp-tokens => [pp-tokens preprocessing-token];
<GNU C preprocessor directive auxiliary grammar productions>=
production gnu-varargs => [IDENTIFIER ELIPSIS];
To define the preprocessing-token
nonterminal,
we need to have one production for each possible
preprocessing token. The reduce action for each
production constructs a corresponding
<C-preprocessing-token>
object.
<Productions for the preprocessing-token
>=
map(method (token-name :: <symbol>) make(<simple-production>, nonterminal: #"preprocessing-token", nonterminal-type: <C-preprocessing-token>, derives: vector(token-name), action: method (p :: <simple-parser>, data, s, e) => (token :: <C-preprocessing-token>); make(<C-preprocessing-token>, name: token-name, value: p[0]) end) end, #[#"IDENTIFIER", #"PP-NUMBER", #"CHARACTER-CONSTANT", #"STRING-LITERAL", #"SHARP", #"SHARPSHARP", #"LBRACK", #"RBRACK", #"LBRACE", #"RBRACE", #"LPAREN", #"RPAREN", #"DOT", #"ARROW", #"INC", #"DEC", #"AMP", #"STAR", #"PLUS", #"MINUS", #"TILDE", #"BANG", #"SLASH", #"PERCENT", #"SHL", #"SHR", #"LT", #"GT", #"LE", #"GE", #"EQ", #"NE", #"HAT", #"OR", #"AMPAMP", #"OROR", #"QUEST", #"COLON", #"SEMI", #"ELIPSIS", #"EQUALS", #"STAR-EQUALS", #"SLASH-EQUALS", #"PERCENT-EQUALS", #"PLUS-EQUALS", #"MINUS-EQUALS", #"SHL-EQUALS", #"SHR-EQUALS", #"AMP-EQUALS", #"HAT-EQUALS", #"OR-EQUALS", #"COMMA"])
Whitespace is not s preprocessing token, but it can appear as a separator within replacement lists and macro arguments.. Multiple adjacent whitespace separators need to be collapsed into a single whitespace marker.
<C90 preprocessor directive auxiliary grammar productions>+=
production pp-tokens :: <sequence> => [pp-tokens WHITESPACE] () unless (trailing-whitespace?(pp-tokens)) add!(pp-tokens, make(<C-preprocessing-token>, name: #"WHITESPACE")); end; pp-tokens;
#undef
DirectiveThe #undef
directive deletes the macro
definition for the given identifier from the definitions
table.
<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "undef");
<C90 preprocessor directive grammar productions>+=
directive "undef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>) let pp-translation-unit = dispatcher.token-dispatcher-preprocessing-translation-unit; remove-key!(pp-translation-unit.preprocessing-macro-definitions, IDENTIFIER);
<Define identifier
as an object-like preprocessing macro>=
let rangemap = make(<source-location-rangemap>); let scanner = make(<simple-lexical-scanner>, definition: $C-tokens, rangemap: rangemap); let tokens = make(<stretchy-object-vector>); scan-tokens(scanner, method (tokens :: <stretchy-object-vector>, token-number, token-name, token-value, start-position, end-position) if (token-name == #"WHITESPACE" | token-name == #"NEW-LINE") unless (empty?(tokens) | trailing-whitespace?(tokens)) add!(tokens, make(<C-preprocessing-token>, name: #"WHITESPACE")); end; else add!(tokens, make(<C-preprocessing-token>, name: token-name, value: token-value)) end if; end, tokens, replacement); let definition = make(<C-preprocessing-macro>, replacement: trim-whitespace!(tokens)); preprocessor-add-macro-definition (preprocessing-translation-unit, intern-string(identifier), definition);
<Undefine identifier
>=
remove-key!(preprocessing-translation-unit.preprocessing-macro-definitions, intern-string(identifier));
When we encounter an identifier during preprocessing, we
determine whether or not it has a macro definition. If it
does, and it's not currently being suppressed, then we begin
the process of expanding the macro by calling
C-preprocessing-macro-expand
.
<Expand the identifier if it has a macro definition, and send it directly to the output otherwise>=
let pp-translation-unit = token-dispatcher.token-dispatcher-preprocessing-translation-unit; let definition = element(pp-translation-unit.preprocessing-macro-definitions, token-value, default: #f); if (definition & ~definition.preprocessing-macro-suppressed?) if (C-preprocessing-macro-expand(token-dispatcher, definition, token-value, start-position, end-position, #t)) enter-state(token-dispatcher, linemid-macro-name) end if; else C-preprocessor-output-token(token-dispatcher, token-number, token-name, token-value, start-position, end-position); end if;
The C-preprocessing-macro-expand
function
begins the process of expanding the given identifier. It
returns true if the macro is a function-like macro,
indicating that the macro arguments need to be collected
before the actual macro expansion begins.
<Module cpr-internals
>+=
define function C-preprocessing-macro-expand (token-dispatcher :: <C-preprocessor-token-dispatcher>, definition :: <C-preprocessing-macro>, token-value, start-position, end-position, newline-permitted? :: <boolean>) => (function-like? :: <boolean>); if (definition.preprocessing-macro-parameters) <Expand a function-like macro> #t elseif (definition.preprocessing-macro-expander) // Built-in expansion: expand directly definition.preprocessing-macro-expander(token-dispatcher, token-value, start-position, end-position); #f else // Object-like macro: expand directly C-expand-replacement-list (token-dispatcher, token-value, definition, #f, start-position, end-position); #f end if; end function;
Expanding function-like macros is a much more involved
process. We'll need to collect the arguments for
substitution into the replacement list before performing the
expansion. We'll do this in an instance of
<C-preprocessing-macro-invocation>
.
<Module cpr-internals
>+=
define class <C-preprocessing-macro-invocation> (<object>) // Name of the invoked macro constant slot invocation-name :: <byte-string>, required-init-keyword: name:; // Starting position of the macro name in the source constant slot invocation-start-position :: <integer>, required-init-keyword: start-position:; // Ending position of the macro name in the source constant slot invocation-end-position :: <integer>, required-init-keyword: end-position:; // Macro definition constant slot invocation-macro-definition :: <C-preprocessing-macro>, required-init-keyword: macro-definition:; // State from which macro expansion state was entered constant slot invocation-state-function :: <function>, required-init-keyword: state-function:; // True of newline is permitted within the macro invocation constant slot invocation-newline-permitted? :: <boolean>, required-init-keyword: newline-permitted?:; // Tokens saved for the current argument constant slot invocation-saved-tokens :: <stretchy-vector> = make(<stretchy-vector>); // Table (indexed by parameter name) of argument values constant slot invocation-arguments :: <object-table> = make(<object-table>); // Count of nested parentheses (beyond the first opening parenthesis) slot invocation-parens :: <integer>, init-value: 0; // Number of arguments processed so far slot invocation-argument-count :: <integer>, init-value: 0; end class;
<Additional slots in <C-preprocessor-token-dispatcher>
>+=
slot token-dispatcher-macro-invocation :: false-or(<C-preprocessing-macro-invocation>), init-value: #f;
<Expand a function-like macro>=
token-dispatcher.token-dispatcher-macro-invocation := make(<C-preprocessing-macro-invocation>, name: token-value, start-position: start-position, end-position: end-position, macro-definition: definition, state-function: token-dispatcher.token-dispatcher-state-function, newline-permitted?: newline-permitted?);
If a macro name is followed by something other than an open parenthesis, we know it's not really a macro invocation, and we put the macro name back into the input stream.
<Module cpr-internals
>+=
define function abandon-macro-invocation (token-dispatcher :: <C-preprocessor-token-dispatcher>) => (token-value, start-position, end-position); let invocation = token-dispatcher.token-dispatcher-macro-invocation; token-dispatcher.token-dispatcher-macro-invocation := #f; token-dispatcher.token-dispatcher-state-function := invocation.invocation-state-function; values(invocation.invocation-name, invocation.invocation-start-position, invocation.invocation-end-position) end function;
After seeing the macro name, we wait until we see the opening parenthesis (or something else).
<Module cpr-internals
>+=
define preprocessor-state-function linemid-macro-name (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) select (token-name) #"LPAREN" => enter-state(token-dispatcher, macro-arguments); #"WHITESPACE" => ; // remain in this state #"NEW-LINE" => enter-state(token-dispatcher, linestart-macro-name); otherwise => let (identifier-name, identifier-start-position, identifier-end-position) = abandon-macro-invocation(token-dispatcher); C-preprocessor-output-token(token-dispatcher, token-number, #"IDENTIFIER", identifier-name, identifier-start-position, identifier-end-position); C-preprocessor-dispatch-token (token-dispatcher, token-number, token-name, token-value, start-position, end-position); end select; end preprocessor-state-function;
One “corner case” is when the macro name is followed by a newline before the opening parenthesis (if any).
<Module cpr-internals
>+=
define preprocessor-state-function linestart-macro-name (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) select (token-name) #"LPAREN" => enter-state(token-dispatcher, macro-arguments); #"WHITESPACE", #"NEW-LINE" => ; // remain in this state otherwise => let (identifier-name, identifier-start-position, identifier-end-position) = abandon-macro-invocation(token-dispatcher); C-preprocessor-output-token(token-dispatcher, token-number, #"IDENTIFIER", identifier-name, identifier-start-position, identifier-end-position); enter-state(token-dispatcher, linestart); token-dispatcher.token-dispatcher-state-function (token-dispatcher, token-number, token-name, token-value, start-position, end-position); end select; end preprocessor-state-function;
When we see the macro arguments, we save them in the
<C-preprocessing-macro-invocation>
instance. Whitespace tokens are saved (because they are
significant for stringization), but adjacent whitespace
tokens are collapsed into a single token.
<Module cpr-internals
>+=
define preprocessor-state-function macro-arguments (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let invocation = token-dispatcher.token-dispatcher-macro-invocation; local method save-token () => (); let token = make(<C-preprocessing-token>, name: token-name, value: token-value); add!(invocation.invocation-saved-tokens, token); end method; select (token-name) #"COMMA" => if (zero?(invocation.invocation-parens)) <Handle a top-level comma within the macro arguments> else save-token(); end if; #"LPAREN" => invocation.invocation-parens := invocation.invocation-parens + 1; save-token(); #"RPAREN" => if (zero?(invocation.invocation-parens)) <Finish processing of the macro invocation> else invocation.invocation-parens := invocation.invocation-parens - 1; save-token(); end if; #"WHITESPACE", #"NEW-LINE" => if (token-name == #"NEW-LINE" & ~invocation.invocation-newline-permitted?) preprocessor-error(token-dispatcher, start-position, end-position, "newline within invocation of %s macro", invocation.invocation-name); end if; let saved-tokens = invocation.invocation-saved-tokens; unless (empty?(saved-tokens) | trailing-whitespace?(saved-tokens)) add!(saved-tokens, make(<C-preprocessing-token>, name: #"WHITESPACE")); end unless; #"EOF" => preprocessor-error(token-dispatcher, start-position, end-position, "end of input within invocation of %s macro", invocation.invocation-name); otherwise => save-token(); end select; end preprocessor-state-function;
When we see a comma within the macro arguments, we store the argument under the corresponding parameter name.
<Handle a top-level comma within the macro arguments>=
let definition = invocation.invocation-macro-definition; let index = invocation.invocation-argument-count; if (index < definition.preprocessing-macro-parameters.size) let parameter = definition.preprocessing-macro-parameters[index]; trim-whitespace!(invocation.invocation-saved-tokens); invocation.invocation-arguments[parameter] := as(<simple-object-vector>, invocation.invocation-saved-tokens); invocation.invocation-saved-tokens.size := 0; invocation.invocation-argument-count := index + 1; elseif (definition.preprocessing-macro-varargs-parameter) save-token(); else preprocessor-error(token-dispatcher, start-position, end-position, "too many arguments to %s macro", invocation.invocation-name); end if;
When we see the closing parenthesis, we store the last argument in the same way.
<Finish processing of the macro invocation>=
trim-whitespace!(invocation.invocation-saved-tokens); let definition = invocation.invocation-macro-definition; let index = invocation.invocation-argument-count; if (index < definition.preprocessing-macro-parameters.size) let parameter = definition.preprocessing-macro-parameters[index]; invocation.invocation-arguments[parameter] := as(<simple-object-vector>, invocation.invocation-saved-tokens); if (index + 1 ~= definition.preprocessing-macro-parameters.size) preprocessor-error(token-dispatcher, start-position, end-position, "too few arguments to %s macro (expected %d)", invocation.invocation-name, definition.preprocessing-macro-parameters.size); end if; elseif (definition.preprocessing-macro-varargs-parameter) let parameter = definition.preprocessing-macro-varargs-parameter; invocation.invocation-arguments[parameter] := as(<simple-object-vector>, invocation.invocation-saved-tokens); elseif (~empty?(invocation.invocation-saved-tokens)) preprocessor-error(token-dispatcher, start-position, end-position, "too many arguments to %s macro", invocation.invocation-name); end if;
Then we expand the macro's replacement list, substituting the parameter values for the parameter names where they occur.
<Finish processing of the macro invocation>+=
token-dispatcher.token-dispatcher-macro-invocation := #f; token-dispatcher.token-dispatcher-state-function := invocation.invocation-state-function; C-expand-replacement-list (token-dispatcher, invocation.invocation-name, definition, invocation.invocation-arguments, invocation.invocation-start-position, end-position);
When expanding a replacement list, we delay the
processing of each item in order to allow it to be pasted
onto the following item using the ##
operator.
<Module cpr-internals
>+=
define function C-expand-replacement-list (token-dispatcher :: <C-preprocessor-token-dispatcher>, macro-name :: <byte-string>, definition :: <C-preprocessing-macro>, parameters :: false-or(<object-table>), start-position, end-position) => (); let replacement-list = definition.preprocessing-macro-replacement; let limit = replacement-list.size; let result-list = make(<stretchy-object-vector>); <Establishexpansion-token-dispatcher
> local <Local methods ofC-expand-replacement-list
> method loop (prev-token :: <C-preprocessing-token>, prev-whitespace-token :: false-or(<C-preprocessing-token>), i :: <integer>) if (i < limit) let (token, next) = item(i, #f); if (token.preprocessing-token-name == #"SHARPSHARP" & i + 1 < limit) let (paste-token, next) = item(i + 1, #t); let placemarker = paste(prev-token.preprocessing-token-name, prev-token.preprocessing-token-value, paste-token.preprocessing-token-name, paste-token.preprocessing-token-value); loop(make(<C-preprocessing-token>, name: #"PLACEMARKER", value: placemarker), #f, next); elseif (token.preprocessing-token-name == #"WHITESPACE") loop(prev-token, token, next) else output(prev-token, prev-whitespace-token); loop(token, #f, next); end if; else output(prev-token, prev-whitespace-token); end if; end method; if (limit > 0) let (token, next) = item(0, #f); loop(token, #f, next); end if; <Dispatch the tokens inresult-list
for re-expansion> end function;
A replacement list item consists of a single
preprocessing token, or a #
operator followed by a
parameter name. References to parameter names are also
replaced with #"PARAMETER"
markers.
<Local methods of C-expand-replacement-list
>=
method item (i :: <integer>, skip-whitespace? :: <boolean>) => (token :: <C-preprocessing-token>, next :: <integer>); let token :: <C-preprocessing-token> = replacement-list[i]; let token-name = token.preprocessing-token-name; select (token-name) #"IDENTIFIER", #"BLUE-IDENTIFIER" => let token-value = token.preprocessing-token-value; let expansion = parameters & element(parameters, token-value, default: #f); if (expansion) values(make(<C-preprocessing-token>, name: #"PARAMETER", value: expansion), i + 1) else values(token, i + 1); end if; #"SHARP" => if (i + 1 < limit) let (pname-token :: <C-preprocessing-token>, next :: <integer>) = item(i + 1, #t); if (pname-token.preprocessing-token-name == #"PARAMETER") <Stringify the parameter value> else source-error(definition, "A macro parameter name must follow the # operator"); end if; else source-error(definition, "A macro parameter name must follow the # operator"); end if; #"WHITESPACE" => if (skip-whitespace?) item(i + 1, #t) else values(token, i + 1) end if; otherwise => values(token, i + 1) end select end,
<Stringify the parameter value>=
let stringified = with-output-to-string (str) write-element(str, '\"'); for (param-token :: <C-preprocessing-token> in pname-token.preprocessing-token-value) let param-token-name = param-token.preprocessing-token-name; let param-token-value = param-token.preprocessing-token-value; if (param-token-name == #"STRING-LITERAL" | param-token-name == #"CHARACTER-CONSTANT") for (c in param-token-value) if (c == '\\' | c == '\"') write-element(str, '\\'); end if; write-element(str, c); end for; else write(str, aux-token-string(param-token-name, param-token-value)); end if; end for; write-element(str, '\"'); end; values(make(<C-preprocessing-token>, name: #"STRING-LITERAL", value: stringified), next);
Token pasting placemarkers are represented as
<string-stream>
instances to which we can
write the string representations of the tokens to be
pasted.
<Local methods of C-expand-replacement-list
>+=
method paste (token-name, token-value, paste-token-name, paste-token-value) => (placemarker); let placemarker = if (token-name == #"PLACEMARKER") token-value else paste-aux(make(<string-stream>, direction: #"output"), token-name, token-value); end if; paste-aux(placemarker, paste-token-name, paste-token-value); end method,
method paste-aux (placemarker, token-name, token-value) => (placemarker); if (token-name == #"PARAMETER") for (param-token :: <C-preprocessing-token> in token-value) write(placemarker, aux-token-string(param-token.preprocessing-token-name, param-token.preprocessing-token-value)); end for; else write(placemarker, aux-token-string(token-name, token-value)); end if; placemarker end method,
It is possible for “painted” identifiers to appear during stringization or token pasting, so we handle that case specially.
<Local methods of C-expand-replacement-list
>+=
method aux-token-string(token-name :: <symbol>, token-value) if (token-name == #"BLUE-IDENTIFIER") token-value else preprocessor-token-string(token-name, token-value) end if end method,
Most tokens, excepting parameter names, are copied directly into the result list. If the macro name itself appears, we need to paint it blue to prevent its further expansion.
<Local methods of C-expand-replacement-list
>+=
method output (token :: <C-preprocessing-token>, whitespace-token :: false-or(<C-preprocessing-token>)) => (); if (token.preprocessing-token-name == #"PARAMETER") <Output the expanded argument for this parameter> elseif (token.preprocessing-token-name == #"IDENTIFIER" & token.preprocessing-token-value == macro-name) add!(result-list, make(<C-preprocessing-token>, name: #"BLUE-IDENTIFIER", value: macro-name)); else add!(result-list, token); end if; if (whitespace-token) add!(result-list, whitespace-token); end if; end method,
Argument values are re-expanded as if they were the the
entire remainder of the preprocessing file input. We do
this by creating a separate dispatcher to perform the
expansion and store the resulting tokens into
result-list
.
<Establish expansion-token-dispatcher
>=
let pp-translation-unit
= token-dispatcher.token-dispatcher-preprocessing-translation-unit;
let macro-definitions = pp-translation-unit.preprocessing-macro-definitions;
let preprocessor-state = token-dispatcher.token-dispatcher-state;
let expansion-preprocessor-state
= make(<C-preprocessor-state>,
dialect: preprocessor-state.preprocessor-dialect,
consumer:
method (result-list, token-name, token-value,
rangemap, start-position, end-position) => ();
<Store the expanded tokens in result-list
>
end,
consumer-data: result-list);
let expansion-token-dispatcher
= make(<C-preprocessor-token-dispatcher>,
preprocessing-translation-unit:
token-dispatcher.token-dispatcher-preprocessing-translation-unit,
scanner: token-dispatcher.token-dispatcher-scanner,
directory: token-dispatcher.token-dispatcher-directory,
rangemap: token-dispatcher.token-dispatcher-rangemap,
consumer-state-function: C-preprocess-token-expand-argument,
consumer-state: expansion-preprocessor-state);
<Module cpr-internals
>+=
define preprocessor-state-function expand-argument (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; select (token-name) #"IDENTIFIER" => <Expand the identifier if it has a macro definition, and send it directly to the output otherwise> #"EOF" => ; // Filter out EOF markers at the end of parameter expansions otherwise => C-preprocessor-output-token(token-dispatcher, token-number, token-name, token-value, start-position, end-position); end select; end preprocessor-state-function;
If we encounter an identifier and its expansion is
being suppressed, then we “paint it blue”
(changing the token name to #"BLUE-IDENTIFIER"
) to
prevent expansion in the future.
<Store the expanded tokens in result-list
>=
select (token-name) #"IDENTIFIER" => let definition = element(macro-definitions, token-value, default: #f); add!(result-list, make(<C-preprocessing-token>, name: if (definition & definition.preprocessing-macro-suppressed?) #"BLUE-IDENTIFIER" else #"IDENTIFIER" end if, value: token-value)); otherwise => add!(result-list, make(<C-preprocessing-token>, name: token-name, value: token-value)); end select;
We feed the argument tokens into the this dispatcher so
they get expanded, with the expansion added to
result-list
.
<Output the expanded argument for this parameter>=
for (param-token :: <C-preprocessing-token> in token.preprocessing-token-value) let param-token-name = param-token.preprocessing-token-name; let param-token-value = param-token.preprocessing-token-value; C-preprocessor-dispatch-token (expansion-token-dispatcher, 0, param-token-name, param-token-value, start-position, end-position); end for; C-preprocessor-dispatch-token (expansion-token-dispatcher, 0, #"EOF", #f, end-position, end-position);
<Dispatch the tokens in result-list
for re-expansion>=
with-macro-suppressed (definition)
for (token :: <C-preprocessing-token> in result-list)
let token-name = token.preprocessing-token-name;
if (token-name == #"PLACEMARKER")
<Output the token paste placemarker token-value
>
else
C-preprocessor-dispatch-token
(token-dispatcher, 0,
token-name, token.preprocessing-token-value,
start-position, end-position);
end if;
end for;
end with-macro-suppressed;
To output the result of a token paste, we re-scan the resulting string. The resulting source location information is only approximate, but it's the best we can do for now.
<Output the token paste placemarker token-value
>=
let scanner = make(<simple-lexical-scanner>, definition: $C-tokens, rangemap: token-dispatcher.token-dispatcher-rangemap); scanner.scanner-source-position := start-position; scan-tokens(scanner, C-preprocessor-dispatch-token, token-dispatcher, stream-contents(token.preprocessing-token-value));
The C standard specifies several built-in object-like
preprocessor macros. We add these definitions in the
initializer function for the
<C-preprocessing-translation-unit-representation>
class.
<Modules imported by the cpr-internals
module>+=
use date;
<Module cpr-internals
>+=
define constant $month-abbreviations = #["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
define method initialize (instance :: <C-preprocessing-translation-unit-representation>, #key, #all-keys) => (); let now = current-date(); let date-string = format-to-string("\"%s %2d %4d\"", $month-abbreviations[now.date-month - 1], now.date-day, now.date-year); preprocessor-define(instance, "__DATE__", date-string); let time-string = format-to-string("\"%02d:%02d:%02d\"", now.date-hours, now.date-minutes, now.date-seconds); preprocessor-define(instance, "__TIME__", time-string); preprocessor-add-macro-definition (instance, intern-string("__FILE__"), make(<C-preprocessing-macro>, replacement: #[], expander: method (dispatcher, name, start-position, end-position) let srcloc = range-source-location(dispatcher.token-dispatcher-rangemap, start-position, end-position); let literal = concatenate("\"", as(<string>, srcloc.source-file), "\""); dispatcher.token-dispatcher-state-function (dispatcher, 0, #"STRING-LITERAL", literal, start-position, end-position); end)); preprocessor-add-macro-definition (instance, intern-string("__LINE__"), make(<C-preprocessing-macro>, replacement: #[], expander: method (dispatcher, name, start-position, end-position) let srcloc = range-source-location(dispatcher.token-dispatcher-rangemap, start-position, end-position); dispatcher.token-dispatcher-state-function (dispatcher, 0, #"PP-NUMBER", integer-to-string(srcloc.source-start-line), start-position, end-position); end)); preprocessor-define(instance, "__STDC__", "1"); preprocessor-define(instance, "__STDC_HOSTED__", "1"); preprocessor-define(instance, "__STDC_VERSION__", "199901L"); end method;
Within certain preprocessor directives, macro invocations
are expanded when they occur. The defined
operator
also needs to be handled specially.
<Module cpr-internals
>+=
define constant $defined-operator :: <string> = intern-string("defined");
define preprocessor-state-function sharp-directive-expanded
(token-dispatcher :: <C-preprocessor-token-dispatcher>,
token-number, token-name :: <symbol>, token-value,
start-position, end-position)
let preprocessor-state = token-dispatcher.token-dispatcher-state;
let parser = preprocessor-state.preprocessor-directive-parser;
select (token-name)
#"WHITESPACE" =>
if (simple-parser-can-consume-token?(parser, token-number, token-name))
with-preprocessing-directive-restart (token-dispatcher, token-name)
simple-parser-consume-token(parser, token-number, token-name, #f,
start-position, end-position);
end;
end if;
#"NEW-LINE" =>
with-preprocessing-directive-restart (token-dispatcher, token-name)
simple-parser-consume-token(parser, token-number, token-name,
token-value,
start-position, end-position);
end;
token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
:= $C-tokens;
enter-state(token-dispatcher, linestart);
#"IDENTIFIER" =>
token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
:= $C-tokens;
let pp-translation-unit
= token-dispatcher.token-dispatcher-preprocessing-translation-unit;
let definition
= element(pp-translation-unit.preprocessing-macro-definitions,
token-value, default: #f);
if (definition & ~definition.preprocessing-macro-suppressed?)
if (C-preprocessing-macro-expand(token-dispatcher, definition,
token-value,
start-position, end-position, #f))
enter-state(token-dispatcher, sharp-directive-macro-name)
end if;
elseif (token-value == $defined-operator)
<Handle the defined
operator>
else
with-preprocessing-directive-restart (token-dispatcher, token-name)
simple-parser-consume-token(parser,
token-number, token-name, token-value,
start-position, end-position);
end;
end if;
#"BLUE-IDENTIFIER" =>
with-preprocessing-directive-restart (token-dispatcher, token-name)
simple-parser-consume-token(parser, 0, #"IDENTIFIER", token-value,
start-position, end-position);
end;
#"EOF" =>
signal("File does not end in a newline");
simple-parser-consume-token(parser, 0, #"NEW-LINE", #f,
start-position, end-position);
otherwise =>
with-preprocessing-directive-restart (token-dispatcher, token-name)
simple-parser-consume-token(parser, token-number, token-name,
token-value,
start-position, end-position);
end;
end select;
end preprocessor-state-function;
After we've seen the name of a function-like macro, we wait to see if what follows is an actual macro invocation. If it is not, we abandon the invocation and place the macro name into the output.
<Module cpr-internals
>+=
define preprocessor-state-function sharp-directive-macro-name (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) select (token-name) #"LPAREN" => enter-state(token-dispatcher, macro-arguments); #"WHITESPACE" => ; // remain in this state otherwise => let preprocessor-state = token-dispatcher.token-dispatcher-state; let parser = preprocessor-state.preprocessor-directive-parser; let (identifier-name, identifier-start-position, identifier-end-position) = abandon-macro-invocation(token-dispatcher); simple-parser-consume-token(parser, 0, #"IDENTIFIER", identifier-name, identifier-start-position, identifier-end-position); token-dispatcher.token-dispatcher-state-function (token-dispatcher, token-number, token-name, token-value, start-position, end-position); end select; end preprocessor-state-function;
The defined
operator is treated as a token by
the directive parser.
token DEFINED;
<Handle the defined
operator>=
simple-parser-consume-token(parser, 0, #"DEFINED", #f, start-position, end-position); enter-state(token-dispatcher, sharp-directive-defined);
The defined
operator suppresses the expansion of
the next following identifier.
<Module cpr-internals
>+=
define preprocessor-state-function sharp-directive-defined (token-dispatcher :: <C-preprocessor-token-dispatcher>, token-number, token-name :: <symbol>, token-value, start-position, end-position) let preprocessor-state = token-dispatcher.token-dispatcher-state; let parser = preprocessor-state.preprocessor-directive-parser; select (token-name) #"WHITESPACE" => #f; #"NEW-LINE" => simple-parser-consume-token(parser, token-number, token-name, token-value, start-position, end-position); enter-state(token-dispatcher, linestart); #"IDENTIFIER", #"BLUE-IDENTIFIER" => simple-parser-consume-token(parser, 0, #"IDENTIFIER", token-value, start-position, end-position); enter-state(token-dispatcher, sharp-directive-expanded); #"EOF" => signal("File does not end in a newline"); simple-parser-consume-token(parser, 0, #"NEW-LINE", #f, start-position, end-position); otherwise => simple-parser-consume-token(parser, token-number, token-name, token-value, start-position, end-position); end select; end preprocessor-state-function;
Conditional groups can be nested pretty much arbitrarily deep. To keep track of whether a group is being skipped or not, we need to maintain a stack of flags.
<Additional slots in <C-preprocessor-state>
>+=
constant slot preprocessor-skipping?-stack :: <stretchy-object-vector> = make(<stretchy-object-vector>);
To implement the #else
and #elif
directives, we need to keep track, at each stack level, of
whether or not a true value has been seen in the current
series of conditional directives. We also need to note when
#else
has been seen to prevent #else
or
#elif
from being used again in the current
series.
<Additional slots in <C-preprocessor-state>
>+=
constant slot preprocessor-true-seen?-stack :: <stretchy-object-vector> = make(<stretchy-object-vector>); constant slot preprocessor-else-seen?-stack :: <stretchy-object-vector> = make(<stretchy-object-vector>);
The preprocessor-begin-conditional
function starts a
new conditional group level and sets the
preprocessor-skipping?
slot of the preprocessor
state.
<Module cpr-internals
>+=
define function preprocessor-begin-conditional (token-dispatcher :: <C-preprocessor-token-dispatcher>, flag) => (); let preprocessor-state = token-dispatcher.token-dispatcher-state; preprocessor-state.preprocessor-skipping? := ~flag; add!(preprocessor-state.preprocessor-skipping?-stack, ~flag); add!(preprocessor-state.preprocessor-true-seen?-stack, flag); add!(preprocessor-state.preprocessor-else-seen?-stack, #f); end function;
At the end of a preprocessing translation unit all conditional groups must be closed.
<Finalize the preprocessor>=
if (preprocessor-state.preprocessor-skipping?-stack.size ~= 0) preprocessor-error(preprocessor-token-dispatcher, end-position, end-position, "Conditional directive left unterminated at end-of-file"); end if;
#ifdef
Directive<Module cpr-internals
>+=
define function if-skipping-function (token-dispatcher :: <C-preprocessor-token-dispatcher>) => (parse-directive? :: <boolean>); let preprocessor-state = token-dispatcher.token-dispatcher-state; add!(preprocessor-state.preprocessor-skipping?-stack, #t); add!(preprocessor-state.preprocessor-true-seen?-stack, #f); add!(preprocessor-state.preprocessor-else-seen?-stack, #f); #f; end function;
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "ifdef", skipping-function: if-skipping-function);
<C90 preprocessor directive grammar productions>+=
directive "ifdef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>) let preprocessing-translation-unit = dispatcher.token-dispatcher-preprocessing-translation-unit; let definition = element(preprocessing-translation-unit.preprocessing-macro-definitions, IDENTIFIER, default: #f); preprocessor-begin-conditional(dispatcher, definition ~== #f);
#ifndef
Directive<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "ifndef", skipping-function: if-skipping-function);
<C90 preprocessor directive grammar productions>+=
directive "ifndef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>) let preprocessing-translation-unit = dispatcher.token-dispatcher-preprocessing-translation-unit; let definition = element(preprocessing-translation-unit.preprocessing-macro-definitions, IDENTIFIER, default: #f); preprocessor-begin-conditional(dispatcher, ~definition);
#if
Directive<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "if", skipping-function: if-skipping-function, macro-replaced?: #t);
<C90 preprocessor directive grammar productions>+=
directive "if" [constant-expression] (dispatcher :: <C-preprocessor-token-dispatcher>) preprocessor-begin-conditional(dispatcher, ~zero?(constant-expression()));
Preprocessor constant expressions are similar to
ordinary C language constant expressions, except that
there are no casts, and there are only two types: a signed
type equivalent to intmax_t
, and an unsigned type
equivalent to uintmax_t
. There is no need to
construct a parse tree for preprocessor constant
expressions, but we do use closures (thunks, in
particular) to delay evaluation for the benefit of the
short-circuited logical operators.
Since preprocessing macros are expanded before they are
seen by the #if
directive parser, if an
identifier is seen at this point it means it had no macro
definition, and should therefore be replaced with
0.
<C90 preprocessor directive auxiliary grammar productions>+=
production primary-expression :: <function> => [IDENTIFIER] () method() 0 end;
production primary-expression :: <function> => [PP-NUMBER] (data, srcloc) block () let (value, type) = C-preprocessing-number-value(PP-NUMBER); always(value); exception (e :: <simple-error>) source-error(srcloc, "%s", e); end block; production primary-expression :: <function> => [CHARACTER-CONSTANT] (data, srcloc) block () let (value, type) = C-character-constant-value(CHARACTER-CONSTANT); always(value); exception (e :: <simple-error>) source-error(srcloc, "%s", e); end block;
production primary-expression :: <function> => [LPAREN constant-expression RPAREN];
production unary-expression :: <function> => [primary-expression]; production unary-expression :: <function> => [PLUS unary-expression] () unary-expression; production unary-expression :: <function> => [MINUS unary-expression] () method() negative(unary-expression()) end; production unary-expression :: <function> => [TILDE unary-expression] () method () lognot(unary-expression()) end; production unary-expression :: <function> => [BANG unary-expression] () method () if (zero?(unary-expression())) 1 else 0 end end; production unary-expression :: <function> => [DEFINED IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>) let preprocessing-translation-unit = dispatcher.token-dispatcher-preprocessing-translation-unit; let definition = element(preprocessing-translation-unit.preprocessing-macro-definitions, IDENTIFIER, default: #f); always(if (definition) 1 else 0 end); production unary-expression :: <function> => [DEFINED LPAREN IDENTIFIER RPAREN] (dispatcher :: <C-preprocessor-token-dispatcher>) let preprocessing-translation-unit = dispatcher.token-dispatcher-preprocessing-translation-unit; let definition = element(preprocessing-translation-unit.preprocessing-macro-definitions, IDENTIFIER, default: #f); always(if (definition) 1 else 0 end);
production multiplicative-expression :: <function> => [unary-expression]; production multiplicative-expression :: <function> => [multiplicative-expression STAR unary-expression] () method () multiplicative-expression() * unary-expression() end; production multiplicative-expression :: <function> => [multiplicative-expression SLASH unary-expression] (dispatcher, srcloc :: <source-location>) method () let divisor = unary-expression(); if (zero?(divisor)) source-error(srcloc, "division by zero") end if; truncate/(multiplicative-expression(), divisor) end; production multiplicative-expression :: <function> => [multiplicative-expression PERCENT unary-expression] (dispatcher, srcloc :: <source-location>) method () let divisor = unary-expression(); if (zero?(divisor)) source-error(srcloc, "division by zero") end if; remainder(multiplicative-expression(), divisor) end;
production additive-expression :: <function> => [multiplicative-expression]; production additive-expression :: <function> => [additive-expression PLUS multiplicative-expression] () method () additive-expression() + multiplicative-expression() end; production additive-expression :: <function> => [additive-expression MINUS multiplicative-expression] () method () additive-expression() - multiplicative-expression() end;
production shift-expression :: <function> => [additive-expression]; production shift-expression :: <function> => [shift-expression SHL additive-expression] () method() ash(shift-expression(), additive-expression()) end; production shift-expression :: <function> => [shift-expression SHR additive-expression] () method () ash(shift-expression(), -additive-expression()) end;
production relational-expression :: <function> => [shift-expression]; production relational-expression :: <function> => [relational-expression LT shift-expression] () method () if (relational-expression() < shift-expression()) 1 else 0 end end; production relational-expression :: <function> => [relational-expression GT shift-expression] () method () if (relational-expression() > shift-expression()) 1 else 0 end end; production relational-expression :: <function> => [relational-expression LE shift-expression] () method () if (relational-expression() <= shift-expression()) 1 else 0 end end;
production relational-expression :: <function> => [relational-expression GE shift-expression] () method () if (relational-expression() <= shift-expression()) 1 else 0 end end;
production equality-expression :: <function> => [relational-expression]; production equality-expression :: <function> => [equality-expression EQ relational-expression] () method () if (equality-expression() = relational-expression()) 1 else 0 end end; production equality-expression :: <function> => [equality-expression NE relational-expression] () method () if (equality-expression() ~= relational-expression()) 1 else 0 end end;
production AND-expression :: <function> => [equality-expression]; production AND-expression :: <function> => [AND-expression AMP equality-expression] () method () logand(AND-expression(), equality-expression()) end;
production exclusive-OR-expression :: <function> => [AND-expression]; production exclusive-OR-expression :: <function> => [exclusive-OR-expression HAT AND-expression] () method () logxor(exclusive-OR-expression(), AND-expression()) end;
production inclusive-OR-expression :: <function> => [exclusive-OR-expression]; production inclusive-OR-expression :: <function> => [inclusive-OR-expression OR exclusive-OR-expression] () method () logior(inclusive-OR-expression(), exclusive-OR-expression()) end;
production logical-AND-expression :: <function> => [inclusive-OR-expression]; production logical-AND-expression :: <function> => [logical-AND-expression AMPAMP inclusive-OR-expression] () method () if (~zero?(logical-AND-expression()) & ~zero?(inclusive-OR-expression())) 1 else 0 end end;
production logical-OR-expression :: <function> => [logical-AND-expression]; production logical-OR-expression :: <function> => [logical-OR-expression OROR logical-AND-expression] () method () if (~zero?(logical-OR-expression()) | ~zero?(logical-AND-expression())) 1 else 0 end end;
production conditional-expression :: <function> => [logical-OR-expression]; production conditional-expression :: <function> => [logical-OR-expression QUEST constant-expression COLON conditional-expression] () method () if (~zero?(logical-OR-expression())) constant-expression() else conditional-expression() end end;
production constant-expression :: <function> => [conditional-expression]; production constant-expression :: <function> => [constant-expression COMMA conditional-expression] () conditional-expression;
#elif
Directive<Module cpr-internals
>+=
define function else-skipping-function (token-dispatcher :: <C-preprocessor-token-dispatcher>) => (parse-directive? :: <boolean>); let preprocessor-state = token-dispatcher.token-dispatcher-state; let index = preprocessor-state.preprocessor-skipping?-stack.size - 1; index = 0 | ~preprocessor-state.preprocessor-skipping?-stack[index - 1] end function;
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "elif", skipping-function: else-skipping-function, macro-replaced?: #t);
<C90 preprocessor directive grammar productions>+=
directive "elif" [constant-expression] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let preprocessor-state = dispatcher.token-dispatcher-state; let index = preprocessor-state.preprocessor-skipping?-stack.size - 1; if (index >= 0) if (preprocessor-state.preprocessor-else-seen?-stack[index]) source-error(srcloc, "#else has already appeared in this if-section"); end if; let flag = ~zero?(constant-expression()) & ~preprocessor-state.preprocessor-true-seen?-stack[index]; preprocessor-state.preprocessor-skipping?-stack[index] := ~flag; preprocessor-state.preprocessor-skipping? := ~flag; if (flag) preprocessor-state.preprocessor-true-seen?-stack[index] := #t; end if; else source-error(srcloc, "#elif outside of #ifdef/#ifndef/#if"); end if;
#else
Directive<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "else", skipping-function: else-skipping-function);
<C90 preprocessor directive grammar productions>+=
directive "else" [] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let preprocessor-state = dispatcher.token-dispatcher-state; let index = preprocessor-state.preprocessor-skipping?-stack.size - 1; if (index >= 0) if (preprocessor-state.preprocessor-else-seen?-stack[index]) source-error(srcloc, "#else has already appeared in this if-section"); end if; let skipping = preprocessor-state.preprocessor-true-seen?-stack[index]; preprocessor-state.preprocessor-skipping?-stack[index] := skipping; preprocessor-state.preprocessor-skipping? := skipping; if (~skipping) preprocessor-state.preprocessor-true-seen?-stack[index] := #t; end if; preprocessor-state.preprocessor-else-seen?-stack[index] := #t; else source-error(srcloc, "#else outside of #ifdef/#ifndef/#if"); end if;
#endif
DirectiveThe #endif
directive terminates the current
if-section, popping the stack and resetting
preprocessor-skipping?
for the new stack
top.
<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "endif", skipping-function: method(dispatcher) #t end);
<C90 preprocessor directive grammar productions>+=
directive "endif" [] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let preprocessor-state = dispatcher.token-dispatcher-state; let index = preprocessor-state.preprocessor-skipping?-stack.size - 1; if (index >= 0) preprocessor-state.preprocessor-skipping?-stack.size := index; preprocessor-state.preprocessor-true-seen?-stack.size := index; preprocessor-state.preprocessor-else-seen?-stack.size := index; if (index = 0) preprocessor-state.preprocessor-skipping? := #f; else preprocessor-state.preprocessor-skipping? := preprocessor-state.preprocessor-skipping?-stack[index - 1]; end if; else source-error(srcloc, "#endif outside of #ifdef/#ifndef/#if"); end if;
#include
DirectiveThe #include
directive is slightly special,
because the lexical tokens for header names can only be
recognized after the include
.
<Module cpr-internals
>+=
define constant $C-include-directive-tokens = simple-lexical-definition token EOF; // Whitespace and comments token WHITESPACE = "([ \t\f]|/\\*([^*]|\\*+[^*/])*\\*+/)+"; // New-line characters and line-end comments token NEW-LINE = "(//[^\r\n]*)?(\n|\r|\r\n)"; // Identifiers token IDENTIFIER :: <string> = "[a-zA-Z_][0-9a-zA-Z_]*", semantic-value-function: method (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>); intern-string(token-string, start: token-start, end: token-end) end; token HEADER-NAME :: <string> = "<[^\n\r>]+>", semantic-value-function: method (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>); as(<file-locator>, copy-sequence(token-string, start: token-start + 1, end: token-end - 1)) end; token SOURCE-NAME :: <string> = "\"[^\n\r\"]+\"", semantic-value-function: method (token-string :: <byte-string>, token-start :: <integer>, token-end :: <integer>); as(<file-locator>, copy-sequence(token-string, start: token-start + 1, end: token-end - 1)) end; end;
<Module cpr-internals
>+=
define-C-preprocessor-directive ($C90-C-preprocessor-dialect, "include", macro-replaced?: #t, lexical-definition: $C-include-directive-tokens);
<C90 preprocessor directive grammar productions>+=
directive "include" [HEADER-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) do-include(dispatcher, srcloc, #"HEADER-NAME", HEADER-NAME);
directive "include" [SOURCE-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) do-include(dispatcher, srcloc, #"SOURCE-NAME", SOURCE-NAME);
As a result of macro expansion, string literal tokens can also appear.
<C90 preprocessor directive grammar productions>+=
directive "include" [STRING-LITERAL] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let filename // FIXME = copy-sequence(STRING-LITERAL, start: 1, end: STRING-LITERAL.size - 1); do-include(dispatcher, srcloc, #"SOURCE-NAME", as(<file-locator>, filename));
<Module cpr-internals
>+=
define function do-include
(token-dispatcher :: <C-preprocessor-token-dispatcher>,
srcloc :: <source-location>,
kind :: one-of(#"HEADER-NAME", #"SOURCE-NAME"),
name :: <file-locator>)
=> ();
token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
:= $C-tokens;
let preprocessing-translation-unit
= token-dispatcher.token-dispatcher-preprocessing-translation-unit;
let preprocessor-state = token-dispatcher.token-dispatcher-state;
let rangemap = token-dispatcher.token-dispatcher-rangemap;
<Save the current source location as here
>
block ()
<Preprocess the given include file and >
<Update the scanner to use the new source-position, and reset the source location>
exception (e :: <file-system-error>)
source-error(srcloc, "%s", e);
end block;
end function;
Our idea of the current source location will change
once we start preprocessing the included file, so we save
the source location immediately following the
#include
directive so we can restore it later.
<Save the current source location as here
>=
let end-position = token-dispatcher.token-dispatcher-scanner.scanner-source-position; let here = range-source-location(rangemap, end-position, end-position);
We then use do-with-C-header-file
to
locate the file to be included, reset the current source
location to be at the beginning of line 1 of
that file, and preprocess it.
<Preprocess the given include file and >=
let (directory, search-path) = select (kind) #"HEADER-NAME" => values(#f, preprocessing-translation-unit .preprocessing-system-header-search-path); #"SOURCE-NAME" => values(token-dispatcher.token-dispatcher-directory, preprocessing-translation-unit .preprocessing-header-search-path); end select; let new-source-position = do-with-C-header-file (name, directory, search-path, method (found-locator :: <file-locator>, stream :: <stream>) => (new-end-position :: <integer>); rangemap-add-line-file(rangemap, end-position, 1, found-locator); preprocess-C-stream(preprocessing-translation-unit, stream, found-locator.locator-directory, rangemap, preprocessor-state.preprocessor-consumer, preprocessor-state.preprocessor-consumer-data, start-position: end-position + 1); end);
When preprocessing of the file completes we re-adjust
our current scanner's idea of the current source position,
and map the position to correspond to the source location
immediately following the #include
directive.
<Update the scanner to use the new source-position, and reset the source location>=
token-dispatcher.token-dispatcher-scanner.scanner-source-position := new-source-position; rangemap-add-line-file(rangemap, new-source-position, here.source-start-line, here.source-file);
#line
DirectiveThe #line
directive ammends the rangemap to
change its idea of the line number (and possibly the
filename) represented by the current place in the
input.
<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "line", macro-replaced?: #t);
<C90 preprocessor directive grammar productions>+=
directive "line" [PP-NUMBER] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let line = string-to-integer(PP-NUMBER); if (line <= 0) source-error(srcloc, "#line number out of range (must be 1 or greater)") end if; let here = dispatcher.token-dispatcher-scanner.scanner-source-position; rangemap-add-line(dispatcher.token-dispatcher-rangemap, here, line);
directive "line" [PP-NUMBER STRING-LITERAL] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>) let line = string-to-integer(PP-NUMBER); let filename // FIXME = copy-sequence(STRING-LITERAL, start: 1, end: STRING-LITERAL.size - 1); if (line <= 0) source-error(srcloc, "#line number must be 1 or greater") end if; let here = dispatcher.token-dispatcher-scanner.scanner-source-position; rangemap-add-line-file(dispatcher.token-dispatcher-rangemap, here, line, as(<file-locator>, filename));
#error
DirectiveThe #error
signals an error with the specified
preprocessing tokens in the error message.
<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "error");
<C90 preprocessor directive grammar productions>+=
directive "error" [pp-tokens-opt] (dispatcher, srcloc :: <source-location>) let string = with-output-to-string (str) for (token :: <C-preprocessing-token> in pp-tokens-opt) write-element(str, ' '); write(str, preprocessor-token-string(token.preprocessing-token-name, token.preprocessing-token-value)); end for; end; source-error(srcloc, "#error%s", string);
#pragma
Directive<Module cpr-internals
>+=
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "pragma");
<C90 preprocessor directive grammar productions>+=
directive "pragma" [pp-tokens-opt] () #f;
In addition to the additional varargs syntax described below, the GNU C preprocessor supports a number of additional extensions.
#ident
DirectiveThe #ident
directive, originally from System V:
<Module cpr-internals
>+=
define-C-preprocessor-directive($gnu89-C-preprocessor-dialect, "ident");
<GNU C preprocessor directive grammar productions>+=
directive "ident" [STRING-LITERAL] () #f;
#warning
DirectiveThe #warning
signals an warning with the specified
preprocessing tokens in the warningmessage.
<Module cpr-internals
>+=
define-C-preprocessor-directive($gnu89-C-preprocessor-dialect, "warning");
<GNU C preprocessor directive grammar productions>+=
directive "warning" [pp-tokens-opt] (dispatcher, srcloc :: <source-location>) let string = with-output-to-string (str) for (token :: <C-preprocessing-token> in pp-tokens-opt) write-element(str, ' '); write(str, preprocessor-token-string(token.preprocessing-token-name, token.preprocessing-token-value)); end for; end; source-warning(srcloc, "#warning%s", string);
#include_next
Directive<Module cpr-internals
>+=
define-C-preprocessor-directive ($gnu89-C-preprocessor-dialect, "include_next", macro-replaced?: #t, lexical-definition: $C-include-directive-tokens);
<GNU C preprocessor directive grammar productions>+=
directive "include_next" [HEADER-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>) do-include-next(dispatcher, #"HEADER-NAME", HEADER-NAME);
directive "include_next" [SOURCE-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>) do-include-next(dispatcher, #"SOURCE-NAME", SOURCE-NAME);
<Module cpr-internals
>+=
define function do-include-next (token-dispatcher :: <C-preprocessor-token-dispatcher>, kind :: one-of(#"HEADER-NAME", #"SOURCE-NAME"), name :: <file-locator>) => (); error("#include_next is not yet supported"); end function;
We include support for trigraphs more in the interest of standards compliance than any actual usefulness in practice.
Trigraph processing requires a simple state machine that
keeps track of how many consecutive ?
questions we
have encountered.
<Initialize the trigraph state>=
let trigraph-state = #f;
<Copy text and translate trigraphs from buf
to text
>=
trigraph-state := iterate trigraph-loop (buf-index :: <integer> = buf.buffer-next, text-index :: <integer> = 0, trigraph-state = trigraph-state) if (buf-index < buf.buffer-end) let c = as(<character>, buf[buf-index]); select (trigraph-state) <Trigraph handling state cases> end select; else trigraph-state end if; end iterate;
In the initial trigraph processing state, we copy the
current character to the output buffer unless it is a
?
. If it is a ?
character, we transition
to the next state without copying.
<Trigraph handling state cases>=
#f => if (c == '?') trigraph-loop(buf-index + 1, text-index, #"?"); else text[text-index] := c; trigraph-loop(buf-index + 1, text-index + 1, #f); end if;
The next (single-?
) state is similar. If we see
a non-?
character in the next state, then we need
to copy in the preceding ?
character, since we
skipped copying it in the initial state.
<Trigraph handling state cases>+=
#"?" => if (c == '?') trigraph-loop(buf-index + 1, text-index, #"??"); else text[text-index] := '?'; text[text-index + 1] := c; trigraph-loop(buf-index + 1, text-index + 2, #f); end if;
Once we have seen ??
, we look at the following
character, and replace it if it corresponds to one of the
nine trigraph sequences:
<Trigraph handling state cases>+=
#"??" => select (c) '=' => text[text-index] := '#'; trigraph-loop(buf-index + 1, text-index + 1, #f); '(' => text[text-index] := '['; trigraph-loop(buf-index + 1, text-index + 1, #f); '/' => text[text-index] := '\\'; trigraph-loop(buf-index + 1, text-index + 1, #f); ')' => text[text-index] := ']'; trigraph-loop(buf-index + 1, text-index + 1, #f); '\'' => text[text-index] := '^'; trigraph-loop(buf-index + 1, text-index + 1, #f); '<' => text[text-index] := '{'; trigraph-loop(buf-index + 1, text-index + 1, #f); '!' => text[text-index] := '|'; trigraph-loop(buf-index + 1, text-index + 1, #f); '>' => text[text-index] := '}'; trigraph-loop(buf-index + 1, text-index + 1, #f); '-' => text[text-index] := '~'; trigraph-loop(buf-index + 1, text-index + 1, #f); <Cases for other characters> end select;
If we see a third ?
, we copy the first one into
the output and remain in the ??
state.
<Cases for other characters>=
'?' => text[text-index] := '?'; trigraph-loop(buf-index + 1, text-index + 1, #"??");
For any other character we copy it, preceded by the
deferred ??
characters, and return to the initial
state.
<Cases for other characters>+=
otherwise => text[text-index] := '?'; text[text-index + 1] := '?'; text[text-index + 2] := c; trigraph-loop(buf-index + 1, text-index + 3, #f);
In the unlikely event that trigraph-state
is
not #f
at the end of the input stream, for
completeness we feed the remaining ?
characters
into the scanner.
<Finalize the trigraph state>=
select (trigraph-state) #f => ; #"?" => scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner, "?", 0, 1); #"??" => scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner, "??", 0, 2); end select;
We parse C files by instantiating a
<simple-parser>
object and feeding the
preprocessor into it via the screener.
<Parse the source file locator
into translation-unit
>=
let rangemap = make(<source-location-rangemap>); rangemap-add-line-file(rangemap, 0, 1, locator); let parser = make(<simple-parser>, automaton: $C99-parser-automaton, start-symbol: #"translation-unit-opt", rangemap: rangemap, consumer-data: translation-unit); let end-position = with-open-file(stream = locator) preprocess-C-stream(translation-unit .translation-unit-preprocessing-translation-unit, stream, locator.locator-directory, rangemap, C-screen-token, parser); end; simple-parser-consume-token(parser, 0, #"EOF", #f, end-position, end-position)
The screener takes preprocessing tokens and separates the
general categories IDENTIFIER
and
PP-NUMBER
into more specific token types.
<Module cpr-internals
>+=
define function C-screen-token (parser :: <simple-parser>, token-name, token-value, rangemap, start-position, end-position) => (); select (token-name) #"IDENTIFIER" => <Screen an identifier token> #"PP-NUMBER" => <Screen a preprocessing number token> otherwise => simple-parser-consume-token(parser, 0, token-name, token-value, start-position, end-position) end select; end;
C99 defines the following reserved words.
token AUTO; token BREAK; token _CASE; token CHAR; token CONST; token CONTINUE; token DEFAULT; token DO; token DOUBLE; token _ELSE; token ENUM; token EXTERN; token FLOAT; token _FOR; token GOTO; token _IF; token INLINE; token INT; token LONG; token REGISTER; token RESTRICT; token RETURN; token SHORT; token SIGNED; token SIZEOF; token STATIC; token STRUCT; token SWITCH; token TYPEDEF; token UNION; token UNSIGNED; token VOID; token VOLATILE; token _WHILE; token BOOL; token COMPLEX; token IMAGINARY;
<Module cpr-internals
>+=
define constant $C99-reserved-words :: <object-table> = begin let spec = #[#("auto" . #"AUTO"), #("break" . #"BREAK"), #("case" . #"_CASE"), #("char" . #"CHAR"), #("const" . #"CONST"), #("continue" . #"CONTINUE"), #("default" . #"DEFAULT"), #("do" . #"DO"), #("double" . #"DOUBLE"), #("else" . #"_ELSE"), #("enum" . #"ENUM"), #("extern" . #"EXTERN"), #("float" . #"FLOAT"), #("for" . #"_FOR"), #("goto" . #"GOTO"), #("if" . #"_IF"), #("inline" . #"INLINE"), #("int" . #"INT"), #("long" . #"LONG"), #("register" . #"REGISTER"), #("restrict" . #"RESTRICT"), #("return" . #"RETURN"), #("short" . #"SHORT"), #("signed" . #"SIGNED"), #("sizeof" . #"SIZEOF"), #("static" . #"STATIC"), #("struct" . #"STRUCT"), #("switch" . #"SWITCH"), #("typedef" . #"TYPEDEF"), #("union" . #"UNION"), #("unsigned" . #"UNSIGNED"), #("void" . #"VOID"), #("volatile" . #"VOLATILE"), #("while" . #"_WHILE"), #("_Bool" . #"BOOL"), #("_Complex" . #"COMPLEX"), #("_Imaginary" . #"IMAGINARY")]; let reserved-words = make(<object-table>, size: spec.size); for (word in spec) reserved-words[intern-string(word.head)] := word.tail; end for; reserved-words end;
<Screen an identifier token>=
let reserved-word-token-name = element($C99-reserved-words, token-value, default: #f); if (reserved-word-token-name) simple-parser-consume-token(parser, 0, reserved-word-token-name, #f, start-position, end-position) else simple-parser-consume-token(parser, 0, token-name, token-value, start-position, end-position) end if;
token TYPEDEF-NAME :: <string>; // FIXME token INTEGER-CONSTANT :: <integer>; token FLOATING-CONSTANT :: <string>; // FIXME
FIXME This is bletcherous.
<Screen a preprocessing number token>=
simple-parser-consume-token(parser, 0, #"INTEGER-CONSTANT", string-to-integer(token-value), start-position, end-position);
<Module cpr-internals
>+=
$C99-grammar-productions
>define constant $C99-grammar-productions = simple-grammar-productions <Grammar productions for C99> end;
A C translation unit consists of zero or more external declarations.
<Grammar productions for C99>=
production translation-unit-opt => [/* empty */]; production translation-unit-opt => [translation-unit-opt external-declaration];
production external-declaration => [function-definition]; production external-declaration => [declaration];
<Module cpr-internals
>+=
define constant $C99-parser-automaton = simple-parser-automaton($C-tokens, $C99-grammar-productions, #[#"translation-unit-opt"]);
<Grammar productions for C99>+=
production declaration => [declaration-specifiers init-declarator-list-opt SEMI];
production declaration-specifiers-opt => [/* empty */]; production declaration-specifiers-opt => [declaration-specifiers]; production declaration-specifiers => [storage-class-specifier declaration-specifiers-opt]; production declaration-specifiers => [type-specifier declaration-specifiers-opt]; production declaration-specifiers => [type-qualifier declaration-specifiers-opt]; production declaration-specifiers => [function-specifier declaration-specifiers-opt];
production init-declarator-list-opt :: <sequence> => [/* empty */] () #[]; production init-declarator-list-opt :: <sequence> => [init-declarator-list]; production init-declarator-list => [init-declarator]; production init-declarator-list => [init-declarator-list COMMA init-declarator];
production init-declarator => [declarator]; production init-declarator => [declarator EQUALS initializer];
<Definitions used within $C99-grammar-productions
>=
define constant <C-storage-class-specifier> = one-of(#"TYPEDEF", #"EXTERN", #"STATIC", #"AUTO", #"REGISTER");
<Grammar productions for C99>+=
production storage-class-specifier :: <C-storage-class-specifier> => [TYPEDEF] (builder) #"TYPEDEF"; production storage-class-specifier :: <C-storage-class-specifier> => [EXTERN] (builder) #"EXTERN"; production storage-class-specifier :: <C-storage-class-specifier> => [STATIC] (builder) #"STATIC"; production storage-class-specifier :: <C-storage-class-specifier> => [AUTO] (builder) #"AUTO"; production storage-class-specifier :: <C-storage-class-specifier> => [REGISTER] (builder) #"REGISTER";
<Definitions used within $C99-grammar-productions
>+=
define constant <C-simple-type-specifier> = one-of(#"VOID", #"CHAR", #"SHORT", #"INT", #"LONG", #"FLOAT", #"DOUBLE", #"SIGNED", #"UNSIGNED", #"BOOL", #"COMPLEX", #"IMAGINARY");
<Grammar productions for C99>+=
production type-specifier :: <C-simple-type-specifier> => [VOID] (builder) #"VOID"; production type-specifier :: <C-simple-type-specifier> => [CHAR] (builder) #"CHAR"; production type-specifier :: <C-simple-type-specifier> => [SHORT] (builder) #"SHORT"; production type-specifier :: <C-simple-type-specifier> => [INT] (builder) #"INT"; production type-specifier :: <C-simple-type-specifier> => [LONG] (builder) #"LONG"; production type-specifier :: <C-simple-type-specifier> => [FLOAT] (builder) #"FLOAT"; production type-specifier :: <C-simple-type-specifier> => [DOUBLE] (builder) #"DOUBLE"; production type-specifier :: <C-simple-type-specifier> => [SIGNED] (builder) #"SIGNED"; production type-specifier :: <C-simple-type-specifier> => [UNSIGNED] (builder) #"UNSIGNED"; production type-specifier :: <C-simple-type-specifier> => [BOOL] (builder) #"BOOL"; production type-specifier :: <C-simple-type-specifier> => [COMPLEX] (builder) #"COMPLEX"; production type-specifier :: <C-simple-type-specifier> => [IMAGINARY] (builder) #"IMAGINARY"; production type-specifier => [struct-or-union-specifier]; production type-specifier => [enum-specifier]; production type-specifier => [TYPEDEF-NAME];
<Grammar productions for C99>+=
production struct-or-union-specifier => [struct-or-union LBRACE struct-declaration-list RBRACE]; production struct-or-union-specifier => [struct-or-union IDENTIFIER LBRACE struct-declaration-list RBRACE];
production struct-or-union-specifier => [struct-or-union IDENTIFIER]; production struct-or-union => [STRUCT]; production struct-or-union => [UNION];
production struct-declaration-list => [struct-declaration]; production struct-declaration-list => [struct-declaration-list struct-declaration];
production struct-declaration => [specifier-qualifier-list struct-declarator-list SEMI];
production specifier-qualifier-list-opt => [/* empty */]; production specifier-qualifier-list-opt => [specifier-qualifier-list]; production specifier-qualifier-list => [type-specifier specifier-qualifier-list-opt]; production specifier-qualifier-list => [type-qualifier specifier-qualifier-list-opt];
production struct-declarator-list => [struct-declarator]; production struct-declarator-list => [struct-declarator-list COMMA struct-declarator];
production struct-declarator => [declarator]; production struct-declarator => [declarator-opt COLON constant-expression];
<Grammar productions for C99>+=
production enum-specifier => [ENUM LBRACE enumerator-list RBRACE]; production enum-specifier => [ENUM LBRACE enumerator-list COMMA RBRACE]; production enum-specifier => [ENUM IDENTIFIER LBRACE enumerator-list RBRACE]; production enum-specifier => [ENUM IDENTIFIER LBRACE enumerator-list COMMA RBRACE]; production enum-specifier => [ENUM IDENTIFIER];
production enumerator-list => [enumerator]; production enumerator-list => [enumerator-list COMMA enumerator];
production enumerator => [IDENTIFIER]; production enumerator => [IDENTIFIER EQUALS constant-expression];
<Grammar productions for C99>+=
production type-qualifier :: singleton(#"CONST") => [CONST] () #"CONST"; production type-qualifier :: singleton(#"RESTRICT") => [RESTRICT] () #"RESTRICT"; production type-qualifier :: singleton(#"VOLATILE") => [VOLATILE] () #"VOLATILE";
<Grammar productions for C99>+=
production function-specifier :: singleton(#"INLINE") => [INLINE] () #"INLINE";
<Grammar productions for C99>+=
production declarator-opt => [/* empty */]; production declarator-opt => [declarator];
production declarator => [pointer-opt direct-declarator];
production direct-declarator => [IDENTIFIER]; production direct-declarator => [LPAREN declarator RPAREN]; production direct-declarator => [direct-declarator LBRACK type-qualifier-list-opt assignment-expression-opt RBRACK]; production direct-declarator => [direct-declarator LBRACK STATIC type-qualifier-list-opt assignment-expression RBRACK]; production direct-declarator => [direct-declarator LBRACK type-qualifier-list STATIC assignment-expression RBRACK]; production direct-declarator => [direct-declarator LBRACK type-qualifier-list-opt STAR RBRACK]; production direct-declarator => [direct-declarator LPAREN parameter-type-list RPAREN]; production direct-declarator => [direct-declarator LPAREN identifier-list-opt RPAREN];
production pointer-opt => [/* empty */]; production pointer-opt => [pointer];
production pointer => [STAR type-qualifier-list-opt]; production pointer => [STAR type-qualifier-list-opt pointer];
production type-qualifier-list-opt => [/* empty */]; production type-qualifier-list-opt => [type-qualifier-list]; production type-qualifier-list => [type-qualifier]; production type-qualifier-list => [type-qualifier-list type-qualifier];
production parameter-type-list-opt => [/* empty */]; production parameter-type-list-opt => [parameter-type-list]; production parameter-type-list => [parameter-list]; production parameter-type-list => [parameter-list COMMA ELIPSIS];
production parameter-list => [parameter-declaration]; production parameter-list => [parameter-list COMMA parameter-declaration];
production parameter-declaration => [declaration-specifiers declarator]; production parameter-declaration => [declaration-specifiers abstract-declarator-opt];
production identifier-list-opt => [/* empty */]; production identifier-list-opt => [identifier-list]; production identifier-list => [IDENTIFIER]; production identifier-list => [identifier-list COMMA IDENTIFIER];
<Grammar productions for C99>+=
production type-name => [specifier-qualifier-list abstract-declarator-opt];
production abstract-declarator-opt => [/* empty */]; production abstract-declarator-opt => [abstract-declarator]; production abstract-declarator => [pointer]; production abstract-declarator => [pointer-opt direct-abstract-declarator];
production direct-abstract-declarator => [LPAREN abstract-declarator RPAREN]; production direct-abstract-declarator => [LBRACK assignment-expression-opt RBRACK]; production direct-abstract-declarator => [LBRACK STAR RBRACK]; production direct-abstract-declarator => [LPAREN parameter-type-list-opt RPAREN]; production direct-abstract-declarator => [direct-abstract-declarator LBRACK assignment-expression-opt RBRACK]; production direct-abstract-declarator => [direct-abstract-declarator LBRACK STAR RBRACK]; production direct-abstract-declarator => [direct-abstract-declarator LPAREN parameter-type-list-opt RPAREN];
<Grammar productions for C99>+=
production initializer => [assignment-expression]; production initializer => [LBRACE initializer-list RBRACE]; production initializer => [LBRACE initializer-list COMMA RBRACE];
production initializer-list => [designation-opt initializer]; production initializer-list => [initializer-list COMMA designation-opt initializer];
production designation-opt => [/* empty */]; production designation-opt => [designator-list EQUALS]; production designator-list => [designator]; production designator-list => [designator-list designator];
production designator => [LBRACK constant-expression RBRACK]; production designator => [DOT IDENTIFIER];
<Grammar productions for C99>+=
<Grammar productions for C99>+=
production function-definition => [declaration-specifiers declarator declaration-list-opt compound-statement];
production declaration-list-opt => [/* empty */]; production declaration-list-opt => [declaration-list-opt declaration];
<Grammar productions for C99>+=
make-production constant :: <C-constant-expression-representation> => [INTEGER-CONSTANT], value: INTEGER-CONSTANT; make-production constant :: <C-constant-expression-representation> => [FLOATING-CONSTANT], value: FLOATING-CONSTANT; make-production constant :: <C-constant-expression-representation> => [CHARACTER-CONSTANT], value: CHARACTER-CONSTANT;
production concatenated-string-literal :: <string> => [STRING-LITERAL] (builder) STRING-LITERAL; // FIXME production concatenated-string-literal :: <string> => [concatenated-string-literal STRING-LITERAL] (builder) concatenate(concatenated-string-literal, STRING-LITERAL); // FIXME
<Grammar productions for C99>+=
make-production primary-expression :: <C-variable-reference-expression-representation> => [IDENTIFIER], variable: IDENTIFIER; production primary-expression => [constant]; make-production primary-expression :: <C-string-literal-expression-representation> => [concatenated-string-literal], value: concatenated-string-literal; production primary-expression => [LPAREN expression RPAREN];
<Grammar productions for C99>+=
production postfix-expression => [primary-expression]; production postfix-expression :: <C-unary-expression-representation> => [postfix-expression LBRACK expression RBRACK] (builder, srcloc) make(<C-unary-expression-representation>, source-location: srcloc, operator: #"DEREF", operand: make(<C-binary-expression-representation>, source-location: srcloc, operator: #"ADD", left: postfix-expression, right: expression)); make-production postfix-expression :: <C-function-call-expression-representation> => [postfix-expression LPAREN argument-expression-list-opt RPAREN], function: postfix-expression, arguments: argument-expression-list-opt; make-production postfix-expression :: <C-member-expression-representation> => [postfix-expression DOT IDENTIFIER], operand: postfix-expression, name: IDENTIFIER; production postfix-expression :: <C-member-expression-representation> => [postfix-expression ARROW IDENTIFIER] (builder, srcloc) make(<C-member-expression-representation>, source-location: srcloc, operand: make(<C-unary-expression-representation>, source-location: srcloc, operator: #"DEREF", operand: postfix-expression), name: IDENTIFIER); make-production postfix-expression :: <C-unary-expression-representation> => [postfix-expression INC], operator: #"POSTINC", operand: postfix-expression; make-production postfix-expression :: <C-unary-expression-representation> => [postfix-expression DEC], operator: #"POSTDEC", operand: postfix-expression; production postfix-expression => [LPAREN type-name RPAREN LBRACE initializer-list RBRACE]; production postfix-expression => [LPAREN type-name RPAREN LBRACE initializer-list COMMA RBRACE];
production argument-expression-list-opt :: <sequence> => [/* empty */] () #[]; production argument-expression-list-opt :: <sequence> => [argument-expression-list]; production argument-expression-list => [assignment-expression]; production argument-expression-list => [argument-expression-list COMMA assignment-expression];
<Grammar productions for C99>+=
production unary-expression => [postfix-expression]; make-production unary-expression :: <C-unary-expression-representation> => [INC unary-expression], operator: #"PREINC", operand: unary-expression; make-production unary-expression :: <C-unary-expression-representation> => [DEC unary-expression], operator: #"PREDEC", operand: unary-expression; make-production unary-expression :: <C-unary-expression-representation> => [AMP cast-expression], operator: #"ADDROF", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [STAR cast-expression], operator: #"DEREF", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [PLUS cast-expression], operator: #"PLUS", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [MINUS cast-expression], operator: #"MINUS", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [TILDE cast-expression], operator: #"BITWISE-NOT", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [BANG cast-expression], operator: #"LOGICAL-NOT", operand: cast-expression; make-production unary-expression :: <C-unary-expression-representation> => [SIZEOF unary-expression], operator: #"SIZEOF", operand: unary-expression; make-production unary-expression :: <C-sizeof-type-expression-representation> => [SIZEOF LPAREN type-name RPAREN], sizeof-type: type-name;
<Grammar productions for C99>+=
production cast-expression => [unary-expression]; make-production cast-expression :: <C-cast-expression-representation> => [LPAREN type-name RPAREN cast-expression], type: type-name, operand: cast-expression;
<Grammar productions for C99>+=
production multiplicative-expression => [cast-expression]; make-production multiplicative-expression :: <C-binary-expression-representation> => [multiplicative-expression STAR cast-expression], operator: #"MUL", left: multiplicative-expression, right: cast-expression; make-production multiplicative-expression :: <C-binary-expression-representation> => [multiplicative-expression SLASH cast-expression], operator: #"DIV", left: multiplicative-expression, right: cast-expression; make-production multiplicative-expression :: <C-binary-expression-representation> => [multiplicative-expression PERCENT cast-expression], operator: #"MOD", left: multiplicative-expression, right: cast-expression;
<Grammar productions for C99>+=
production additive-expression => [multiplicative-expression]; make-production additive-expression :: <C-binary-expression-representation> => [additive-expression PLUS multiplicative-expression], operator: #"ADD", left: additive-expression, right: multiplicative-expression; make-production additive-expression :: <C-binary-expression-representation> => [additive-expression MINUS multiplicative-expression], operator: #"SUB", left: additive-expression, right: multiplicative-expression;
<Grammar productions for C99>+=
production shift-expression => [additive-expression]; make-production shift-expression :: <C-binary-expression-representation> => [shift-expression SHL additive-expression], operator: #"SHL", left: shift-expression, right: additive-expression; make-production shift-expression :: <C-binary-expression-representation> => [shift-expression SHR additive-expression], operator: #"SHR", left: shift-expression, right: additive-expression;
<Grammar productions for C99>+=
production relational-expression => [shift-expression]; make-production relational-expression :: <C-binary-expression-representation> => [relational-expression LT shift-expression], operator: #"LT", left: relational-expression, right: shift-expression; make-production relational-expression :: <C-binary-expression-representation> => [relational-expression GT shift-expression], operator: #"GT", left: relational-expression, right: shift-expression; make-production relational-expression :: <C-binary-expression-representation> => [relational-expression LE shift-expression], operator: #"LE", left: relational-expression, right: shift-expression; make-production relational-expression :: <C-binary-expression-representation> => [relational-expression GE shift-expression], operator: #"GE", left: relational-expression, right: shift-expression;
<Grammar productions for C99>+=
production equality-expression => [relational-expression]; make-production equality-expression :: <C-binary-expression-representation> => [equality-expression EQ relational-expression], operator: #"EQ", left: equality-expression, right: relational-expression; make-production equality-expression :: <C-binary-expression-representation> => [equality-expression NE relational-expression], operator: #"NE", left: equality-expression, right: relational-expression;
<Grammar productions for C99>+=
production AND-expression => [equality-expression]; make-production AND-expression :: <C-binary-expression-representation> => [AND-expression AMP equality-expression], operator: #"BITWISE-AND", left: AND-expression, right: equality-expression;
production exclusive-OR-expression => [AND-expression]; make-production exclusive-OR-expression :: <C-binary-expression-representation> => [exclusive-OR-expression HAT AND-expression], operator: #"BITWISE-XOR", left: exclusive-OR-expression, right: AND-expression;
production inclusive-OR-expression => [exclusive-OR-expression]; make-production inclusive-OR-expression :: <C-binary-expression-representation> => [inclusive-OR-expression OR exclusive-OR-expression], operator: #"BITWISE-XOR", left: inclusive-OR-expression, right: exclusive-OR-expression;
<Grammar productions for C99>+=
production logical-AND-expression => [inclusive-OR-expression]; make-production logical-AND-expression :: <C-binary-expression-representation> => [logical-AND-expression AMPAMP inclusive-OR-expression], operator: #"LOGICAL-AND", left: logical-AND-expression, right: inclusive-OR-expression;
production logical-OR-expression => [logical-AND-expression]; make-production logical-OR-expression :: <C-binary-expression-representation> => [logical-OR-expression OROR logical-AND-expression], operator: #"LOGICAL-OR", left: logical-OR-expression, right: logical-AND-expression;
<Grammar productions for C99>+=
production conditional-expression => [logical-OR-expression]; make-production conditional-expression :: <C-conditional-expression-representation> => [logical-OR-expression QUEST expression COLON conditional-expression], condition: logical-OR-expression, true: expression, false: conditional-expression;
<Grammar productions for C99>+=
production assignment-expression-opt => [/* empty */]; production assignment-expression-opt => [assignment-expression];
Since all of the compound assignment operators are the composition of a binary arithmetic expression and an assignment expression, we can provide a function to generate the reduce actions for each of these operators.
<Definitions used within $C99-grammar-productions
>+=
define function assignment-action (operator :: <C-binary-expression-operator>) => (function :: <function>); method (p :: <simple-parser>, data, s, e) let srcloc = simple-parser-source-location(p, s, e); make(<C-binary-expression-representation>, source-location: srcloc, operator: #"ASSIGN", left: p[0], right: make(<C-binary-expression-representation>, source-location: srcloc, operator: operator, left: p[0], right: p[2])); end; end function;
<Grammar productions for C99>+=
production assignment-expression => [conditional-expression]; make-production assignment-expression :: <C-binary-expression-representation> => [unary-expression EQUALS assignment-expression], operator: #"ASSIGN", left: unary-expression, right: assignment-expression; production assignment-expression :: <C-binary-expression-representation> => [unary-expression STAR-EQUALS assignment-expression], action: assignment-action(#"MUL"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression SLASH-EQUALS assignment-expression], action: assignment-action(#"DIV"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression PERCENT-EQUALS assignment-expression], action: assignment-action(#"MOD"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression PLUS-EQUALS assignment-expression], action: assignment-action(#"ADD"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression MINUS-EQUALS assignment-expression], action: assignment-action(#"SUB"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression SHL-EQUALS assignment-expression], action: assignment-action(#"SHL"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression SHR-EQUALS assignment-expression], action: assignment-action(#"SHR"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression AMP-EQUALS assignment-expression], action: assignment-action(#"BITWISE-AND"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression HAT-EQUALS assignment-expression], action: assignment-action(#"BITWISE-XOR"); production assignment-expression :: <C-binary-expression-representation> => [unary-expression OR-EQUALS assignment-expression], action: assignment-action(#"BITWISE-OR");
<Grammar productions for C99>+=
production expression-opt => [/* empty */]; production expression-opt => [expression];
<Grammar productions for C99>+=
production expression => [assignment-expression]; make-production expression :: <C-binary-expression-representation> => [expression COMMA assignment-expression], operator: #"COMMA", left: expression, right: assignment-expression;
<Grammar productions for C99>+=
production constant-expression => [conditional-expression];
<Modules imported by the cpr-internals
module>+=
use print; use pprint; use format;
<Module cpr-internals
>+=
define method print-C-expression (expression :: <C-constant-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); print(expression.expression-value, stream); end method;
define method print-C-expression (expression :: <C-string-literal-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); print(expression.expression-value, stream); end method;
define method print-C-expression (expression :: <C-variable-reference-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); write(stream, expression.expression-variable); end method;
define method print-C-expression (expression :: <C-function-reference-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); // FIXME end method;
define method print-C-expression (expression :: <C-unary-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); local method do-operator (name :: <byte-string>, operator-level :: <integer>, operand-level :: <integer>, postfix? :: <boolean>) let parens? = operator-level > level; printing-logical-block(stream, prefix: parens? & "(", suffix: parens? & ")") unless (postfix?) write(stream, name) end; print-C-expression(expression.expression-unary-operand, stream, level: operand-level); if (postfix?) write(stream, name) end; end; end; select (expression.expression-operator) #"POSTINC" => do-operator("++", 1, 1, #t); #"POSTDEC" => do-operator("--", 1, 1, #t); #"PREINC" => do-operator("++", 2, 2, #f); #"PREDEC" => do-operator("--", 2, 2, #f); #"ADDROF" => do-operator("&", 2, 2, #f); #"DEREF" => do-operator("*", 2, 2, #f); #"PLUS" => do-operator("+", 2, 2, #f); #"MINUS" => do-operator("-", 2, 2, #f); #"BITWISE-NOT" => do-operator("~", 2, 2, #f); #"LOGICAL-NOT" => do-operator("!", 2, 2, #f); #"SIZEOF" => do-operator("sizeof ", 2, 2, #f); #"ALIGNOF" => do-operator("__alignof__ ", 2, 2, #f); end; end method;
define method print-C-expression (expression :: <C-binary-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); local method do-operator (name :: <byte-string>, operator-level :: <integer>) let parens? = operator-level > level; printing-logical-block(stream, prefix: parens? & "(", suffix: parens? & ")") print-c-expression(expression.expression-binary-left, stream, level: operator-level); unless (name = ",") write-element(stream, ' ') end; pprint-newline(#"fill", stream); format(stream, "%s ", name); print-C-expression(expression.expression-binary-right, stream, level: operator-level - 1); end; end; select (expression.expression-operator) #"MUL" => do-operator("*", 4); #"DIV" => do-operator("/", 4); #"MOD" => do-operator("%", 4); #"ADD" => do-operator("+", 5); #"SUB" => do-operator("-", 5); #"SHL" => do-operator("<<;", 6); #"SHR" => do-operator(">>", 6); #"LT" => do-operator("<", 7); #"LE" => do-operator("<=", 7); #"GT" => do-operator(">", 7); #"GE" => do-operator(">=", 7); #"EQ" => do-operator("==", 8); #"NE" => do-operator("!=", 8); #"BITWISE-AND" => do-operator("&", 9); #"BITWISE-XOR" => do-operator("^", 10); #"BITWISE-OR" => do-operator("|", 11); #"LOGICAL-AND" => do-operator("&&", 12); #"LOGICAL-OR" => do-operator("||", 13); #"ASSIGN" => do-operator("=", 15); #"COMMA" => do-operator(",", 16); end; end method;
define method print-C-expression (expression :: <C-conditional-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); let parens? = level < 14; printing-logical-block(stream, prefix: parens? & "(", suffix: parens? & ")") print-C-expression(expression.expression-conditional-condition, stream, level: 13); write-element(stream, ' '); pprint-newline(#"fill", stream); write(stream, "? "); print-C-expression(expression.expression-conditional-true, stream, level: 16); write-element(stream, ' '); pprint-newline(#"fill", stream); write(stream, ": "); print-C-expression(expression.expression-conditional-false, stream, level: 14); end; end method;
define method print-C-expression (expression :: <C-cast-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); let parens? = level < 3; printing-logical-block(stream, prefix: parens? & "(", suffix: parens? & ")") write-element(stream, '('); // FIXME write(stream, ") "); print-C-expression(expression.expression-cast-operand, stream, level: 3); end; end method;
define method print-C-expression (expression :: <C-sizeof-type-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); write(stream, "sizeof( "); // FIXME write-element(stream, ')'); end method;
define method print-C-expression (expression :: <C-function-call-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); print-C-expression(expression.expression-call-function, stream, level: 1); printing-logical-block(stream, prefix: "(", suffix: ")") for (argument in expression.expression-call-arguments, first? = #t then #f) unless (first?) write(stream, ", "); pprint-newline(#"fill", stream); end; print-C-expression(argument, stream, level: 15); end for; end; end method;
define method print-C-expression (expression :: <C-member-expression-representation>, stream :: <stream>, #key level :: <integer> = $precedence-level-assignment-expression) => (); print-C-expression(expression.expression-member-operand, stream, level: 1); format(stream, ".%s", expression.expression-member-name); end method;
<Module cpr-internals
>+=
define method print-message (expression :: <C-expression-representation>, stream :: <stream>) => (); print-C-expression(expression, stream, level: $precedence-level-assignment-expression); end method;
The C grammar as described in the ISO standard contains a
single ambiguity that prevents it from being strictly LALR(1),
namely the frequenly-encountered if/else ambiguity. We will
resolve this using the standard techique of splitting
statements into two categories, open (possibly
containing an if
with an unmatched else
) and
closed (containing no unpaired if
statements.)
<Grammar productions for C99>+=
production statement => [open-statement]; production statement => [closed-statement];
<Grammar productions for C99>+=
production open-statement => [open-labeled-statement]; production open-statement => [open-selection-statement]; production open-statement => [open-iteration-statement];
<Grammar productions for C99>+=
production closed-statement => [closed-labeled-statement]; production closed-statement => [compound-statement]; production closed-statement => [expression-statement]; production closed-statement => [closed-selection-statement]; production closed-statement => [closed-iteration-statement]; production closed-statement => [jump-statement];
<Grammar productions for C99>+=
production open-labeled-statement => [IDENTIFIER COLON open-statement]; production open-labeled-statement => [_CASE constant-expression COLON open-statement]; production open-labeled-statement => [DEFAULT COLON open-statement];
production closed-labeled-statement => [IDENTIFIER COLON closed-statement]; production closed-labeled-statement => [_CASE constant-expression COLON closed-statement]; production closed-labeled-statement => [DEFAULT COLON closed-statement];
<Grammar productions for C99>+=
production compound-statement => [LBRACE block-item-list-opt RBRACE];
production block-item-list-opt => [/* empty */]; production block-item-list-opt => [block-item-list-opt block-item];
production block-item => [declaration]; production block-item => [statement];
<Grammar productions for C99>+=
production expression-statement => [expression-opt SEMI];
<Grammar productions for C99>+=
production open-selection-statement => [_IF LPAREN expression RPAREN statement]; production open-selection-statement => [_IF LPAREN expression RPAREN closed-statement _ELSE open-statement]; production open-selection-statement => [SWITCH LPAREN expression RPAREN open-statement];
production closed-selection-statement => [_IF LPAREN expression RPAREN closed-statement _ELSE closed-statement]; production closed-selection-statement => [SWITCH LPAREN expression RPAREN closed-statement];
<Grammar productions for C99>+=
production open-iteration-statement => [_WHILE LPAREN expression RPAREN open-statement]; production open-iteration-statement => [_FOR LPAREN expression-opt SEMI expression-opt SEMI expression-opt RPAREN open-statement]; production open-iteration-statement => [_FOR LPAREN declaration expression-opt SEMI expression-opt RPAREN open-statement];
production closed-iteration-statement => [_WHILE LPAREN expression RPAREN closed-statement]; production closed-iteration-statement => [DO statement _WHILE LPAREN expression RPAREN SEMI]; production closed-iteration-statement => [_FOR LPAREN expression-opt SEMI expression-opt SEMI expression-opt RPAREN closed-statement]; production closed-iteration-statement => [_FOR LPAREN declaration expression-opt SEMI expression-opt RPAREN closed-statement];
<Grammar productions for C99>+=
production jump-statement => [GOTO IDENTIFIER SEMI]; production jump-statement => [CONTINUE SEMI]; production jump-statement => [BREAK SEMI]; production jump-statement => [RETURN expression-opt SEMI];