C Program Representation Library

Peter S. Housel

August 2004–January 2007


Copyright

This is the Monday cpr library, providing an abstract syntax tree language for representing programs in the C programming language.

Copyright ©2004–2007 Peter S. Housel.

This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


Introduction

The C programming language serves as a popular language for systems and applications programming, as well as a “least common denominator” language for describing system interfaces, and as a “portable assembly language” that compilers for other languages can emit.

This library provides a means for Dylan programs to represent C program code and declarations, and to parse and unparse C language programs.

<Module Dylan-user>=

define library cpr
  use Common-Dylan;
  use Source-Location;
  use IO;
  use System;
  <Import external libraries used by cpr>

  export cpr-preprocessor,
         cpr;
end library;
          
define module cpr-preprocessor
  <create identifiers exported by the cpr-preprocessor module>
end module;
          
define module cpr
  use cpr-preprocessor,
    export: all;
  <create identifiers exported by the cpr module>
end module;
          
<Utility module definitions>
define module cpr-internals
  use Common-Dylan, exclude: { format-to-string };
  use Locators;
  use cpr-preprocessor;
  use cpr;
  <Modules imported by the cpr-internals module>
end module;
          

Interfaces

C Preprocessor Dialects

The C preprocessor FIXME

<create identifiers exported by the cpr-preprocessor module>=

  // Preprocessor dialects
  create
    <C-preprocessor-dialect>,
    $C90-C-preprocessor-dialect,
    $C99-C-preprocessor-dialect,
    $MSVC6-C-preprocessor-dialect,
    $gnu89-C-preprocessor-dialect,
    $gnu99-C-preprocessor-dialect;
            

<C-preprocessor-dialect>

[Open Class]


The class of representations of C preprocessor dialects.

Superclasses:

<object>

Init-keywords:

None.

Description:

FIXME

Definition

<Module cpr-internals>=

define open class <C-preprocessor-dialect> (<object>)
  <Slots in the <C-preprocessor-dialect> class>
end class;
                
<Define an initialize method on <C-preprocessor-dialect>><Define the preprocessor-directive-productions macro>

$C90-C-preprocessor-dialect

[Constant]


The C preprocessor dialect representing ISO/IEC 9899:1990.

Type:

<C-preprocessor-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $C90-C-preprocessor-dialect :: <C-preprocessor-dialect>
  = make(<C-preprocessor-dialect>);
                

$C99-C-preprocessor-dialect

[Constant]


The C preprocessor dialect representing ISO/IEC 9899:1999.

Type:

<C-preprocessor-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $C99-C-preprocessor-dialect :: <C-preprocessor-dialect>
  = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
                

$MSVC6-C-preprocessor-dialect

[Constant]


The C preprocessor dialect representing Microsoft Visual C version 6.

Type:

<C-preprocessor-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $MSVC6-C-preprocessor-dialect :: <C-preprocessor-dialect>
  = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
                

$gnu89-C-preprocessor-dialect

[Constant]


The C preprocessor dialect representing ISO/IEC 9899:1990 C with GNU extensions.

Type:

<C-preprocessor-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $gnu89-C-preprocessor-dialect :: <C-preprocessor-dialect>
  = make(<C-preprocessor-dialect>, parent: $C90-C-preprocessor-dialect);
                

$gnu99-C-preprocessor-dialect

[Constant]


The C preprocessor dialect representing ISO/IEC 9899:1999 with GNU extensions.

Type:

<C-preprocessor-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $gnu99-C-preprocessor-dialect :: <C-preprocessor-dialect>
  = make(<C-preprocessor-dialect>, parent: $C99-C-preprocessor-dialect);
                

The Preprocessor and Preprocessing Translation Units

<create identifiers exported by the cpr-preprocessor module>+=

  // Translation units
  create
    <C-preprocessing-translation-unit-representation>,
    preprocessing-header-search-path,
    preprocessing-system-header-search-path,
    preprocessing-macros,

    preprocessor-define,
    preprocessor-undefine,

    preprocess-C-source-file,
    preprocess-C-header-file,
    preprocess-C-system-header-file,
    preprocess-C-stream,

    preprocessor-token-string;
            

<C-preprocessing-translation-unit-representation>

[Class]


The class of representations of entire C preprocessing translation units.

Superclasses:

<object>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-preprocessing-translation-unit-representation> (<object>)
  constant slot preprocessing-header-search-path :: <deque>
    = make(<deque>);
  constant slot preprocessing-system-header-search-path :: <deque>
    = make(<deque>);
  constant slot preprocessing-trigraph-support? :: <boolean> = #f,
    init-keyword: trigraph-support:;
  <Slots in <C-preprocessing-translation-unit-representation>>
end class;
                

preprocessor-define

[Generic Function]


Adds a preprocessor macro definition to a preprocessor translation unit.

Signature:

preprocessor-define preprocessing-translation-unit identifier replacement

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
identifier
An instance of <byte-string>.
replacement
An instance of <byte-string>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method preprocessor-define
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     identifier :: <byte-string>,
     replacement :: <byte-string>)
 => ();
  <Define identifier as an object-like preprocessing macro>
end method;
                

preprocessor-undefine

[Generic Function]


Removes a preprocessor macro definition to a preprocessor translation unit.

Signature:

preprocessor-define preprocessing-translation-unit identifier

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
identifier
An instance of <byte-string>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method preprocessor-undefine
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     identifier :: <byte-string>)
 => ();
  <Undefine identifier>
end method;
                

preprocess-C-source-file

[Generic Function]


Preprocesses a C source file.

Signature:

preprocess-C-source-file preprocessing-translation-unit locator consumer consumer-data

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
locator
An instance of <file-locator>.
consumer
An instance of <function>.
consumer-data
An instance of <object>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method preprocess-C-source-file
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     locator :: <file-locator>,
     consumer :: <function>,
     consumer-data,
     #key dialect = $C99-C-preprocessor-dialect)
 => ();
  <Preprocess source file locator>
end method;
                

preprocess-C-header-file

[Generic Function]


Preprocesses a C header file.

Signature:

preprocess-C-header-file preprocessing-translation-unit locator search-directory consumer consumer-data

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
locator
An instance of <file-locator>.
search-directory
An instance of <directory-locator>.
consumer
An instance of <function>.
consumer-data
An instance of <object>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method preprocess-C-header-file
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     locator :: <file-locator>,
     search-directory :: <directory-locator>,
     consumer :: <function>,
     consumer-data,
     #key dialect = $C99-C-preprocessor-dialect)
 => ();
  <Preprocess the header file locator>
end method;
                

preprocess-C-system-header-file

[Generic Function]


Preprocesses a C system header file.

Signature:

preprocess-C-system-header-file preprocessing-translation-unit locator consumer consumer-data

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
locator
An instance of <file-locator>.
consumer
An instance of <function>.
consumer-data
An instance of <object>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method preprocess-C-system-header-file
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     locator :: <file-locator>,
     consumer :: <function>,
     consumer-data,
     #key dialect = $C99-C-preprocessor-dialect)
 => ();
  <Preprocess the system header file locator>
end method;
                

preprocess-C-stream

[Generic Function]


Preprocesses C code from an input stream.

Signature:

preprocess-C-stream preprocessing-translation-unit stream directory rangemap consumer consumer-data

Arguments:
preprocessing-translation-unit
An instance of <C-preprocessing-translation-unit-representation>.
stream
An instance of <stream>.
rangemap
An instance of <source-location-rangemap>.
consumer
An instance of <function>.
consumer-data
An instance of <object>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define generic preprocess-C-stream
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     stream :: <stream>,
     directory :: false-or(<directory-locator>),
     rangemap :: <source-location-rangemap>,
     consumer :: <function>,
     consumer-data,
     #key start-position,
          dialect = $C99-C-preprocessor-dialect)
 => (end-position :: <integer>);
                

preprocessor-token-string

[Function]


Returns a string corresponding to a preprocessing token.

Signature:

preprocessor-token-string token-name token-valuestring

Arguments:
token-name
An instance of <symbol>.
token-value
An instance of <object>.
Values:
string
An instance of <string>.
Description:

FIXME

Definition:

<Module cpr-internals>+=

define function preprocessor-token-string
    (token-name :: <symbol>, token-value :: <object>)
 => (string :: <byte-string>);
  select (token-name)
    <Cases for preprocessor-token-string>
    otherwise =>
      error("preprocessor-token-string called for unknown token %s(%=)",
            token-name, token-value);
  end select
end function;
                

C Language Dialects

The C language FIXME

<create identifiers exported by the cpr module>=

  // Language dialects
  create
    <C-language-dialect>,
    $C90-C-language-dialect,
    $C99-C-language-dialect,
    $MSVC6-C-language-dialect;
            

<C-language-dialect>

[Open Class]


The class of representations of C language dialects.

Superclasses:

<object>

Init-keywords:

None.

Description:

FIXME

Definition

<Module cpr-internals>+=

define open class <C-language-dialect> (<object>)
  //  FIXME 
end class;
                

$C90-C-language-dialect

[Constant]


The C language dialect representing ISO/IEC 9899:1990.

Type:

<C-language-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $C90-C-language-dialect :: <C-language-dialect>
  = make(<C-language-dialect>);
                

$C99-C-language-dialect

[Constant]


The C language dialect representing ISO/IEC 9899:1999.

Type:

<C-language-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $C99-C-language-dialect :: <C-language-dialect>
  = make(<C-language-dialect>);
                

$MSVC6-C-language-dialect

[Constant]


The C language dialect representing Microsoft Visual C version 6.

Type:

<C-language-dialect>

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant $MSVC6-C-language-dialect :: <C-language-dialect>
  = make(<C-language-dialect>);
                

Translation Platforms

<create identifiers exported by the cpr module>+=

  // Platforms
  create
    <C-translation-platform>,
    platform-system-include-path,

    platform-char-bits,
    platform-char-signed?,

    platform-type-size,
    platform-type-alignment;
            

Translation Units

<create identifiers exported by the cpr module>+=

  // Translation units
  create
    <C-translation-unit-representation>,
    translation-unit-external-declarations,

    parse-C-source-file,
    parse-C-header-file,
    parse-C-system-header-file;
            

<C-translation-unit-representation>

[Class]


The class of representations of entire C translation units.

Superclasses:

<object>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-translation-unit-representation> (<object>)
  constant slot translation-unit-preprocessing-translation-unit
    :: <C-preprocessing-translation-unit-representation>,
    required-init-keyword: preprocessing-translation-unit:;
  <>
end class;
                

parse-C-source-file

[Generic Function]


Parses a C source file and adds a representation of its contents to a translation unit object.

Signature:

parse-C-source-file translation-unit locator

Arguments:
translation-unit
An instance of <C-translation-unit-representation>.
locator
An instance of <file-locator>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method parse-C-source-file
    (translation-unit :: <C-translation-unit-representation>,
     locator :: <file-locator>)
 => ();
  <Parse the source file locator into translation-unit>
end method;
                

parse-C-header-file

[Generic Function]


Parses a C header file and adds a representation of its contents to a translation unit object.

Signature:

parse-C-header-file translation-unit locator

Arguments:
translation-unit
An instance of <C-translation-unit-representation>.
locator
An instance of <file-locator>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method parse-C-header-file
    (translation-unit :: <C-translation-unit-representation>,
     locator :: <file-locator>)
 => ();
  <>
end method;
                

parse-C-system-header-file

[Generic Function]


Parses a C system header file and adds a representation of its contents to a translation unit object.

Signature:

parse-C-system-header-file translation-unit locator

Arguments:
translation-unit
An instance of <C-translation-unit-representation>.
locator
An instance of <file-locator>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define method parse-C-system-header-file
    (translation-unit :: <C-translation-unit-representation>,
     locator :: <file-locator>)
 => ();
  <>
end method;
                

Types and Declarations

<create identifiers exported by the cpr module>+=

  // Types
  create
    <C-type-representation>,
    <C-integer-type-representation>,
    <C-enum-type-representation>,
    <C-float-type-representation>,
    <C-array-type-representation>,
    <C-function-type-representation>,
    <C-struct/union-type-representation>,
    <C-pointer-type-representation>,
    $C-void*-representation,
    $C-char*-representation,
    $C-const-char*-representation;
            

<C-type-representation>

[Abstract Class]


The class of representations of C types.

Superclasses:

<object>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define abstract class <C-type-representation> (<object>)
  <>
end class;
                

<C-integer-type-representation>

[Class]


The class of representations of C integer types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-integer-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-enum-type-representation>

[Class]


The class of representations of C enumeration types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-enum-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-float-type-representation>

[Class]


The class of representations of C floating-point types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-float-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-array-type-representation>

[Class]


The class of representations of C array types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-array-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-function-type-representation>

[Class]


The class of representations of C function types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-function-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-struct/union-type-representation>

[Class]


The class of representations of C struct and union types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-struct/union-type-representation> (<C-type-representation>)
  <>
end class;
                

<C-pointer-type-representation>

[Class]


The class of representations of C array types.

Superclasses:

<C-type-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-pointer-type-representation> (<C-type-representation>)
  <>
end class;
                

Variables and Scopes

Functions

Expressions

<create identifiers exported by the cpr module>+=

  // Expressions
  create
    <C-expression-representation>,
    expression-type,

    <C-constant-expression-representation>,
    <C-string-literal-expression-representation>,
    expression-value,

    <C-variable-reference-expression-representation>,
    expression-variable,

    <C-function-reference-expression-representation>,

    expression-operator,

    <C-unary-expression-representation>,
    expression-unary-operand,

    <C-binary-expression-representation>,
    expression-binary-left,
    expression-binary-right,

    <C-conditional-expression-representation>,
    expression-conditional-condition,
    expression-conditional-true,
    expression-conditional-false,

    <C-cast-expression-representation>,
    expression-cast-operand,

    <C-sizeof-type-expression-representation>,
    expression-sizeof-type,

    <C-function-call-expression-representation>,
    expression-call-function,
    expression-call-arguments,

    <C-member-expression-representation>,
    expression-member-operand,
    expression-member-name,

    print-C-expression,
    $precedence-level-assignment-expression,
    $precedence-level-expression;
            

<C-expression-representation>

[Abstract Class]


The class of representations of C expressions.

Superclasses:

<source-location-mixin>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define abstract class <C-expression-representation> (<source-location-mixin>)
  <>
end class;
                
define sealed domain initialize(<C-expression-representation>);
                

expression-type

[Generic Function]


Returns the type of a C expression.

Signature:

expression-type expressiontype

Arguments:
expression
An instance of <C-expression-representation>.
Values:
type
An instance of <C-type-representation>.
Description:

FIXME

Definition:

<Module cpr-internals>+=

define generic expression-type
    (expression :: <C-expression-representation>)
 => (type :: <C-type-representation>);
                

<C-constant-expression-representation>

[Class]


The class of representations of C constant expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-constant-expression-representation>
    (<C-expression-representation>)
  constant slot expression-value :: <object>,
    required-init-keyword: value:;
end class;
                
define sealed domain make(singleton(<C-constant-expression-representation>));
                

<C-string-literal-expression-representation>

[Class]


The class of representations of C string literal expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-string-literal-expression-representation>
    (<C-expression-representation>)
  constant slot expression-value :: <object>,
    required-init-keyword: value:;
end class;
                
define sealed domain make(singleton(<C-string-literal-expression-representation>));
                

<C-variable-reference-expression-representation>

[Class]


The class of representations of C variable reference expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-variable-reference-expression-representation>
    (<C-expression-representation>)
  constant slot expression-variable :: <string>, // FIXME
    required-init-keyword: variable:;
end class;
                
define sealed domain make(singleton(<C-variable-reference-expression-representation>));
                

<C-function-reference-expression-representation>

[Class]


The class of representations of C function reference expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-function-reference-expression-representation>
    (<C-expression-representation>)
  //
end class;
                
define sealed domain make(singleton(<C-function-reference-expression-representation>));
                

<C-unary-expression-representation>

[Class]


The class of representations of C unary expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant <C-unary-expression-operator>
  = one-of(#"POSTINC", #"POSTDEC", #"PREINC", #"PREDEC", 
           #"ADDROF", #"DEREF", 
           #"PLUS", #"MINUS", #"BITWISE-NOT", #"LOGICAL-NOT",
           #"SIZEOF", #"ALIGNOF");
                
define class <C-unary-expression-representation>
    (<C-expression-representation>)
  slot expression-type :: <C-type-representation>,
    init-keyword: type:;
  constant slot expression-operator :: <C-unary-expression-operator>,
    required-init-keyword: operator:;
  constant slot expression-unary-operand :: <C-expression-representation>,
    required-init-keyword: operand:;
end class;
                
define sealed domain make(singleton(<C-unary-expression-representation>));
                

<C-binary-expression-representation>

[Class]


The class of representations of C binary expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define constant <C-binary-expression-operator>
  = one-of(#"MUL", #"DIV", #"MOD", #"ADD", #"SUB",
           #"SHL", #"SHR",
           #"LT", #"LE", #"GT", #"GE", #"EQ", #"NE",
           #"BITWISE-AND", #"BITWISE-XOR", #"BITWISE-OR",
           #"LOGICAL-AND", #"LOGICAL-OR",
           #"ASSIGN", #"COMMA");
                
define class <C-binary-expression-representation>
    (<C-expression-representation>)
  slot expression-type :: <C-type-representation>,
    init-keyword: type:;
  constant slot expression-operator :: <C-binary-expression-operator>,
    required-init-keyword: operator:;
  constant slot expression-binary-right :: <C-expression-representation>,
    required-init-keyword: right:;
  constant slot expression-binary-left :: <C-expression-representation>,
    required-init-keyword: left:;
end class;
                
define sealed domain make(singleton(<C-binary-expression-representation>));
                

<C-conditional-expression-representation>

[Class]


The class of representations of C binary expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-conditional-expression-representation>
    (<C-expression-representation>)
  constant slot expression-conditional-condition :: <C-expression-representation>,
    required-init-keyword: condition:;
  constant slot expression-conditional-true :: <C-expression-representation>,
    required-init-keyword: true:;
  constant slot expression-conditional-false :: <C-expression-representation>,
    required-init-keyword: false:;
end class;
                
define sealed domain make(singleton(<C-conditional-expression-representation>));
                

<C-cast-expression-representation>

[Class]


The class of representations of C cast expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-cast-expression-representation>
    (<C-expression-representation>)
  slot expression-type :: <C-type-representation>,
    init-keyword: type:;
  constant slot expression-cast-operand :: <C-expression-representation>,
    required-init-keyword: operand:;
end class;
                
define sealed domain make(singleton(<C-cast-expression-representation>));
                

<C-sizeof-type-expression-representation>

[Class]


The class of representations of C sizeof expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-sizeof-type-expression-representation>
    (<C-expression-representation>)
  constant slot expression-sizeof-type :: <C-type-representation>,
    required-init-keyword: sizeof-type:;
end class;
                
define sealed domain make(singleton(<C-sizeof-type-expression-representation>));
                

<C-function-call-expression-representation>

[Class]


The class of representations of C function call expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-function-call-expression-representation>
    (<C-expression-representation>)
  constant slot expression-call-function :: <C-expression-representation>,
    required-init-keyword: function:;
  constant slot expression-call-arguments :: <sequence>,
    required-init-keyword: arguments:;
end class;
                
define sealed domain make(singleton(<C-function-call-expression-representation>));
                

<C-member-expression-representation>

[Class]


The class of representations of C struct/union field member expressions.

Superclasses:

<C-expression-representation>

Init-keywords:

FIXME

Description:

FIXME

Definition

<Module cpr-internals>+=

define class <C-member-expression-representation>
    (<C-expression-representation>)
  constant slot expression-member-operand :: <C-expression-representation>,
    required-init-keyword: operand:;
  constant slot expression-member-name :: <string>,
    required-init-keyword: name:;
end class;
                
define sealed domain make(singleton(<C-member-expression-representation>));
                

print-C-expression

[Generic Function]


Prints a C expression using C syntax.

Signature:

print-C-expression expression steam

Arguments:
expression
An instance of <C-expression-representation>.
stream
An instance of <stream>.
Values:

None.

Description:

FIXME

Definition:

<Module cpr-internals>+=

define generic print-C-expression
    (expression :: <C-expression-representation>,
     stream :: <stream>,
     #key level)
 => ();
                

$precedence-level-assignment-expression

[Constant]


FIXME

Description:

FIXME

Definition:

<Module cpr-internals>+=

define constant $precedence-level-assignment-expression :: <integer>
  = 15;
                

$precedence-level-expression

[Constant]


FIXME

Description:

FIXME

Definition:

<Module cpr-internals>+=

define constant $precedence-level-expression :: <integer>
  = 16;
                

Statements

Interned Strings

We'll be representing C identifiers using interned strings; in particular, for any particular identifier name, there will be a canonical <byte-string> object that represents its name.

<Utility module definitions>=

define module interned-string
  use common-dylan;
  use byte-vector;
  export intern-string;
end module;
          

We'll keep track of interned strings in an ordinary chained hash table.

<Module interned-string>=

define variable *interned-string-buckets* :: <simple-object-vector>
  = make(<simple-object-vector>, size: 53);
          
define variable *interned-string-count* :: <integer> = 0;
          

The string table is made up of entries, each of type <interned-string-entry>:

<Module interned-string>+=

define class <interned-string-entry> (<object>)
  constant slot interned-string :: <byte-string>,
    required-init-keyword: interned-string:;
  constant slot interned-string-hash-value :: <integer>,
    required-init-keyword: hash:;
  slot interned-string-entry-next :: false-or(<interned-string-entry>),
    init-value: #f, init-keyword: next:;
end class;
          
define sealed domain make(singleton(<interned-string-entry>));
          
define sealed domain initialize(<interned-string-entry>);
          

The following hash function is modeled after the one in SML/NJ.

<Module interned-string>+=

define function interned-string-hash
    (name :: <byte-string>,
     start :: <integer>,
     _end  :: <integer>)
 => (hash-value :: <integer>);
  for (i :: <integer> from start below _end,
       hash :: <integer> = 0
         then logand(ash(hash, 5) + hash + 720 + as(<integer>, name[i]),
                     #x3FFFFF))
  finally
    hash
  end for
end function;
          

We'll use the intern-string method to return canonical strings.

<Module interned-string>+=

define method intern-string
    (name :: <byte-string>,
     #key start: start :: false-or(<integer>) = 0,
          end: _end :: false-or(<integer>) = name.size)
 => (canonical-string :: <byte-string>);
  let hash-value = interned-string-hash(name, start, _end);
  let bucket-value = modulo(hash-value, *interned-string-buckets*.size);
  block (return)
    for (entry = *interned-string-buckets*[bucket-value]
           then entry.interned-string-entry-next,
         while: entry)
      if (entry.interned-string-hash-value = hash-value
            & entry.interned-string.size = _end - start
            & block (next)
                for (i :: <integer> from start below _end,
                     j :: <integer> from 0)
                  if (name[i] ~== entry.interned-string[j])
                    next(#f);
                  end if;
                end for;
                #t;
              end block)
        return(entry.interned-string);
      end if;
    end for;
    <Return a new canonical string>
  end block;
end method;
          

We insert a new entry into the table when necessary, insuring that the hash table maintains a loading factor of no more than unity.

<Return a new canonical string>=

*interned-string-count* := *interned-string-count* + 1;
if (*interned-string-count* > *interned-string-buckets*.size)
  <Resize the hash table>
  bucket-value := modulo(hash-value, *interned-string-buckets*.size);
end if;
let new-string = make(<byte-string>, size: _end - start);
copy-bytes(new-string, 0, name, start, _end - start);
let new-entry = make(<interned-string-entry>,
                     interned-string: new-string,
                     hash: hash-value,
                     next: *interned-string-buckets*[bucket-value]);
*interned-string-buckets*[bucket-value] := new-entry;
new-string
          

If we have to grow the hash table, we grow it to a larger prime number of buckets. The table of primes was borrowed from the Gwydion Dylan d2c runtime library; that version borrowed the same table from somewhere else.

<Module interned-string>+=

define constant $prime-table = 
  #[/* 53, */   97,         193,       389,       769,
    1543,       3079,       6151,      12289,     24593,
    49157,      98317,      196613,    393241,    786433,
    1572869,    3145739,    6291469,   12582917,  25165843,
    50331653,   100663319,  201326611, 402653189, 805306457];
          

<Resize the hash table>=

let new-size =
  block(done)
    for (prime in $prime-table)
      if (prime > *interned-string-count*)
        done(prime);
      end if;
    end for;
  end block;
let new-buckets = make(<simple-object-vector>, size: new-size);

for (bucket in *interned-string-buckets*)
  iterate loop (entry = bucket)
    if (entry)
      let next = entry.interned-string-entry-next;
      let bucket-value = modulo(entry.interned-string-hash-value, new-size);
      entry.interned-string-entry-next := new-buckets[bucket-value];
      new-buckets[bucket-value] := entry;
      loop(next);
    end if;
  end iterate;
end for;
*interned-string-buckets* := new-buckets;
          

Hierarchical Symbol Tables

Given a canonicalized string representation, the only thing we need to implement symbol tables for the representation is a hierarchical lookup table facility.

<Utility module definitions>+=

define module hierarchical-table
  use common-dylan;
  export <hierarchical-table>;
end module;
          

<Module hierarchical-table>=

define class <hierarchical-table> (<mutable-explicit-key-collection>, <stretchy-collection>)
  slot parent-table :: false-or(<hierarchical-table>),
    init-value: #f, init-keyword: parent:;
  constant slot object-table :: <object-table> = make(<object-table>);
end class;
          
define sealed domain make(singleton(<hierarchical-table>));
          
define sealed domain initialize(<hierarchical-table>);
          

The element-setter method just delegates to the contained object table.

<Module hierarchical-table>+=

define method element-setter
    (value, table :: <hierarchical-table>, key)
 => (value);
  element-setter(value, table.object-table, key)
end method;
          

The getter method retrieves the value for the given key (if there is one), and memoizes the lookup in the top-level table.

<Module hierarchical-table>+=

define method element
    (table :: <hierarchical-table>, key, #key default = $unsupplied)
 => (value);
  block (return)
    for (search-table = table then search-table.parent-table,
         while: search-table)
      let value = element(search-table.object-table, key, default: $unfound);
      if (found?(value))
        if (search-table == table)
          return(value)
        else
          return(element(table.object-table, key) := value)
        end if;
      end;
    end for;
    if (supplied?(default))
      default
    else
      error("element %= not found");
    end if
  end block
end method;
          

<Module hierarchical-table>+=

  
define method remove-key!
    (table :: <hierarchical-table>, key)
 => (present? :: <boolean>);
  let present? = #f;
  for (search-table = table then search-table.parent-table,
       while: search-table)
    present? := remove-key!(search-table, key) | present?;
  end for;
  present?
end method;
          

We can implement forward-iteration-protocol on <hierarchical-table> by first flattening the hierarchy, copying entries from parent tables in the top-level table, and then iterating through the top-level table.

<Module hierarchical-table>+=

define method forward-iteration-protocol
    (table :: <hierarchical-table>)
 => (initial-state, limit,
     next-state :: <function>, finished-state? :: <function>,
     current-key :: <function>, current-element :: <function>,
     current-element-setter :: <function>, copy-state :: <function>);
  for (other-table = table.parent-table then other-table.parent-table,
       while: other-table)
    for (item keyed-by key in other-table.object-table)
      unless (found?(element(table.object-table, key, default: $unfound)))
        table.object-table[key] := item;
      end unless;
    end for;
  end for;
  table.parent-table := #f;
  forward-iteration-protocol(table.object-table)
end method;
          

The C Preprocessor

The translation of a C program consists of several phases. The first few phases take one or more C source or header files and combine them to form a preprocessing translation unit. After the preprocessing phases are complete, the combined text of the program forms a translation unit, which is then translated by the remaining phases.

File and Stream Parsing Interfaces

The preprocess-C-source-file method opens the given locator, marks the beginning of the file in a new rangemap, and preprocesses the file using preprocess-C-stream.

<Modules imported by the cpr-internals module>=

use streams;
use file-system;
use source-location;
use source-location-rangemap;
use source-location-conditions;
            

<Preprocess source file locator>=

let rangemap = make(<source-location-rangemap>);
rangemap-add-line-file(rangemap, 0, 1, locator);
with-open-file(stream = locator)
  preprocess-C-stream(preprocessing-translation-unit, stream,
                      locator.locator-directory, rangemap,
                      consumer, consumer-data,
                      dialect: dialect);
end;
            

For #include files we maintain two search paths, one for ordinary header files and one for system header files. Normally these paths are identical but they are kept separate for full generality.

<Module cpr-internals>+=

define method do-with-C-header-file
    (locator :: <file-locator>,
     search-directory :: false-or(<directory-locator>),
     search-path :: <sequence>,
     function :: <function>)
  let found-locator
    = if (~locator-relative?(locator))
        locator
      elseif (search-directory
                & file-exists?(merge-locators(locator, search-directory)))
        merge-locators(locator, search-directory)
      else
        block (found)
          for (directory in search-path)
            let merged = merge-locators(locator, directory);
            if (file-exists?(merged)) found(merged) end if;
          finally
            locator
          end for
        end block
      end if;
  let stream
    = make(<file-stream>, direction: #"input", locator: found-locator);
  block ()
    function(found-locator, stream);
  cleanup
    close(stream);
  end block;
end method;
            

<Preprocess the header file locator>=

do-with-C-header-file
  (locator, search-directory,
   preprocessing-translation-unit.preprocessing-header-search-path,
   method (found-locator :: <file-locator>, stream :: <stream>) => ();
     let rangemap = make(<source-location-rangemap>);
     rangemap-add-line-file(rangemap, 0, 1, found-locator);
     preprocess-C-stream(preprocessing-translation-unit,
                         stream, found-locator.locator-directory, rangemap,
                         consumer, consumer-data,
                         dialect: dialect);
   end);
            

<Preprocess the system header file locator>=

do-with-C-header-file
  (locator, #f,
   preprocessing-translation-unit.preprocessing-system-header-search-path,
   method (found-locator :: <file-locator>, stream :: <stream>) => ();
     let rangemap = make(<source-location-rangemap>);
     rangemap-add-line-file(rangemap, 0, 1, found-locator);
     preprocess-C-stream(preprocessing-translation-unit,
                         stream, found-locator.locator-directory, rangemap,
                         consumer, consumer-data,
                         dialect: dialect);
   end);
            

Source File Reading and Line Splicing

The first thing the preprocessor does is to read the input file, and to splice source lines that end with \ together with the lines that follow them.

Our main file-reading loop reads the source file one input buffer at a time and tokenizes it.

<Module cpr-internals>+=

define method preprocess-C-stream
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     stream :: <buffered-stream>,
     directory :: false-or(<directory-locator>),
     rangemap :: <source-location-rangemap>,
     consumer :: <function>,
     consumer-data,
     #key start-position :: <integer> = 0,
          dialect :: <C-preprocessor-dialect> = $C99-C-preprocessor-dialect)
 => (end-position :: <integer>);
  <Initialize the line-splicing state>
  <Initialize the trigraph state>
  <Initialize the scanner>
  <Initialize the preprocessor and preprocessor-token-dispatcher>
  let buf = get-input-buffer(stream);
  let text :: <byte-string>
    = make(<byte-string>, size: if (buf) buf.buffer-size else 0 end + 2);
  iterate buf-loop (buf :: false-or(<buffer>) = buf)
    if (buf)
      let text-size :: <integer> = buf.buffer-end - buf.buffer-next;
      if (preprocessing-translation-unit.preprocessing-trigraph-support?)
        <Copy text and translate trigraphs from buf to text>
      else
        copy-bytes(text, 0, buf, buf.buffer-next, text-size);
      end if;
      <Line-splice and tokenize the program text in text>
      buf.buffer-next := buf.buffer-end;
      buf-loop(next-input-buffer(stream));
    end if;
  end;
  release-input-buffer(stream);
  <Finalize the trigraph state>
  <Finalize the scanner and preprocessor and return the end position>
end method;
            

Line splice processing is slightly complicated by the fact that a line splice may consist of a backslash followed by a CR, an LF, or a CR+LF pair, and the fact that one of these sequences might cross a buffer boundary.

<Initialize the line-splicing state>=

let splice-state :: one-of(#f, #"backslash", #"backslash-cr") = #f;
            

Our approach involves sending segments of the buffer, as large as possible, to the scanner. If we encounter a line splice, we end the current segment and begin a new one after the splice.

This is compliated slightly by the case when we see a backslash at the end of the buffer, and can't tell whether the backslash needs to be included or not. We take the approach of scanning the buffer without the backslash, and scan a new backslash before the beginning of the next buffer if it turns out not to be part of a splice.

<Line-splice and tokenize the program text in text>=

iterate char-loop (start :: <buffer-index> = 0,
                   i :: <buffer-index> = 0)
  if (i < text-size)
    let c :: <byte-character> = as(<character>, text[i]);
    if (c == '\\')
      splice-state := #"backslash";
      char-loop(start, i + 1);
    elseif (splice-state == #f)
      char-loop(start, i + 1);
    elseif (splice-state == #"backslash")
      if (c == '\r')
        scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
                                    text, start, i - 1);
        splice-state := #"backslash-cr";
        char-loop(i + 1, i + 1);
      elseif (c == '\n')
        if (i > start)
          scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
                                      text, start, i - 1);
        end if;
        rangemap-add-line(rangemap, scanner.scanner-source-position, #f);
        splice-state := #f;
        char-loop(i + 1, i + 1);
      elseif (i = 0)
        scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
                                    "\\", 0, 1);
        if (c ~== '\\')
          splice-state := #f;
        end if;
        char-loop(i, i + 1);
      else
        if (c ~== '\\')
          splice-state := #f;
        end if;
        char-loop(start, i + 1);  
      end if;
    else // (splice-state == #"backslash-cr")
      splice-state := #f;
      rangemap-add-line(rangemap, scanner.scanner-source-position, #f);
      if (c == '\n')
        char-loop(i + 1, i + 1);
      else
        if (c == '\\')
          splice-state := #"backslash";
        end if;
        char-loop(i, i + 1);
      end if;
    end if;
  else
    scan-C-preprocessing-tokens
      (preprocessor-token-dispatcher, scanner, text, start,
       if (splice-state == #"backslash") i - 1 else i end);
  end if;
end iterate;        
            

Separation into Preprocessing Tokens

The next phase separates the program text into preprocessing tokens. The set of preprocessing tokens is similar to the set of C tokens, with a few additions.

We use the simple-parser library to do tokenization and parsing of C program text.

<Import external libraries used by cpr>=

  use simple-parser;
            

<Modules imported by the cpr-internals module>+=

  use byte-vector;
  use simple-parser;
  use simple-lexical-scanner;
            

The extract-action method is used as the semantic value function for various preprocessing tokens to extract the original recognized token string.

<Module cpr-internals>+=

define function extract-action
    (token-string :: <byte-string>,
     token-start :: <integer>,
     token-end :: <integer>)
 => (result :: <byte-string>);
  let result = make(<byte-string>, size: token-end - token-start);
  copy-bytes(result, 0, token-string, token-start, token-end - token-start);
  result
end;
            

<Module cpr-internals>+=

<Semantic value function definitions>
define constant $C-tokens
  = simple-lexical-definition
      token EOF;
      <Token definitions>
      token UNKNOWN-TOKEN = ".",
        priority: -1,
        semantic-value-function: extract-action;
    end;
            

<Cases for preprocessor-token-string>=

#"UNKNOWN-TOKEN" => token-value;
            

For example, whitespace, comments, and line breaks are significant during preprocessing. We will eliminate these before the C parser sees them.

<Token definitions>=

// Whitespace and comments
token WHITESPACE = "([ \t\f]|/\\*([^*]|\\*+[^*/])*\\*+/)+";

// New-line characters and line-end comments
token NEW-LINE = "(//[^\r\n]*)?(\n|\r|\r\n)";
            

The canonical string representation of whitespace is a single space character.

<Cases for preprocessor-token-string>+=

#"WHITESPACE" => " ";
            

Recognition of identifiers is straightforward.

<Modules imported by the cpr-internals module>+=

use interned-string;
            

<Token definitions>+=

// Identifiers
token IDENTIFIER :: <string> = "[a-zA-Z_][0-9a-zA-Z_]*",
  semantic-value-function:
    method (token-string :: <byte-string>,
            token-start :: <integer>,
            token-end :: <integer>);
      intern-string(token-string, start: token-start, end: token-end)
    end;
            

<Cases for preprocessor-token-string>+=

#"IDENTIFIER" => token-value;
            

During preprocessing, numbers are not interpreted completely, but are recognized as preprocessing numbers. The grammar for preprocessing numbers is a superset of the various kinds of numeric tokens, and also includes a few things that are not valid numeric tokens at all.

<Token definitions>+=

// Preprocessing numbers
token PP-NUMBER = "\\.?[0-9]([.0-9a-zA-Z_]|[eEpP][-+])*",
  semantic-value-function: extract-action;
            

<Cases for preprocessor-token-string>+=

#"PP-NUMBER" => token-value;
            

Character constants and string literals are described by similar regular expressions.

<Token definitions>+=

// Character constants
name c-char
  = "[^'\\\\\r\n]|\\\\(['\"?\\abfnrtv]|[0-7]([0-7][0-7]?)?|x[0-9a-fA-F]+)";
token CHARACTER-CONSTANT :: <string> = "L?'{c-char}+'", // FIXME
  semantic-value-function: extract-action;

// String literals
name s-char
  = "[^\"\\\\\r\n]|\\\\(['\"?\\abfnrtv]|[0-7]([0-7][0-7]?)?|x[0-9a-fA-F]+)";
token STRING-LITERAL :: <string> = "L?\"{s-char}*\"", // FIXME
  semantic-value-function: extract-action;
            

<Cases for preprocessor-token-string>+=

#"CHARACTER-CONSTANT" => token-value;
#"STRING-LITERAL" => token-value;
            

The remaining tokens at the preprocessing stage are considered punctuation. We provide semantic value functions for some of the punctuation tokens so we can distingush between the alternate spellings for stringization purposes. In all current cases looking at the length of the recognized token is sufficient.

<Semantic value function definitions>=

define function punctuation-value-function
    (length :: <integer>) => (func :: <function>);
  method
      (token-string :: <byte-string>,
       token-start :: <integer>,
       token-end :: <integer>)
   => (result :: <boolean>);
    token-end - token-start = length
  end
end function;
            

<Token definitions>+=

// Punctuation
token SHARP = "#|%:",
  semantic-value-function: punctuation-value-function(1);
token SHARPSHARP = "##|%:%:",
  semantic-value-function: punctuation-value-function(2);
token LBRACK = "\\[|<:",
  semantic-value-function: punctuation-value-function(1);
token RBRACK = "\\]|:>",
  semantic-value-function: punctuation-value-function(1);
token LBRACE = "\\{|<%",
  semantic-value-function: punctuation-value-function(1);
token RBRACE = "\\}|%>",
  semantic-value-function: punctuation-value-function(1);
token LPAREN = "\\(";
token RPAREN = "\\)";
token DOT = "\\.";
token ARROW = "->";
token INC = "\\+\\+";
token DEC = "-" "-";
token AMP = "&";
token STAR = "\\*";
token PLUS = "\\+";
token MINUS = "-";
token TILDE = "~";
token BANG = "!";
token SLASH = "/";
token PERCENT = "%";
token SHL = "<<";
token SHR = ">>";
token LT = "<";
token GT = ">";
token LE = "<=";
token GE = ">=";
token EQ = "==";
token NE = "!=";
token HAT = "^";
token OR = "\\|";
token AMPAMP = "&&";
token OROR = "\\|\\|";
token QUEST = "\\?";
token COLON = ":";
token SEMI = ";";
token ELIPSIS = "\\.\\.\\.";
token EQUALS = "=";
token STAR-EQUALS = "\\*=";
token SLASH-EQUALS = "/=";
token PERCENT-EQUALS = "%=";
token PLUS-EQUALS = "\\+=";
token MINUS-EQUALS = "-=";
token SHL-EQUALS = "<<=";
token SHR-EQUALS = ">>=";
token AMP-EQUALS = "&=";
token HAT-EQUALS = "^=";
token OR-EQUALS = "\\|=";
token COMMA = ",";
            

<Cases for preprocessor-token-string>+=

#"SHARP" => if (token-value) "#" else "%:" end;
#"SHARPSHARP" => if (token-value) "##" else "%:%:" end;
#"LBRACK" => if (token-value) "[" else "<:" end;
#"RBRACK" => if (token-value) "]" else ":>" end;
#"LBRACE" => if (token-value) "{" else "<%" end;
#"RBRACE" => if (token-value) "}" else "%>" end;
#"LPAREN" => "(";
#"RPAREN" => ")";
#"DOT" => ".";
#"ARROW" => "->";
#"INC" => "++";
#"DEC" => "--";
#"AMP" => "&";
#"STAR" => "*";
#"PLUS" => "+";
#"MINUS" => "-";
#"TILDE" => "~";
#"BANG" => "!";
#"SLASH" => "/";
#"PERCENT" => "%";
#"SHL" => "<<";
#"SHR" => ">>";
#"LT" => "<";
#"GT" => ">";
#"LE" => "<=";
#"GE" => ">=";
#"EQ" => "==";
#"NE" => "!=";
#"HAT" => "^";
#"OR" => "|";
#"AMPAMP" => "&&";
#"OROR" => "||";
#"QUEST" => "?";
#"COLON" => ":";
#"SEMI" => ";";
#"ELIPSIS" => "...";
#"EQUALS" => "=";
#"STAR-EQUALS" => "*=";
#"SLASH-EQUALS" => "/=";
#"PERCENT-EQUALS" => "%=";
#"PLUS-EQUALS" => "+=";
#"MINUS-EQUALS" => "-=";
#"SHL-EQUALS" => "<<=";
#"SHR-EQUALS" => ">>=";
#"AMP-EQUALS" => "&=";
#"HAT-EQUALS" => "^=";
#"OR-EQUALS" => "|=";
#"COMMA" => ",";
            

So, that's the scanner we'll use.

<Initialize the scanner>=

let scanner
  = make(<simple-lexical-scanner>, definition: $C-tokens, rangemap: rangemap);
scanner.scanner-source-position := start-position;
            

<Module cpr-internals>+=

define function scan-C-preprocessing-tokens
    (preprocessor-token-dispatcher :: <C-preprocessor-token-dispatcher>,
     scanner :: <simple-lexical-scanner>,
     text :: <byte-string>,
     start :: <buffer-index>,
     _end :: <buffer-index>)
 => ();
  scan-tokens(scanner, C-preprocessor-dispatch-token, preprocessor-token-dispatcher,
              text, start: start, end: _end, partial?: #t);
end function;
            

<Finalize the scanner and preprocessor and return the end position>=

scan-tokens(scanner, C-preprocessor-dispatch-token, preprocessor-token-dispatcher,
            "", partial?: #f);
let end-position = scanner.scanner-source-position;
C-preprocessor-dispatch-token(preprocessor-token-dispatcher, 0, #"EOF", #f,
                              end-position, end-position);
<Finalize the preprocessor>
end-position
            

Interpreting Preprocessor Tokens

At various points we'll need to further interpret preprocessing numbers, character constants, and string constants beyond just treating them as preprocessing tokens.

Preprocessing Numbers

Preprocessing numbers can be interpreted as integer or floating-point constants.

<Module cpr-internals>+=

define function C-preprocessing-number-value
    (string :: <byte-string>)
 => (value :: <number>, type :: <symbol>);
  let string-size = string.size;
  local
    <Local methods in C-preprocessing-number-value>;
  select (string[0])
    '0' =>
      initial-zero-part();
    '1' =>
      decimal-integer-part(1, 1);
    '2' =>
      decimal-integer-part(1, 2);
    '3' =>
      decimal-integer-part(1, 3);
    '4' =>
      decimal-integer-part(1, 4);
    '5' =>
      decimal-integer-part(1, 5);
    '6' =>
      decimal-integer-part(1, 6);
    '7' =>
      decimal-integer-part(1, 7);
    '8' =>
      decimal-integer-part(1, 8);
    '9' =>
      decimal-integer-part(1, 9);
    '.' =>
      error("floating constants are not yet supported");
    otherwise =>
      error("unrecognized constant %s", string); 
  end select
end function;
	      

An initial 0 could indicate an octal or hexadecimal integer constant, or a hexadecimal floating-point constant.

<Local methods in C-preprocessing-number-value>=

method initial-zero-part () => (value :: <number>, type :: <symbol>);
  if (1 >= string-size)
    values(0, #"int")
  else
    select (string[1])
      'x', 'X' =>
        hexadecimal-integer-part(2, 0);
      '0' =>
        octal-integer-part(2, 0);
      '1' =>
        octal-integer-part(2, 1);
      '2' =>
        octal-integer-part(2, 2);
      '3' =>
        octal-integer-part(2, 3);
      '4' =>
        octal-integer-part(2, 4);
      '5' =>
        octal-integer-part(2, 5);
      '6' =>
        octal-integer-part(2, 6);
      '7' =>
        octal-integer-part(2, 7);
      'l', 'L', 'u', 'U' =>
        integer-suffix-part(1, 0, #t);
      otherwise =>
        error("unrecognized octal constant %s", string)
    end select
  end if
end method,
              

<Local methods in C-preprocessing-number-value>+=

method hexadecimal-integer-part
    (i :: <integer>, value :: <integer>)
 => (value :: <number>, type :: <symbol>);
  if (i >= string-size)
    values(value, #"int")
  else
    select (string[i])
      '0' =>
        hexadecimal-integer-part(i + 1, value * 16 + 0);
      '1' =>
        hexadecimal-integer-part(i + 1, value * 16 + 1);
      '2' =>
        hexadecimal-integer-part(i + 1, value * 16 + 2);
      '3' =>
        hexadecimal-integer-part(i + 1, value * 16 + 3);
      '4' =>
        hexadecimal-integer-part(i + 1, value * 16 + 4);
      '5' =>
        hexadecimal-integer-part(i + 1, value * 16 + 5);
      '6' =>
        hexadecimal-integer-part(i + 1, value * 16 + 6);
      '7' =>
        hexadecimal-integer-part(i + 1, value * 16 + 7);
      '8' =>
        hexadecimal-integer-part(i + 1, value * 16 + 8);
      '9' =>
        hexadecimal-integer-part(i + 1, value * 16 + 9);
      'a', 'A' =>
        hexadecimal-integer-part(i + 1, value * 16 + 10);
      'b', 'B' =>
        hexadecimal-integer-part(i + 1, value * 16 + 11);
      'c', 'C' =>
        hexadecimal-integer-part(i + 1, value * 16 + 12);
      'd', 'D' =>
        hexadecimal-integer-part(i + 1, value * 16 + 13);
      'e', 'E' =>
        hexadecimal-integer-part(i + 1, value * 16 + 14);
      'f', 'F' =>
        hexadecimal-integer-part(i + 1, value * 16 + 15);
      'l', 'L', 'u', 'U' =>
        integer-suffix-part(i, value, #t);
      '.', 'p', 'P' =>
        error("floating hexadecimal constants are not yet supported");
      otherwise =>
        error("unrecognized hexadecimal constant %s", string)
    end 
  end if
end method,
              

<Local methods in C-preprocessing-number-value>+=

method octal-integer-part
    (i :: <integer>, value :: <integer>)
 => (value :: <number>, type :: <symbol>);
  if (i >= string-size)
    values(value, #"int")
  else
    select (string[i])
      '0' =>
        octal-integer-part(i + 1, value * 8 + 0);
      '1' =>
        octal-integer-part(i + 1, value * 8 + 1);
      '2' =>
        octal-integer-part(i + 1, value * 8 + 2);
      '3' =>
        octal-integer-part(i + 1, value * 8 + 3);
      '4' =>
        octal-integer-part(i + 1, value * 8 + 4);
      '5' =>
        octal-integer-part(i + 1, value * 8 + 5);
      '6' =>
        octal-integer-part(i + 1, value * 8 + 6);
      '7' =>
        octal-integer-part(i + 1, value * 8 + 7);
      'l', 'L', 'u', 'U' =>
        integer-suffix-part(i, value, #t);
      otherwise =>
        error("unrecognized octal constant %s", string)
    end select
  end if
end method,
              

<Local methods in C-preprocessing-number-value>+=

method decimal-integer-part
    (i :: <integer>, value :: <integer>)
 => (value :: <number>, type :: <symbol>);
  if (i >= string-size)
    values(value, #"int")
  else
    select (string[i])
      '0' =>
        decimal-integer-part(i + 1, value * 10 + 0);
      '1' =>
        decimal-integer-part(i + 1, value * 10 + 1);
      '2' =>
        decimal-integer-part(i + 1, value * 10 + 2);
      '3' =>
        decimal-integer-part(i + 1, value * 10 + 3);
      '4' =>
        decimal-integer-part(i + 1, value * 10 + 4);
      '5' =>
        decimal-integer-part(i + 1, value * 10 + 5);
      '6' =>
        decimal-integer-part(i + 1, value * 10 + 6);
      '7' =>
        decimal-integer-part(i + 1, value * 10 + 7);
      '8' =>
        decimal-integer-part(i + 1, value * 10 + 8);
      '9' =>
        decimal-integer-part(i + 1, value * 10 + 9);
      'l', 'L', 'u', 'U' =>
        integer-suffix-part(i, value, #f);
      '.', 'e', 'E', 'f', 'F' =>
        error("floating constants are not yet supported");
      otherwise =>
        error("unrecognized decimal constant %s", string);
    end select
  end if
end method,
              

<Local methods in C-preprocessing-number-value>+=

method integer-suffix-part
    (i :: <integer>, value :: <integer>, flag? :: <boolean>)
 => (value :: <number>, type :: <symbol>);
  values(value, #"signed-int")
end method
	      

Character Constants

<Module cpr-internals>+=

define function C-character-constant-value
    (string :: <byte-string>)
 => (value :: <number>);
  let _start = if (string[0] == 'L') 2 else 1 end;
  let _end = string.size - 1;
  iterate loop(i = _start, value = 0)
    if (i >= _end)
      value
    elseif (string[i] == '\\')
      let (character-value, next-index)
        = C-character-escape-value(string, i, _end);
      loop(next-index, logior(ash(value, 8), character-value));
    else
      loop(i + 1, logior(ash(value, 8), as(<integer>, string[i])));
    end if
  end iterate
end function;
              

<Module cpr-internals>+=

define function C-character-escape-value
    (string :: <byte-string>, i :: <integer>, _end :: <integer>)
 => (value :: <integer>, next-index :: <integer>);
  select (string[i])
    '\'', '\"', '?', '\\' =>
      values(as(<integer>, string[i]), i + 1);
    'a' =>
      values(#x07, i + 1);
    'b' =>
      values(#x08, i + 1);
    'f' =>
      values(#x0C, i + 1);
    'n' =>
      values(#x0A, i + 1);
    'r' =>
      values(#x0D, i + 1);
    't' =>
      values(#x09, i + 1);
    'v' =>
      values(#x0B, i + 1);
    '0', '1', '2', '3', '4', '5', '6', '7' =>
      error("Octal escape");
    'x' =>
      error("Hex escape");
    otherwise =>
      error("Illegal escape");
  end
end function;
              

The Preprocessor Token Dispatcher

The scanner needs to pass various information to the token consumer, including the preprocessing-translation-unit, and the rangemap. This needs to be encapsulated into a single object.

<Module cpr-internals>+=

define class <C-preprocessor-token-dispatcher> (<object>)
  constant slot token-dispatcher-preprocessing-translation-unit
      :: <C-preprocessing-translation-unit-representation>,
    required-init-keyword: preprocessing-translation-unit:;
  constant slot token-dispatcher-scanner :: <simple-lexical-scanner>,
    required-init-keyword: scanner:;
  constant slot token-dispatcher-directory :: false-or(<directory-locator>),
    required-init-keyword: directory:;
  constant slot token-dispatcher-rangemap :: <source-location-rangemap>,
    required-init-keyword: rangemap:;
  <Additional slots in <C-preprocessor-token-dispatcher>>
end class;
            
define sealed domain make(singleton(<C-preprocessor-token-dispatcher>));
            
define sealed domain initialize(<C-preprocessor-token-dispatcher>);
            

We'll implement our preprocessor as a state machine, with states represented by token consumer functions. The C-preprocessor-dispatch-token function will serve as a trampoline, dispatching to the various state functions depending on the current state.

<Additional slots in <C-preprocessor-token-dispatcher>>=

slot token-dispatcher-state-function :: <function>,
  required-init-keyword: consumer-state-function:;
            

<Module cpr-internals>+=

define function C-preprocessor-dispatch-token
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name, token-value, start-position, end-position)
 => ();
  token-dispatcher.token-dispatcher-state-function
    (token-dispatcher, token-number, token-name, token-value,
     start-position, end-position);
end function;
            

For brevity's sake, we'll provide macros for defining state functions and for making state transitions.

<Module cpr-internals>+=

define macro preprocessor-state-function-definer
  { define preprocessor-state-function ?state:name
        (?token-dispatcher:variable,
         ?token-number:variable, ?token-name:variable, ?token-value:variable,
         ?start-position:variable, ?end-position:variable)
      ?:body
    end }
    => { define function "C-preprocess-token-" ## ?state
             (?token-dispatcher, ?token-number, ?token-name, ?token-value,
              ?start-position, ?end-position)
          => ();
           ?body
         end }
end macro;
            
define macro enter-state
  { enter-state(?token-dispatcher:expression, ?state:name) }
  => { ?token-dispatcher.token-dispatcher-state-function
         := "C-preprocess-token-" ## ?state }
end macro;
            

The preprocessor-error method can be used by preprocessor state functions to signal errors.

<Module cpr-internals>+=

define function preprocessor-error
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     start-position :: <integer>, end-position :: <integer>,
     string :: <string>, #rest arguments)
 => (no-return :: <bottom>);
  let srcloc
    = range-source-location(token-dispatcher.token-dispatcher-rangemap,
                            start-position, end-position);
  apply(source-error, srcloc, string, arguments);
end function;
            

The

<Module cpr-internals>+=

define macro with-preprocessing-directive-restart
  { with-preprocessing-directive-restart (?dispatcher:expression,
                                          ?token-name:expression)
      ?:body
    end }
    => { block (exit)
           ?body;
         exception (restart :: <source-error-recovery-restart>)
           if (?token-name == #"NEW-LINE")
             enter-state(?dispatcher, linestart);
           else
             enter-state(?dispatcher, sharp-skipping);
           end if;
           exit();
         end block }
end macro;
            

The preprocessor needs to keep various types of state information concerning active conditionals, as well as directives that are in the process of being parsed. The most important of the state variables are the consumer function receiving tokens from the preprocessor and its state data, and an indication of whether or not the current input is being skipped due to a false conditional.

<Module cpr-internals>+=

define class <C-preprocessor-state> (<object>)
  constant slot preprocessor-dialect :: <C-preprocessor-dialect>,
    required-init-keyword: dialect:;
  constant slot preprocessor-consumer :: <function>,
    required-init-keyword: consumer:;
  constant slot preprocessor-consumer-data,
    required-init-keyword: consumer-data:;
  slot preprocessor-skipping? :: <boolean>,
    init-value: #f, init-keyword: skipping?:;
  <Additional slots in <C-preprocessor-state>>
end class;
            
define sealed domain make(singleton(<C-preprocessor-state>));
            
define sealed domain initialize(<C-preprocessor-state>);
            

<Additional slots in <C-preprocessor-token-dispatcher>>+=

constant slot token-dispatcher-state :: <C-preprocessor-state>,
  required-init-keyword: consumer-state:;
            

<Initialize the preprocessor and preprocessor-token-dispatcher>=

let preprocessor-state
  = make(<C-preprocessor-state>,
         dialect: dialect,
         consumer: consumer,
         consumer-data: consumer-data);
let preprocessor-token-dispatcher
  = make(<C-preprocessor-token-dispatcher>,
         preprocessing-translation-unit: preprocessing-translation-unit,
         scanner: scanner,
         directory: directory,
         rangemap: rangemap,
         consumer-state-function: C-preprocess-token-linestart,
         consumer-state: preprocessor-state);
            

The C-preprocessor-output-token function sends a token to the preprocessor's consumer.

<Module cpr-internals>+=

define inline function C-preprocessor-output-token
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
 => ();
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let rangemap = token-dispatcher.token-dispatcher-rangemap;

  preprocessor-state.preprocessor-consumer
    (preprocessor-state.preprocessor-consumer-data,
     token-name, token-value,
     rangemap, start-position, end-position);
end function;
            

Preprocessor Dialects

The exact set of supported preprocessor directives are parameterizable through <C-preprocessor-dialect> objects. Since preprocessor-dialects can inherit from other dialects, we use a <hierarchical-table> to store information about these directives.

<Modules imported by the cpr-internals module>+=

use hierarchical-table;
            

<Slots in the <C-preprocessor-dialect> class>=

  constant slot preprocessor-dialect-parent
      :: false-or(<C-preprocessor-dialect>) = #f,
    init-keyword: parent:;
  slot preprocessor-dialect-directives :: <hierarchical-table>;
            

<Define an initialize method on <C-preprocessor-dialect>>=

define method initialize
    (dialect :: <C-preprocessor-dialect>,
     #key parent :: false-or(<C-preprocessor-dialect>), #all-keys)
 => ();
  next-method();
  dialect.preprocessor-dialect-directives
    := make(<hierarchical-table>,
            parent: parent & parent.preprocessor-dialect-directives);
end method;
            

The <C-preprocessor-directive> class stores information about these individual directives. Information we need includes:

<Module cpr-internals>+=

define class <C-preprocessor-directive> (<object>)
  constant slot directive-skipping-function :: <function>,
    init-value: method (dispatcher) #f end, init-keyword: skipping-function:;
  constant slot directive-macro-replaced?,
    init-value: #f, init-keyword: macro-replaced?:;
  constant slot directive-lexical-definition
      :: false-or(<simple-lexical-definition>),
    init-value: #f, init-keyword: lexical-definition:;
end class;
            
define sealed domain make(singleton(<C-preprocessor-directive>));
            
define sealed domain initialize(<C-preprocessor-directive>);
            

<Module cpr-internals>+=

define method define-C-preprocessor-directive
    (dialect :: <C-preprocessor-dialect>,
     directive :: <string>,
     #rest keys)
 => ();
  dialect.preprocessor-dialect-directives[intern-string(directive)]
    := apply(make, <C-preprocessor-directive>, keys);
end method;
            

We'll use a LR parser to recognize C preprocessor directives. The grammar productions defining these directives, and the resulting parser automaton, will be stored with the preprocessor dialect.

<Slots in the <C-preprocessor-dialect> class>+=

  slot preprocessor-dialect-productions :: <sequence>,
    init-value: #[], init-keyword: productions:;
            

We'll compute the parser automaton, recognizing a nonterminal called directive, as soon as it is needed.

<Slots in the <C-preprocessor-dialect> class>+=

  slot %preprocessor-dialect-automaton;
            

<Module cpr-internals>+=

define method preprocessor-dialect-automaton
   (dialect :: <C-preprocessor-dialect>)
=> (automaton);
  if (slot-initialized?(dialect, %preprocessor-dialect-automaton))
    dialect.%preprocessor-dialect-automaton
  else
    for (the-dialect = dialect
           then the-dialect.preprocessor-dialect-parent,
         productions = #[]
           then concatenate(productions,
                            the-dialect.preprocessor-dialect-productions),
         while: the-dialect)
    finally
      dialect.%preprocessor-dialect-automaton
        := simple-parser-automaton($C-tokens, productions,
                                   #[#"directive"],
                                   end-symbol: #"NEW-LINE")
    end for
  end if
end;
            

For convenience's sake, we'll provide a specialized macro for defining the syntax of directives. Note that the # and the directive name are included in the right-hand side of the production, because we want the source location information for the reduced production to include the entire directive.

<Modules imported by the cpr-internals module>+=

  use simple-parser-automaton;
            

<Define the preprocessor-directive-productions macro>=

define macro preprocessor-directive-productions
  { preprocessor-directive-productions ?clauses end }
    => { vector(?clauses) }

clauses:
  { } => { }

  { directive ?name:token [?symbols] (?variables:*)
      ?:body ... }
    => { make(<simple-production>,
              nonterminal: #"directive",
              derives: preprocessor-directive-derives(intern-string(?name),
                                                      ?symbols),
              action: production-user-reduce-action-function([];
                                                             [SHARP d ?symbols];
                                                             [?variables];
                                                             ?body)), ... }
symbols:
  { } => { }
  { ?symbol:name ... } => { ?symbol ... }
end macro;
            
define macro preprocessor-directive-derives
  { preprocessor-directive-derives(?name:expression, ?symbols) }
    => { vector(#"SHARP", ?name, ?symbols) }
symbols:
  { } => { }
  { ?symbol:name ... } => { ?#"symbol", ... }
end macro;
            

<Module cpr-internals>+=

$C90-C-preprocessor-dialect.preprocessor-dialect-productions
  := concatenate(preprocessor-directive-productions
                   <C90 preprocessor directive grammar productions>
                 end,
                 simple-grammar-productions
                   <C90 preprocessor directive auxiliary grammar productions>
                 end,
                 <Productions for the preprocessing-token>);
             

<Module cpr-internals>+=

$C99-C-preprocessor-dialect.preprocessor-dialect-productions
  := concatenate(preprocessor-directive-productions
                   <C99 preprocessor directive grammar productions>
                 end);
             

<Module cpr-internals>+=

$gnu89-C-preprocessor-dialect.preprocessor-dialect-productions
  := concatenate(preprocessor-directive-productions
                   <GNU C preprocessor directive grammar productions>
                 end,
                 simple-grammar-productions
                   <GNU C preprocessor directive auxiliary grammar productions>
                 end);
             
$gnu99-C-preprocessor-dialect.preprocessor-dialect-productions
  := concatenate(preprocessor-directive-productions
                   <>
                 end);
             

Recognizing Preprocessor Directives

Along with the dispatcher state machine, we'll use a <simple-parser> to recognize preprocessor directives.

<Additional slots in <C-preprocessor-state>>=

slot preprocessor-directive-parser :: <simple-parser>,
  init-keyword: directive-parser:;
            

<Initialize the preprocessor and preprocessor-token-dispatcher>+=

preprocessor-state.preprocessor-directive-parser
  := make(<simple-parser>,
          automaton: dialect.preprocessor-dialect-automaton,
          start-symbol: #"directive",
          rangemap: rangemap,
          consumer-data: preprocessor-token-dispatcher);
            

Initially the preprocessor input is considered to be at the start of a line. Preprocessing directives are only recognized in this state.

When we see a # in this state, we reset the parser and feed the token to it.

<Module cpr-internals>+=

define preprocessor-state-function linestart
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  select (token-name)
    #"NEW-LINE", #"WHITESPACE" =>
      ; // stay in this state
    #"SHARP" =>
      enter-state(token-dispatcher, sharp);
      let parser = preprocessor-state.preprocessor-directive-parser;
      simple-parser-reset(parser, start-symbol: #"directive");
      simple-parser-consume-token(parser, 0, #"SHARP", #f,
                                  start-position, end-position);
    #"EOF" =>
      ; // EOF is okay here
    #"IDENTIFIER" =>
      enter-state(token-dispatcher, linemid);
      unless (preprocessor-state.preprocessor-skipping?)
        <Expand the identifier if it has a macro definition, and send it directly to the output otherwise>
      end unless;
    #"BLUE-IDENTIFIER" =>
      enter-state(token-dispatcher, linemid);
      unless (preprocessor-state.preprocessor-skipping?)
        C-preprocessor-output-token(token-dispatcher,
                                    token-number, #"IDENTIFIER", token-value,
                                    start-position, end-position);
      end unless;
    otherwise =>
      enter-state(token-dispatcher, linemid);
      unless (preprocessor-state.preprocessor-skipping?)
        C-preprocessor-output-token(token-dispatcher, 
                                    token-number, token-name, token-value,
                                    start-position, end-position);
      end unless;
  end select;
end preprocessor-state-function;
            

Processing in the middle of a line is similar, except that we return to the line-start state if we see a newline.

<Module cpr-internals>+=

define preprocessor-state-function linemid
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  select (token-name)
    #"NEW-LINE" =>
      enter-state(token-dispatcher, linestart);
    #"WHITESPACE" =>
      ; // stay in this state
    #"EOF" =>
      signal("File does not end in a newline");
    #"IDENTIFIER" =>
      unless (preprocessor-state.preprocessor-skipping?)
        <Expand the identifier if it has a macro definition, and send it directly to the output otherwise>
      end unless;
    #"BLUE-IDENTIFIER" =>
      unless (preprocessor-state.preprocessor-skipping?)
        C-preprocessor-output-token(token-dispatcher,
                                    token-number, #"IDENTIFIER", token-value,
                                    start-position, end-position);
      end unless;
    otherwise =>
      unless (preprocessor-state.preprocessor-skipping?)
        C-preprocessor-output-token(token-dispatcher, 
                                    token-number, token-name, token-value,
                                    start-position, end-position);
      end unless;
  end select;
end preprocessor-state-function;
            

After seeing # at the beginning of a line, we look for a preprocessor directive. Unrecognized directives are okay if we're skipping due to a false conditional. Note that we explicitly ignore the null directive (# immediately followed by a newline).

<Module cpr-internals>+=

define preprocessor-state-function sharp
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  select (token-name)
    #"IDENTIFIER" =>
      let dialect = preprocessor-state.preprocessor-dialect;
      let directive
        = element(dialect.preprocessor-dialect-directives, token-value,
                  default: #f);
      if (directive)
        let parser = preprocessor-state.preprocessor-directive-parser;
        simple-parser-consume-token(parser, 0, token-value, #f,
                                    start-position, end-position);

        if (preprocessor-state.preprocessor-skipping?)
          if (~directive.directive-skipping-function(token-dispatcher))
            enter-state(token-dispatcher, sharp-skipping);
          elseif (directive.directive-macro-replaced?)
            enter-state(token-dispatcher, sharp-directive-expanded);
          else
            enter-state(token-dispatcher, sharp-directive);
          end if;
        else
          if (directive.directive-lexical-definition)
            token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
             := directive.directive-lexical-definition;
          end if;

          if (directive.directive-macro-replaced?)
            enter-state(token-dispatcher, sharp-directive-expanded);
          else
            enter-state(token-dispatcher, sharp-directive);
          end if;
        end if;
      elseif (preprocessor-state.preprocessor-skipping?)
        enter-state(token-dispatcher, sharp-skipping);
      else
        with-preprocessing-directive-restart (token-dispatcher, token-name)
          preprocessor-error(token-dispatcher, start-position, end-position,
                             "invalid preprocessing directive #%s",
                              token-value);
        end;
      end if;

    #"NEW-LINE" =>
      // Null directive
      enter-state(token-dispatcher, linestart);

    #"WHITESPACE" =>
      ; // remain in this state

    otherwise =>
      if (preprocessor-state.preprocessor-skipping?)
        enter-state(token-dispatcher, sharp-skipping);
      else
        with-preprocessing-directive-restart (token-dispatcher, token-name)
          preprocessor-error(token-dispatcher, start-position, end-position,
                             "invalid preprocessing directive");
        end;
      end if;
  end select;
end preprocessor-state-function;
            

While processing a directive, we feed the tokens into the directive parser. Whitespace is recognized as a token in one particular spot, as we'll see later, so we feed whitespace tokens into the parser only if they can be recognized at that point.

Line end constitutes the end of the directive, and as treated as the end-of-input marker by the parser. Once we feed this token into the parser (causing the directive's reduce action to be triggered), we return to the initial state.

<Module cpr-internals>+=

define preprocessor-state-function sharp-directive
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let parser = preprocessor-state.preprocessor-directive-parser;

  select (token-name)
    #"WHITESPACE" =>
      if (simple-parser-can-consume-token?(parser, token-number, token-name))
        simple-parser-consume-token(parser, token-number, token-name, #f,
                                    start-position, end-position);
      end if;

    #"NEW-LINE" =>
      with-preprocessing-directive-restart (token-dispatcher, token-name)
        simple-parser-consume-token(parser, token-number, token-name,
                                    token-value,
                                    start-position, end-position);
        enter-state(token-dispatcher, linestart);
      end;

    #"EOF" =>
      signal("File does not end in a newline");
      simple-parser-consume-token(parser, 0, #"NEW-LINE", #f,
                                  start-position, end-position);

    otherwise =>
      with-preprocessing-directive-restart (token-dispatcher, token-name)
        simple-parser-consume-token(parser, token-number, token-name,
                                    token-value,
                                    start-position, end-position);
      end;
  end select;
end preprocessor-state-function;
            

If we're skipping the rest of a preprocessor directive line (due either to a false conditional or to a syntax error) then we enter the C-preprocess-token-sharp-skipping state.

<Module cpr-internals>+=

define preprocessor-state-function sharp-skipping
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  if (token-name == #"NEW-LINE")
    enter-state(token-dispatcher, linestart);
  end if;  
end preprocessor-state-function;
            

Preprocessor Macros

Definitions of macros defined using #define are stored in a preprocessing-macro-definitions table of the preprocessing translation unit.

<Slots in <C-preprocessing-translation-unit-representation>>=

constant slot preprocessing-macro-definitions :: <object-table>
 = make(<object-table>);
            

Each macro definition consists of a list of macro parameters and a variable argument parameter for function-like macros, and a sequence containing the replacement list of the macro. There is also a mutable flag, preprocessing-macro-suppressed?, which is set during expansion of the macro to prevent it from being recursively expanded.

<Module cpr-internals>+=

define class <C-preprocessing-macro> (<source-location-mixin>)
  constant slot preprocessing-macro-parameters :: false-or(<sequence>),
    init-value: #f, init-keyword: parameters:;
  constant slot preprocessing-macro-varargs-parameter
    :: false-or(<byte-string>),
    init-value: #f, init-keyword: varargs-parameter:;
  constant slot preprocessing-macro-replacement :: <sequence>,
    required-init-keyword: replacement:;
  constant slot preprocessing-macro-expander :: false-or(<function>),
    init-value: #f, init-keyword: expander:;
  slot preprocessing-macro-suppressed? :: <boolean>,
    init-value: #f;
end class;
            
define sealed domain make(singleton(<C-preprocessing-macro>));
            
define sealed domain initialize(<C-preprocessing-macro>);
            

<Module cpr-internals>+=

define macro with-macro-suppressed
  { with-macro-suppressed (?definition:expression) ?:body end }
    => { block ()
           ?definition.preprocessing-macro-suppressed? := #t;
           ?body
         cleanup
           ?definition.preprocessing-macro-suppressed? := #f;
          end block }
end macro;           
            

Macros can be redefined without complaint if the new definition is equivalent to the previous one:

<Module cpr-internals>+=

define sealed method \=
    (macro1 :: <C-preprocessing-macro>,
     macro2 :: <C-preprocessing-macro>)
 => (equal? :: <boolean>);
  (macro1.preprocessing-macro-parameters
     = macro2.preprocessing-macro-parameters)
    & (macro1.preprocessing-macro-varargs-parameter
         = macro2.preprocessing-macro-varargs-parameter)
    & (macro1.preprocessing-macro-replacement
         = macro2.preprocessing-macro-replacement)
end method;    
            

The elements in the replacement list are the individual preprocessing tokens.

<Module cpr-internals>+=

define class <C-preprocessing-token> (<object>)
  constant slot preprocessing-token-name :: <symbol>,
    required-init-keyword: name:;
  constant slot preprocessing-token-value,
    init-value: #f, init-keyword: value:;
/*
  constant slot preprocessing-token-start-position :: false-or(<integer>),
    init-value: #f, init-keyword: start-position:;
  constant slot preprocessing-token-end-position :: false-or(<integer>),
    init-value: #f, init-keyword: end-position:;
*/
end class;
            

<Module cpr-internals>+=

define sealed method \=
    (token1 :: <C-preprocessing-token>,
     token2 :: <C-preprocessing-token>)
 => (result :: <boolean>);
  token1.preprocessing-token-name == token2.preprocessing-token-name
    & token1.preprocessing-token-value = token2.preprocessing-token-value
end method;
            

Given a sequence of preprocessor tokens, we sometimes need to determine whether there is (already) whitespace at the end.

<Module cpr-internals>+=

define function trailing-whitespace?
    (tokens :: <sequence>)
 => (result :: <boolean>);
  if (empty?(tokens))
    #f
  else
    tokens[tokens.size - 1].preprocessing-token-name == #"WHITESPACE"
  end if
end function;
            

Whitespace following a replacement list is not considered part of the replacement list. The trim-whitespace! and trim-whitespace functions are used to strip off such whitespace.

<Module cpr-internals>+=

define function trim-whitespace!
    (tokens :: <stretchy-sequence>)
 => (tokens :: <stretchy-sequence>);
  if (trailing-whitespace?(tokens))
    tokens.size := tokens.size - 1;
  end if;
  tokens
end function;
            
define method trim-whitespace
    (tokens :: <sequence>)
 => (result :: <sequence>);
  if (trailing-whitespace?(tokens))
    copy-sequence(tokens, end: tokens.size - 1)
  else
    tokens
  end if
end method;
            
define method trim-whitespace
    (tokens :: <stretchy-sequence>)
 => (result :: <stretchy-sequence>);
  trim-whitespace!(tokens);
end method;
            

The #define Directive

<Module cpr-internals>+=

define function preprocessor-add-macro-definition
    (preprocessing-translation-unit
       :: <C-preprocessing-translation-unit-representation>,
     macro-name :: <byte-string>,
     definition :: <C-preprocessing-macro>)
 => ();
  let previous-definition
    = element(preprocessing-translation-unit.preprocessing-macro-definitions,
              macro-name,
              default: #f);
  if (previous-definition)
    if (definition ~= previous-definition)
      source-warning(definition, "redefined macro %s", macro-name);
      source-warning(previous-definition, "the previous definition was here");
    end if;
  end if;

  preprocessing-translation-unit.preprocessing-macro-definitions[macro-name]
    := definition;
end function;
              

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "define");
              

<C90 preprocessor directive grammar productions>=

directive "define" [IDENTIFIER]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           replacement: #[]);
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);

directive "define" [IDENTIFIER WHITESPACE pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);

directive "define" [IDENTIFIER LPAREN identifier-list-opt RPAREN
                    pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           parameters: identifier-list-opt,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);
              

Function-like macros with variable arguments were added in C99.

<Module cpr-internals>+=

define constant $C99-varargs-parameter :: <string>
  = intern-string("__VA_ARGS__");
              

<C99 preprocessor directive grammar productions>=

directive "define" [IDENTIFIER LPAREN ELIPSIS RPAREN pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           parameters: #[],
           varargs-parameter: $C99-varargs-parameter,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);

directive "define" [IDENTIFIER LPAREN identifier-list COMMA ELIPSIS RPAREN
                    pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           parameters: identifier-list,
           varargs-parameter: $C99-varargs-parameter,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);
              

GNU C has its own version of varargs function-like macros.

<GNU C preprocessor directive grammar productions>=

directive "define" [IDENTIFIER LPAREN gnu-varargs RPAREN pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           parameters: #[],
           varargs-parameter: gnu-varargs,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);

directive "define" [IDENTIFIER LPAREN identifier-list COMMA gnu-varargs RPAREN
                    pp-tokens-opt]
    (dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>)
  let definition
    = make(<C-preprocessing-macro>,
           source-location: srcloc,
           parameters: identifier-list,
           varargs-parameter: gnu-varargs,
           replacement: trim-whitespace(pp-tokens-opt));
  preprocessor-add-macro-definition
    (dispatcher.token-dispatcher-preprocessing-translation-unit,
     IDENTIFIER,
     definition);
              

<C90 preprocessor directive auxiliary grammar productions>=

production identifier-list-opt :: <sequence> => [/* empty */] () #[];
production identifier-list-opt :: <sequence> => [identifier-list];

production identifier-list => [IDENTIFIER];
production identifier-list => [identifier-list COMMA IDENTIFIER];

production pp-tokens-opt :: <sequence> => [/* empty */] () #[];
production pp-tokens-opt :: <sequence> => [pp-tokens];

production pp-tokens => [preprocessing-token];
production pp-tokens => [pp-tokens preprocessing-token];
              

<GNU C preprocessor directive auxiliary grammar productions>=

production gnu-varargs => [IDENTIFIER ELIPSIS];
              

To define the preprocessing-token nonterminal, we need to have one production for each possible preprocessing token. The reduce action for each production constructs a corresponding <C-preprocessing-token> object.

<Productions for the preprocessing-token>=

map(method (token-name :: <symbol>)
      make(<simple-production>,
           nonterminal: #"preprocessing-token",
           nonterminal-type: <C-preprocessing-token>,
           derives: vector(token-name),
           action:
             method
                 (p :: <simple-parser>, data, s, e)
              => (token :: <C-preprocessing-token>);
               make(<C-preprocessing-token>,
                    name: token-name,
                    value: p[0])
             end)
    end,
    #[#"IDENTIFIER", #"PP-NUMBER", #"CHARACTER-CONSTANT", #"STRING-LITERAL",
      #"SHARP", #"SHARPSHARP", #"LBRACK", #"RBRACK", #"LBRACE", #"RBRACE",
      #"LPAREN", #"RPAREN", #"DOT", #"ARROW", #"INC", #"DEC", #"AMP", #"STAR",
      #"PLUS", #"MINUS", #"TILDE", #"BANG", #"SLASH", #"PERCENT",
      #"SHL", #"SHR", #"LT", #"GT", #"LE", #"GE", #"EQ", #"NE", #"HAT", #"OR",
      #"AMPAMP", #"OROR", #"QUEST", #"COLON", #"SEMI", #"ELIPSIS",
      #"EQUALS", #"STAR-EQUALS", #"SLASH-EQUALS", #"PERCENT-EQUALS",
      #"PLUS-EQUALS", #"MINUS-EQUALS", #"SHL-EQUALS", #"SHR-EQUALS",
      #"AMP-EQUALS", #"HAT-EQUALS", #"OR-EQUALS", #"COMMA"])
              

Whitespace is not s preprocessing token, but it can appear as a separator within replacement lists and macro arguments.. Multiple adjacent whitespace separators need to be collapsed into a single whitespace marker.

<C90 preprocessor directive auxiliary grammar productions>+=

production pp-tokens :: <sequence> => [pp-tokens WHITESPACE] ()
  unless (trailing-whitespace?(pp-tokens))
    add!(pp-tokens, make(<C-preprocessing-token>, name: #"WHITESPACE"));
  end;
  pp-tokens;
              

The #undef Directive

The #undef directive deletes the macro definition for the given identifier from the definitions table.

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "undef");
              

<C90 preprocessor directive grammar productions>+=

directive "undef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>)
  let pp-translation-unit
    = dispatcher.token-dispatcher-preprocessing-translation-unit;
  remove-key!(pp-translation-unit.preprocessing-macro-definitions, IDENTIFIER);
              

External Interface

<Define identifier as an object-like preprocessing macro>=

let rangemap = make(<source-location-rangemap>);
let scanner
  = make(<simple-lexical-scanner>, definition: $C-tokens, rangemap: rangemap);
let tokens = make(<stretchy-object-vector>);
scan-tokens(scanner,
            method (tokens :: <stretchy-object-vector>,
                    token-number, token-name, token-value,
                    start-position, end-position)
              if (token-name == #"WHITESPACE" | token-name == #"NEW-LINE")
                unless (empty?(tokens) | trailing-whitespace?(tokens))
                  add!(tokens, make(<C-preprocessing-token>,
                                    name: #"WHITESPACE"));
                end;
              else
                add!(tokens, make(<C-preprocessing-token>,
                                  name: token-name, value: token-value))
              end if;
            end,
            tokens,
            replacement);
let definition
  = make(<C-preprocessing-macro>,
         replacement: trim-whitespace!(tokens));
preprocessor-add-macro-definition
  (preprocessing-translation-unit,
   intern-string(identifier),
   definition);
              

<Undefine identifier>=

remove-key!(preprocessing-translation-unit.preprocessing-macro-definitions,
            intern-string(identifier));
              

Macro Expansion

When we encounter an identifier during preprocessing, we determine whether or not it has a macro definition. If it does, and it's not currently being suppressed, then we begin the process of expanding the macro by calling C-preprocessing-macro-expand.

<Expand the identifier if it has a macro definition, and send it directly to the output otherwise>=

let pp-translation-unit
  = token-dispatcher.token-dispatcher-preprocessing-translation-unit;
let definition
  = element(pp-translation-unit.preprocessing-macro-definitions,
            token-value, default: #f);
if (definition & ~definition.preprocessing-macro-suppressed?)
  if (C-preprocessing-macro-expand(token-dispatcher,
                                   definition, token-value,
                                   start-position, end-position, #t))
    enter-state(token-dispatcher, linemid-macro-name)
  end if;
else
  C-preprocessor-output-token(token-dispatcher, token-number,
                              token-name, token-value,
                              start-position, end-position);
end if;
            

The C-preprocessing-macro-expand function begins the process of expanding the given identifier. It returns true if the macro is a function-like macro, indicating that the macro arguments need to be collected before the actual macro expansion begins.

<Module cpr-internals>+=

define function C-preprocessing-macro-expand
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     definition :: <C-preprocessing-macro>,
     token-value, start-position, end-position,
     newline-permitted? :: <boolean>)
 => (function-like? :: <boolean>);
  if (definition.preprocessing-macro-parameters)
    <Expand a function-like macro>
    #t
  elseif (definition.preprocessing-macro-expander)
    // Built-in expansion: expand directly
    definition.preprocessing-macro-expander(token-dispatcher, token-value,
                                            start-position, end-position);
    #f
  else
    // Object-like macro: expand directly
    C-expand-replacement-list
      (token-dispatcher, token-value, definition, #f,
       start-position, end-position);
    #f
  end if;
end function;
            

Expanding function-like macros is a much more involved process. We'll need to collect the arguments for substitution into the replacement list before performing the expansion. We'll do this in an instance of <C-preprocessing-macro-invocation>.

<Module cpr-internals>+=

define class <C-preprocessing-macro-invocation> (<object>)
  // Name of the invoked macro
  constant slot invocation-name :: <byte-string>,
    required-init-keyword: name:;
  // Starting position of the macro name in the source
  constant slot invocation-start-position :: <integer>,
    required-init-keyword: start-position:;
  // Ending position of the macro name in the source
  constant slot invocation-end-position :: <integer>,
    required-init-keyword: end-position:;
  // Macro definition
  constant slot invocation-macro-definition :: <C-preprocessing-macro>,
    required-init-keyword: macro-definition:;
  // State from which macro expansion state was entered
  constant slot invocation-state-function :: <function>,
    required-init-keyword: state-function:;
  // True of newline is permitted within the macro invocation
  constant slot invocation-newline-permitted? :: <boolean>,
    required-init-keyword: newline-permitted?:;
  // Tokens saved for the current argument
  constant slot invocation-saved-tokens :: <stretchy-vector>
    = make(<stretchy-vector>);
  // Table (indexed by parameter name) of argument values
  constant slot invocation-arguments :: <object-table>
    = make(<object-table>);
  // Count of nested parentheses (beyond the first opening parenthesis)
  slot invocation-parens :: <integer>,
    init-value: 0;
  // Number of arguments processed so far
  slot invocation-argument-count :: <integer>,
    init-value: 0;
end class;
            

<Additional slots in <C-preprocessor-token-dispatcher>>+=

slot token-dispatcher-macro-invocation
    :: false-or(<C-preprocessing-macro-invocation>),
  init-value: #f;
            

<Expand a function-like macro>=

token-dispatcher.token-dispatcher-macro-invocation
  := make(<C-preprocessing-macro-invocation>,
          name: token-value,
          start-position: start-position,
          end-position: end-position,
          macro-definition: definition,
          state-function: token-dispatcher.token-dispatcher-state-function,
          newline-permitted?: newline-permitted?);
            

If a macro name is followed by something other than an open parenthesis, we know it's not really a macro invocation, and we put the macro name back into the input stream.

<Module cpr-internals>+=

define function abandon-macro-invocation
    (token-dispatcher :: <C-preprocessor-token-dispatcher>)
 => (token-value, start-position, end-position);
  let invocation = token-dispatcher.token-dispatcher-macro-invocation;
  token-dispatcher.token-dispatcher-macro-invocation := #f;

  token-dispatcher.token-dispatcher-state-function
    := invocation.invocation-state-function;

  values(invocation.invocation-name,
         invocation.invocation-start-position,
         invocation.invocation-end-position)
end function;
            

After seeing the macro name, we wait until we see the opening parenthesis (or something else).

<Module cpr-internals>+=

define preprocessor-state-function linemid-macro-name
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  select (token-name)
    #"LPAREN" =>
      enter-state(token-dispatcher, macro-arguments);

    #"WHITESPACE" =>
      ; // remain in this state

    #"NEW-LINE" =>
      enter-state(token-dispatcher, linestart-macro-name);

    otherwise =>
      let (identifier-name, identifier-start-position, identifier-end-position)
        = abandon-macro-invocation(token-dispatcher);
      C-preprocessor-output-token(token-dispatcher,
                                  token-number, #"IDENTIFIER", identifier-name,
                                  identifier-start-position,
                                  identifier-end-position);
      C-preprocessor-dispatch-token
        (token-dispatcher, token-number, token-name, token-value,
         start-position, end-position);
  end select;
end preprocessor-state-function;
            

One “corner case” is when the macro name is followed by a newline before the opening parenthesis (if any).

<Module cpr-internals>+=

define preprocessor-state-function linestart-macro-name
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  select (token-name)
    #"LPAREN" =>
      enter-state(token-dispatcher, macro-arguments);

    #"WHITESPACE", #"NEW-LINE" =>
      ; // remain in this state

    otherwise =>
      let (identifier-name, identifier-start-position, identifier-end-position)
        = abandon-macro-invocation(token-dispatcher);
      C-preprocessor-output-token(token-dispatcher,
                                  token-number, #"IDENTIFIER", identifier-name,
                                  identifier-start-position,
                                  identifier-end-position);

      enter-state(token-dispatcher, linestart);
      token-dispatcher.token-dispatcher-state-function
        (token-dispatcher, token-number, token-name, token-value,
         start-position, end-position);
  end select;
end preprocessor-state-function;
            

When we see the macro arguments, we save them in the <C-preprocessing-macro-invocation> instance. Whitespace tokens are saved (because they are significant for stringization), but adjacent whitespace tokens are collapsed into a single token.

<Module cpr-internals>+=

define preprocessor-state-function macro-arguments
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let invocation = token-dispatcher.token-dispatcher-macro-invocation;
  local
    method save-token () => ();
      let token
        = make(<C-preprocessing-token>,
               name: token-name, value: token-value);
      add!(invocation.invocation-saved-tokens, token);
    end method;

  select (token-name)
    #"COMMA" =>
      if (zero?(invocation.invocation-parens))
        <Handle a top-level comma within the macro arguments>
      else
        save-token();
      end if;

    #"LPAREN" =>
      invocation.invocation-parens := invocation.invocation-parens + 1;
      save-token();

    #"RPAREN" =>
      if (zero?(invocation.invocation-parens))
        <Finish processing of the macro invocation>
      else
        invocation.invocation-parens := invocation.invocation-parens - 1;
        save-token();
      end if;
      
    #"WHITESPACE", #"NEW-LINE" =>
      if (token-name == #"NEW-LINE"
            & ~invocation.invocation-newline-permitted?)
        preprocessor-error(token-dispatcher, start-position, end-position,
                           "newline within invocation of %s macro",
                           invocation.invocation-name);
      end if;
      
      let saved-tokens = invocation.invocation-saved-tokens;
      unless (empty?(saved-tokens) | trailing-whitespace?(saved-tokens))
        add!(saved-tokens,
             make(<C-preprocessing-token>, name: #"WHITESPACE"));
      end unless;

    #"EOF" =>
      preprocessor-error(token-dispatcher, start-position, end-position,
                         "end of input within invocation of %s macro",
                         invocation.invocation-name);

    otherwise =>
      save-token();
  end select;
end preprocessor-state-function;
            

When we see a comma within the macro arguments, we store the argument under the corresponding parameter name.

<Handle a top-level comma within the macro arguments>=

let definition = invocation.invocation-macro-definition;
let index = invocation.invocation-argument-count;
if (index < definition.preprocessing-macro-parameters.size)
  let parameter = definition.preprocessing-macro-parameters[index];

  trim-whitespace!(invocation.invocation-saved-tokens);

  invocation.invocation-arguments[parameter]
    := as(<simple-object-vector>, invocation.invocation-saved-tokens);
  invocation.invocation-saved-tokens.size := 0;
  invocation.invocation-argument-count := index + 1;
elseif (definition.preprocessing-macro-varargs-parameter)
  save-token();
else
  preprocessor-error(token-dispatcher, start-position, end-position,
                     "too many arguments to %s macro",
                     invocation.invocation-name);
end if;
            

When we see the closing parenthesis, we store the last argument in the same way.

<Finish processing of the macro invocation>=

trim-whitespace!(invocation.invocation-saved-tokens);

let definition = invocation.invocation-macro-definition;
let index = invocation.invocation-argument-count;
if (index < definition.preprocessing-macro-parameters.size)
  let parameter = definition.preprocessing-macro-parameters[index];
  invocation.invocation-arguments[parameter]
    := as(<simple-object-vector>, invocation.invocation-saved-tokens);

  if (index + 1 ~= definition.preprocessing-macro-parameters.size)
    preprocessor-error(token-dispatcher, start-position, end-position,
                       "too few arguments to %s macro (expected %d)",
                       invocation.invocation-name,
                       definition.preprocessing-macro-parameters.size);
  end if;
elseif (definition.preprocessing-macro-varargs-parameter)
  let parameter = definition.preprocessing-macro-varargs-parameter;
  invocation.invocation-arguments[parameter]
    := as(<simple-object-vector>, invocation.invocation-saved-tokens);
elseif (~empty?(invocation.invocation-saved-tokens))
  preprocessor-error(token-dispatcher, start-position, end-position,
                     "too many arguments to %s macro",
                     invocation.invocation-name);
end if;
            

Then we expand the macro's replacement list, substituting the parameter values for the parameter names where they occur.

<Finish processing of the macro invocation>+=

token-dispatcher.token-dispatcher-macro-invocation := #f;
token-dispatcher.token-dispatcher-state-function
  := invocation.invocation-state-function;

C-expand-replacement-list
  (token-dispatcher,
   invocation.invocation-name,
   definition,
   invocation.invocation-arguments,
   invocation.invocation-start-position,
   end-position);
            

When expanding a replacement list, we delay the processing of each item in order to allow it to be pasted onto the following item using the ## operator.

<Module cpr-internals>+=

define function C-expand-replacement-list
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     macro-name :: <byte-string>,
     definition :: <C-preprocessing-macro>,
     parameters :: false-or(<object-table>),
     start-position, end-position)
 => ();
  let replacement-list = definition.preprocessing-macro-replacement;
  let limit = replacement-list.size;
  let result-list = make(<stretchy-object-vector>);
  <Establish expansion-token-dispatcher>

  local
    <Local methods of C-expand-replacement-list>
    method loop (prev-token :: <C-preprocessing-token>,
                 prev-whitespace-token :: false-or(<C-preprocessing-token>),
                 i :: <integer>)
      if (i < limit)
        let (token, next) = item(i, #f);
        if (token.preprocessing-token-name == #"SHARPSHARP" & i + 1 < limit)
          let (paste-token, next) = item(i + 1, #t);
          let placemarker
            = paste(prev-token.preprocessing-token-name,
                    prev-token.preprocessing-token-value,
                    paste-token.preprocessing-token-name,
                    paste-token.preprocessing-token-value);
          loop(make(<C-preprocessing-token>,
                    name: #"PLACEMARKER", value: placemarker),
               #f, next);
        elseif (token.preprocessing-token-name == #"WHITESPACE")
          loop(prev-token, token, next)
        else
          output(prev-token, prev-whitespace-token);
          loop(token, #f, next);
        end if;
      else
        output(prev-token, prev-whitespace-token);
      end if;
    end method;

  if (limit > 0)
    let (token, next) = item(0, #f);
    loop(token, #f, next);
  end if;

  <Dispatch the tokens in result-list for re-expansion>
end function;
            

A replacement list item consists of a single preprocessing token, or a # operator followed by a parameter name. References to parameter names are also replaced with #"PARAMETER" markers.

<Local methods of C-expand-replacement-list>=

    method item
        (i :: <integer>, skip-whitespace? :: <boolean>)
     => (token :: <C-preprocessing-token>, next :: <integer>);
      let token :: <C-preprocessing-token> = replacement-list[i];
      let token-name = token.preprocessing-token-name;
      select (token-name)
        #"IDENTIFIER", #"BLUE-IDENTIFIER" =>
          let token-value = token.preprocessing-token-value;
          let expansion
            = parameters & element(parameters, token-value, default: #f);
          if (expansion)
            values(make(<C-preprocessing-token>,
                        name: #"PARAMETER", value: expansion),
                   i + 1)
          else
            values(token, i + 1);
          end if;
        #"SHARP" =>
          if (i + 1 < limit)
            let (pname-token :: <C-preprocessing-token>, next :: <integer>)
              = item(i + 1, #t);
            if (pname-token.preprocessing-token-name == #"PARAMETER")
              <Stringify the parameter value>
            else
              source-error(definition,
                           "A macro parameter name must follow the # operator");
            end if;
          else
            source-error(definition,
                         "A macro parameter name must follow the # operator");
          end if;
        #"WHITESPACE" =>
          if (skip-whitespace?)
            item(i + 1, #t)
          else
            values(token, i + 1)
          end if;
        otherwise =>
          values(token, i + 1)
      end select
    end,
            

<Stringify the parameter value>=

let stringified
  = with-output-to-string (str)
      write-element(str, '\"');
      for (param-token :: <C-preprocessing-token>
             in pname-token.preprocessing-token-value)
        let param-token-name = param-token.preprocessing-token-name;
        let param-token-value = param-token.preprocessing-token-value;    
        if (param-token-name == #"STRING-LITERAL"
              | param-token-name == #"CHARACTER-CONSTANT")
          for (c in param-token-value)
            if (c == '\\' | c == '\"')
              write-element(str, '\\');
            end if;
            write-element(str, c);
          end for;
        else
          write(str, aux-token-string(param-token-name, param-token-value));
        end if;
      end for;
      write-element(str, '\"');
    end;
values(make(<C-preprocessing-token>,
            name: #"STRING-LITERAL", value: stringified),
       next);
            

Token pasting placemarkers are represented as <string-stream> instances to which we can write the string representations of the tokens to be pasted.

<Local methods of C-expand-replacement-list>+=

    method paste
        (token-name, token-value, paste-token-name, paste-token-value)
     => (placemarker);
      let placemarker
        = if (token-name == #"PLACEMARKER")
            token-value
          else
            paste-aux(make(<string-stream>, direction: #"output"),
                      token-name, token-value);
          end if;
      paste-aux(placemarker, paste-token-name, paste-token-value);
    end method,
            
    method paste-aux
        (placemarker, token-name, token-value)
     => (placemarker);
      if (token-name == #"PARAMETER")
        for (param-token :: <C-preprocessing-token> in token-value)
          write(placemarker,
                aux-token-string(param-token.preprocessing-token-name,
                                 param-token.preprocessing-token-value));
        end for;
      else
        write(placemarker, aux-token-string(token-name, token-value));
      end if;
      placemarker
    end method,
            

It is possible for “painted” identifiers to appear during stringization or token pasting, so we handle that case specially.

<Local methods of C-expand-replacement-list>+=

    method aux-token-string(token-name :: <symbol>, token-value)
      if (token-name == #"BLUE-IDENTIFIER")
        token-value
      else
         preprocessor-token-string(token-name, token-value)
      end if
    end method,
            

Most tokens, excepting parameter names, are copied directly into the result list. If the macro name itself appears, we need to paint it blue to prevent its further expansion.

<Local methods of C-expand-replacement-list>+=

    method output
        (token :: <C-preprocessing-token>,
         whitespace-token :: false-or(<C-preprocessing-token>))
     => ();
      if (token.preprocessing-token-name == #"PARAMETER")
        <Output the expanded argument for this parameter>
      elseif (token.preprocessing-token-name == #"IDENTIFIER"
                & token.preprocessing-token-value == macro-name)
        add!(result-list,
             make(<C-preprocessing-token>,
                  name: #"BLUE-IDENTIFIER",
                  value: macro-name));
      else
        add!(result-list, token);
      end if;

      if (whitespace-token)
        add!(result-list, whitespace-token);
      end if;
    end method,
            

Argument values are re-expanded as if they were the the entire remainder of the preprocessing file input. We do this by creating a separate dispatcher to perform the expansion and store the resulting tokens into result-list.

<Establish expansion-token-dispatcher>=

let pp-translation-unit
  = token-dispatcher.token-dispatcher-preprocessing-translation-unit;
let macro-definitions = pp-translation-unit.preprocessing-macro-definitions;

let preprocessor-state = token-dispatcher.token-dispatcher-state;
let expansion-preprocessor-state
  = make(<C-preprocessor-state>,
         dialect: preprocessor-state.preprocessor-dialect,
         consumer:
           method (result-list, token-name, token-value,
                   rangemap, start-position, end-position) => ();
             <Store the expanded tokens in result-list>
           end,
         consumer-data: result-list);
let expansion-token-dispatcher
  = make(<C-preprocessor-token-dispatcher>,
         preprocessing-translation-unit:
           token-dispatcher.token-dispatcher-preprocessing-translation-unit,
         scanner: token-dispatcher.token-dispatcher-scanner,
         directory: token-dispatcher.token-dispatcher-directory,
         rangemap: token-dispatcher.token-dispatcher-rangemap,
         consumer-state-function: C-preprocess-token-expand-argument,
         consumer-state: expansion-preprocessor-state);
            

<Module cpr-internals>+=

define preprocessor-state-function expand-argument
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  select (token-name)
    #"IDENTIFIER" =>
      <Expand the identifier if it has a macro definition, and send it directly to the output otherwise>
    #"EOF" =>
      ; // Filter out EOF markers at the end of parameter expansions
    otherwise =>
      C-preprocessor-output-token(token-dispatcher, 
                                  token-number, token-name, token-value,
                                  start-position, end-position);
  end select;
end preprocessor-state-function;
            

If we encounter an identifier and its expansion is being suppressed, then we “paint it blue” (changing the token name to #"BLUE-IDENTIFIER") to prevent expansion in the future.

<Store the expanded tokens in result-list>=

select (token-name)
  #"IDENTIFIER" =>
    let definition = element(macro-definitions, token-value, default: #f);
    add!(result-list,
         make(<C-preprocessing-token>,
              name:
                if (definition & definition.preprocessing-macro-suppressed?)
                  #"BLUE-IDENTIFIER"
                else
                  #"IDENTIFIER"
                end if,
              value: token-value));
  otherwise =>
    add!(result-list,
         make(<C-preprocessing-token>,
              name: token-name, value: token-value));
end select;
            

We feed the argument tokens into the this dispatcher so they get expanded, with the expansion added to result-list.

<Output the expanded argument for this parameter>=

for (param-token :: <C-preprocessing-token> in token.preprocessing-token-value)
  let param-token-name = param-token.preprocessing-token-name;
  let param-token-value = param-token.preprocessing-token-value;    
  C-preprocessor-dispatch-token
    (expansion-token-dispatcher, 0, param-token-name, param-token-value,
     start-position, end-position);
end for;

C-preprocessor-dispatch-token
  (expansion-token-dispatcher, 0, #"EOF", #f, end-position, end-position);
            

<Dispatch the tokens in result-list for re-expansion>=

with-macro-suppressed (definition)
  for (token :: <C-preprocessing-token> in result-list)
    let token-name = token.preprocessing-token-name;
    if (token-name == #"PLACEMARKER")
      <Output the token paste placemarker token-value>
    else
      C-preprocessor-dispatch-token
        (token-dispatcher, 0,
         token-name, token.preprocessing-token-value,
         start-position, end-position);
    end if;
  end for;
end with-macro-suppressed;
            

To output the result of a token paste, we re-scan the resulting string. The resulting source location information is only approximate, but it's the best we can do for now.

<Output the token paste placemarker token-value>=

let scanner
  = make(<simple-lexical-scanner>,
         definition: $C-tokens,
         rangemap: token-dispatcher.token-dispatcher-rangemap);
scanner.scanner-source-position := start-position;
scan-tokens(scanner,
            C-preprocessor-dispatch-token,
            token-dispatcher,
            stream-contents(token.preprocessing-token-value));
            

Built-in Preprocessor Macros

The C standard specifies several built-in object-like preprocessor macros. We add these definitions in the initializer function for the <C-preprocessing-translation-unit-representation> class.

<Modules imported by the cpr-internals module>+=

use date;
            

<Module cpr-internals>+=

define constant $month-abbreviations
  = #["Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
            
define method initialize
    (instance :: <C-preprocessing-translation-unit-representation>,
     #key, #all-keys)
 => ();
  let now = current-date();
  let date-string
    = format-to-string("\"%s %2d %4d\"",
                       $month-abbreviations[now.date-month - 1],
                       now.date-day,
                       now.date-year);
  preprocessor-define(instance, "__DATE__", date-string);
  let time-string
    = format-to-string("\"%02d:%02d:%02d\"",
                       now.date-hours, now.date-minutes, now.date-seconds);
  preprocessor-define(instance, "__TIME__", time-string);

  preprocessor-add-macro-definition
    (instance, intern-string("__FILE__"),
     make(<C-preprocessing-macro>,
          replacement: #[],
          expander:
            method (dispatcher, name, start-position, end-position)
              let srcloc
                = range-source-location(dispatcher.token-dispatcher-rangemap,
                                        start-position, end-position);
              let literal
                = concatenate("\"", as(<string>, srcloc.source-file), "\"");
              dispatcher.token-dispatcher-state-function
                (dispatcher, 0, #"STRING-LITERAL", literal,
                 start-position, end-position);              
            end));

  preprocessor-add-macro-definition
    (instance, intern-string("__LINE__"),
     make(<C-preprocessing-macro>,
          replacement: #[],
          expander:
            method (dispatcher, name, start-position, end-position)
              let srcloc
                = range-source-location(dispatcher.token-dispatcher-rangemap,
                                        start-position, end-position);
              dispatcher.token-dispatcher-state-function
                (dispatcher,
                 0, #"PP-NUMBER", integer-to-string(srcloc.source-start-line),
                 start-position, end-position);              
            end));

  preprocessor-define(instance, "__STDC__", "1");
  preprocessor-define(instance, "__STDC_HOSTED__", "1");
  preprocessor-define(instance, "__STDC_VERSION__", "199901L");
end method;
            

Macro-Expanded Preprocessor Directives

Within certain preprocessor directives, macro invocations are expanded when they occur. The defined operator also needs to be handled specially.

<Module cpr-internals>+=

define constant $defined-operator :: <string> = intern-string("defined");
            
define preprocessor-state-function sharp-directive-expanded
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let parser = preprocessor-state.preprocessor-directive-parser;

  select (token-name)
    #"WHITESPACE" =>
      if (simple-parser-can-consume-token?(parser, token-number, token-name))
        with-preprocessing-directive-restart (token-dispatcher, token-name)
          simple-parser-consume-token(parser, token-number, token-name, #f,
                                      start-position, end-position);
        end;
      end if;

    #"NEW-LINE" =>
      with-preprocessing-directive-restart (token-dispatcher, token-name)
        simple-parser-consume-token(parser, token-number, token-name,
                                    token-value,
                                    start-position, end-position);
      end;

      token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
        := $C-tokens;
      enter-state(token-dispatcher, linestart);

    #"IDENTIFIER" =>
      token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
        := $C-tokens;

      let pp-translation-unit
        = token-dispatcher.token-dispatcher-preprocessing-translation-unit;
      let definition
        = element(pp-translation-unit.preprocessing-macro-definitions,
                  token-value, default: #f);
      if (definition & ~definition.preprocessing-macro-suppressed?)
        if (C-preprocessing-macro-expand(token-dispatcher, definition,
                                         token-value,
                                         start-position, end-position, #f))
          enter-state(token-dispatcher, sharp-directive-macro-name)
        end if;
      elseif (token-value == $defined-operator)
        <Handle the defined operator>
      else
        with-preprocessing-directive-restart (token-dispatcher, token-name)
          simple-parser-consume-token(parser,
                                      token-number, token-name, token-value,
                                      start-position, end-position);
        end;
      end if;

    #"BLUE-IDENTIFIER" =>
      with-preprocessing-directive-restart (token-dispatcher, token-name)
        simple-parser-consume-token(parser, 0, #"IDENTIFIER", token-value,
                                    start-position, end-position);
      end;
    #"EOF" =>
      signal("File does not end in a newline");
      simple-parser-consume-token(parser, 0, #"NEW-LINE", #f,
                                  start-position, end-position);

    otherwise =>
      with-preprocessing-directive-restart (token-dispatcher, token-name)
        simple-parser-consume-token(parser, token-number, token-name,
                                    token-value,
                                    start-position, end-position);
      end;
  end select;
end preprocessor-state-function;
            

After we've seen the name of a function-like macro, we wait to see if what follows is an actual macro invocation. If it is not, we abandon the invocation and place the macro name into the output.

<Module cpr-internals>+=

define preprocessor-state-function sharp-directive-macro-name
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  select (token-name)
    #"LPAREN" =>
      enter-state(token-dispatcher, macro-arguments);

    #"WHITESPACE" =>
      ; // remain in this state

    otherwise =>
      let preprocessor-state = token-dispatcher.token-dispatcher-state;
      let parser = preprocessor-state.preprocessor-directive-parser;
      let (identifier-name, identifier-start-position, identifier-end-position)
        = abandon-macro-invocation(token-dispatcher);
      simple-parser-consume-token(parser, 0, #"IDENTIFIER", identifier-name,
                                  identifier-start-position,
                                  identifier-end-position);
      token-dispatcher.token-dispatcher-state-function
        (token-dispatcher, token-number, token-name, token-value,
         start-position, end-position);
  end select;
end preprocessor-state-function;
            

The defined operator is treated as a token by the directive parser.

<Token definitions>+=

token DEFINED;
            

<Handle the defined operator>=

simple-parser-consume-token(parser, 0, #"DEFINED", #f,
                            start-position, end-position);
enter-state(token-dispatcher, sharp-directive-defined);
            

The defined operator suppresses the expansion of the next following identifier.

<Module cpr-internals>+=

define preprocessor-state-function sharp-directive-defined
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     token-number, token-name :: <symbol>, token-value,
     start-position, end-position)
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let parser = preprocessor-state.preprocessor-directive-parser;

  select (token-name)
    #"WHITESPACE" =>
      #f;

    #"NEW-LINE" =>
      simple-parser-consume-token(parser, token-number, token-name, token-value,
                                  start-position, end-position);

      enter-state(token-dispatcher, linestart);

    #"IDENTIFIER", #"BLUE-IDENTIFIER" =>
      simple-parser-consume-token(parser, 0, #"IDENTIFIER", token-value,
                                  start-position, end-position);
      enter-state(token-dispatcher, sharp-directive-expanded);

    #"EOF" =>
      signal("File does not end in a newline");
      simple-parser-consume-token(parser, 0, #"NEW-LINE", #f,
                                  start-position, end-position);

    otherwise =>
      simple-parser-consume-token(parser, token-number, token-name, token-value,
                                  start-position, end-position);
  end select;
end preprocessor-state-function;
            

Conditional Directives

Conditional groups can be nested pretty much arbitrarily deep. To keep track of whether a group is being skipped or not, we need to maintain a stack of flags.

<Additional slots in <C-preprocessor-state>>+=

constant slot preprocessor-skipping?-stack :: <stretchy-object-vector>
  = make(<stretchy-object-vector>);
            

To implement the #else and #elif directives, we need to keep track, at each stack level, of whether or not a true value has been seen in the current series of conditional directives. We also need to note when #else has been seen to prevent #else or #elif from being used again in the current series.

<Additional slots in <C-preprocessor-state>>+=

constant slot preprocessor-true-seen?-stack :: <stretchy-object-vector>
  = make(<stretchy-object-vector>);
constant slot preprocessor-else-seen?-stack :: <stretchy-object-vector>
  = make(<stretchy-object-vector>);
            

The preprocessor-begin-conditional function starts a new conditional group level and sets the preprocessor-skipping? slot of the preprocessor state.

<Module cpr-internals>+=

define function preprocessor-begin-conditional
    (token-dispatcher :: <C-preprocessor-token-dispatcher>, flag)
 => ();
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  preprocessor-state.preprocessor-skipping? := ~flag;
  add!(preprocessor-state.preprocessor-skipping?-stack, ~flag);
  add!(preprocessor-state.preprocessor-true-seen?-stack, flag);
  add!(preprocessor-state.preprocessor-else-seen?-stack, #f);
end function;
            

At the end of a preprocessing translation unit all conditional groups must be closed.

<Finalize the preprocessor>=

if (preprocessor-state.preprocessor-skipping?-stack.size ~= 0)
  preprocessor-error(preprocessor-token-dispatcher, end-position, end-position,
                     "Conditional directive left unterminated at end-of-file");
end if;
            

The #ifdef Directive

<Module cpr-internals>+=

define function if-skipping-function
    (token-dispatcher :: <C-preprocessor-token-dispatcher>)
 => (parse-directive? :: <boolean>);
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  add!(preprocessor-state.preprocessor-skipping?-stack, #t);
  add!(preprocessor-state.preprocessor-true-seen?-stack, #f);
  add!(preprocessor-state.preprocessor-else-seen?-stack, #f);
  #f;
end function;
              
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "ifdef",
                                skipping-function: if-skipping-function);
              

<C90 preprocessor directive grammar productions>+=

directive "ifdef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>)
  let preprocessing-translation-unit
    = dispatcher.token-dispatcher-preprocessing-translation-unit;
  let definition
    = element(preprocessing-translation-unit.preprocessing-macro-definitions,
              IDENTIFIER,
              default: #f);
  preprocessor-begin-conditional(dispatcher, definition ~== #f);
              

The #ifndef Directive

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "ifndef",
                                skipping-function: if-skipping-function);
              

<C90 preprocessor directive grammar productions>+=

directive "ifndef" [IDENTIFIER] (dispatcher :: <C-preprocessor-token-dispatcher>)
  let preprocessing-translation-unit
    = dispatcher.token-dispatcher-preprocessing-translation-unit;
  let definition
    = element(preprocessing-translation-unit.preprocessing-macro-definitions,
              IDENTIFIER,
              default: #f);
  preprocessor-begin-conditional(dispatcher, ~definition);
              

The #if Directive

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "if",
                                skipping-function: if-skipping-function,
                                macro-replaced?: #t);
              

<C90 preprocessor directive grammar productions>+=

directive "if" [constant-expression] (dispatcher :: <C-preprocessor-token-dispatcher>)
  preprocessor-begin-conditional(dispatcher, ~zero?(constant-expression()));
              

Preprocessor constant expressions are similar to ordinary C language constant expressions, except that there are no casts, and there are only two types: a signed type equivalent to intmax_t, and an unsigned type equivalent to uintmax_t. There is no need to construct a parse tree for preprocessor constant expressions, but we do use closures (thunks, in particular) to delay evaluation for the benefit of the short-circuited logical operators.

Since preprocessing macros are expanded before they are seen by the #if directive parser, if an identifier is seen at this point it means it had no macro definition, and should therefore be replaced with 0.

<C90 preprocessor directive auxiliary grammar productions>+=

production primary-expression :: <function>
  => [IDENTIFIER] ()
  method() 0 end;
              
production primary-expression :: <function>
  => [PP-NUMBER] (data, srcloc)
  block ()
    let (value, type) = C-preprocessing-number-value(PP-NUMBER);
    always(value);
  exception (e :: <simple-error>)
    source-error(srcloc, "%s", e);
  end block;
production primary-expression :: <function>
  => [CHARACTER-CONSTANT] (data, srcloc)
  block ()
    let (value, type) = C-character-constant-value(CHARACTER-CONSTANT);
    always(value);
  exception (e :: <simple-error>)
    source-error(srcloc, "%s", e);
  end block;

              
production primary-expression :: <function>
  => [LPAREN constant-expression RPAREN];
              
production unary-expression :: <function> => [primary-expression];
production unary-expression :: <function>
  => [PLUS unary-expression] ()
  unary-expression;
production unary-expression :: <function>
  => [MINUS unary-expression] ()
  method() negative(unary-expression()) end;
production unary-expression :: <function>
  => [TILDE unary-expression] ()
  method () lognot(unary-expression()) end;
production unary-expression :: <function>
  => [BANG unary-expression] ()
  method ()
    if (zero?(unary-expression())) 1 else 0 end
  end;
production unary-expression :: <function>
  => [DEFINED IDENTIFIER] (dispatcher  :: <C-preprocessor-token-dispatcher>)
  let preprocessing-translation-unit
    = dispatcher.token-dispatcher-preprocessing-translation-unit;
  let definition
    = element(preprocessing-translation-unit.preprocessing-macro-definitions,
              IDENTIFIER,
              default: #f);
  always(if (definition) 1 else 0 end);
production unary-expression :: <function>
  => [DEFINED LPAREN IDENTIFIER RPAREN] (dispatcher  :: <C-preprocessor-token-dispatcher>)
  let preprocessing-translation-unit
    = dispatcher.token-dispatcher-preprocessing-translation-unit;
  let definition
    = element(preprocessing-translation-unit.preprocessing-macro-definitions,
              IDENTIFIER,
              default: #f);
  always(if (definition) 1 else 0 end);
              
production multiplicative-expression :: <function> => [unary-expression];
production multiplicative-expression :: <function>
  => [multiplicative-expression STAR unary-expression] ()
  method ()
    multiplicative-expression() * unary-expression()
  end;
production multiplicative-expression :: <function>
  => [multiplicative-expression SLASH unary-expression] (dispatcher, srcloc :: <source-location>)
  method ()
    let divisor = unary-expression();
    if (zero?(divisor)) source-error(srcloc, "division by zero") end if;
    truncate/(multiplicative-expression(), divisor)
  end;
production multiplicative-expression :: <function>
  => [multiplicative-expression PERCENT unary-expression] (dispatcher, srcloc :: <source-location>)
  method ()
    let divisor = unary-expression();
    if (zero?(divisor)) source-error(srcloc, "division by zero") end if;
    remainder(multiplicative-expression(), divisor)
  end;
              
production additive-expression :: <function> => [multiplicative-expression];
production additive-expression :: <function>
  => [additive-expression PLUS multiplicative-expression] ()
  method ()
    additive-expression() + multiplicative-expression()
  end;
production additive-expression :: <function>
  => [additive-expression MINUS multiplicative-expression] ()
  method ()
    additive-expression() - multiplicative-expression()
  end;
              
production shift-expression :: <function> => [additive-expression];
production shift-expression :: <function>
  => [shift-expression SHL additive-expression] ()
  method()
    ash(shift-expression(), additive-expression())
  end;
production shift-expression :: <function>
  => [shift-expression SHR additive-expression] ()
  method ()
    ash(shift-expression(), -additive-expression())
  end;
              
production relational-expression :: <function> => [shift-expression];
production relational-expression :: <function>
  => [relational-expression LT shift-expression] ()
  method ()
    if (relational-expression() < shift-expression()) 1 else 0 end
  end;
production relational-expression :: <function>
  => [relational-expression GT shift-expression] ()
  method ()
    if (relational-expression() > shift-expression()) 1 else 0 end
  end;
production relational-expression :: <function>
  => [relational-expression LE shift-expression] ()
  method ()
    if (relational-expression() <= shift-expression()) 1 else 0 end
  end;
              
production relational-expression :: <function>
  => [relational-expression GE shift-expression] ()
  method ()
    if (relational-expression() <= shift-expression()) 1 else 0 end
  end;
              
production equality-expression :: <function> => [relational-expression];
production equality-expression :: <function>
  => [equality-expression EQ relational-expression] ()
  method ()
    if (equality-expression() = relational-expression()) 1 else 0 end
  end;
production equality-expression :: <function>
  => [equality-expression NE relational-expression] ()
  method ()
    if (equality-expression() ~= relational-expression()) 1 else 0 end
  end;
              
production AND-expression :: <function> => [equality-expression];
production AND-expression :: <function>
  => [AND-expression AMP equality-expression] ()
  method ()
    logand(AND-expression(), equality-expression())
  end;
              
production exclusive-OR-expression :: <function> => [AND-expression];
production exclusive-OR-expression :: <function>
  => [exclusive-OR-expression HAT AND-expression] ()
  method ()
    logxor(exclusive-OR-expression(), AND-expression())
  end;
              
production inclusive-OR-expression :: <function> => [exclusive-OR-expression];
production inclusive-OR-expression :: <function>
  => [inclusive-OR-expression OR exclusive-OR-expression] ()
  method ()
    logior(inclusive-OR-expression(), exclusive-OR-expression())
  end;
              
production logical-AND-expression :: <function> => [inclusive-OR-expression];
production logical-AND-expression :: <function>
  => [logical-AND-expression AMPAMP inclusive-OR-expression] ()
  method ()
    if (~zero?(logical-AND-expression()) & ~zero?(inclusive-OR-expression()))
      1
    else
      0
    end
  end;
              
production logical-OR-expression :: <function> => [logical-AND-expression];
production logical-OR-expression :: <function>
  => [logical-OR-expression OROR logical-AND-expression] ()
  method ()
    if (~zero?(logical-OR-expression()) | ~zero?(logical-AND-expression()))
      1
    else
      0
    end
  end;
              
production conditional-expression :: <function> => [logical-OR-expression];
production conditional-expression :: <function>
  => [logical-OR-expression QUEST constant-expression
                            COLON conditional-expression] ()
  method ()
    if (~zero?(logical-OR-expression()))
      constant-expression()
    else
      conditional-expression()
    end
  end;
              
production constant-expression :: <function> => [conditional-expression];
production constant-expression :: <function>
  => [constant-expression COMMA conditional-expression] ()
  conditional-expression;
              

The #elif Directive

<Module cpr-internals>+=

define function else-skipping-function
    (token-dispatcher :: <C-preprocessor-token-dispatcher>)
 => (parse-directive? :: <boolean>);
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let index = preprocessor-state.preprocessor-skipping?-stack.size - 1;
  index = 0 | ~preprocessor-state.preprocessor-skipping?-stack[index - 1]
end function;
              
define-C-preprocessor-directive($C90-C-preprocessor-dialect, "elif",
                                skipping-function: else-skipping-function,
                                macro-replaced?: #t);
              

<C90 preprocessor directive grammar productions>+=

directive "elif" [constant-expression] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let preprocessor-state = dispatcher.token-dispatcher-state;
  let index = preprocessor-state.preprocessor-skipping?-stack.size - 1;
  if (index >= 0)
    if (preprocessor-state.preprocessor-else-seen?-stack[index])
      source-error(srcloc, "#else has already appeared in this if-section");
    end if;
    let flag
      = ~zero?(constant-expression())
      & ~preprocessor-state.preprocessor-true-seen?-stack[index];
    preprocessor-state.preprocessor-skipping?-stack[index] := ~flag;
    preprocessor-state.preprocessor-skipping? := ~flag;
    if (flag)
      preprocessor-state.preprocessor-true-seen?-stack[index] := #t;
    end if;
  else
    source-error(srcloc, "#elif outside of #ifdef/#ifndef/#if");
  end if;
              

The #else Directive

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "else",
                                skipping-function: else-skipping-function);
              

<C90 preprocessor directive grammar productions>+=

directive "else" [] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let preprocessor-state = dispatcher.token-dispatcher-state;
  let index = preprocessor-state.preprocessor-skipping?-stack.size - 1;
  if (index >= 0)
    if (preprocessor-state.preprocessor-else-seen?-stack[index])
      source-error(srcloc, "#else has already appeared in this if-section");
    end if;
    let skipping = preprocessor-state.preprocessor-true-seen?-stack[index];
    preprocessor-state.preprocessor-skipping?-stack[index] := skipping;
    preprocessor-state.preprocessor-skipping? := skipping;
    if (~skipping)
      preprocessor-state.preprocessor-true-seen?-stack[index] := #t;
    end if;
    preprocessor-state.preprocessor-else-seen?-stack[index] := #t;
  else
    source-error(srcloc, "#else outside of #ifdef/#ifndef/#if");
  end if;
              

The #endif Directive

The #endif directive terminates the current if-section, popping the stack and resetting preprocessor-skipping? for the new stack top.

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "endif",
                                skipping-function: method(dispatcher) #t end);
              

<C90 preprocessor directive grammar productions>+=

directive "endif" [] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let preprocessor-state = dispatcher.token-dispatcher-state;
  let index = preprocessor-state.preprocessor-skipping?-stack.size - 1;
  if (index >= 0)
    preprocessor-state.preprocessor-skipping?-stack.size := index;
    preprocessor-state.preprocessor-true-seen?-stack.size := index;
    preprocessor-state.preprocessor-else-seen?-stack.size := index;
    if (index = 0)
      preprocessor-state.preprocessor-skipping? := #f;
    else
      preprocessor-state.preprocessor-skipping?
        := preprocessor-state.preprocessor-skipping?-stack[index - 1];
    end if;
  else
    source-error(srcloc, "#endif outside of #ifdef/#ifndef/#if");
  end if;
              

Other Standard Preprocessor Directives

The #include Directive

The #include directive is slightly special, because the lexical tokens for header names can only be recognized after the include.

<Module cpr-internals>+=

define constant $C-include-directive-tokens
  = simple-lexical-definition
      token EOF;

      // Whitespace and comments
      token WHITESPACE = "([ \t\f]|/\\*([^*]|\\*+[^*/])*\\*+/)+";

      // New-line characters and line-end comments
      token NEW-LINE = "(//[^\r\n]*)?(\n|\r|\r\n)";

      // Identifiers
      token IDENTIFIER :: <string> = "[a-zA-Z_][0-9a-zA-Z_]*",
        semantic-value-function:
          method (token-string :: <byte-string>,
                  token-start :: <integer>,
                  token-end :: <integer>);
            intern-string(token-string, start: token-start, end: token-end)
          end;
            
      token HEADER-NAME :: <string> = "<[^\n\r>]+>",
        semantic-value-function:
          method (token-string :: <byte-string>,
                  token-start :: <integer>,
                  token-end :: <integer>);
            as(<file-locator>,
               copy-sequence(token-string,
                             start: token-start + 1, end: token-end - 1))
          end;
        
      token SOURCE-NAME :: <string> = "\"[^\n\r\"]+\"",
        semantic-value-function:
          method (token-string :: <byte-string>,
                  token-start :: <integer>,
                  token-end :: <integer>);
            as(<file-locator>,
               copy-sequence(token-string,
                             start: token-start + 1, end: token-end - 1))
          end;
    end;
              

<Module cpr-internals>+=

define-C-preprocessor-directive
  ($C90-C-preprocessor-dialect, "include",
   macro-replaced?: #t, lexical-definition: $C-include-directive-tokens);
              

<C90 preprocessor directive grammar productions>+=

directive "include" [HEADER-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  do-include(dispatcher, srcloc, #"HEADER-NAME", HEADER-NAME);
              
directive "include" [SOURCE-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  do-include(dispatcher, srcloc, #"SOURCE-NAME", SOURCE-NAME);
              

As a result of macro expansion, string literal tokens can also appear.

<C90 preprocessor directive grammar productions>+=

directive "include" [STRING-LITERAL] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let filename // FIXME
    = copy-sequence(STRING-LITERAL, start: 1, end: STRING-LITERAL.size - 1);
  do-include(dispatcher, srcloc, #"SOURCE-NAME", as(<file-locator>, filename));
              

<Module cpr-internals>+=

define function do-include
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     srcloc :: <source-location>,
     kind :: one-of(#"HEADER-NAME", #"SOURCE-NAME"),
     name :: <file-locator>)
 => ();
  token-dispatcher.token-dispatcher-scanner.scanner-lexical-definition
    := $C-tokens;
  
  let preprocessing-translation-unit
    = token-dispatcher.token-dispatcher-preprocessing-translation-unit;
  let preprocessor-state = token-dispatcher.token-dispatcher-state;
  let rangemap = token-dispatcher.token-dispatcher-rangemap;

  <Save the current source location as here>
  block ()
    <Preprocess the given include file and >
    <Update the scanner to use the new source-position, and reset the source location>
  exception (e :: <file-system-error>)
    source-error(srcloc, "%s", e);
  end block;
end function;
              

Our idea of the current source location will change once we start preprocessing the included file, so we save the source location immediately following the #include directive so we can restore it later.

<Save the current source location as here>=

let end-position
  = token-dispatcher.token-dispatcher-scanner.scanner-source-position;
let here
  = range-source-location(rangemap, end-position, end-position);
              

We then use do-with-C-header-file to locate the file to be included, reset the current source location to be at the beginning of line 1 of that file, and preprocess it.

<Preprocess the given include file and >=

let (directory, search-path)
  = select (kind)
      #"HEADER-NAME" =>
        values(#f,
               preprocessing-translation-unit
                 .preprocessing-system-header-search-path);
      #"SOURCE-NAME" =>
        values(token-dispatcher.token-dispatcher-directory,
               preprocessing-translation-unit
                 .preprocessing-header-search-path);
    end select;
let new-source-position
  = do-with-C-header-file
      (name, directory, search-path,
       method
           (found-locator :: <file-locator>, stream :: <stream>)
        => (new-end-position :: <integer>);
         rangemap-add-line-file(rangemap, end-position, 1, found-locator);
         preprocess-C-stream(preprocessing-translation-unit,
                             stream, found-locator.locator-directory,
                             rangemap,
                             preprocessor-state.preprocessor-consumer,
                             preprocessor-state.preprocessor-consumer-data,
                             start-position: end-position + 1);
       end);
              

When preprocessing of the file completes we re-adjust our current scanner's idea of the current source position, and map the position to correspond to the source location immediately following the #include directive.

<Update the scanner to use the new source-position, and reset the source location>=

token-dispatcher.token-dispatcher-scanner.scanner-source-position
  := new-source-position;
rangemap-add-line-file(rangemap, new-source-position,
                       here.source-start-line, here.source-file);
              

The #line Directive

The #line directive ammends the rangemap to change its idea of the line number (and possibly the filename) represented by the current place in the input.

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "line",
                                macro-replaced?: #t);
              

<C90 preprocessor directive grammar productions>+=

directive "line" [PP-NUMBER] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let line = string-to-integer(PP-NUMBER);
  if (line <= 0)
    source-error(srcloc, "#line number out of range (must be 1 or greater)")
  end if;
  let here = dispatcher.token-dispatcher-scanner.scanner-source-position;
  rangemap-add-line(dispatcher.token-dispatcher-rangemap, here, line);
              
directive "line" [PP-NUMBER STRING-LITERAL] (dispatcher :: <C-preprocessor-token-dispatcher>, srcloc :: <source-location>)
  let line = string-to-integer(PP-NUMBER);
  let filename // FIXME
    = copy-sequence(STRING-LITERAL, start: 1, end: STRING-LITERAL.size - 1);
  if (line <= 0)
    source-error(srcloc, "#line number must be 1 or greater")
  end if;
  let here = dispatcher.token-dispatcher-scanner.scanner-source-position;
  rangemap-add-line-file(dispatcher.token-dispatcher-rangemap, here,
                         line, as(<file-locator>, filename));
              

The #error Directive

The #error signals an error with the specified preprocessing tokens in the error message.

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "error");
              

<C90 preprocessor directive grammar productions>+=

directive "error" [pp-tokens-opt] (dispatcher, srcloc :: <source-location>)
  let string
    = with-output-to-string (str)
        for (token :: <C-preprocessing-token> in pp-tokens-opt)
           write-element(str, ' ');
           write(str,
                 preprocessor-token-string(token.preprocessing-token-name,
                                           token.preprocessing-token-value));
        end for;
      end;
  source-error(srcloc, "#error%s", string);
              

The #pragma Directive

<Module cpr-internals>+=

define-C-preprocessor-directive($C90-C-preprocessor-dialect, "pragma");
              

<C90 preprocessor directive grammar productions>+=

directive "pragma" [pp-tokens-opt] ()
  #f;
              

GNU C Preprocessing Directives

In addition to the additional varargs syntax described below, the GNU C preprocessor supports a number of additional extensions.

The #ident Directive

The #ident directive, originally from System V:

<Module cpr-internals>+=

define-C-preprocessor-directive($gnu89-C-preprocessor-dialect, "ident");
              

<GNU C preprocessor directive grammar productions>+=

directive "ident" [STRING-LITERAL] ()
  #f;
              

The #warning Directive

The #warning signals an warning with the specified preprocessing tokens in the warningmessage.

<Module cpr-internals>+=

define-C-preprocessor-directive($gnu89-C-preprocessor-dialect, "warning");
              

<GNU C preprocessor directive grammar productions>+=

directive "warning" [pp-tokens-opt] (dispatcher, srcloc :: <source-location>)
  let string
    = with-output-to-string (str)
        for (token :: <C-preprocessing-token> in pp-tokens-opt)
           write-element(str, ' ');
           write(str,
                 preprocessor-token-string(token.preprocessing-token-name,
                                           token.preprocessing-token-value));
        end for;
      end;
  source-warning(srcloc, "#warning%s", string);
              

The #include_next Directive

<Module cpr-internals>+=

define-C-preprocessor-directive
  ($gnu89-C-preprocessor-dialect, "include_next",
   macro-replaced?: #t, lexical-definition: $C-include-directive-tokens);
              

<GNU C preprocessor directive grammar productions>+=

directive "include_next" [HEADER-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>)
  do-include-next(dispatcher, #"HEADER-NAME", HEADER-NAME);
              
directive "include_next" [SOURCE-NAME] (dispatcher :: <C-preprocessor-token-dispatcher>)
  do-include-next(dispatcher, #"SOURCE-NAME", SOURCE-NAME);
              

<Module cpr-internals>+=

define function do-include-next
    (token-dispatcher :: <C-preprocessor-token-dispatcher>,
     kind :: one-of(#"HEADER-NAME", #"SOURCE-NAME"),
     name :: <file-locator>)
 => ();
  error("#include_next is not yet supported");
end function;
              

Addendum: Trigraph Processing

We include support for trigraphs more in the interest of standards compliance than any actual usefulness in practice.

Trigraph processing requires a simple state machine that keeps track of how many consecutive ? questions we have encountered.

<Initialize the trigraph state>=

let trigraph-state = #f;
            

<Copy text and translate trigraphs from buf to text>=

trigraph-state
  :=  iterate trigraph-loop (buf-index :: <integer> = buf.buffer-next,
                             text-index :: <integer> = 0,
                             trigraph-state = trigraph-state)
        if (buf-index < buf.buffer-end)
          let c = as(<character>, buf[buf-index]);
          select (trigraph-state)
            <Trigraph handling state cases>
          end select;
        else
          trigraph-state
        end if;
      end iterate;
            

In the initial trigraph processing state, we copy the current character to the output buffer unless it is a ?. If it is a ? character, we transition to the next state without copying.

<Trigraph handling state cases>=

#f =>
  if (c == '?')
    trigraph-loop(buf-index + 1, text-index, #"?");
  else
    text[text-index] := c;
    trigraph-loop(buf-index + 1, text-index + 1, #f);
  end if;
              
            

The next (single-?) state is similar. If we see a non-? character in the next state, then we need to copy in the preceding ? character, since we skipped copying it in the initial state.

<Trigraph handling state cases>+=

#"?" =>
  if (c == '?')
    trigraph-loop(buf-index + 1, text-index, #"??");
  else
    text[text-index] := '?';
    text[text-index + 1] := c;
    trigraph-loop(buf-index + 1, text-index + 2, #f);
  end if;
            

Once we have seen ??, we look at the following character, and replace it if it corresponds to one of the nine trigraph sequences:

<Trigraph handling state cases>+=

#"??" =>
  select (c)
    '='  =>
      text[text-index] := '#';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '('  =>
      text[text-index] := '[';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '/'  =>
      text[text-index] := '\\';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    ')'  =>
      text[text-index] := ']';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '\'' =>
      text[text-index] := '^';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '<' =>
      text[text-index] := '{';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '!'  =>
      text[text-index] := '|';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '>' =>
      text[text-index] := '}';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    '-'  =>
      text[text-index] := '~';
      trigraph-loop(buf-index + 1, text-index + 1, #f);
    <Cases for other characters>
  end select;
            

If we see a third ?, we copy the first one into the output and remain in the ?? state.

<Cases for other characters>=

    '?' =>
      text[text-index] := '?';
      trigraph-loop(buf-index + 1, text-index + 1, #"??");
            

For any other character we copy it, preceded by the deferred ?? characters, and return to the initial state.

<Cases for other characters>+=

    otherwise =>
      text[text-index] := '?';
      text[text-index + 1] := '?';
      text[text-index + 2] := c;
      trigraph-loop(buf-index + 1, text-index + 3, #f);
            

In the unlikely event that trigraph-state is not #f at the end of the input stream, for completeness we feed the remaining ? characters into the scanner.

<Finalize the trigraph state>=

select (trigraph-state)
  #f =>
    ;
  #"?" =>
    scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
                                "?", 0, 1);
  #"??" =>
    scan-C-preprocessing-tokens(preprocessor-token-dispatcher, scanner,
                                "??", 0, 2);
end select;
            

C Language Dialects

The C Parser

We parse C files by instantiating a <simple-parser> object and feeding the preprocessor into it via the screener.

<Parse the source file locator into translation-unit>=

let rangemap = make(<source-location-rangemap>);
rangemap-add-line-file(rangemap, 0, 1, locator);
let parser = make(<simple-parser>,
                  automaton: $C99-parser-automaton,
                  start-symbol: #"translation-unit-opt",
                  rangemap: rangemap,
                  consumer-data: translation-unit);
let end-position = 
  with-open-file(stream = locator)
    preprocess-C-stream(translation-unit
                          .translation-unit-preprocessing-translation-unit,
                        stream, locator.locator-directory, rangemap,
                        C-screen-token, parser);
  end;
simple-parser-consume-token(parser, 0, #"EOF", #f, end-position, end-position)
          

Token Screening

The screener takes preprocessing tokens and separates the general categories IDENTIFIER and PP-NUMBER into more specific token types.

<Module cpr-internals>+=

define function C-screen-token
    (parser :: <simple-parser>, token-name, token-value,
     rangemap, start-position, end-position)
 => ();
  select (token-name)
    #"IDENTIFIER" =>
      <Screen an identifier token>
    #"PP-NUMBER" =>
      <Screen a preprocessing number token>
    otherwise =>
      simple-parser-consume-token(parser, 0, token-name, token-value,
                                  start-position, end-position)
  end select;
end;
            

C99 defines the following reserved words.

<Token definitions>+=

token AUTO;
token BREAK;
token _CASE;
token CHAR;
token CONST;
token CONTINUE;
token DEFAULT;
token DO;
token DOUBLE;
token _ELSE;
token ENUM;
token EXTERN;
token FLOAT;
token _FOR;
token GOTO;
token _IF;
token INLINE;
token INT;
token LONG;
token REGISTER;
token RESTRICT;
token RETURN;
token SHORT;
token SIGNED;
token SIZEOF;
token STATIC;
token STRUCT;
token SWITCH;
token TYPEDEF;
token UNION;
token UNSIGNED;
token VOID;
token VOLATILE;
token _WHILE;
token BOOL;
token COMPLEX;
token IMAGINARY;
            

<Module cpr-internals>+=

define constant $C99-reserved-words :: <object-table>
  = begin
      let spec =
        #[#("auto" . #"AUTO"),
          #("break" . #"BREAK"),
          #("case" . #"_CASE"),
          #("char" . #"CHAR"),
          #("const" . #"CONST"),
          #("continue" . #"CONTINUE"),
          #("default" . #"DEFAULT"),
          #("do" . #"DO"),
          #("double" . #"DOUBLE"),
          #("else" . #"_ELSE"),
          #("enum" . #"ENUM"),
          #("extern" . #"EXTERN"),
          #("float" . #"FLOAT"),
          #("for" . #"_FOR"),
          #("goto" . #"GOTO"),
          #("if" . #"_IF"),
          #("inline" . #"INLINE"),
          #("int" . #"INT"),
          #("long" . #"LONG"),
          #("register" . #"REGISTER"),
          #("restrict" . #"RESTRICT"),
          #("return" . #"RETURN"),
          #("short" . #"SHORT"),
          #("signed" . #"SIGNED"),
          #("sizeof" . #"SIZEOF"),
          #("static" . #"STATIC"),
          #("struct" . #"STRUCT"),
          #("switch" . #"SWITCH"),
          #("typedef" . #"TYPEDEF"),
          #("union" . #"UNION"),
          #("unsigned" . #"UNSIGNED"),
          #("void" . #"VOID"),
          #("volatile" . #"VOLATILE"),
          #("while" . #"_WHILE"),
          #("_Bool" . #"BOOL"),
          #("_Complex" . #"COMPLEX"),
          #("_Imaginary" . #"IMAGINARY")];
      let reserved-words = make(<object-table>, size: spec.size);
      for (word in spec)
        reserved-words[intern-string(word.head)] := word.tail;
      end for;
      reserved-words
    end;
            

<Screen an identifier token>=

let reserved-word-token-name
  = element($C99-reserved-words, token-value, default: #f);
if (reserved-word-token-name)
  simple-parser-consume-token(parser, 0, reserved-word-token-name, #f,
                              start-position, end-position)
else
  simple-parser-consume-token(parser, 0, token-name, token-value,
                              start-position, end-position)
end if;
            

<Token definitions>+=

token TYPEDEF-NAME :: <string>; // FIXME
token INTEGER-CONSTANT :: <integer>;
token FLOATING-CONSTANT :: <string>; // FIXME
            

FIXME This is bletcherous.

<Screen a preprocessing number token>=

simple-parser-consume-token(parser, 0, #"INTEGER-CONSTANT",
                            string-to-integer(token-value),
                            start-position, end-position);
            

C Grammar

<Module cpr-internals>+=

<Definitions used within $C99-grammar-productions>
define constant $C99-grammar-productions
 = simple-grammar-productions
     <Grammar productions for C99>
   end;
            

A C translation unit consists of zero or more external declarations.

<Grammar productions for C99>=

production translation-unit-opt => [/* empty */];
production translation-unit-opt => [translation-unit-opt external-declaration];
            
production external-declaration => [function-definition];
production external-declaration => [declaration];
            

<Module cpr-internals>+=

define constant $C99-parser-automaton
  = simple-parser-automaton($C-tokens, $C99-grammar-productions,
                            #[#"translation-unit-opt"]);
            

Declarations and Types

<Grammar productions for C99>+=

production declaration => [declaration-specifiers init-declarator-list-opt SEMI];
          
production declaration-specifiers-opt => [/* empty */];
production declaration-specifiers-opt => [declaration-specifiers];

production declaration-specifiers
  => [storage-class-specifier declaration-specifiers-opt];

production declaration-specifiers
  => [type-specifier declaration-specifiers-opt];

production declaration-specifiers
  => [type-qualifier declaration-specifiers-opt];

production declaration-specifiers
  => [function-specifier declaration-specifiers-opt];
          
production init-declarator-list-opt :: <sequence> => [/* empty */] () #[];
production init-declarator-list-opt :: <sequence> => [init-declarator-list];

production init-declarator-list => [init-declarator];
production init-declarator-list => [init-declarator-list COMMA init-declarator];
          
production init-declarator => [declarator];
production init-declarator => [declarator EQUALS initializer];
          

Storage Class Specifiers

<Definitions used within $C99-grammar-productions>=

define constant <C-storage-class-specifier>
  = one-of(#"TYPEDEF", #"EXTERN", #"STATIC", #"AUTO", #"REGISTER");
            

<Grammar productions for C99>+=

production storage-class-specifier :: <C-storage-class-specifier>
    => [TYPEDEF] (builder)
  #"TYPEDEF";

production storage-class-specifier :: <C-storage-class-specifier>
    => [EXTERN] (builder)
  #"EXTERN";

production storage-class-specifier :: <C-storage-class-specifier>
    => [STATIC] (builder)
  #"STATIC";

production storage-class-specifier :: <C-storage-class-specifier>
    => [AUTO] (builder)
  #"AUTO";

production storage-class-specifier :: <C-storage-class-specifier>
    => [REGISTER] (builder)
  #"REGISTER";
            

Type Specifiers

<Definitions used within $C99-grammar-productions>+=

define constant <C-simple-type-specifier>
  = one-of(#"VOID", #"CHAR", #"SHORT", #"INT", #"LONG", #"FLOAT", #"DOUBLE",
           #"SIGNED", #"UNSIGNED", #"BOOL", #"COMPLEX", #"IMAGINARY");
            

<Grammar productions for C99>+=

production type-specifier :: <C-simple-type-specifier>
    => [VOID] (builder)
  #"VOID";
production type-specifier :: <C-simple-type-specifier>
    => [CHAR] (builder)
  #"CHAR";
production type-specifier :: <C-simple-type-specifier>
    => [SHORT] (builder)
  #"SHORT";
production type-specifier :: <C-simple-type-specifier>
    => [INT] (builder)
  #"INT";
production type-specifier :: <C-simple-type-specifier>
    => [LONG] (builder)
  #"LONG";
production type-specifier :: <C-simple-type-specifier>
    => [FLOAT] (builder)
  #"FLOAT";
production type-specifier :: <C-simple-type-specifier>
    => [DOUBLE] (builder)
  #"DOUBLE";
production type-specifier :: <C-simple-type-specifier>
    => [SIGNED] (builder)
  #"SIGNED";
production type-specifier :: <C-simple-type-specifier>
    => [UNSIGNED] (builder)
  #"UNSIGNED";
production type-specifier :: <C-simple-type-specifier>
    => [BOOL] (builder)
  #"BOOL";
production type-specifier :: <C-simple-type-specifier>
    => [COMPLEX] (builder)
  #"COMPLEX";
production type-specifier :: <C-simple-type-specifier>
    => [IMAGINARY] (builder)
  #"IMAGINARY";
production type-specifier => [struct-or-union-specifier];
production type-specifier => [enum-specifier];
production type-specifier => [TYPEDEF-NAME];
            

Structure and Union Specifiers

<Grammar productions for C99>+=

production struct-or-union-specifier
  => [struct-or-union LBRACE struct-declaration-list RBRACE];
production struct-or-union-specifier
  => [struct-or-union IDENTIFIER LBRACE struct-declaration-list RBRACE];
              
production struct-or-union-specifier => [struct-or-union IDENTIFIER];

production struct-or-union => [STRUCT];
production struct-or-union => [UNION];
              
production struct-declaration-list => [struct-declaration];
production struct-declaration-list
  => [struct-declaration-list struct-declaration];
              
production struct-declaration
  => [specifier-qualifier-list struct-declarator-list SEMI];
              
production specifier-qualifier-list-opt 
  => [/* empty */];
production specifier-qualifier-list-opt
  => [specifier-qualifier-list];

production specifier-qualifier-list
  => [type-specifier specifier-qualifier-list-opt];
production specifier-qualifier-list
  => [type-qualifier specifier-qualifier-list-opt];
              
production struct-declarator-list
  => [struct-declarator];
production struct-declarator-list
  => [struct-declarator-list COMMA struct-declarator];
              
production struct-declarator => [declarator];
production struct-declarator => [declarator-opt COLON constant-expression];
              

Enumeration Specifiers

<Grammar productions for C99>+=

production enum-specifier => [ENUM LBRACE enumerator-list RBRACE];
production enum-specifier => [ENUM LBRACE enumerator-list COMMA RBRACE];
production enum-specifier => [ENUM IDENTIFIER LBRACE enumerator-list RBRACE];
production enum-specifier
  => [ENUM IDENTIFIER LBRACE enumerator-list COMMA RBRACE];
production enum-specifier => [ENUM IDENTIFIER];
              
production enumerator-list => [enumerator];
production enumerator-list => [enumerator-list COMMA enumerator];
              
production enumerator => [IDENTIFIER];
production enumerator => [IDENTIFIER EQUALS constant-expression];
              

Type Qualifiers

<Grammar productions for C99>+=

production type-qualifier :: singleton(#"CONST") => [CONST] ()
  #"CONST";
production type-qualifier :: singleton(#"RESTRICT") => [RESTRICT] ()
  #"RESTRICT";
production type-qualifier :: singleton(#"VOLATILE") => [VOLATILE] ()
  #"VOLATILE";
            

Function Specifiers

<Grammar productions for C99>+=

production function-specifier :: singleton(#"INLINE") => [INLINE] ()
  #"INLINE";
            

Declarators

<Grammar productions for C99>+=

production declarator-opt => [/* empty */];
production declarator-opt => [declarator];
            
production declarator => [pointer-opt direct-declarator];
            
production direct-declarator
  => [IDENTIFIER];
production direct-declarator
  => [LPAREN declarator RPAREN];
production direct-declarator
  => [direct-declarator LBRACK type-qualifier-list-opt assignment-expression-opt RBRACK];
production direct-declarator
  => [direct-declarator LBRACK STATIC type-qualifier-list-opt assignment-expression RBRACK];
production direct-declarator
  => [direct-declarator LBRACK type-qualifier-list STATIC assignment-expression RBRACK];
production direct-declarator
  => [direct-declarator LBRACK type-qualifier-list-opt STAR RBRACK];
production direct-declarator
  => [direct-declarator LPAREN parameter-type-list RPAREN];
production direct-declarator
  => [direct-declarator LPAREN identifier-list-opt RPAREN];
            
production pointer-opt => [/* empty */];
production pointer-opt => [pointer];
            
production pointer => [STAR type-qualifier-list-opt];
production pointer => [STAR type-qualifier-list-opt pointer];
            
production type-qualifier-list-opt => [/* empty */];
production type-qualifier-list-opt => [type-qualifier-list];

production type-qualifier-list => [type-qualifier];
production type-qualifier-list => [type-qualifier-list type-qualifier];
            
production parameter-type-list-opt => [/* empty */];
production parameter-type-list-opt => [parameter-type-list];

production parameter-type-list => [parameter-list];
production parameter-type-list => [parameter-list COMMA ELIPSIS];
	    
production parameter-list => [parameter-declaration];
production parameter-list => [parameter-list COMMA parameter-declaration];
            
production parameter-declaration => [declaration-specifiers declarator];

production parameter-declaration => [declaration-specifiers abstract-declarator-opt];
            
production identifier-list-opt => [/* empty */];
production identifier-list-opt => [identifier-list];

production identifier-list => [IDENTIFIER];
production identifier-list => [identifier-list COMMA IDENTIFIER];
            

Type Names

<Grammar productions for C99>+=

production type-name => [specifier-qualifier-list abstract-declarator-opt];
            
production abstract-declarator-opt => [/* empty */];
production abstract-declarator-opt => [abstract-declarator];

production abstract-declarator => [pointer];
production abstract-declarator => [pointer-opt direct-abstract-declarator];
            
production direct-abstract-declarator => [LPAREN abstract-declarator RPAREN];
production direct-abstract-declarator => [LBRACK assignment-expression-opt RBRACK];
production direct-abstract-declarator => [LBRACK STAR RBRACK];
production direct-abstract-declarator => [LPAREN parameter-type-list-opt RPAREN];
production direct-abstract-declarator => [direct-abstract-declarator LBRACK assignment-expression-opt RBRACK];
production direct-abstract-declarator => [direct-abstract-declarator LBRACK STAR RBRACK];
production direct-abstract-declarator => [direct-abstract-declarator LPAREN parameter-type-list-opt RPAREN];
	    

Initialization

<Grammar productions for C99>+=

production initializer => [assignment-expression];
production initializer => [LBRACE initializer-list RBRACE];
production initializer => [LBRACE initializer-list COMMA RBRACE];
            
production initializer-list => [designation-opt initializer];
production initializer-list => [initializer-list COMMA designation-opt initializer];
            
production designation-opt => [/* empty */];
production designation-opt => [designator-list EQUALS];

production designator-list => [designator];
production designator-list => [designator-list designator];
            
production designator => [LBRACK constant-expression RBRACK];
production designator => [DOT IDENTIFIER];
            

Variables and Scopes

<Grammar productions for C99>+=

          

Functions

<Grammar productions for C99>+=

production function-definition
  => [declaration-specifiers declarator declaration-list-opt compound-statement];
          
production declaration-list-opt => [/* empty */];
production declaration-list-opt => [declaration-list-opt declaration];
          

Expressions

Constants

<Grammar productions for C99>+=

make-production constant :: <C-constant-expression-representation>
    => [INTEGER-CONSTANT],
  value: INTEGER-CONSTANT;
make-production constant :: <C-constant-expression-representation>
    => [FLOATING-CONSTANT],
  value: FLOATING-CONSTANT;
make-production constant :: <C-constant-expression-representation>
    => [CHARACTER-CONSTANT],
  value: CHARACTER-CONSTANT;
              
production concatenated-string-literal :: <string>
    => [STRING-LITERAL] (builder)
  STRING-LITERAL; // FIXME
production concatenated-string-literal :: <string>
    => [concatenated-string-literal STRING-LITERAL] (builder)
  concatenate(concatenated-string-literal, STRING-LITERAL); // FIXME
              

Primary Expressions

<Grammar productions for C99>+=

make-production primary-expression
  :: <C-variable-reference-expression-representation>
    => [IDENTIFIER],
  variable: IDENTIFIER;
  
production primary-expression => [constant];

make-production primary-expression
  :: <C-string-literal-expression-representation>
    => [concatenated-string-literal],
  value: concatenated-string-literal;

production primary-expression => [LPAREN expression RPAREN];
            

Postfix Operator Expressions

<Grammar productions for C99>+=

production postfix-expression => [primary-expression];

production postfix-expression :: <C-unary-expression-representation>
    => [postfix-expression LBRACK expression RBRACK] (builder, srcloc)
  make(<C-unary-expression-representation>,
       source-location: srcloc,
       operator: #"DEREF",
       operand: make(<C-binary-expression-representation>,
                     source-location: srcloc,
                     operator: #"ADD",
                     left: postfix-expression,
                     right: expression));

make-production postfix-expression
  :: <C-function-call-expression-representation>
    => [postfix-expression LPAREN argument-expression-list-opt RPAREN],
  function: postfix-expression,
  arguments: argument-expression-list-opt;

make-production postfix-expression :: <C-member-expression-representation>
    => [postfix-expression DOT IDENTIFIER],
  operand: postfix-expression,
  name: IDENTIFIER;

production postfix-expression :: <C-member-expression-representation>
    => [postfix-expression ARROW IDENTIFIER] (builder, srcloc)
  make(<C-member-expression-representation>,
       source-location: srcloc,
       operand: make(<C-unary-expression-representation>,
                     source-location: srcloc,
                     operator: #"DEREF",
                     operand: postfix-expression),
       name: IDENTIFIER);

make-production postfix-expression :: <C-unary-expression-representation>
    => [postfix-expression INC],
  operator: #"POSTINC",
  operand: postfix-expression;

make-production postfix-expression :: <C-unary-expression-representation>
    => [postfix-expression DEC],
  operator: #"POSTDEC",
  operand: postfix-expression;

production postfix-expression
  => [LPAREN type-name RPAREN LBRACE initializer-list RBRACE];
production postfix-expression
  => [LPAREN type-name RPAREN LBRACE initializer-list COMMA RBRACE];
              
production argument-expression-list-opt :: <sequence> => [/* empty */] () #[];
production argument-expression-list-opt :: <sequence>
    => [argument-expression-list];

production argument-expression-list
  => [assignment-expression];
production argument-expression-list
  => [argument-expression-list COMMA assignment-expression];
            

Unary Operator Expressions

<Grammar productions for C99>+=

production unary-expression => [postfix-expression];

make-production unary-expression :: <C-unary-expression-representation>
    => [INC unary-expression],
  operator: #"PREINC",
  operand: unary-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [DEC unary-expression],
  operator: #"PREDEC",
  operand: unary-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [AMP cast-expression],
  operator: #"ADDROF",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [STAR cast-expression],
  operator: #"DEREF",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [PLUS cast-expression],
  operator: #"PLUS",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [MINUS cast-expression],
  operator: #"MINUS",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [TILDE cast-expression],
  operator: #"BITWISE-NOT",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [BANG cast-expression],
  operator: #"LOGICAL-NOT",
  operand: cast-expression;

make-production unary-expression :: <C-unary-expression-representation>
    => [SIZEOF unary-expression],
  operator: #"SIZEOF",
  operand: unary-expression;

make-production unary-expression :: <C-sizeof-type-expression-representation>
    => [SIZEOF LPAREN type-name RPAREN],
  sizeof-type: type-name;
            

Cast Operator Expressions

<Grammar productions for C99>+=

production cast-expression => [unary-expression];

make-production cast-expression :: <C-cast-expression-representation>
    => [LPAREN type-name RPAREN cast-expression],
  type: type-name,
  operand: cast-expression;
            

Multiplicative Operator Expressions

<Grammar productions for C99>+=

production multiplicative-expression => [cast-expression];

make-production multiplicative-expression :: <C-binary-expression-representation>
    => [multiplicative-expression STAR cast-expression],
  operator: #"MUL", left: multiplicative-expression, right: cast-expression;

make-production multiplicative-expression :: <C-binary-expression-representation>
  => [multiplicative-expression SLASH cast-expression],
  operator: #"DIV", left: multiplicative-expression, right: cast-expression;

make-production multiplicative-expression :: <C-binary-expression-representation>
  => [multiplicative-expression PERCENT cast-expression],
  operator: #"MOD", left: multiplicative-expression, right: cast-expression;
            

Additive Expressions

<Grammar productions for C99>+=

production additive-expression => [multiplicative-expression];

make-production additive-expression :: <C-binary-expression-representation>
    => [additive-expression PLUS multiplicative-expression],
  operator: #"ADD",
  left: additive-expression,
  right: multiplicative-expression;

make-production additive-expression :: <C-binary-expression-representation>
    => [additive-expression MINUS multiplicative-expression],
  operator: #"SUB",
  left: additive-expression,
  right: multiplicative-expression;
            

Bitwise Shift Operator Expressions

<Grammar productions for C99>+=

production shift-expression => [additive-expression];

make-production shift-expression :: <C-binary-expression-representation>
    => [shift-expression SHL additive-expression],
  operator: #"SHL",
  left: shift-expression,
  right: additive-expression;

make-production shift-expression :: <C-binary-expression-representation>
    => [shift-expression SHR additive-expression],
  operator: #"SHR",
  left: shift-expression,
  right: additive-expression;
            

Relational Operator Expressions

<Grammar productions for C99>+=

production relational-expression => [shift-expression];

make-production relational-expression :: <C-binary-expression-representation>
    => [relational-expression LT shift-expression],
  operator: #"LT",
  left: relational-expression,
  right: shift-expression;

make-production relational-expression :: <C-binary-expression-representation>
    => [relational-expression GT shift-expression],
  operator: #"GT",
  left: relational-expression,
  right: shift-expression;

make-production relational-expression :: <C-binary-expression-representation>
    => [relational-expression LE shift-expression],
  operator: #"LE",
  left: relational-expression,
  right: shift-expression;

make-production relational-expression :: <C-binary-expression-representation>
    => [relational-expression GE shift-expression],
  operator: #"GE",
  left: relational-expression,
  right: shift-expression;
            

Equality Operator Expressions

<Grammar productions for C99>+=

production equality-expression => [relational-expression];

make-production equality-expression :: <C-binary-expression-representation>
    => [equality-expression EQ relational-expression],
  operator: #"EQ",
  left: equality-expression,
  right: relational-expression;

make-production equality-expression :: <C-binary-expression-representation>
    => [equality-expression NE relational-expression],
  operator: #"NE",
  left: equality-expression,
  right: relational-expression;
            

Bitwise Operator Expressions

<Grammar productions for C99>+=

production AND-expression => [equality-expression];

make-production AND-expression :: <C-binary-expression-representation>
    => [AND-expression AMP equality-expression],
  operator: #"BITWISE-AND",
  left: AND-expression,
  right: equality-expression;
            
production exclusive-OR-expression => [AND-expression];

make-production exclusive-OR-expression :: <C-binary-expression-representation>
    => [exclusive-OR-expression HAT AND-expression],
  operator: #"BITWISE-XOR",
  left: exclusive-OR-expression,
  right: AND-expression;
            
production inclusive-OR-expression => [exclusive-OR-expression];

make-production inclusive-OR-expression :: <C-binary-expression-representation>
    => [inclusive-OR-expression OR exclusive-OR-expression],
  operator: #"BITWISE-XOR",
  left: inclusive-OR-expression,
  right: exclusive-OR-expression;
            

Logical Operator Expressions

<Grammar productions for C99>+=

production logical-AND-expression => [inclusive-OR-expression];

make-production logical-AND-expression :: <C-binary-expression-representation>
  => [logical-AND-expression AMPAMP inclusive-OR-expression],
  operator: #"LOGICAL-AND",
  left: logical-AND-expression,
  right: inclusive-OR-expression;
            
production logical-OR-expression => [logical-AND-expression];

make-production logical-OR-expression :: <C-binary-expression-representation>
    => [logical-OR-expression OROR logical-AND-expression],
  operator: #"LOGICAL-OR",
  left: logical-OR-expression,
  right: logical-AND-expression;
            

Conditional Operator Expressions

<Grammar productions for C99>+=

production conditional-expression => [logical-OR-expression];

make-production conditional-expression :: <C-conditional-expression-representation>
    => [logical-OR-expression QUEST expression COLON conditional-expression],
  condition: logical-OR-expression,
  true: expression,
  false: conditional-expression;
            

Assignment Operator Expressions

<Grammar productions for C99>+=

production assignment-expression-opt => [/* empty */];
production assignment-expression-opt => [assignment-expression];
            

Since all of the compound assignment operators are the composition of a binary arithmetic expression and an assignment expression, we can provide a function to generate the reduce actions for each of these operators.

<Definitions used within $C99-grammar-productions>+=

define function assignment-action
    (operator :: <C-binary-expression-operator>)
 => (function :: <function>);
  method (p :: <simple-parser>, data, s, e)
    let srcloc = simple-parser-source-location(p, s, e);
    make(<C-binary-expression-representation>,
         source-location: srcloc,
         operator: #"ASSIGN",
         left: p[0],
         right: make(<C-binary-expression-representation>,
                     source-location: srcloc,
                     operator: operator,
                     left: p[0],
                     right: p[2]));
  end;
end function;
            

<Grammar productions for C99>+=

production assignment-expression => [conditional-expression];

make-production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression EQUALS assignment-expression],
  operator: #"ASSIGN", 
  left: unary-expression,
  right: assignment-expression;

production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression STAR-EQUALS assignment-expression],
  action: assignment-action(#"MUL");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression SLASH-EQUALS assignment-expression],
  action: assignment-action(#"DIV");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression PERCENT-EQUALS assignment-expression],
  action: assignment-action(#"MOD");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression PLUS-EQUALS assignment-expression],
  action: assignment-action(#"ADD");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression MINUS-EQUALS assignment-expression],
  action: assignment-action(#"SUB");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression SHL-EQUALS assignment-expression],
  action: assignment-action(#"SHL");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression SHR-EQUALS assignment-expression],
  action: assignment-action(#"SHR");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression AMP-EQUALS assignment-expression],
  action: assignment-action(#"BITWISE-AND");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression HAT-EQUALS assignment-expression],
  action: assignment-action(#"BITWISE-XOR");
production assignment-expression :: <C-binary-expression-representation>
  => [unary-expression OR-EQUALS assignment-expression],
  action: assignment-action(#"BITWISE-OR");
            

Comma Operator Expressions

<Grammar productions for C99>+=

production expression-opt => [/* empty */];
production expression-opt => [expression];
            

<Grammar productions for C99>+=

production expression => [assignment-expression];

make-production expression :: <C-binary-expression-representation>
  => [expression COMMA assignment-expression],
  operator: #"COMMA",
  left: expression,
  right: assignment-expression;
            

Constant Expressions

<Grammar productions for C99>+=

production constant-expression => [conditional-expression];
            

Printing Expressions

<Modules imported by the cpr-internals module>+=

  use print;
  use pprint;
  use format;
            

<Module cpr-internals>+=

define method print-C-expression
    (expression :: <C-constant-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  print(expression.expression-value, stream);
end method;
            
define method print-C-expression
    (expression :: <C-string-literal-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  print(expression.expression-value, stream);
end method;
            
define method print-C-expression
    (expression :: <C-variable-reference-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  write(stream, expression.expression-variable);
end method;
            
define method print-C-expression
    (expression :: <C-function-reference-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  // FIXME
end method;
            
define method print-C-expression
    (expression :: <C-unary-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  local
    method do-operator (name :: <byte-string>,
                        operator-level :: <integer>,
                        operand-level :: <integer>,
                        postfix? :: <boolean>)
      let parens? = operator-level > level;
      printing-logical-block(stream, prefix: parens? & "(",
                                     suffix: parens? & ")")
        unless (postfix?) write(stream, name) end;
        print-C-expression(expression.expression-unary-operand, stream,
                           level: operand-level);
        if (postfix?) write(stream, name) end;
      end;
    end;
  select (expression.expression-operator)
    #"POSTINC" =>
      do-operator("++", 1, 1, #t);
    #"POSTDEC" =>
      do-operator("--", 1, 1, #t);
    #"PREINC" =>
      do-operator("++", 2, 2, #f);
    #"PREDEC" =>
      do-operator("--", 2, 2, #f);
    #"ADDROF" =>
      do-operator("&", 2, 2, #f);
    #"DEREF" =>
      do-operator("*", 2, 2, #f);
    #"PLUS" =>
      do-operator("+", 2, 2, #f);
    #"MINUS" =>
      do-operator("-", 2, 2, #f);
    #"BITWISE-NOT" =>
      do-operator("~", 2, 2, #f);
    #"LOGICAL-NOT" =>
      do-operator("!", 2, 2, #f);
    #"SIZEOF" =>
      do-operator("sizeof ", 2, 2, #f);
    #"ALIGNOF" =>
      do-operator("__alignof__ ", 2, 2, #f);
  end;
end method;
            
define method print-C-expression
    (expression :: <C-binary-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  local
    method do-operator (name :: <byte-string>, operator-level :: <integer>)
      let parens? = operator-level > level;
      printing-logical-block(stream, prefix: parens? & "(",
                                     suffix: parens? & ")")
        print-c-expression(expression.expression-binary-left, stream,
                           level: operator-level);
        unless (name = ",") write-element(stream, ' ') end;
        pprint-newline(#"fill", stream);
        format(stream, "%s ", name);
        print-C-expression(expression.expression-binary-right, stream,
                           level: operator-level - 1);
      end;
    end;
  select (expression.expression-operator)
    #"MUL" =>
      do-operator("*", 4);
    #"DIV" =>
      do-operator("/", 4);
    #"MOD" =>
      do-operator("%", 4);
    #"ADD" =>
      do-operator("+", 5);
    #"SUB" =>
      do-operator("-", 5);
    #"SHL" =>
      do-operator("<<;", 6);
    #"SHR" =>
      do-operator(">>", 6);
    #"LT" =>
      do-operator("<", 7);
    #"LE" =>
      do-operator("<=", 7);
    #"GT" =>
      do-operator(">", 7);
    #"GE" =>
      do-operator(">=", 7);
    #"EQ" =>
      do-operator("==", 8);
    #"NE" =>
      do-operator("!=", 8);
    #"BITWISE-AND" =>
      do-operator("&", 9);
    #"BITWISE-XOR" =>
      do-operator("^", 10);
    #"BITWISE-OR" =>
      do-operator("|", 11);
    #"LOGICAL-AND" =>
      do-operator("&&", 12);
    #"LOGICAL-OR" =>
      do-operator("||", 13);
    #"ASSIGN" =>
      do-operator("=", 15);
    #"COMMA" =>
      do-operator(",", 16);
  end;
end method;
            
define method print-C-expression
    (expression :: <C-conditional-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  let parens? = level < 14;
  printing-logical-block(stream, prefix: parens? & "(",
                                 suffix: parens? & ")")
    print-C-expression(expression.expression-conditional-condition, stream,
                       level: 13);
    write-element(stream, ' ');
    pprint-newline(#"fill", stream);
    write(stream, "? ");
    print-C-expression(expression.expression-conditional-true, stream,
                       level: 16);
    write-element(stream, ' ');
    pprint-newline(#"fill", stream);
    write(stream, ": ");
    print-C-expression(expression.expression-conditional-false, stream,
                       level: 14);
  end;
end method;
            
define method print-C-expression
    (expression :: <C-cast-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  let parens? = level < 3;
  printing-logical-block(stream, prefix: parens? & "(",
                                 suffix: parens? & ")")
    write-element(stream, '(');
    // FIXME
    write(stream, ") ");
    print-C-expression(expression.expression-cast-operand, stream, level: 3);
  end;
end method;
            
define method print-C-expression
    (expression :: <C-sizeof-type-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  write(stream, "sizeof( ");
  // FIXME
  write-element(stream, ')');
end method;
            
define method print-C-expression
    (expression :: <C-function-call-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  print-C-expression(expression.expression-call-function, stream, level: 1);
  printing-logical-block(stream, prefix: "(", suffix: ")")
    for (argument in expression.expression-call-arguments,
         first? = #t then #f)
      unless (first?)
        write(stream, ", ");
        pprint-newline(#"fill", stream);
      end;
      print-C-expression(argument, stream, level: 15);
    end for;
  end;
end method;
            
define method print-C-expression
    (expression :: <C-member-expression-representation>,
     stream :: <stream>,
     #key level :: <integer> = $precedence-level-assignment-expression)
 => ();
  print-C-expression(expression.expression-member-operand, stream, level: 1);
  format(stream, ".%s", expression.expression-member-name);
end method;
            

<Module cpr-internals>+=

define method print-message
    (expression :: <C-expression-representation>, stream :: <stream>)
 => ();
  print-C-expression(expression, stream,
                     level: $precedence-level-assignment-expression);
end method;

            

Statements

The C grammar as described in the ISO standard contains a single ambiguity that prevents it from being strictly LALR(1), namely the frequenly-encountered if/else ambiguity. We will resolve this using the standard techique of splitting statements into two categories, open (possibly containing an if with an unmatched else) and closed (containing no unpaired if statements.)

<Grammar productions for C99>+=

production statement => [open-statement];
production statement => [closed-statement];
          

<Grammar productions for C99>+=

production open-statement => [open-labeled-statement];
production open-statement => [open-selection-statement];
production open-statement => [open-iteration-statement];
          

<Grammar productions for C99>+=

production closed-statement => [closed-labeled-statement];
production closed-statement => [compound-statement];
production closed-statement => [expression-statement];
production closed-statement => [closed-selection-statement];
production closed-statement => [closed-iteration-statement];
production closed-statement => [jump-statement];
          

Labeled Statements

<Grammar productions for C99>+=

production open-labeled-statement
  => [IDENTIFIER COLON open-statement];
production open-labeled-statement
  => [_CASE constant-expression COLON open-statement];
production open-labeled-statement
  => [DEFAULT COLON open-statement];
            
production closed-labeled-statement
  => [IDENTIFIER COLON closed-statement];
production closed-labeled-statement
  => [_CASE constant-expression COLON closed-statement];
production closed-labeled-statement
  => [DEFAULT COLON closed-statement];
            

Compound Statements

<Grammar productions for C99>+=

production compound-statement => [LBRACE block-item-list-opt RBRACE];
            
production block-item-list-opt => [/* empty */];
production block-item-list-opt => [block-item-list-opt block-item];
            
production block-item => [declaration];
production block-item => [statement];
            

Expression and Null Statements

<Grammar productions for C99>+=

production expression-statement => [expression-opt SEMI];
            

Selection Statements

<Grammar productions for C99>+=

production open-selection-statement
    => [_IF LPAREN expression RPAREN statement];

production open-selection-statement
    => [_IF LPAREN expression RPAREN closed-statement _ELSE open-statement];

production open-selection-statement
    => [SWITCH LPAREN expression RPAREN open-statement];

            
production closed-selection-statement
    => [_IF LPAREN expression RPAREN closed-statement _ELSE closed-statement];

production closed-selection-statement
    => [SWITCH LPAREN expression RPAREN closed-statement];
            

Iteration Statements

<Grammar productions for C99>+=

production open-iteration-statement
    => [_WHILE LPAREN expression RPAREN open-statement];

production open-iteration-statement
    => [_FOR LPAREN expression-opt SEMI expression-opt SEMI expression-opt RPAREN open-statement];

production open-iteration-statement
    => [_FOR LPAREN declaration expression-opt SEMI expression-opt RPAREN open-statement];
            
production closed-iteration-statement
    => [_WHILE LPAREN expression RPAREN closed-statement];

production closed-iteration-statement
    => [DO statement _WHILE LPAREN expression RPAREN SEMI];

production closed-iteration-statement
    => [_FOR LPAREN expression-opt SEMI expression-opt SEMI expression-opt RPAREN closed-statement];

production closed-iteration-statement
    => [_FOR LPAREN declaration expression-opt SEMI expression-opt RPAREN closed-statement];
            

Jump Statements

<Grammar productions for C99>+=

production jump-statement => [GOTO IDENTIFIER SEMI];
production jump-statement => [CONTINUE SEMI];
production jump-statement => [BREAK SEMI];
production jump-statement => [RETURN expression-opt SEMI];