Simple Compiler

Peter S. Housel

March 2003


Copyright

This is the Monday Simple Compiler example program.

Copyright ©2003 Peter S. Housel.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


Introduction

This program is a compiler for a simple programming language called “p2k”. It is intended to illustrate several salient aspects of a modern optimizing compiler, in particular:

The project was implemented in three phases, each illustrating one of the above.

The compiler is written in the Dylan language. Every Dylan program has a Dylan-user module, containing library and module declarations:

<Module Dylan-user>=

define library simple-compiler
  use common-Dylan;
  <Libraries imported and modules exported by the p2kcomp library>
end library;
          
<Module definitions for the p2kcomp library>

Lexical Analyzer

The p2k-lexer module contains the lexical analyzer for the p2k language.

<Libraries imported and modules exported by the p2kcomp library>=

use io;
export p2k-lexer;
          

<Module definitions for the p2kcomp library>=

define module p2k-lexer
  use common-Dylan;
  use streams;
  <Names imported to and exported from the p2k-lexer module>
end module;
          

Instances of the <p2k-lexer> class contain the state information for the p2k lexical analyzer.

<Names imported to and exported from the p2k-lexer module>=

export <p2k-lexer>;
          

When a <p2k-lexer> object is instantiated, an input stream must be specified with the stream: keyword.

<Module p2k-lexer>=

define class <p2k-lexer> (<object>)
  constant slot lexer-stream :: <stream>,
    required-init-keyword: stream:;
  <Additional slots in the <p2k-lexer> class>
end class;
          

The lexer will be implemented as a simple state machine, with function calls representing state transitions. It was constructed with the help of a lexgen program (which I wrote in 1997 while waiting around at my in-laws' house), capable of converting a collection of regular expressions into a deterministic finite automaton. The rest was done manually because of the limitations of that tool.

The initial state looks for the beginning of a token, and returns many single-character tokens immediately. Tokens are represented as <symbol> objects.

<Module p2k-lexer>+=

define method lexer-state-0 (stream :: <stream>)
  if(stream-at-end?(stream))
    #"EOF";
  else
    let char = read-element(stream);
    select(char)
      ' ', '\t', '\n', '\r' =>
        lexer-state-0(stream);
      '[' =>
        #"[";
      '(' =>
        lexer-state-lparen(stream);
      ')' =>
        #")";
      ']' =>
        #"]";
      '*' =>
        #"*";
      '+' =>
        #"+";
      ',' =>
        #",";
      ':' =>
        lexer-state-colon(stream);
      '-' =>
        #"-";
      '.' =>
        lexer-state-dot(stream);
      ';' =>
        #";";
      '<' =>
        lexer-state-lt(stream);
      '=' =>
        #"=";
      '>' =>
        lexer-state-gt(stream);
      otherwise =>
        if(('a' <= char & char <= 'z')
           | ('A' <= char & char <= 'Z'))
  	  lexer-state-alpha(stream, make(<string>, size: 1, fill: char));
        elseif('0' <= char & char <= '9')
          let val = as(<integer>, char) - as(<integer>, '0');
	  lexer-state-digit(stream, val);
        else
          error("illegal input character: '%c'", char);
        end if;
    end select;
  end if;
end method;
          

Comments are delimited with (* and *), and may not be nested.

<Module p2k-lexer>+=

define method lexer-state-lparen (stream :: <stream>)
  select(peek(stream))
    '*' =>
      read-element(stream);
      lexer-state-comment(stream);
    otherwise =>
      #"(";
  end select;
end method;
          
define method lexer-state-comment (stream :: <stream>)
  select(read-element(stream))
      '*' =>
        lexer-state-comend(stream);
      otherwise =>
        lexer-state-comment(stream);
  end select;
end method;
          
define method lexer-state-comend (stream :: <stream>)
  select(read-element(stream))
    ')' =>
      lexer-state-0(stream);
    '*' =>
      lexer-state-comend(stream);
    otherwise =>
      lexer-state-comment(stream);
  end select;
end method;
          

Most of the remaining states are straightforward.

<Module p2k-lexer>+=

define method lexer-state-colon (stream :: <stream>)
  select(peek(stream))
    '=' =>
      read-element(stream);
      #":=";
    otherwise =>
      #":";
  end select;
end method;
          
define method lexer-state-dot (stream :: <stream>)
  select(peek(stream))
    '.' =>
      read-element(stream);
      #"..";
    otherwise =>
      #".";
  end select;
end method;
          
define method lexer-state-lt (stream :: <stream>)
  select(peek(stream))
    '=' =>
      read-element(stream);
      #"<=";
    '>' =>
      read-element(stream);
      #"<>";
    otherwise =>
      #"<";
  end select;
end method;
          
define method lexer-state-gt (stream :: <stream>)
  select(peek(stream))
    '=' =>
      read-element(stream);
      #">=";
    otherwise =>
      #">";
  end select;
end method;
          

When we recognize an identifier, we return both the token symbol #"IDENT" and the token string.

<Module p2k-lexer>+=

define method lexer-state-alpha (stream :: <stream>, string :: <string>)
  let char = peek(stream);
  if(('a' <= char & char <= 'z')
     | ('A' <= char & char <= 'Z')
     | ('0' <= char & char <= '9'))
    lexer-state-alpha(stream, add(string, read-element(stream)));
  else
    values(#"IDENT", string);
  end if;
end method;
          

Similarly for numeric tokens.

<Module p2k-lexer>+=

define method lexer-state-digit
    (stream :: <stream>, number :: <integer>)
  let char = peek(stream);
  if('0' <= char & char <= '9')
    let val = as(<integer>, read-element(stream))
              - as(<integer>, '0');
    lexer-state-digit(stream, number * 10 + val);
  else
    values(#"NUMBER", number);
  end if;
end method;
          

Since our parser will be a recursive-descent parser, the easiest way to use the lexer will be via the match method. If the current input token matches the specified token identifier, then the match method will return #t and consume the input token. After a successful match, token-value will return the token string or numeric value for the appropriate token types.

<Names imported to and exported from the p2k-lexer module>+=

export match;
export token-value;
          

<Additional slots in the <p2k-lexer> class>=

slot current-token :: false-or(<symbol>) = #f;
slot token-value :: <object> = #f;
          

<Module p2k-lexer>+=

define method match
    (lexer :: <p2k-lexer>, match-token :: <symbol>,
     #key value, consume = #t)
 => (result :: <boolean>);
  if(lexer.current-token == #f)
    let (token, #rest vals) = lexer-state-0(lexer.lexer-stream);
    lexer.current-token := token;
    unless(empty?(vals))
      lexer.token-value := first(vals);
    end;
  end if;
  if(lexer.current-token == match-token)
    if(value)
      if(value = lexer.token-value)
        if(consume) lexer.current-token := #f; end;
        #t;
      else
        #f;
      end if;
    else
      if(consume) lexer.current-token := #f; end;
      #t;
    end if;
  else
    #f;
  end if;
end method;
          

The expect method signals an error if a token is not matched.

<Names imported to and exported from the p2k-lexer module>+=

export expect;
          

<Module p2k-lexer>+=

define method expect
    (lexer :: <p2k-lexer>, match-token :: <symbol>,
     #key value, consume = #t)
 => (result :: <boolean>);
  unless(match(lexer, match-token, value: value, consume: consume))
    error("Syntax error: %s expected, got %s",
      if(value)
        value;
      else
        as(<string>, match-token);
      end if, as(<string>, lexer.current-token));
  end;
end method;
          

Program Representation

One of the main points of this exercise is to illustrate Single Static Assignment internal representations. Here we'll define the SSA representation that will provide the central structure for our compiler, and methods for printing out the IR in human-readable form.

<Libraries imported and modules exported by the p2kcomp library>+=

export p2k-rep;
          

<Module definitions for the p2kcomp library>+=

define module p2k-rep
  use common-dylan, exclude: { format-to-string };
  use streams;
  use format;
  <Names imported into and exported from the p2k-rep module>
end module;
          

Variables and Environments

An environment maps variable names to denotations. We'll implement variable environments as a linear <list> of <p2k-denotation> objects. This will make variable scoping easy.

<Names imported into and exported from the p2k-rep module>=

export <p2k-denotation>, denotation-name, denotation-scope-depth;
export <p2k-constant-denotation>, denotation-constant-value;
export <p2k-type-denotation>, denotation-type-kind, denotation-type-base;
export denotation-type-array-min, denotation-type-array-max;
export <p2k-var-denotation>, denotation-var-type;
export denotation-var-storage-base, denotation-var-storage-base-setter;
export <p2k-procedure-denotation>, denotation-procedure;
            

<Module p2k-rep>=

define abstract class <p2k-denotation> (<object>)
  constant slot denotation-name :: <string>,
    required-init-keyword: name:;
  constant slot denotation-scope-depth :: <integer>,
    required-init-keyword: scope-depth:;
end class;
            

Names in p2k can denote constants, types, variables, or procedures. We'll define <p2k-denotation> subclasses for each of these.

<Module p2k-rep>+=

define class <p2k-constant-denotation> (<p2k-denotation>)
  constant slot denotation-constant-value :: <integer>,
    required-init-keyword: constant-value:;
end class;
            
define class <p2k-type-denotation> (<p2k-denotation>)
  constant slot denotation-type-kind :: one-of(#"array", #"integer"),
    required-init-keyword: kind:;
  constant slot denotation-type-base :: false-or(<p2k-type-denotation>)
    = #f, init-keyword: type-base:;
  constant slot denotation-type-array-min :: false-or(<integer>) = #f,
    init-keyword: type-array-min:;
  constant slot denotation-type-array-max :: false-or(<integer>) = #f,
    init-keyword: type-array-max:;
end class;
            
define class <p2k-var-denotation> (<p2k-denotation>)
  constant slot denotation-var-type :: <p2k-type-denotation>,
    required-init-keyword: type:;
  slot denotation-var-storage-base :: <integer>,
    required-init-keyword: storage-base:;
end class;
            
define class <p2k-procedure-denotation> (<p2k-denotation>)
  constant slot denotation-procedure :: false-or(<p2k-procedure>),
    required-init-keyword: procedure:;
end class;
            

The sizeof-p2k-type method computes the storage size (in 8-bit bytes) of objects of a given type. INTEGER quantities are assumed to be 32 bits wide.

<Names imported into and exported from the p2k-rep module>+=

export sizeof-p2k-type;
            

<Module p2k-rep>+=

define method sizeof-p2k-type
    (type :: <p2k-type-denotation>)
 => (size :: <integer>);
  select(type.denotation-type-kind)
    #"integer" =>
     4;
    #"array" =>
     (type.denotation-type-array-max - type.denotation-type-array-min + 1)
       * sizeof-p2k-type(type.denotation-type-base);
  end select;
end method;
            

The extend-environment returns an environment extended with a denotation of a given class. The locate-denotation locates a denotation in an environment.

<Names imported into and exported from the p2k-rep module>+=

export extend-environment, locate-denotation;
            

<Module p2k-rep>+=

define method extend-environment
    (env :: <list>, class :: <class>, #rest keys)
 => (new-env :: <list>);
  pair(apply(make, class, keys), env);
end method;
            
define method locate-denotation
    (env :: <list>, name :: <string>)
 => (denotation :: false-or(<p2k-denotation>));
  if(empty?(env))
    #f;
  elseif(env.head.denotation-name = name)
    env.head;
  else
    locate-denotation(env.tail, name);
  end if;
end method;
            

Instructions

The program representation will be centered around a simple two-address instruction type. Each instruction will take up to two operands, and has an implicit result value of its own. Instruction operands may be integer constants, variables, or (the implicit result value of) other instructions.

<Names imported into and exported from the p2k-rep module>+=

export <p2k-operand>, p2k-constant?;
export <p2k-instruction>;
export instruction-number, instruction-containing-block, instruction-opcode;
export instruction-operand-x, instruction-operand-x-setter;
export instruction-operand-y, instruction-operand-y-setter;
            

<Module p2k-rep>+=

define constant <p2k-operand>
  = type-union(<integer>, <p2k-instruction>, singleton(#f));
            
define method p2k-constant?
    (operand :: <p2k-operand>)
 => (constant? :: <boolean>);
  instance?(operand, <integer>);
end method;
            

The instructions will also contain an automatically-initialized number field for identification purposes during debugging.

<Module p2k-rep>+=

define variable *instruction-count* :: <integer> = 0;
            
define class <p2k-instruction> (<object>)
  constant slot instruction-number :: <integer>
    = (*instruction-count* := *instruction-count* + 1);
  slot instruction-containing-block :: <p2k-basic-block>,
    required-init-keyword: block:;
  constant slot instruction-opcode :: <symbol>,
    required-init-keyword: opcode:;
  slot instruction-operand-x :: <p2k-operand> = #f,
    init-keyword: x:;
  slot instruction-operand-y :: <p2k-operand> = #f,
    init-keyword: y:;
  <Additional slots in <p2k-instruction>>
end class;
            

When we eliminate an instruction during optimization we need to provide a “forwarding pointer” to the value that should be used in place of a direct reference to the instruction.

<Names imported into and exported from the p2k-rep module>+=

export instruction-replaced-with, instruction-replaced-with-setter;
            

<Additional slots in <p2k-instruction>>=

slot instruction-replaced-with :: <p2k-operand> = #f;
            

In later phases of compilation we will have to assign registers to hold instruction values.

<Names imported into and exported from the p2k-rep module>+=

export instruction-register, instruction-register-setter;
            

<Additional slots in <p2k-instruction>>+=

slot instruction-register :: false-or(<integer>) = #f;
            

The instruction opcodes and some of their characteristics are shown below:

<Names imported into and exported from the p2k-rep module>+=

export instruction-side-effects?, instruction-needs-register?;
            

<Module p2k-rep>+=

define method instruction-side-effects?
    (inst :: <p2k-instruction>) => (effects? :: <boolean>);
  select(inst.instruction-opcode)
    #"STORE", #"MOVE", #"END", #"READ", #"WRITE", #"WLN" => #t;
    otherwise => #f;
  end select;
end method;
            
define method instruction-needs-register?
    (inst :: <p2k-instruction>) => (effects? :: <boolean>);
  select(inst.instruction-opcode)
    #"ADDA", #"STORE", #"END", #"WRITE", #"WLN" => #f;
    #"BRA", #"BNE", #"BEQ", #"BLE", #"BLT", #"BGE", #"BGT" => #f;
    otherwise => #t;
  end select;
end method;
            

The p2k-print-instruction method writes a human-readable representation of an instruction to the given stream. Note that by convention a y-operand of #f in an ADD instruction refers to the frame pointer.

<Names imported into and exported from the p2k-rep module>+=

export p2k-print-instruction, p2k-print-operand;
            

<Module p2k-rep>+=

define method p2k-print-instruction
    (inst :: <p2k-instruction>, stream :: <stream>)
 => ();
  format(stream, "%d: ", inst.instruction-number);
  if(inst.instruction-register)
    format(stream, "R%d := ", inst.instruction-register);
  end if;
  format(stream, "%s ", as(<string>, inst.instruction-opcode));

  if(inst.instruction-operand-x)
    p2k-print-operand(inst.instruction-operand-x, stream);
    format(stream, " ");
  end if;
  if(inst.instruction-operand-y)
    p2k-print-operand(inst.instruction-operand-y, stream);
  end if;
  if(inst.instruction-operand-y == #f & inst.instruction-opcode == #"ADD")
    format(stream, "FP");
  end if;
  if(inst.instruction-replaced-with)
    format(stream, " replaced with ");
    p2k-print-operand(inst.instruction-replaced-with, stream);
  end if;
end method;
            

By convention, instruction-value operands are printed as (n), and constant-value operands are printed as themselves.

<Module p2k-rep>+=

define method p2k-print-operand
    (op :: <p2k-instruction>, stream :: <stream>)
 => ();
  format(stream, "(%d)", op.instruction-number);
end;
            
define method p2k-print-operand
    (op :: <integer>, stream :: <stream>)
 => ();
  format(stream, "%d", op);
end;
            

Basic Blocks

A basic block is a sequence of instructions that may only be entered at the top and may only exit at the bottom. A basic block ending with a conditional branch may exit to either a “condition-true” block or a “fall-through” block.

<Names imported into and exported from the p2k-rep module>+=

export <p2k-basic-block>;
export block-number;
export block-phi-instructions, block-phi-instructions-setter;
export block-instructions, block-instructions-setter;
export block-branch-instruction, block-branch-instruction-setter;
export block-fail-block, block-fail-block-setter;
export block-branch-block, block-branch-block-setter;
export block-dominates;
            

<Module p2k-rep>+=

define variable *block-count* :: <integer> = 0;
            
define class <p2k-basic-block> (<object>)
  constant slot block-number :: <integer>
    = (*block-count* := *block-count* + 1);
  slot block-phi-instructions :: <stretchy-vector>
    = make(<stretchy-vector>);
  slot block-instructions :: <stretchy-vector>
    = make(<stretchy-vector>);
  slot block-branch-instruction :: false-or(<p2k-instruction>) = #f;
  slot block-fail-block :: false-or(<p2k-basic-block>) = #f;
  slot block-branch-block :: false-or(<p2k-basic-block>) = #f;
  <Additional slots in <p2k-basic-block>>
end class;
            

We will also need to keep track of the dominator tree of basic blocks. A block dominates a second block if all execution paths into the second block must pass through the first.

<Additional slots in <p2k-basic-block>>=

constant slot block-dominates :: <stretchy-vector>
  = make(<stretchy-vector>);
            

We will print references to blocks using the syntax [n].

<Names imported into and exported from the p2k-rep module>+=

export p2k-print-block-name, p2k-print-block;
            

<Module p2k-rep>+=

define method p2k-print-block-name
    (blk :: <p2k-basic-block>, stream :: <stream>)
 => ();
  format(stream, "[%d]", blk.block-number);
end method;
            

The p2k-print-block method prints a given block and all of the blocks it dominates.

<Module p2k-rep>+=

define method p2k-print-block
    (blk :: <p2k-basic-block>, stream :: <stream>)
 => ();
  format(stream, "Block [%d]", blk.block-number);
  unless(empty?(blk.block-dominates))
    format(stream, " (dominates");
    do(method(ablk)
         format(stream, " ");
         p2k-print-block-name(ablk, stream);
       end, blk.block-dominates);
    format(stream, ")");
  end;
  new-line(stream);
  do(method(inst)
       p2k-print-instruction(inst, stream);
       new-line(stream);
     end, blk.block-phi-instructions);
  do(method(inst)
       p2k-print-instruction(inst, stream);
       new-line(stream);
     end, blk.block-instructions);
  if(blk.block-branch-instruction)
    p2k-print-instruction(blk.block-branch-instruction, stream);
    p2k-print-block-name(blk.block-branch-block, stream);
    new-line(stream);
  end if;
  if(blk.block-fail-block)
    format(stream, "==> ");
    p2k-print-block-name(blk.block-fail-block, stream);
    new-line(stream);
  end if;
  new-line(stream);

  do(method(ablk)
       p2k-print-block(ablk, stream);
     end, blk.block-dominates);
end method;
            

Value Tables

SSA representation is centered around values instead of variables. At any given static point in the program, we represent a variable value by the instruction that was used to compute it. The <p2k-value-table> type maps a <p2k-var-denotation> onto the <p2k-operand> that it currently represents. Since we update the table whenever a variable is assigned, we get copy propogation for free. FIXME This should be a limited type.

<Names imported into and exported from the p2k-rep module>+=

export <p2k-value-table>;
export copy-value-table, join-value-tables!;
            

<Module p2k-rep>+=

define constant <p2k-value-table> = <object-table>;
            

If control flow splits, then each path will need its own <p2k-value-table>.

<Module p2k-rep>+=

define method copy-value-table
    (from :: <p2k-value-table>)
 => (to :: <p2k-value-table>)
  let to = make(<p2k-value-table>);
  for(value keyed-by key in from)
    to[key] := value;
  end for;
  to;
end method;
            

If control flow joins again, then the value tables need to be merged, and phi-instructions need to be inserted at the beginning of the join block to account for any discrepancies.

The left argument becomes the new map at the beginning of the join block (after the phi-instructions). Note that we don't need a phi-instruction for things that only appear on one of the paths; we remove them from the final map instead.

<Module p2k-rep>+=

define method join-value-tables!
    (left :: <p2k-value-table>, right :: <p2k-value-table>,
     join :: <p2k-basic-block>)
 => ();
  for(left-val keyed-by key in left)
    let right-val = element(right, key, default: #f);
    if(right-val)
      if(left-val ~= right-val)
        let phi = make(<p2k-instruction>, block: join, opcode: #"PHI",
                       x: left-val, y: right-val);
        add!(join.block-phi-instructions, phi);
        left[key] := phi;
      end if;
    else
      remove-key!(left, key);
    end if;
  end for;
end method;
            

Procedures (and the Main Program)

The highest-level structures in p2k programs are procedures and the top-level program itself. Our representation of a procedure contains the program's basic blocks, the procedure's variable scoping environment, and the total amount of local variable storage required.

<Names imported into and exported from the p2k-rep module>+=

export <p2k-procedure>;
export procedure-name, procedure-entry-block, procedure-entry-block-setter;
export procedure-environment, procedure-environment-setter;
export procedure-storage, procedure-storage-setter;
            

<Module p2k-rep>+=

define class <p2k-procedure> (<object>)
  constant slot procedure-name :: <string>,
    required-init-keyword: name:;
  slot procedure-entry-block :: <p2k-basic-block>;
  slot procedure-environment :: <list> = #();
  slot procedure-storage :: <integer> = 0;
end class;
            

The p2k-print-procedure method writes a human-readable representation of a <p2k-procedure> to a given output <stream>.

<Names imported into and exported from the p2k-rep module>+=

export p2k-print-procedure;
            

<Module p2k-rep>+=

define method p2k-print-procedure
    (procedure :: <p2k-procedure>, stream :: <stream>)
 => ();
  format(stream, "PROCEDURE %s: %d bytes of local storage, entry at ",
                  procedure.procedure-name,
                  procedure.procedure-storage);
  p2k-print-block-name(procedure.procedure-entry-block, stream);
  new-line(stream);

  p2k-print-block(procedure.procedure-entry-block, stream);
end method;
            

Parsing p2k, Building SSA

Since the p2k language is a) very simple, and b) structured, we can easily build the SSA program representation as we parse it using a handcrafted top-down parser.

<Libraries imported and modules exported by the p2kcomp library>+=

export p2k-parser;
          

<Module definitions for the p2kcomp library>+=

define module p2k-parser
  use common-dylan;
  use p2k-lexer;
  use p2k-rep;
  <Names imported into and exported from the p2k-parser module>
end module;
          

The only external interface to the parser is p2k-parse-program, which parses a p2k program using a <p2k-lexer> instance, returning an instance of <p2k-procedure>.

<Names imported into and exported from the p2k-parser module>=

export p2k-parse-program;
          

<Module p2k-parser>=

define method p2k-parse-program
    (lexer :: <p2k-lexer>)
 => (program :: <p2k-procedure>)
  unless(match(lexer, #"IDENT", value: "PROGRAM"))
    error("program must begin with PROGRAM");
  end;

  unless(match(lexer, #"IDENT") & instance?(lexer.token-value, <string>))
    error("program name must be an identifier");
  end;

  let program = make(<p2k-procedure>, name: lexer.token-value);
  expect(lexer, #";");

  <Construct an initial variable environment>

  p2k-parse-declarations(lexer, program, 0);
  if(match(lexer, #"IDENT", value: "BEGIN"))
    let entry-block :: <p2k-basic-block> = make(<p2k-basic-block>);
    let value-table :: <p2k-value-table> = make(<p2k-value-table>);
    program.procedure-entry-block := entry-block;
    let end-block
      = p2k-parse-statement-sequence(lexer, program, value-table, entry-block);
    let end-instruction
      = make(<p2k-instruction>, block: end-block, opcode: #"END");
    add!(end-block.block-instructions, end-instruction);
  end if;
  expect(lexer, #"IDENT", value: "END");
  expect(lexer, #".");
  program;
end method;
          

At the onset, there are four predefined names: the type name INTEGER and the three predefined procedures ReadInt, WriteInt and WriteLn.

<Construct an initial variable environment>=

program.procedure-environment :=
  extend-environment(#(), <p2k-type-denotation>,
                     name: "INTEGER", scope-depth: 0, kind: #"integer");
program.procedure-environment :=
  extend-environment(program.procedure-environment,
                     <p2k-procedure-denotation>,
                     name: "ReadInt", scope-depth: 0, procedure: #f);
program.procedure-environment :=
  extend-environment(program.procedure-environment,
                     <p2k-procedure-denotation>,
                     name: "WriteInt", scope-depth: 0, procedure: #f);
program.procedure-environment :=
  extend-environment(program.procedure-environment,
                     <p2k-procedure-denotation>,
                     name: "WriteLn", scope-depth: 0, procedure: #f);
          

Parsing Declarations

FIXME The implementation below doesn't quite match this grammar.

<Module p2k-parser>+=

define method p2k-parse-declarations
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     level :: <integer>)
 => ();
  if(match(lexer, #"IDENT", value: "CONST"))
    p2k-parse-const-section(lexer, procedure, level);
    p2k-parse-declarations(lexer, procedure, level);
  elseif(match(lexer, #"IDENT", value: "TYPE"))
    p2k-parse-type-section(lexer, procedure, level);
    p2k-parse-declarations(lexer, procedure, level);
  elseif(match(lexer, #"IDENT", value: "VAR"))
    p2k-parse-var-section(lexer, procedure, level);
    p2k-parse-declarations(lexer, procedure, level);
  elseif(match(lexer, #"IDENT", value: "PROCEDURE"))
    p2k-parse-procedure(lexer, procedure, level);
    p2k-parse-declarations(lexer, procedure, level);
  end if;
end method;
            

CONST Sections

For now CONST expressions are restricted to be single-token integers.

<Module p2k-parser>+=

define method p2k-parse-const-section
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     level :: <integer>)
 => ();
  while(match(lexer, #"IDENT", consume: #f)
        & lexer.token-value ~= "CONST"
        & lexer.token-value ~= "TYPE"
        & lexer.token-value ~= "VAR"
        & lexer.token-value ~= "PROCEDURE"
        & lexer.token-value ~= "BEGIN")
    match(lexer, #"IDENT", consume: #t);
    let name = lexer.token-value;
    expect(lexer, #"=");
    expect(lexer, #"NUMBER");
    procedure.procedure-environment
      := extend-environment(procedure.procedure-environment,
                            <p2k-constant-denotation>,
                            name: name, scope-depth: level,
                            constant-value: lexer.token-value);
    expect(lexer, #";");
  end while;
end method;
              

TYPE Sections

<Module p2k-parser>+=

define method p2k-parse-type-section
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     level :: <integer>)
 => ();
  while(match(lexer, #"IDENT", consume: #f)
        & lexer.token-value ~= "CONST"
        & lexer.token-value ~= "TYPE"
        & lexer.token-value ~= "VAR"
        & lexer.token-value ~= "PROCEDURE"
        & lexer.token-value ~= "BEGIN")
    match(lexer, #"IDENT", consume: #t);
    let name = lexer.token-value;
    expect(lexer, #"=");
    let new-type = p2k-parse-type(lexer, procedure.procedure-environment);    
    expect(lexer, #";");
    procedure.procedure-environment
      := extend-environment(procedure.procedure-environment,
                            <p2k-type-denotation>,
                            name: name, scope-depth: level,
                            kind: new-type.denotation-type-kind,
                            type-base: new-type.denotation-type-base,
                            type-array-min: new-type.denotation-type-array-min,
                            type-array-max: new-type.denotation-type-array-max);                          
  end while;
end method;
              

A type expression is either an identifier or an ARRAY OF something.

<Module p2k-parser>+=

define method p2k-parse-type
    (lexer :: <p2k-lexer>, environment :: <list>)
 => (type :: <p2k-type-denotation>);
  expect(lexer, #"IDENT");
  if(lexer.token-value = "ARRAY")
    expect(lexer, #"[");
    expect(lexer, #"NUMBER");
    let min = lexer.token-value;
    expect(lexer, #"..");
    expect(lexer, #"NUMBER");
    let max = lexer.token-value;
    if(min > max)
      error("lower array bound must be less than or equal to upper bound");
    end;
    expect(lexer, #"]");
    expect(lexer, #"IDENT", value: "OF");
    let base = p2k-parse-type(lexer, environment);
    make(<p2k-type-denotation>, name: "ARRAY", scope-depth: -1,
         kind: #"array", type-base: base,
         type-array-min: min, type-array-max: max);
  else
    let denotation = locate-denotation(environment, lexer.token-value);
    unless(denotation & instance?(denotation, <p2k-type-denotation>))
      error("%s: illegal type name", lexer.token-value);
    end;
    denotation;
  end if;
end method;
              

VAR Sections

We'll go ahead and allocate storage space for each variable, though for scalar variables we probably won't end up using it unless it has “global” scope relative to some procedure we are calling, or gets used as a variable parameter.

The procedure's stack frame looks like this: [ FIXME we need a nifty diagram here]

<Module p2k-parser>+=

define method p2k-parse-var-section
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     level :: <integer>)
 => ();
  while(match(lexer, #"IDENT", consume: #f)
        & lexer.token-value ~= "CONST"
        & lexer.token-value ~= "TYPE"
        & lexer.token-value ~= "VAR"
        & lexer.token-value ~= "PROCEDURE"
        & lexer.token-value ~= "BEGIN")
    match(lexer, #"IDENT", consume: #t);
    let names :: <list> = list(lexer.token-value);
    while(match(lexer, #","))
      expect(lexer, #"IDENT");
      names := add(names, lexer.token-value);
    end while;
    expect(lexer, #":");
    let type = p2k-parse-type(lexer, procedure.procedure-environment);
    expect(lexer, #";");
    for(name in names)
      procedure.procedure-storage
        := procedure.procedure-storage + sizeof-p2k-type(type);
      procedure.procedure-environment
        := extend-environment(procedure.procedure-environment,
                              <p2k-var-denotation>,
                              name: name, scope-depth: level,
                              type: type,
                              storage-base: - procedure.procedure-storage);
    end for;
  end while;
end method;
              

PROCEDURE Declarations

We don't support procedure declarations. Yet.

<Module p2k-parser>+=

define method p2k-parse-procedure
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     level :: <integer>)
 => ();
  error("This compiler doesn't support PROCEDURE declarations (yet)");
end method;
              

Parsing Statements

This is where all the action is.

FIXME The implementation doesn't quite match the grammar.

<Module p2k-parser>+=

define method p2k-parse-statement-sequence
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>)
 => (blk :: <p2k-basic-block>);
  if(match(lexer, #"IDENT", value: "IF"))
    <Parse an IF statement>
  elseif(match(lexer, #"IDENT", value: "WHILE"))
    <Parse a WHILE statement>
  else
    expect(lexer, #"IDENT");
    let name = lexer.token-value;
    let denotation :: false-or(<p2k-denotation>)
      = locate-denotation(procedure.procedure-environment, name);
    select(denotation by instance?)
      <p2k-procedure-denotation> =>
        <Parse a procedure call>
      <p2k-var-denotation> =>
        <Parse an assignment statement>
      otherwise =>
        error("the identifier '%s' is unexpected here", name);
    end select;
  end if;
  if(match(lexer, #";")
     & ~match(lexer, #"IDENT", value: "END", consume: #f)
     & ~match(lexer, #"IDENT", value: "ELSE", consume: #f))
    p2k-parse-statement-sequence(lexer, procedure, value-table, blk);
  else
    blk;
  end if;
end method;
            

IF Statements

The conditional block dominates the THEN, ELSE, and join blocks.

FIXME We refuse to handle the ever-so-nasty ELSEIF construct here.

<Parse an IF statement>=

let then-blk = make(<p2k-basic-block>);
add!(blk.block-dominates, then-blk);
let else-blk = make(<p2k-basic-block>);
add!(blk.block-dominates, else-blk);
let join-blk = make(<p2k-basic-block>);
add!(blk.block-dominates, join-blk);

let (result, branchop)
  = p2k-parse-conditional-expression(lexer, procedure, value-table, blk);
blk.block-branch-instruction
  := make(<p2k-instruction>, block: blk, opcode: branchop, x: result);
blk.block-fail-block := then-blk;
blk.block-branch-block := else-blk;

let else-value-table = copy-value-table(value-table);
expect(lexer, #"IDENT", value: "THEN");
let then-end-blk
  = p2k-parse-statement-sequence(lexer, procedure, value-table, then-blk);
then-end-blk.block-branch-instruction
  := make(<p2k-instruction>, block: then-end-blk, opcode: #"BRA");
then-end-blk.block-branch-block := join-blk;

if(match(lexer, #"IDENT", value: "ELSE"))
  blk.block-branch-block := else-blk;
  let else-end-blk
    = p2k-parse-statement-sequence(lexer, procedure, else-value-table,
                                   else-blk);
  else-end-blk.block-fail-block := join-blk;
else
  else-blk.block-fail-block := join-blk;
end if;


expect(lexer, #"IDENT", value: "END");

join-value-tables!(value-table, else-value-table, join-blk);
blk := join-blk;
              

WHILE Statements

WHILE statements are somewhat similar to IF statements, with a few twists. If the current block is not empty, we need to split it and create a new block so that it can be used as the head of the loop.

<Parse a WHILE statement>=

let new-blk = make(<p2k-basic-block>);
blk.block-fail-block := new-blk;
add!(blk.block-dominates, new-blk);
blk := new-blk;

let body-blk = make(<p2k-basic-block>);
add!(blk.block-dominates, body-blk);

<Rebuild the value-table using phi-instructions>
let (result, branchop)
  = p2k-parse-conditional-expression(lexer, procedure, value-table, blk);
blk.block-branch-instruction
  := make(<p2k-instruction>, block: blk, opcode: branchop, x: result);
blk.block-fail-block := body-blk;

expect(lexer, #"IDENT", value: "DO");
let body-value-table = copy-value-table(value-table);
let body-end-blk
  = p2k-parse-statement-sequence(lexer, procedure, body-value-table, body-blk);
body-end-blk.block-branch-instruction
  := make(<p2k-instruction>, block: body-end-blk, opcode: #"BRA");
body-end-blk.block-branch-block := blk;

expect(lexer, #"IDENT", value: "END");

let exit-blk = make(<p2k-basic-block>);
add!(blk.block-dominates, exit-blk);
blk.block-branch-block := exit-blk;

<Insert necessary phi-instructions into blk, eliminate references to unnecessary ones>

blk := exit-blk;
              

Since we don't know which values will require phi-instructions until we've compiled the body, we proactively re-create the value table assuming every variable will have one. There's no point in adding them to the block until we're sure we need them, though.

<Rebuild the value-table using phi-instructions>=

for(val keyed-by key in value-table)
  value-table[key] := make(<p2k-instruction>, block: blk, opcode: #"PHI",
                           x: val, y: val);
end for;
              

Once we've compiled the body, we can figure out what variables were changed and therefore really need a phi-instruction. For things that don't, we restore the original value-table mapping and build a substitution table to be used for eliminating references to the phi-instruction values we generated earlier.

<Insert necessary phi-instructions into blk, eliminate references to unnecessary ones>=

let substitutions = make(<object-table>);
for(val keyed-by key in value-table)
  if(val == element(body-value-table, key))
    element(substitutions, val) := val.instruction-operand-x;
    element(value-table, key) := val.instruction-operand-x;
  else
    val.instruction-operand-x := element(body-value-table, key);
    add!(blk.block-phi-instructions, val);
  end if;
end for;
              

Now we perform a round of substitutions over the loop header and body.

<Insert necessary phi-instructions into blk, eliminate references to unnecessary ones>+=

local
  method substitute-instruction(inst :: <p2k-instruction>) => ();
    inst.instruction-operand-x
      := element(substitutions, inst.instruction-operand-x,
                                default: inst.instruction-operand-x);
    inst.instruction-operand-y
      := element(substitutions, inst.instruction-operand-y,
                                default: inst.instruction-operand-y);
  end method,
  method substitute-block(blk :: <p2k-basic-block>) => ();
    do(substitute-instruction, blk.block-phi-instructions);
    do(substitute-instruction, blk.block-instructions);
    if(blk.block-branch-instruction)
      substitute-instruction(blk.block-branch-instruction);
    end if;
    do(substitute-block, blk.block-dominates);
  end method;
substitute-block(blk);
              

Parsing Expressions

The p2k-parse-conditional-expression method returns the CMP instruction where the value is computed, and the operator that should be used to branch if the comparison does not hold.

<Module p2k-parser>+=

define method p2k-parse-conditional-expression
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>)
 => (result :: <p2k-operand>, operator :: <symbol>);
  let left = p2k-parse-simple-expression(lexer, procedure, value-table, blk);

  let op = #f;
  if(match(lexer, #"="))
     op := #"BNE";
  elseif(match(lexer, #"<>"))
    op := #"BEQ";
  elseif(match(lexer, #"<"))
    op := #"BGE";
  elseif(match(lexer, #"<="))
    op := #"BGT";
  elseif(match(lexer, #">"))
    op := #"BLE";
  elseif(match(lexer, #">="))
    op := #"BLT";
  else
    error("conditional operator expected");
  end if;

  let right = p2k-parse-simple-expression(lexer, procedure, value-table, blk);
  let cmp = make(<p2k-instruction>, block: blk, opcode: #"CMP",
                 x: left, y: right);
  add!(blk.block-instructions, cmp);
  values(cmp, op);
end method;
            

The next inner level of expressions is simpleExpression.

<Module p2k-parser>+=

define method p2k-parse-simple-expression
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>)
 => (result :: <p2k-operand>);
  let neg = match(lexer, #"-");
  unless(neg)
    match(lexer, #"+");
  end;
  let left = p2k-parse-term(lexer, procedure, value-table, blk);
  if(neg)
    left := make(<p2k-instruction>, block: blk, opcode: #"NEG", x: left);
    add!(blk.block-instructions, left);
  end;

  let op = #f;
  while(if(match(lexer, #"+")) op := #"ADD";
        elseif(match(lexer, #"-")) op := #"SUB"; else #f; end if)
    let right = p2k-parse-term(lexer, procedure, value-table, blk);
    left := make(<p2k-instruction>, block: blk, opcode: op,
                 x: left, y: right);
    add!(blk.block-instructions, left);
  end while;
  left;
end method;
            

The “next level in” is similar.

<Module p2k-parser>+=

define method p2k-parse-term
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>)
 => (result :: <p2k-operand>);
  let left = p2k-parse-factor(lexer, procedure, value-table, blk);

  let op = #f;
  while(if(match(lexer, #"*")) op := #"MUL";
        elseif(match(lexer, #"IDENT", value: "DIV")) op := #"DIV";
        elseif(match(lexer, #"IDENT", value: "MOD")) op := #"MOD";
        else #f; end if)
    let right = p2k-parse-factor(lexer, procedure, value-table, blk);
    left := make(<p2k-instruction>, block: blk, opcode: op,
                 x: left, y: right);
    add!(blk.block-instructions, left);
  end while;
  left;
end method;
            

The bottom level is factor.

<Module p2k-parser>+=

define method p2k-parse-factor
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>)
 => (result :: <p2k-operand>);
  if(match(lexer, #"("))
    let result
      = p2k-parse-simple-expression(lexer, procedure, value-table, blk);
    expect(lexer, #")");
    result;
  elseif(match(lexer, #"NUMBER"))
    lexer.token-value;
  elseif(match(lexer, #"IDENT"))
    let name = lexer.token-value;
    <Parse a reference to the identifier name>
  else
    error("expression syntax error (spare me, okay?)");
  end if;
end method;
            

Identifiers are a bit hairier than the rest. Constants are treated just like numbers. Array references generate code to read from the array. The current denotations of scalar variable references are looked up in the value-table.

<Parse a reference to the identifier name>=

let denotation :: false-or(<p2k-denotation>)
  = locate-denotation(procedure.procedure-environment, name);
select(denotation by instance?)
  <p2k-constant-denotation> =>
    denotation.denotation-constant-value;
  <p2k-var-denotation> =>
    let offset
      = p2k-parse-selector(lexer, procedure, value-table, blk, denotation);
    if(offset)
      let base = make(<p2k-instruction>, block: blk, opcode: #"ADD",
                      x: denotation.denotation-var-storage-base,
                      y: #f); // #f is FP
      add!(blk.block-instructions, base);
      let addr = make(<p2k-instruction>, block: blk, opcode: #"ADDA",
                      x: base, y: offset);
      add!(blk.block-instructions, addr);
      let load = make(<p2k-instruction>, block: blk, opcode: #"LOAD",
                      y: addr);
      add!(blk.block-instructions, load);
      load;
    else
      let val = element(value-table, denotation, default: #f);
      if(val)
        val;
      else
        error("the variable '%s' is uninitialized at this point", name);
      end if;
    end if;
  otherwise =>
    error("the identifier '%s' is unexpected here", name);
end select;
            

The p2k-parse-selector method computes offsets for array subscripting.

<Module p2k-parser>+=

define method p2k-parse-selector
    (lexer :: <p2k-lexer>, procedure :: <p2k-procedure>,
     value-table :: <p2k-value-table>, blk :: <p2k-basic-block>,
     denotation :: <p2k-var-denotation>)
 => (result :: <p2k-operand>);
  let type = denotation.denotation-var-type;
  let offset = #f;
  while(match(lexer, #"["))
    if(type.denotation-type-kind ~== #"array")
      error("attempt to subscript a non-array");
    end;
    let index
      = p2k-parse-simple-expression(lexer, procedure, value-table, blk);
    expect(lexer, #"]");

    <Compute rel-offset from index>    
    if(offset)
      offset := make(<p2k-instruction>, block: blk, opcode: #"ADD",
                     x: offset, y: rel-offset);
      add!(blk.block-instructions, offset);
    else
      offset := rel-offset;
    end if;

    type := type.denotation-type-base;
  end while;
  if(type.denotation-type-kind == #"array")
    error("can't treat arrays as values");
  end;
  offset;
end method;
            

To compute the byte offset for an array index, we need to subtract off the array lower bound (if it is nonzero) and multiply by the size of the array elements.

<Compute rel-offset from index>=

if(type.denotation-type-array-min ~= 0)
  index := make(<p2k-instruction>, block: blk, opcode: #"SUB",
                x: index, y: type.denotation-type-array-min);
  add!(blk.block-instructions, index);
end if;
let rel-offset = make(<p2k-instruction>, block: blk, opcode: #"MUL",
                      x: index,
                      y: sizeof-p2k-type(type.denotation-type-base));
add!(blk.block-instructions, rel-offset);
            

Assignments and Procedure Calls

Assignment Statements

<Parse an assignment statement>=

let offset
  = p2k-parse-selector(lexer, procedure, value-table, blk, denotation);

expect(lexer, #":=");
let val = p2k-parse-simple-expression(lexer, procedure, value-table, blk);
p2k-assign(value-table, blk, denotation, offset, val);
              

An assignment stores a value or changes the mapping in the value table as appropriate.

<Module p2k-parser>+=

define method p2k-assign
    (value-table :: <p2k-value-table>, blk :: <p2k-basic-block>,
     denotation :: <p2k-var-denotation>, offset :: <p2k-operand>,
     value :: <p2k-operand>)
 => ();
  if(offset)
    let base = make(<p2k-instruction>, block: blk, opcode: #"ADD",
                    x: denotation.denotation-var-storage-base,
                    y: #f); // #f is FP
    add!(blk.block-instructions, base);
    let addr = make(<p2k-instruction>, block: blk, opcode: #"ADDA",
                    x: base, y: offset);
    add!(blk.block-instructions, addr);
    let store = make(<p2k-instruction>, block: blk, opcode: #"STORE",
                     x: value, y: addr);
    add!(blk.block-instructions, store);
  else
    value-table[denotation] := value;
  end if;
end method;
              

Procedure Calls

We will handle the builtin functions specially, generating inline code for them.

<Parse a procedure call>=

select(name by \=)
  <Cases for parsing and compiling built-in functions>
  otherwise =>
    error("hey... how'd %s get in here?", name);
end select;
              

The ReadInt function reads an integer and assigns it to its argument.

<Cases for parsing and compiling built-in functions>=

"ReadInt" =>
  expect(lexer, #"(");
  expect(lexer, #"IDENT");
  let denotation :: false-or(<p2k-denotation>)
    = locate-denotation(procedure.procedure-environment, lexer.token-value);
  if(~instance?(denotation, <p2k-var-denotation>))
    error("expected a variable name, got identifier '%s'", lexer.token-value);
  end if;
  let offset
    = p2k-parse-selector(lexer, procedure, value-table, blk, denotation);
  expect(lexer, #")");

  let val = make(<p2k-instruction>, block: blk, opcode: #"READ");
  add!(blk.block-instructions, val);
  p2k-assign(value-table, blk, denotation, offset, val);
              

The WriteInt function writes out the value of its argument as an integer.

<Cases for parsing and compiling built-in functions>+=

"WriteInt" =>
  expect(lexer, #"(");
  let val = p2k-parse-simple-expression(lexer, procedure, value-table, blk);
  expect(lexer, #")");
  let wri = make(<p2k-instruction>, block: blk, opcode: #"WRITE",
                 x: val);
  add!(blk.block-instructions, wri);
              

The WriteLn function takes no arguments, and simply outputs a newline.

<Cases for parsing and compiling built-in functions>+=

"WriteLn" =>
  expect(lexer, #"(");
  expect(lexer, #")");
  let wri = make(<p2k-instruction>, block: blk, opcode: #"WLN");
  add!(blk.block-instructions, wri);
              

Constant Folding and Common Subexpression Elimination

Now it's time for a vigorous and exciting round of constant folding and common subexpression elimination!

<Libraries imported and modules exported by the p2kcomp library>+=

export p2k-cse;
          

<Module definitions for the p2kcomp library>+=

define module p2k-cse
  use common-dylan;
  use p2k-rep;
  <Names exported from the p2k-cse module>
end module;
          

The p2k-cse-optimize method performs these two optimizations on the given basic block, as well as the blocks it dominates.

<Names exported from the p2k-cse module>=

export p2k-cse-optimize;
          

During CSE we'll keep a table of potentially live instructions of each particular opcode to aid in finding other equivalent instructions.

<Module p2k-cse>=

define method p2k-cse-optimize
    (procedure :: <p2k-procedure>)
 => ();
  <Local methods of p2k-cse-optimize>
  let instruction-table = make(<object-table>);
  cse-optimize-block(procedure.procedure-entry-block, instruction-table);
end method;
          

<Local methods of p2k-cse-optimize>=

local
  method cse-optimize-block
      (blk :: <p2k-basic-block>, instruction-table :: <object-table>)
   => ();
    for(inst in blk.block-phi-instructions)
      replace-operands(inst);
      cse-optimize-instruction(inst, instruction-table);
    end;
    for(inst in blk.block-instructions)
      replace-operands(inst);
      constant-fold-instruction(inst);
      unless(inst.instruction-replaced-with)
        cse-optimize-instruction(inst, instruction-table);
      end;
    end for;
    <Recursively call cse-optimize-block for each child>
  end,
          

Each of the (dominated) child blocks needs to be given its own copy of the instruction-table. These can't include PHI instructions, since they are context-sensitive.

<Recursively call cse-optimize-block for each child>=

for(child-blk in blk.block-dominates)
  let child-instruction-table = make(<object-table>);
  for(instrs keyed-by opcode in instruction-table)
    unless(opcode == #"PHI") child-instruction-table[opcode] := instrs; end;
  end for;
  cse-optimize-block(child-blk, child-instruction-table);
end for;
          

Replacing References to Optimized-Away Values

Direct references to instruction values need to be replaced with references to the values that replace them.

<Local methods of p2k-cse-optimize>+=

  method replace-operands
      (inst :: <p2k-instruction>)
   => ();
    if(instance?(inst.instruction-operand-x, <p2k-instruction>)
       & inst.instruction-operand-x.instruction-replaced-with)
      inst.instruction-operand-x
        := inst.instruction-operand-x.instruction-replaced-with;
    end if;
    if(instance?(inst.instruction-operand-y, <p2k-instruction>)
       & inst.instruction-operand-y.instruction-replaced-with)
      inst.instruction-operand-y
        := inst.instruction-operand-y.instruction-replaced-with;
    end if;
  end,
            

Constant Folding

We can try to evaluate instructions that compute numerical values at compile time.

<Local methods of p2k-cse-optimize>+=

  method constant-fold-instruction
      (inst :: <p2k-instruction>)
   => ();
    let x = inst.instruction-operand-x;
    let y = inst.instruction-operand-y;
    let x-integer? = instance?(x, <integer>);
    let y-integer? = instance?(y, <integer>);
    select(inst.instruction-opcode)
      #"NEG" =>
        if(x-integer?)
          inst.instruction-replaced-with := - x;
        end if;
      #"ADD" =>
        if(x-integer? & y-integer?)
          inst.instruction-replaced-with := x + y;
        end if;
      #"SUB" =>
        if(x-integer? & y-integer?)
          inst.instruction-replaced-with := x - y;
        end if;
      #"MUL" =>
        if(x-integer? & y-integer?)
          inst.instruction-replaced-with := x * y;
        end if;
      #"DIV" =>
        if(x-integer? & y-integer?)
          inst.instruction-replaced-with := floor/(x, y);
        end if;
      #"MOD" =>
        if(x-integer? & y-integer?)
          inst.instruction-replaced-with := modulo(x, y);
        end if;
      otherwise =>
        #f;
    end select;
  end,
            

Common Subexpression Elimination

Basically the idea is that instruction-table contains lists of instructions, one list per opcode, that dominate the current instruction. We search the appropriate list for an earlier instruction with identical operands, and replace the current instruction with that. Of course instructions with side effects are off limits, as are memory instructions (due to aliasing effects). (We won't even see branch instructions here, so we don't have to do anything about them.)

<Local methods of p2k-cse-optimize>+=

  method cse-optimize-instruction
      (inst :: <p2k-instruction>, instruction-table :: <object-table>)
   => ();
    select(inst.instruction-opcode)
      #"READ", #"WRITE", #"WLN", #"ADDA", #"LOAD", #"STORE" =>
        #f;
      otherwise =>
        for(other in element(instruction-table, inst.instruction-opcode,
                             default: #()),
            until:
              inst.instruction-operand-x == other.instruction-operand-x
                & inst.instruction-operand-y == other.instruction-operand-y
                & (inst.instruction-replaced-with := other))
        end for;
        unless(inst.instruction-replaced-with)
          element(instruction-table, inst.instruction-opcode)
            := pair(inst, element(instruction-table, inst.instruction-opcode,
                                  default: #()));
        end;
    end select;
  end;
            

Register Allocation

Now we can perform register allocation on our program. We'll be using the Iterated Register Coalescing algorithm of Lal George and Andrew Appel. (This portion of the program previously had to be ripped out before the compiler could see the light of day, because it infringesd on US patent number 4,571,678 assigned to the IBM corporation. I hate patents. Fortunately this patent expired after February 18, 2003.)

(I did ask Lal George, who reassured me that it did not infringe on any other patents that he knew of.)

<Libraries imported and modules exported by the p2kcomp library>+=

export p2k-regalloc;
          

<Module definitions for the p2kcomp library>+=

define module p2k-regalloc
  use common-dylan, exclude: { format-to-string };
  use p2k-rep;     // FIXME
  use streams;     // FIXME
  use standard-io; // FIXME
  use format;
  export p2k-regalloc, p2k-generate-moves;
end module;
          

How it Works

The George and Appel register allocation algorithm is described in detail in and . The basic steps are:

Build
Construct an interference graph, telling which values have to be live at the same time as some other value. Identify values which are move-related (either phi-instructions, or arguments to a phi-instruction).
Simplify
Keep removing non-move-related values having fewer neighbors than the number of available registers from the graph, and push them onto a stack.
Coalesce
Try to assign the same register to a phi-instruction and the instructions it refers to, making it unnecessary to generate corresponding move instructions.
Freeze
If no simplification or coalescing can be done, we look for a move-related node with a low number of neighbors and freeze it, giving up hope of coalescing it (and making a move instruction necessary). Then, resume the simplify and coalesce phases.
Spill
Choose values to be spilled into memory, using some heuristic.
Select
Pop all of the registers off of the stack and assign registers to them.

In code, it looks like this:

<Module p2k-regalloc>=

define method p2k-regalloc
    (procedure :: <p2k-procedure>, K :: <integer>)
 => ();
  <Declare and initialize data structures for p2k-regalloc>
  local
    <Local methods within p2k-regalloc>
    method regalloc-main(entry-blk :: <p2k-basic-block>) => ();
      construct-liveness-adjacency(entry-blk);
      construct-worklists(entry-blk);
      while(~empty?(simplify-worklist) | ~empty?(worklist-moves)
            | ~empty?(freeze-worklist) | ~empty?(spill-worklist))
        if(~empty?(simplify-worklist))
           regalloc-simplify();
        elseif(~empty?(worklist-moves))
          regalloc-coalesce();
        elseif(~empty?(freeze-worklist))
          regalloc-freeze();
        elseif(~empty?(spill-worklist))
          regalloc-select-spill();
        end if;
      end;
      regalloc-assign-registers();
      if(~empty?(spilled))
        regalloc-perform-spill(procedure);
        p2k-regalloc(procedure, K);
      end if;
    end method;
  regalloc-main(procedure.procedure-entry-block);
end method;
            

Live Range Analysis and the Interference Graph

In our SSA-based representation, all values are valid after the point they are defined. Of course, not all of them are relevant at any given point. This is especially important because we only have a limited number of registers in which to store these values. For this reason, we will do live range analysis to determine what values are live at any given point in the program, and construct an interference graph with edges linking values that are live concurrently and therefore must be placed in different registers.

We'll have two separate representations of our interference graph: an adjacency list for each value (graph node), and a hash table of pairs of nodes where an (undirected) edge exists. Since the interference graph is generally fairly sparse, this should be more efficient than a triangular bit matrix. Such a hash table can be defined by subclassing the Dylan <table> class and providing appropriate equality and hash functions.

<Module p2k-regalloc>+=

define class <undirected-edge-table> (<table>)
  // No additional slots
end class;
            
define method table-protocol(table :: <undirected-edge-table>)
  => (test-function :: <function>, hash-function :: <function>);
  values(method  // equality method
             (edge1 :: <pair>, edge2 :: <pair>)
          => (equal? :: <boolean>);
           (edge1.head == edge2.head & edge1.tail == edge2.tail)
           | (edge1.head == edge2.tail & edge1.tail == edge2.head)
         end,
         method  // hash method
             (edge :: <pair>, initial-state :: <object>)
          => (hash-id :: <integer>, hash-state :: <object>);
           let (head-id, head-state) = object-hash(edge.head, initial-state);
           let (tail-id, tail-state) = object-hash(edge.tail, initial-state);
           let merged-id = merge-hash-ids(head-id, tail-id, ordered: #f);
           values(merged-id, tail-state);
         end);
end method;
            
define sealed domain table-protocol(<undirected-edge-table>);
            

Now we can declare our edge and adjacencly list data structures. We also keep track of the degree of each graph node.

<Declare and initialize data structures for p2k-regalloc>=

let edges :: <undirected-edge-table> = make(<undirected-edge-table>);
let adjacency-list :: <object-table> = make(<object-table>);
let degree :: <object-table> = make(<object-table>);
            

The add-edge method will add an edge between two nodes to edges, and add each node to the other's the adjacency list.

<Local methods within p2k-regalloc>=

  method add-edge(u :: <p2k-instruction>, v :: <p2k-instruction>) => ();
    let uv = pair(u, v);
    unless(u == v | element(edges, uv, default: #f))
      element(edges, uv) := #t;
      element(adjacency-list, u)
        := add(element(adjacency-list, u, default: #()), v);
      element(degree, u) := element(degree, u, default: 0) + 1;
      element(adjacency-list, v)
        := add(element(adjacency-list, v, default: #()), u);
      element(degree, v) := element(degree, v, default: 0) + 1;
    end;
  end method,
            

For the liveness analysis itself, we'll use a worklist algorithm for the iterative dataflow computation. We start by traversing the control flow graph to determine what nodes depend on each other, and prime the worklist with the nodes that don't depend on anything (i.e., the end of the program). Then we process nodes in the worklist until there aren't any left.

<Local methods within p2k-regalloc>+=

  method construct-liveness-adjacency(entry-blk :: <p2k-basic-block>) => ();
    let liveness-worklist :: <deque> = make(<deque>);
    let live-at-head-branch :: <object-table>
      = make(<object-table>);
    let branch-predecessor :: <object-table>
      = make(<object-table>);
    let live-at-head-fail :: <object-table>
      = make(<object-table>);
    let fail-predecessor :: <object-table>
      = make(<object-table>);
    local
      <Declare the compute-liveness-dependency method>
    compute-liveness-dependency(entry-blk);
    while(~empty?(liveness-worklist))
      let blk = pop(liveness-worklist);
      <Visit the blk>
    end;
  end method,
            

<Declare the compute-liveness-dependency method>=

  method compute-liveness-dependency(blk :: <p2k-basic-block>) => ();
    if(blk.block-branch-block)
      branch-predecessor[blk.block-branch-block] := blk;
    end if;
    if(blk.block-fail-block)
      fail-predecessor[blk.block-fail-block] := blk;      
    end if;
    if(~blk.block-branch-block & ~blk.block-fail-block)
      push-last(liveness-worklist, blk);
    end if;
    do(compute-liveness-dependency, blk.block-dominates);
  end;
            

First we compute the set of live values at the top of the branch and fall-through successor blocks, and take the union of the two to determine the set of live values at the end of this block.

<Visit the blk>=

let live = #();
if(blk.block-branch-block)
  live := element(live-at-head-branch, blk.block-branch-block, default: #());
end if;
if(blk.block-fail-block)
  live := union(live, element(live-at-head-fail, blk.block-fail-block,
                              default: #()));
end if;
if(blk.block-branch-instruction
   & blk.block-branch-instruction.instruction-operand-x
   & ~p2k-constant?(blk.block-branch-instruction.instruction-operand-x))
  live := add-new(live, blk.block-branch-instruction.instruction-operand-x);
end if;
            

Once we've recursively determined the set of values live at the end of the block, we work from the end up to determine the what other values each instruction value conflicts with.

<Visit the blk>+=

for(i from blk.block-instructions.size - 1 to 0 by -1)
  let inst = blk.block-instructions[i];
  unless(inst.instruction-side-effects? | inst.instruction-replaced-with)
    live := remove(live, inst);
  end;
  unless(inst.instruction-replaced-with)
    if(inst.instruction-needs-register?)
      for(val in live)
        add-edge(inst, val);
      end for;
      unless(element(degree, inst, default: #f))
        degree[inst] := 0;
        adjacency-list[inst] := #();
      end;
    end if;
    if(inst.instruction-operand-x
       & ~p2k-constant?(inst.instruction-operand-x))
      live := add-new(live, inst.instruction-operand-x);
    end if;
    if(inst.instruction-operand-y
       & ~p2k-constant?(inst.instruction-operand-y))
      live := add-new(live, inst.instruction-operand-y);
    end if;
  end;
end for;
            

Now we do the same for the phi-instructions, except that we maintain two separate branches for the separate branch and fail entry points.

<Visit the blk>+=

let branch-live = live;
let fail-live = live;
for(i from blk.block-phi-instructions.size - 1 to 0 by -1)
  let inst = blk.block-phi-instructions[i];
  unless(inst.instruction-replaced-with)
    branch-live := remove(branch-live, inst);
    fail-live := remove(fail-live, inst);
  end;
  unless(inst.instruction-replaced-with)
   for(val in live)
     add-edge(inst, val);
   end for;
   unless(element(degree, inst, default: #f))
     degree[inst] := 0;
     adjacency-list[inst] := #();
   end;
   unless(p2k-constant?(inst.instruction-operand-x))
     branch-live := add-new(branch-live, inst.instruction-operand-x);
   end;
   unless(p2k-constant?(inst.instruction-operand-y))
     fail-live := add-new(fail-live, inst.instruction-operand-y);
   end;
  end unless;
end for;
            

Now that we've reached the top, we determine if the sets for branch-live and fail-live are any different from the values previously computed for this block (if any). If so, we add the affected predecessor blocks to the worklist.

<Visit the blk>+=

if(element(fail-predecessor, blk, default: #f))
  if(element(live-at-head-fail, blk, default: #f))
    let fail-difference = difference(fail-live, live-at-head-fail[blk]);
    if(~empty?(fail-difference))
      unless(member?(fail-predecessor[blk], liveness-worklist))
        push(liveness-worklist, fail-predecessor[blk]);
      end;
    end if;
  else
    push(liveness-worklist, fail-predecessor[blk]);
  end if;
end if;
if(element(branch-predecessor, blk, default: #f))
  if(element(live-at-head-branch, blk, default: #f))
    let branch-difference = difference(branch-live, live-at-head-branch[blk]);
    if(~empty?(branch-difference))
      unless(member?(branch-predecessor[blk], liveness-worklist))
        push(liveness-worklist, branch-predecessor[blk]);
      end;
    end if;
  else
    push(liveness-worklist, branch-predecessor[blk]);
  end if;
end if;
live-at-head-branch[blk] := branch-live;
live-at-head-fail[blk] := fail-live;
            

Constructing the Initial Worklists

Now that we know what instructions conflict with each other, we can construct the initial worklists for the main loop.

First we determine what values are “move-related”, collecting lists of moves in which a value is referenced (or that are referenced) into move-list. We also add all moves to worklist-moves.

There's a wrinkle, though: each phi instruction might actually correspond to zero, one, or two moves. We'll separate things out using instances of <p2k-move>.

<Module p2k-regalloc>+=

define class <p2k-move> (<object>)
  constant slot move-phi :: <p2k-instruction>,
    required-init-keyword: phi:;
  constant slot move-operand-getter :: <function>,
    required-init-keyword: operand-getter:;
end class;
            

<Declare and initialize data structures for p2k-regalloc>+=

let move-list :: <object-table> = make(<object-table>);
let worklist-moves :: <deque> = make(<deque>);
let move-kind  :: <object-table> = make(<object-table>);
            

<Local methods within p2k-regalloc>+=

  method construct-worklists(entry-blk :: <p2k-basic-block>) => ();
    for(deg keyed-by inst in degree)
      let x = inst.instruction-operand-x;
      let y = inst.instruction-operand-y;
      if(inst.instruction-opcode == #"PHI")
         if(instance?(x, <p2k-instruction>))
           let move = make(<p2k-move>,
                           phi: inst, operand-getter: instruction-operand-x);
           move-list[inst]
             := add-new(element(move-list, inst, default: #()), move);
           move-list[x] := add-new(element(move-list, x, default: #()), move);
           push(worklist-moves, move);
           move-kind[move] := #"worklist";
         end if;
         if(instance?(y, <p2k-instruction>))
           let move = make(<p2k-move>,
                           phi: inst, operand-getter: instruction-operand-y);
           move-list[inst]
             := add-new(element(move-list, inst, default: #()), move);
           move-list[y] := add-new(element(move-list, y, default: #()), move);
           push(worklist-moves, move);
           move-kind[move] := #"worklist";
         end if;
      end if;
    end for;
    <Construct initial worklists for instructions>
  end,
            

Instructions are initially categorized by their degree and whether or not they are move-related.

<Declare and initialize data structures for p2k-regalloc>+=

let spill-worklist :: <deque> = make(<deque>);
let freeze-worklist :: <deque> = make(<deque>);
let simplify-worklist :: <deque> = make(<deque>);
let worklist-kind  :: <object-table> = make(<object-table>);
            

<Construct initial worklists for instructions>=

for(deg keyed-by inst in degree)
  if(deg >= K)
    push(spill-worklist, inst);
    worklist-kind[inst] := #"spill";
  elseif(element(move-list, inst, default: #f))
    push(freeze-worklist, inst);
    worklist-kind[inst] := #"freeze";
  else
    push(simplify-worklist, inst);
    worklist-kind[inst] := #"simplify";
  end if;
end for;    
            

Simplifing the Graph

Nodes that have fewer than K conflicts can be made ready for coloring by placing them in the select-stack.

<Declare and initialize data structures for p2k-regalloc>+=

let select-stack :: <deque> = make(<deque>);
            

<Local methods within p2k-regalloc>+=

  method regalloc-simplify() => ();
    let inst = pop(simplify-worklist);
    push(select-stack, inst);
    worklist-kind[inst] := #"select";
    for(val in element(adjacency-list, inst, default: #()))
      unless(worklist-kind[val] == #"select"
             | worklist-kind[val] == #"coalesced")
        decrement-degree(val);
      end;
    end for;
  end method,
            

When we reduce the degree of a neighbor, we might bring it below the threshold so where it can be assigned a register, or where phi instructions it is associated with can be re-considered for handling.

<Local methods within p2k-regalloc>+=

  method move-related?
      (inst :: <p2k-instruction>) 
   => (related? :: <boolean>);
    let moves = element(move-list, inst, default: #f);
    moves
      & any?(method(m)
                   move-kind[m] == #"active" | move-kind[m] == #"worklist";
                 end, moves);
  end method,
            
  method decrement-degree(inst :: <p2k-instruction>) => ();
    let d = degree[inst];
    degree[inst] := d - 1;
    if(d = K)
      enable-moves(inst);
      do(enable-moves, adjacency-list[inst]);
      remove!(spill-worklist, inst);
      if(move-related?(inst))
        push(freeze-worklist, inst);
        worklist-kind[inst] := #"freeze";
      else
        push(simplify-worklist, inst);
        worklist-kind[inst] := #"simplify";
      end if;
    end if;
  end method,
            

<Local methods within p2k-regalloc>+=

  method enable-moves(inst :: <p2k-instruction>) => ();
    for(m in element(move-list, inst, default: #()))
      if(move-kind[m] == #"active")
        push(worklist-moves, m);
        move-kind[m] := #"worklist";
      end if;
    end for;
  end method,
            

Coalescing Moves

We want to assign the same register to a phi-node and one or both of its arguments. When we do so, the source(s) get an entry in alias pointing to the destination (phi-node).

<Declare and initialize data structures for p2k-regalloc>+=

let alias :: <object-table> = make(<object-table>);
let coalesced :: <deque> = make(<deque>);
            

<Local methods within p2k-regalloc>+=

  method get-alias
      (inst :: <p2k-instruction>) => (a :: <p2k-instruction>);
    if(worklist-kind[inst] == #"coalesced")
      get-alias(alias[inst]);
    else
      inst;
    end if;
  end method,
            

Here we try to coalesce the register assignments. Failure will result in move instructions being generated to compensate.

<Local methods within p2k-regalloc>+=

  method regalloc-coalesce() => ();
    let m = pop(worklist-moves);
    let dest = get-alias(m.move-phi);
    let src = get-alias((m.move-operand-getter)(m.move-phi));

    if(dest == src)
      move-kind[m] := #"coalesced";
      coalesce-add-to-worklist(dest);
    elseif(element(edges, pair(dest, src), default: #f))
      move-kind[m] := #"constrained";
      coalesce-add-to-worklist(dest);
      coalesce-add-to-worklist(src);
    elseif(conservative?(union(adjacency-list[dest], adjacency-list[src])))
      move-kind[m] := #"coalesced";
      coalesce-combine(dest, src);
      coalesce-add-to-worklist(dest);
    else
      move-kind[m] := #"active";
    end if;      
  end method,
            

When we can, or when we have to, we move newly non-move-related instructions into the simplify-worklist.

<Local methods within p2k-regalloc>+=

  method coalesce-add-to-worklist(inst :: <p2k-instruction>) => ();
    if(worklist-kind[inst] ~== #"freeze")
      error("coalesce-to-worklist but %= is in %=", inst, worklist-kind[inst]);
    end if;
    if(~move-related?(inst) & degree[inst] < K)
      remove!(freeze-worklist, inst);
      push(simplify-worklist, inst);
      worklist-kind[inst] := #"simplify";
    end if;
  end method,
            

This implements the Briggs conservative coalescing test.

<Local methods within p2k-regalloc>+=

  method conservative?
      (insts :: <sequence>) => (conservative? :: <boolean>);
    let c = for(inst in insts,
                c = 0 then if(degree[inst] >= K) c + 1 else c end)
              finally c;
            end;
    c < K;
  end method,
            

When we coalesce two values then we add an alias entry for the source, and merge their interference graphs.

<Local methods within p2k-regalloc>+=

  method coalesce-combine
      (dest :: <p2k-instruction>, src :: <p2k-instruction>)
   => ();
    if(worklist-kind[src] == #"freeze")
      remove!(freeze-worklist, src);
    elseif(worklist-kind[src] == #"spill")
      remove!(spill-worklist, src);
    else
      error("combine: %= in the wrong worklist!", src);
    end if;

    push(coalesced, src);
    worklist-kind[src] := #"coalesced";
    alias[src] := dest;
    move-list[dest] := union(move-list[dest], move-list[src]);
    for(t in adjacency-list[src])
      add-edge(t, dest);
      decrement-degree(t);
    end for;
    if(worklist-kind[dest] == #"freeze" & degree[dest] >= K)
      remove!(freeze-worklist, dest);
      push(spill-worklist, dest);
    end if;
  end method,
            

Freezing

Freezing is when we give up on trying to coalesce some move-related nodes. George and Appel say it doesn't happen very often.

<Local methods within p2k-regalloc>+=

  method regalloc-freeze() => ();
    let inst = pop(freeze-worklist);
    push(simplify-worklist, inst);
    worklist-kind[inst] := #"simplify";
    freeze-moves(inst);
  end method,
            

We freeze any related moves, and release the other nodes for (independent) register selection.

<Local methods within p2k-regalloc>+=

  method freeze-moves(inst :: <p2k-instruction>) => ();
    for(m in element(move-list, inst, default: #()))
      if(move-kind[m] == #"active")
        let dest = get-alias(m.move-phi);
        let src = get-alias((m.move-operand-getter)(m.move-phi));
        move-kind[m] := #"frozen";
        let release = if(get-alias(src) == get-alias(inst))
                        get-alias(dest);
                      else
                        get-alias(src);
                      end;
        if(~move-related?(release) & degree[release] < K)
          remove!(freeze-worklist, release);
          push(simplify-worklist, release);
          worklist-kind[release] := #"simplify";
        end if;
      end if;
    end for;
  end method,
            

Selecting Spill Candiates

FIXME We'll be stupid for now and spill the first thing that comes along.

<Local methods within p2k-regalloc>+=

  method regalloc-select-spill() => ();
    let inst = pop(spill-worklist);
    push(simplify-worklist, inst);
    worklist-kind[inst] := #"simplify";
    freeze-moves(inst);
  end method,
            

Assigning Registers

For each value that made it into the select-stack, we determine what values with already-assigned registers conflict with it, and assign it a register that doesn't conflict. If none do, we consider it spilled.

<Declare and initialize data structures for p2k-regalloc>+=

let spilled :: <deque> = make(<deque>);
            

<Local methods within p2k-regalloc>+=

  method regalloc-assign-registers() => ();
    while(~empty?(select-stack))
      let registers = make(<vector>, size: K);
      for(i from 0 below K)
        registers[i] := i;
      end;
      
      let inst = pop(select-stack);
      for(interfering in adjacency-list[inst])
        let assigned = get-alias(interfering);
        if(worklist-kind[assigned] == #"colored")
          registers[assigned.instruction-register] := #f;
        end if;
      end for;
      let available = choose(method(x) x end, registers);
      if(empty?(available))
        push(spilled, inst);
        worklist-kind[inst] := #"spilled"
      else
        inst.instruction-register := first(available);
        worklist-kind[inst] := #"colored";
      end if;
    end;
    while(~empty?(coalesced))
      let inst = pop(coalesced);
      inst.instruction-register := get-alias(inst).instruction-register;
    end;
  end,
            

Rewriting the Procedure to Account for Spills

When there's nothing else we can do, we re-write the program so that spill candidates are stored in memory, with a store after they are defined and a load before they are referenced.

<Local methods within p2k-regalloc>+=

  method regalloc-perform-spill(procedure :: <p2k-procedure>) => ();
    <Assign storage to each spilled value into spill-map>
    local
      method rewrite-block(blk :: <p2k-basic-block>) => ();
        <Rewrite blk>
        do(rewrite-block, blk.block-dominates);
      end method;
    rewrite-block(procedure.procedure-entry-block);
  end method,
            

First we assign storage on the stack for each spilled value.

<Assign storage to each spilled value into spill-map>=

let spill-map :: <object-table> = make(<object-table>);
for(val in spilled)
  procedure.procedure-storage
   := procedure.procedure-storage + 4;
  spill-map[val] := - procedure.procedure-storage;
end for;
            

Now we rewrite the blocks.

<Rewrite blk>=

let new-instructions = make(<stretchy-vector>);
for(phi in blk.block-phi-instructions)
  if(element(spill-map, phi, default: #f))
    let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                            block: blk, x: spill-map[phi]);
    add!(new-instructions, compute-addr);
    let store = make(<p2k-instruction>, opcode: #"STORE", block: blk,
                     x: phi, y: compute-addr);
    add!(new-instructions, store);
  end if;
end for;
for(inst in blk.block-instructions)
  let x = inst.instruction-operand-x;
  let y = inst.instruction-operand-y;
  if(element(spill-map, x, default: #f))
    let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                            block: blk, x: spill-map[x]);
    add!(new-instructions, compute-addr);
    let load = make(<p2k-instruction>, opcode: #"LOAD", block: blk,
                    y: compute-addr);
    add!(new-instructions, load);
    inst.instruction-operand-x := load;
  end if;
  if(element(spill-map, y, default: #f))
    if(x == y)
      inst.instruction-operand-y := inst.instruction-operand-x;
    else
      let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                              block: blk, x: spill-map[y]);
      add!(new-instructions, compute-addr);
      let load = make(<p2k-instruction>, opcode: #"LOAD", block: blk,
                      y: compute-addr);
      add!(new-instructions, load);
      inst.instruction-operand-y := load;
    end if;
  end if;
  add!(new-instructions, inst);
  if(element(spill-map, inst, default: #f))
    let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                            block: blk, x: spill-map[inst]);
    add!(new-instructions, compute-addr);
    let store = make(<p2k-instruction>, opcode: #"STORE", block: blk,
                     x: inst, y: compute-addr);
    add!(new-instructions, store);
  end if;
end for;
            

The handling of spilled arguments to phi functions is similar to the MOVE generation that we'll see in the next section.

<Rewrite blk>+=

if(blk.block-branch-block)
  for(phi in blk.block-branch-block.block-phi-instructions)
    if(element(spill-map, phi.instruction-operand-x, default: #f))
      let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                              block: blk,
                              x: spill-map[phi.instruction-operand-x]);
      add!(new-instructions, compute-addr);
      let load = make(<p2k-instruction>, opcode: #"LOAD", block: blk,
                      y: compute-addr);
      add!(new-instructions, load);
      phi.instruction-operand-x := load;
    end if;
  end for;
end if;
if(blk.block-fail-block)
  for(phi in blk.block-fail-block.block-phi-instructions)
    if(element(spill-map, phi.instruction-operand-y, default: #f))
      let compute-addr = make(<p2k-instruction>, opcode: #"ADD",
                              block: blk,
                              x: spill-map[phi.instruction-operand-y]);
      add!(new-instructions, compute-addr);
      let load = make(<p2k-instruction>, opcode: #"LOAD", block: blk,
                      y: compute-addr);
      add!(new-instructions, load);
      phi.instruction-operand-y := load;
    end if;
  end for;
end if;    

Finaly we replace the instructions.

<Rewrite blk>+=

blk.block-instructions := new-instructions;
            

Generating Move Instructions

In the cases where we couldn't completely eliminate a phi instruction on one or both paths, we need to add move instructions so that the proper value is in the phi-instruction's register for all paths.

<Module p2k-regalloc>+=

define method p2k-generate-moves
    (procedure :: <p2k-procedure>)
 => ();
  local
    method generate-moves(blk :: <p2k-basic-block>) => ();
      if(blk.block-branch-block)
        for(phi in blk.block-branch-block.block-phi-instructions)
          if(p2k-constant?(phi.instruction-operand-x)
             | phi.instruction-register
                 ~= phi.instruction-operand-x.instruction-register)
            let move = make(<p2k-instruction>, opcode: #"MOVE",
                            block: blk, x: phi.instruction-operand-x);
            move.instruction-register := phi.instruction-register;
            add!(blk.block-instructions, move);
            phi.instruction-operand-x := move;
          end if;
        end for;
      end if;
      if(blk.block-fail-block)
        for(phi in blk.block-fail-block.block-phi-instructions)
          if(p2k-constant?(phi.instruction-operand-y)
             | phi.instruction-register
                 ~= phi.instruction-operand-y.instruction-register)
            let move = make(<p2k-instruction>, opcode: #"MOVE",
                            block: blk, x: phi.instruction-operand-y);
            move.instruction-register := phi.instruction-register;
            add!(blk.block-instructions, move);
            phi.instruction-operand-y := move;
          end if;
        end for;
      end if;
      do(generate-moves, blk.block-dominates);
    end method;
  generate-moves(procedure.procedure-entry-block);
end method;
            

Test Harness

<Libraries imported and modules exported by the p2kcomp library>+=

use io;
export p2k-test;
          

<Module definitions for the p2kcomp library>+=

define module p2k-test
  use common-dylan, exclude: { format-to-string };
  use streams;
  use standard-io;
  use format;
  use pprint;
  use print;
  use p2k-lexer;
  use p2k-rep;
  use p2k-parser;
  use p2k-cse;
  use p2k-regalloc;
end module;
          

<Module p2k-test>=

begin
  let lexer = make(<p2k-lexer>, stream: *standard-input*);
  let program = p2k-parse-program(lexer);
  p2k-cse-optimize(program);
  p2k-regalloc(program, 6);
  p2k-generate-moves(program);
  p2k-print-procedure(program, *standard-output*);
end;
          
define method print-object
    (object :: <p2k-instruction>, stream :: <stream>)
 => ();
  pprint-logical-block
    (stream,
     prefix: "{",
     body: method (stream)
             write(stream, "p2k-instruction ");
             print(object.instruction-number, stream);
	   end method,
     suffix: "}");
end method;
          

Bibliography