Inno-Setup-issrc/Projects/Src/ISPP.CTokenizer.pas

573 lines
13 KiB
ObjectPascal

{
Inno Setup Preprocessor
Copyright (C) 2001-2002 Alex Yackimoff
Inno Setup
Copyright (C) 1997-2020 Jordan Russell
Portions by Martijn Laan
For conditions of distribution and use, see LICENSE.TXT.
}
unit ISPP.CTokenizer;
interface
uses SysUtils;
type
EParsingError = class(Exception)
Position: Integer;
end;
TTokenKind = (tkError, tkEOF, tkIdent, tkNumber, tkString, opGreater,
opLess,
opGreaterEqual,
opLessEqual,
opEqual,
opNotEqual,
opOr,
opAnd,
opAdd,
opSubtract,
opBwOr,
opXor,
opMul,
opDiv,
opBwAnd,
opShl,
opShr,
opMod,
opNot,
opBwNot,
opAssign,
opAgnAdd,
opAgnSub,
opAgnOr,
opAgnXor,
opAgnMul,
opAgnDiv,
opAgnAnd,
opAgnShl,
opAgnShr,
opAgnMod,
opInc,
opDec,
tkOpenParen,
tkOpenBracket,
tkOpenBrace,
tkCloseParen,
tkCloseBracket,
tkCloseBrace,
tkPeriod,
tkComma,
tkColon,
tkSemicolon,
tkQuestion,
tkPtr);
TTokenKinds = set of TTokenKind;
TCTokenizer = class(TObject)
private
FEscapeSequences: Boolean;
FExprStart: PChar;
FIdent: string;
FToken: TTokenKind;
FNextTokenKnown: Boolean;
FNextToken: TTokenKind;
FNextTokenPos: PChar;
FNextIdent: string;
FStoredPos: PChar;
procedure IllegalChar(C: Char);
function InternalNextToken: TTokenKind;
protected
FExpr: PChar;
FExprOffset: Integer;
procedure EndOfExpr;
procedure Error(const Message: string);
procedure ErrorFmt(const Message: string; Args: array of const);
public
constructor Create(const Expression: string;
EscapeSequences: Boolean);
procedure SkipBlanks;
function NextToken: TTokenKind;
function NextTokenExpect(Expected: TTokenKinds): TTokenKind;
function TokenInt: Longint;
function PeekAtNextToken: TTokenKind;
function PeekAtNextTokenString: string;
procedure Store;
procedure Restore;
procedure SetPos(NewPos: PChar);
property Token: TTokenKind read FToken;
property TokenString: string read FIdent;
end;
const
ExpressionStartTokens = [tkOpenParen, tkIdent, tkNumber, tkString, opNot,
opBwNot, opAdd, opSubtract, opInc, opDec, tkPtr];
implementation
uses
ISPP.Consts, Shared.CommonFunc;
{ TCTokenizer }
constructor TCTokenizer.Create(const Expression: string;
EscapeSequences: Boolean);
begin
FExpr := PChar(Expression);
FExprStart := FExpr;
FEscapeSequences := EscapeSequences;
end;
procedure TCTokenizer.SkipBlanks;
begin
while CharInSet(FExpr^, [#1..#32]) do Inc(FExpr);
if (FExpr^ = '/') and (FExpr[1] = '*') then
begin
Inc(FExpr, 2);
while True do
begin
while not CharInSet(FExpr^, [#0, '*']) do Inc(FExpr);
if (FExpr^ = '*') then
if FExpr[1] = '/' then
begin
Inc(FExpr, 2);
SkipBlanks;
Exit;
end
else
Inc(FExpr)
else
Error('Unterminated comment');
end;
end
end;
function TCTokenizer.InternalNextToken: TTokenKind;
procedure Promote(T: TTokenKind);
begin
Result := T;
Inc(FExpr);
end;
function GetString(QuoteChar: Char): string;
var
P: PChar;
S: string;
I: Integer;
C: Byte;
procedure Unterminated;
begin
if FExpr^ = #0 then
Error('Unterminated string');
end;
begin
Inc(FExpr);
Result := '';
while True do
begin
P := FExpr;
while not CharInSet(FExpr^, [#0, '\', QuoteChar]) do Inc(FExpr);
SetString(S, P, FExpr - P);
Result := Result + S;
Unterminated;
if FExpr^ = QuoteChar then
begin
Inc(FExpr);
Break;
end;
Inc(FExpr);
Unterminated;
case FExpr^ of
#0: Unterminated;
'0'..'7':// octal 400 = $100
begin
C := 0;
I := 0;
while CharInSet(FExpr^, ['0'..'7']) and (I < 3) do
begin
Inc(I);
C := (C shl 3) + (Ord(FExpr^) - Ord('0'));
Inc(FExpr);
Unterminated;
end;
Result := Result + Char(C);
Continue;
end;
'a': Result := Result + #7;
'b': Result := Result + #8;
'f': Result := Result + #12;
'n': Result := Result + #10;
'r': Result := Result + #13;
't': Result := Result + #9;
'v': Result := Result + #11;
'x':
begin
Inc(FExpr);
C := 0;
I := 0;
while CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'a'..'f']) and (I < 2) do
begin
Inc(I);
C := C shl 4;
case FExpr^ of
'0'..'9': C := C + (Ord(FExpr^) - Ord('0'));
'A'..'F': C := C + (Ord(FExpr^) - Ord('A')) + $0A;
else
C := C + (Ord(FExpr^) - Ord('a')) + $0A;
end;
Inc(FExpr);
Unterminated;
end;
Result := Result + Char(C);
Continue;
end;
else
Result := Result + FExpr^
end;
Inc(FExpr);
end;
SkipBlanks;
if FExpr^ = QuoteChar then
Result := Result + GetString(QuoteChar);
end;
var
P: PChar;
begin
SkipBlanks;
Result := tkError;
case FExpr^ of
#0:
begin
Result := tkEOF;
Exit;
end;
'!': if FExpr[1] = '=' then Promote(opNotEqual) else Result := opNot;
'&':
case FExpr[1] of
'&': Promote(opAnd);
'=': Promote(opAgnAnd)
else
Result := opBwAnd
end;
'|':
case FExpr[1] of
'|': Promote(opOr);
'=': Promote(opAgnOr)
else
Result := opBwOr
end;
'^': if FExpr[1] = '=' then Promote(opAgnXor) else Result := opXor;
'=': if FExpr[1] = '=' then Promote(opEqual) else Result := opAssign;
'>':
case FExpr[1] of
'>':
begin
Promote(opShr);
if FExpr[1] = '=' then Promote(opAgnShr);
end;
'=': Promote(opGreaterEqual)
else
Result := opGreater
end;
'<':
case FExpr[1] of
'<':
begin
Promote(opShl);
if FExpr[1] = '=' then Promote(opAgnShl);
end;
'=': Promote(opLessEqual)
else
Result := opLess
end;
'+':
case FExpr[1] of
'=': Promote(opAgnAdd);
'+': Promote(opInc)
else
Result := opAdd
end;
'-':
case FExpr[1] of
'=': Promote(opAgnSub);
'-': Promote(opDec)
else
Result := opSubtract
end;
'/': if FExpr[1] = '=' then Promote(opAgnDiv) else Result := opDiv;
'%': if FExpr[1] = '=' then Promote(opAgnMod) else Result := opMod;
'*': if FExpr[1] = '=' then Promote(opAgnMul) else Result := opMul;
'?': Result := tkQuestion;
':': Result := tkColon;
';': Result := tkSemicolon;
',': Result := tkComma;
'.': Result := tkPeriod;
'~': Result := opBwNot;
'(': Result := tkOpenParen;
'[': Result := tkOpenBracket;
'{': Result := tkOpenBrace;
')': Result := tkCloseParen;
']': Result := tkCloseBracket;
'}': Result := tkCloseBrace;
'@': Result := tkPtr;
'A'..'Z', '_', 'a'..'z':
begin
P := FExpr;
repeat
Inc(FExpr)
until not CharInSet(FExpr^, ['0'..'9', 'A'..'Z', '_', 'a'..'z']);
SetString(FIdent, P, FExpr - P);
Result := tkIdent;
Exit;
end;
'0'..'9':
begin
P := FExpr;
repeat
Inc(FExpr)
until not CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'X', 'a'..'f', 'x']);
SetString(FIdent, P, FExpr - P);
while CharInSet(FExpr^, ['L', 'U', 'l', 'u']) do Inc(FExpr);
Result := tkNumber;
Exit;
end;
'"', '''':
begin
if FEscapeSequences then
FIdent := GetString(FExpr^)
else
FIdent := AnsiExtractQuotedStr(FExpr, FExpr^);
Result := tkString;
Exit;
end;
end;
if Result = tkError then IllegalChar(FExpr^);
Inc(FExpr)
end;
function TCTokenizer.PeekAtNextToken: TTokenKind;
var
P: PChar;
SaveIdent: string;
begin
if not FNextTokenKnown then
begin
P := FExpr;
SaveIdent := FIdent;
FNextToken := InternalNextToken;
FNextIdent := FIdent;
FIdent := SaveIdent;
FNextTokenPos := FExpr;
FExpr := P;
FNextTokenKnown := True;
end;
Result := FNextToken;
end;
function TCTokenizer.NextToken: TTokenKind;
begin
if FNextTokenKnown then
begin
FToken := FNextToken;
FIdent := FNextIdent;
FExpr := FNextTokenPos;
FNextTokenKnown := False;
end
else
FToken := InternalNextToken;
Result := FToken;
end;
function TCTokenizer.PeekAtNextTokenString: string;
begin
PeekAtNextToken;
Result := FNextIdent;
end;
function TCTokenizer.TokenInt: Longint;
var
E: Integer;
begin
Val(FIdent, Result, E);
if E <> 0 then
Error('Cannot convert to integer');
end;
procedure TCTokenizer.Restore;
begin
FExpr := FStoredPos;
FNextTokenKnown := False;
end;
procedure TCTokenizer.Store;
begin
FStoredPos := FExpr;
end;
function TCTokenizer.NextTokenExpect(Expected: TTokenKinds): TTokenKind;
function GetFriendlyTokenDesc(T: TTokenKind; Found: Boolean): string;
const
TokenNames: array[TTokenKind] of string =
('illegal character', 'end of expression', 'identifier', 'number', 'string literal',
'right angle bracket (">")',
'left angle bracket ("<")',
'greater-or-equal-to operator (">=")',
'less-or-equal-to operator ("<=")',
'equality operator ("==")',
'inequality operator ("!=")',
'logical OR operator ("||")',
'logical AND operator ("&&")',
'plus sign ("+")',
'minus sign ("-")',
'OR sign ("|")',
'XOR operator ("^")',
'star sign ("*")',
'slash ("/")',
'AND sign ("&")',
'SHL operator ("<<")',
'SHR operator (">>")',
'percent sign ("%")',
'exclamation sign ("!")',
'tilde ("~")',
'equal sign ("=")',
'compound assignment operator ("+=")',
'compound assignment operator ("-=")',
'compound assignment operator ("|=")',
'compound assignment operator ("^=")',
'compound assignment operator ("*=")',
'compound assignment operator ("/=")',
'compound assignment operator ("&=")',
'compound assignment operator ("<<=")',
'compound assignment operator (">>=")',
'compound assignment operator ("%=")',
'increment operator ("++")',
'decrement operator ("--")',
'opening parenthesis ("(")',
'opening bracket ("[")',
'opening brace ("{")',
'closing parenthesis (")")',
'closing bracket ("]")',
'closing brace ("}")',
'period (".")',
'comma (",")',
'colon (":")',
'semicolon (";")',
'question sign ("?")',
'call-context-of operator ("@")');
begin
case T of
tkIdent:
if Found then
Result := Format('identifier "%s"', [TokenString])
else
Result := 'identifier';
tkNumber:
if Found then
Result := Format('number %d (0x%0:.2x)', [TokenInt])
else
Result := 'number';
else
Result := TokenNames[T];
end;
end;
function Capitalize(const S: string): string;
begin
if (S <> '') and CharInSet(S[1], ['a'..'z']) then
Result := UpCase(S[1]) + Copy(S, 2, MaxInt)
else
Result := S;
end;
var
M1, M2: string;
I: TTokenKind;
C: Integer;
begin
Result := NextToken;
if not (Result in Expected) then
begin
C := 0;
if Expected * ExpressionStartTokens = ExpressionStartTokens then
begin
M2 := 'expression';
Expected := Expected - ExpressionStartTokens;
end;
for I := Low(TTokenKind) to High(TTokenKind) do
if I in Expected then
begin
Inc(C);
if M2 <> '' then
begin
if M1 <> '' then M1 := M1 + ', ';
M1 := M1 + M2;
M2 := '';
end;
M2 := GetFriendlyTokenDesc(I, False);
end;
if M2 <> '' then
if M1 <> '' then
begin
if C > 2 then M1 := M1 + ',';
M1 := M1 + ' or ' + M2
end
else
M1 := M2;
Error(Capitalize(Format('%s expected but %s found', [M1, GetFriendlyTokenDesc(Token, True)])));
end;
end;
procedure TCTokenizer.EndOfExpr;
begin
NextTokenExpect([tkEOF, tkSemicolon])
end;
procedure TCTokenizer.Error(const Message: string);
begin
var E := EParsingError.Create(Message);
if FExprOffset <> -1 then
E.Position := FExprOffset + (FExpr - FExprStart) + 1;
raise E;
end;
procedure TCTokenizer.ErrorFmt(const Message: string;
Args: array of const);
begin
Error(Format(Message, Args));
end;
procedure TCTokenizer.IllegalChar(C: Char);
begin
raise EParsingError.CreateFmt(SIllegalChar, [C, Ord(C)]);
end;
procedure TCTokenizer.SetPos(NewPos: PChar);
begin
FExpr := NewPos;
FNextTokenKnown := False;
end;
end.