Pyrogenesis  trunk
Preprocessor.h
Go to the documentation of this file.
1 /*
2  * This source file originally came from OGRE v1.7.2 - http://www.ogre3d.org/
3  * with some tweaks as part of 0 A.D.
4  * All changes are released under the original license, as follows:
5  */
6 
7 /*
8 -----------------------------------------------------------------------------
9 This source file is part of OGRE
10  (Object-oriented Graphics Rendering Engine)
11 For the latest info, see http://www.ogre3d.org/
12 
13 Copyright (c) 2000-2009 Torus Knot Software Ltd
14 
15 Permission is hereby granted, free of charge, to any person obtaining a copy
16 of this software and associated documentation files (the "Software"), to deal
17 in the Software without restriction, including without limitation the rights
18 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19 copies of the Software, and to permit persons to whom the Software is
20 furnished to do so, subject to the following conditions:
21 
22 The above copyright notice and this permission notice shall be included in
23 all copies or substantial portions of the Software.
24 
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 THE SOFTWARE.
32 -----------------------------------------------------------------------------
33 */
34 
35 #ifndef INCLUDED_CPREPROCESSOR
36 #define INCLUDED_CPREPROCESSOR
37 
38 /**
39  * This is a simplistic C/C++-like preprocessor.
40  * It takes an non-zero-terminated string on input and outputs a
41  * non-zero-terminated string buffer.
42  *
43  * This preprocessor was designed specifically for GLSL shaders, so
44  * if you want to use it for other purposes you might want to check
45  * if the feature set it provides is enough for you.
46  *
47  * Here's a list of supported features:
48  * <ul>
49  * <li>Fast memory allocation-less operation (mostly).</li>
50  * <li>Line continuation (backslash-newline) is swallowed.</li>
51  * <li>Line numeration is fully preserved by inserting empty lines where
52  * required. This is crucial if, say, GLSL compiler reports you an error
53  * with a line number.</li>
54  * <li>\#define: Parametrized and non-parametrized macros. Invoking a macro with
55  * less arguments than it takes assignes empty values to missing arguments.</li>
56  * <li>\#undef: Forget defined macros</li>
57  * <li>\#ifdef/\#ifndef/\#else/\#endif: Conditional suppression of parts of code.</li>
58  * <li>\#if: Supports numeric expression of any complexity, also supports the
59  * defined() pseudo-function.</li>
60  * </ul>
61  */
63 {
64  /**
65  * A input token.
66  *
67  * For performance reasons most tokens will point to portions of the
68  * input stream, so no unneeded memory allocation is done. However,
69  * in some cases we must allocate different memory for token storage,
70  * in this case this is signalled by setting the Allocated member
71  * to non-zero in which case the destructor will know that it must
72  * free memory on object destruction.
73  *
74  * Again for performance reasons we use malloc/realloc/free here because
75  * C++-style new[] lacks the realloc() counterpart.
76  */
77  class Token
78  {
79  public:
80  enum Kind
81  {
82  TK_EOS, // End of input stream
83  TK_ERROR, // An error has been encountered
84  TK_WHITESPACE, // A whitespace span (but not newline)
85  TK_NEWLINE, // A single newline (CR & LF)
86  TK_LINECONT, // Line continuation ('\' followed by LF)
87  TK_NUMBER, // A number
88  TK_KEYWORD, // A keyword
89  TK_PUNCTUATION, // A punctuation character
90  TK_DIRECTIVE, // A preprocessor directive
91  TK_STRING, // A string
92  TK_COMMENT, // A block comment
93  TK_LINECOMMENT, // A line comment
94  TK_TEXT, // An unparsed text (cannot be returned from GetToken())
95  };
96 
97  /// Token type
99  /// True if string was allocated (and must be freed)
100  mutable size_t Allocated;
101  union
102  {
103  /// A pointer somewhere into the input buffer
104  const char *String;
105  /// A memory-allocated string
106  char *Buffer;
107  };
108  /// Token length in bytes
109  size_t Length;
110 
111  Token () : Type (TK_ERROR), Allocated (0), String (NULL), Length (0)
112  { }
113 
114  Token (Kind iType) : Type (iType), Allocated (0), String (NULL), Length (0)
115  { }
116 
117  Token (Kind iType, const char *iString, size_t iLength) :
118  Type (iType), Allocated (0), String (iString), Length (iLength)
119  { }
120 
121  Token (const Token &iOther)
122  {
123  Type = iOther.Type;
124  Allocated = iOther.Allocated;
125  iOther.Allocated = 0; // !!! not quite correct but effective
126  String = iOther.String;
127  Length = iOther.Length;
128  }
129 
131  { if (Allocated) free (Buffer); }
132 
133  /// Assignment operator
134  Token &operator = (const Token &iOther)
135  {
136  if (Allocated) free (Buffer);
137  Type = iOther.Type;
138  Allocated = iOther.Allocated;
139  iOther.Allocated = 0; // !!! not quite correct but effective
140  String = iOther.String;
141  Length = iOther.Length;
142  return *this;
143  }
144 
145  /// Append a string to this token
146  void Append (const char *iString, size_t iLength);
147 
148  /// Append a token to this token
149  void Append (const Token &iOther);
150 
151  /// Append given number of newlines to this token
152  void AppendNL (int iCount);
153 
154  /// Count number of newlines in this token
155  int CountNL ();
156 
157  /// Get the numeric value of the token
158  bool GetValue (long &oValue) const;
159 
160  /// Set the numeric value of the token
161  void SetValue (long iValue);
162 
163  /// Test two tokens for equality
164  bool operator == (const Token &iOther)
165  {
166  if (iOther.Length != Length)
167  return false;
168  return (memcmp (String, iOther.String, Length) == 0);
169  }
170  };
171 
172  /// A macro definition
173  class Macro
174  {
175  public:
176  /// Macro name
178  /// Number of arguments
179  int NumArgs;
180  /// The names of the arguments
182  /// The macro value
184  /// Unparsed macro body (keeps the whole raw unparsed macro body)
186  /// Next macro in chained list
188  /// A pointer to function implementation (if macro is really a func)
189  Token (*ExpandFunc) (CPreprocessor *iParent, int iNumArgs, Token *iArgs);
190  /// true if macro expansion is in progress
191  bool Expanding;
192 
193  Macro (const Token &iName) :
194  Name (iName), NumArgs (0), Args (NULL), Next (NULL),
195  ExpandFunc (NULL), Expanding (false)
196  { }
197 
199  { delete [] Args; delete Next; }
200 
201  /// Expand the macro value (will not work for functions)
202  Token Expand (int iNumArgs, Token *iArgs, Macro *iMacros);
203  };
204 
205  friend class CPreprocessor::Macro;
206 
207  /// The current source text input
208  const char *Source;
209  /// The end of the source text
210  const char *SourceEnd;
211  /// Current line number
212  int Line;
213  /// True if we are at beginning of line
214  bool BOL;
215  /// A stack of 32 booleans packed into one value :)
216  unsigned EnableOutput;
217  /// The list of macros defined so far
219 
220  /**
221  * Private constructor to re-parse a single token.
222  */
223  CPreprocessor (const Token &iToken, int iLine);
224 
225  /**
226  * Stateless tokenizer: Parse the input text and return the next token.
227  * @param iExpand
228  * If true, macros will be expanded to their values
229  * @return
230  * The next token from the input stream
231  */
232  Token GetToken (bool iExpand);
233 
234  /**
235  * Handle a preprocessor directive.
236  * @param iToken
237  * The whole preprocessor directive line (until EOL)
238  * @param iLine
239  * The line where the directive begins (for error reports)
240  * @return
241  * The last input token that was not proceeded.
242  */
243  Token HandleDirective (Token &iToken, int iLine);
244 
245  /**
246  * Handle a \#define directive.
247  * @param iBody
248  * The body of the directive (everything after the directive
249  * until end of line).
250  * @param iLine
251  * The line where the directive begins (for error reports)
252  * @return
253  * true if everything went ok, false if not
254  */
255  bool HandleDefine (Token &iBody, int iLine);
256 
257  /**
258  * Undefine a previously defined macro
259  * @param iBody
260  * The body of the directive (everything after the directive
261  * until end of line).
262  * @param iLine
263  * The line where the directive begins (for error reports)
264  * @return
265  * true if everything went ok, false if not
266  */
267  bool HandleUnDef (Token &iBody, int iLine);
268 
269  /**
270  * Handle an \#ifdef directive.
271  * @param iBody
272  * The body of the directive (everything after the directive
273  * until end of line).
274  * @param iLine
275  * The line where the directive begins (for error reports)
276  * @return
277  * true if everything went ok, false if not
278  */
279  bool HandleIfDef (Token &iBody, int iLine);
280 
281  /**
282  * Handle an \#if directive.
283  * @param iBody
284  * The body of the directive (everything after the directive
285  * until end of line).
286  * @param iLine
287  * The line where the directive begins (for error reports)
288  * @return
289  * true if everything went ok, false if not
290  */
291  bool HandleIf (Token &iBody, int iLine);
292 
293  /**
294  * Handle an \#else directive.
295  * @param iBody
296  * The body of the directive (everything after the directive
297  * until end of line).
298  * @param iLine
299  * The line where the directive begins (for error reports)
300  * @return
301  * true if everything went ok, false if not
302  */
303  bool HandleElse (Token &iBody, int iLine);
304 
305  /**
306  * Handle an \#endif directive.
307  * @param iBody
308  * The body of the directive (everything after the directive
309  * until end of line).
310  * @param iLine
311  * The line where the directive begins (for error reports)
312  * @return
313  * true if everything went ok, false if not
314  */
315  bool HandleEndIf (Token &iBody, int iLine);
316 
317  /**
318  * Get a single function argument until next ',' or ')'.
319  * @param oArg
320  * The argument is returned in this variable.
321  * @param iExpand
322  * If false, parameters are not expanded and no expressions are
323  * allowed; only a single keyword is expected per argument.
324  * @return
325  * The first unhandled token after argument.
326  */
327  Token GetArgument (Token &oArg, bool iExpand);
328 
329  /**
330  * Get all the arguments of a macro: '(' arg1 { ',' arg2 { ',' ... }} ')'
331  * @param oNumArgs
332  * Number of parsed arguments is stored into this variable.
333  * @param oArgs
334  * This is set to a pointer to an array of parsed arguments.
335  * @param iExpand
336  * If false, parameters are not expanded and no expressions are
337  * allowed; only a single keyword is expected per argument.
338  */
339  Token GetArguments (int &oNumArgs, Token *&oArgs, bool iExpand);
340 
341  /**
342  * Parse an expression, compute it and return the result.
343  * @param oResult
344  * A token containing the result of expression
345  * @param iLine
346  * The line at which the expression starts (for error reports)
347  * @param iOpPriority
348  * Operator priority (at which operator we will stop if
349  * proceeding recursively -- used internally. Parser stops
350  * when it encounters an operator with higher or equal priority).
351  * @return
352  * The last unhandled token after the expression
353  */
354  Token GetExpression (Token &oResult, int iLine, int iOpPriority = 0);
355 
356  /**
357  * Get the numeric value of a token.
358  * If the token was produced by expanding a macro, we will get
359  * an TEXT token which can contain a whole expression; in this
360  * case we will call GetExpression to parse it. Otherwise we
361  * just call the token's GetValue() method.
362  * @param iToken
363  * The token to get the numeric value of
364  * @param oValue
365  * The variable to put the value into
366  * @param iLine
367  * The line where the directive begins (for error reports)
368  * @return
369  * true if ok, false if not
370  */
371  bool GetValue (const Token &iToken, long &oValue, int iLine);
372 
373  /**
374  * Expand the given macro, if it exists.
375  * If macro has arguments, they are collected from source stream.
376  * @param iToken
377  * A KEYWORD token containing the (possible) macro name.
378  * @return
379  * The expanded token or iToken if it is not a macro
380  */
381  Token ExpandMacro (const Token &iToken);
382 
383  /**
384  * Check if a macro is defined, and if so, return it
385  * @param iToken
386  * Macro name
387  * @return
388  * The macro object or NULL if a macro with this name does not exist
389  */
390  Macro *IsDefined (const Token &iToken);
391 
392  /**
393  * The implementation of the defined() preprocessor function
394  * @param iParent
395  * The parent preprocessor object
396  * @param iNumArgs
397  * Number of arguments
398  * @param iArgs
399  * The arguments themselves
400  * @return
401  * The return value encapsulated in a token
402  */
403  static Token ExpandDefined (CPreprocessor *iParent, int iNumArgs, Token *iArgs);
404 
405  /**
406  * Parse the input string and return a token containing the whole output.
407  * @param iSource
408  * The source text enclosed in a token
409  * @return
410  * The output text enclosed in a token
411  */
412  Token Parse (const Token &iSource);
413 
414  /**
415  * Call the error handler
416  * @param iLine
417  * The line at which the error happened.
418  * @param iError
419  * The error string.
420  * @param iToken
421  * If not NULL contains the erroneous token
422  */
423  void Error (int iLine, const char *iError, const Token *iToken = NULL);
424 
425 public:
426  /// Create an empty preprocessor object
427  CPreprocessor () : MacroList (NULL)
428  { }
429 
430  /// Destroy the preprocessor object
431  virtual ~CPreprocessor ();
432 
433  /**
434  * Define a macro without parameters.
435  * @param iMacroName
436  * The name of the defined macro
437  * @param iMacroNameLen
438  * The length of the name of the defined macro
439  * @param iMacroValue
440  * The value of the defined macro
441  * @param iMacroValueLen
442  * The length of the value of the defined macro
443  */
444  void Define (const char *iMacroName, size_t iMacroNameLen,
445  const char *iMacroValue, size_t iMacroValueLen);
446 
447  /**
448  * Define a numerical macro.
449  * @param iMacroName
450  * The name of the defined macro
451  * @param iMacroNameLen
452  * The length of the name of the defined macro
453  * @param iMacroValue
454  * The value of the defined macro
455  */
456  void Define (const char *iMacroName, size_t iMacroNameLen, long iMacroValue);
457 
458  /**
459  * Define a macro without parameters.
460  * @param iMacroName
461  * The name of the defined macro
462  * @param iMacroValue
463  * The value of the defined macro
464  */
465  void Define (const char *iMacroName, const char *iMacroValue);
466 
467  /**
468  * Define a numerical macro.
469  * @param iMacroName
470  * The name of the defined macro
471  * @param iMacroValue
472  * The value of the defined macro
473  */
474  void Define (const char *iMacroName, long iMacroValue);
475 
476  /**
477  * Undefine a macro.
478  * @param iMacroName
479  * The name of the macro to undefine
480  * @param iMacroNameLen
481  * The length of the name of the macro to undefine
482  * @return
483  * true if the macro has been undefined, false if macro doesn't exist
484  */
485  bool Undef (const char *iMacroName, size_t iMacroNameLen);
486 
487  /**
488  * Parse the input string and return a newly-allocated output string.
489  * @note
490  * The returned preprocessed string is NOT zero-terminated
491  * (just like the input string).
492  * @param iSource
493  * The source text
494  * @param iLength
495  * The length of the source text in characters
496  * @param oLength
497  * The length of the output string.
498  * @return
499  * The output from preprocessor, allocated with malloc().
500  * The parser can actually allocate more than needed for performance
501  * reasons, but this should not be a problem unless you will want
502  * to store the returned pointer for long time in which case you
503  * might want to realloc() it.
504  * If an error has been encountered, the function returns NULL.
505  * In some cases the function may return an unallocated address
506  * that's *inside* the source buffer. You must free() the result
507  * string only if the returned address is not inside the source text.
508  */
509  char *Parse (const char *iSource, size_t iLength, size_t &oLength);
510 
511  /**
512  * An error handler function type.
513  * The default implementation just drops a note to stderr and
514  * then the parser ends, returning NULL.
515  * @param iData
516  * User-specific pointer from the corresponding CPreprocessor object.
517  * @param iLine
518  * The line at which the error happened.
519  * @param iError
520  * The error string.
521  * @param iToken
522  * If not NULL contains the erroneous token
523  * @param iTokenLen
524  * The length of iToken. iToken is never zero-terminated!
525  */
526  typedef void (*ErrorHandlerFunc) (
527  void *iData, int iLine, const char *iError,
528  const char *iToken, size_t iTokenLen);
529 
530  /**
531  * A pointer to the preprocessor's error handler.
532  * You can assign the address of your own function to this variable
533  * and implement your own error handling (e.g. throwing an exception etc).
534  */
536 
537  /// User-specific storage, passed to Error()
538  void *ErrorData;
539 };
540 
541 #endif // INCLUDED_CPREPROCESSOR
char * Buffer
A memory-allocated string.
Definition: Preprocessor.h:106
const char * Source
The current source text input.
Definition: Preprocessor.h:208
void Define(const char *iMacroName, size_t iMacroNameLen, const char *iMacroValue, size_t iMacroValueLen)
Define a macro without parameters.
Definition: Preprocessor.cpp:1159
void Append(const char *iString, size_t iLength)
Append a string to this token.
Definition: Preprocessor.cpp:56
int NumArgs
Number of arguments.
Definition: Preprocessor.h:179
void(* ErrorHandlerFunc)(void *iData, int iLine, const char *iError, const char *iToken, size_t iTokenLen)
An error handler function type.
Definition: Preprocessor.h:526
void Error(int iLine, const char *iError, const Token *iToken=NULL)
Call the error handler.
Definition: Preprocessor.cpp:258
bool Undef(const char *iMacroName, size_t iMacroNameLen)
Undefine a macro.
Definition: Preprocessor.cpp:1187
CPreprocessor()
Create an empty preprocessor object.
Definition: Preprocessor.h:427
size_t Length
Token length in bytes.
Definition: Preprocessor.h:109
Definition: Preprocessor.h:92
Token()
Definition: Preprocessor.h:111
Definition: Preprocessor.h:87
bool HandleDefine(Token &iBody, int iLine)
Handle a #define directive.
Definition: Preprocessor.cpp:872
Token Parse(const Token &iSource)
Parse the input string and return a token containing the whole output.
Definition: Preprocessor.cpp:1208
const char * String
A pointer somewhere into the input buffer.
Definition: Preprocessor.h:104
bool HandleIf(Token &iBody, int iLine)
Handle an #if directive.
Definition: Preprocessor.cpp:994
bool HandleEndIf(Token &iBody, int iLine)
Handle an #endif directive.
Definition: Preprocessor.cpp:1038
Token * Args
The names of the arguments.
Definition: Preprocessor.h:181
Definition: Preprocessor.h:83
bool BOL
True if we are at beginning of line.
Definition: Preprocessor.h:214
~Macro()
Definition: Preprocessor.h:198
Definition: Preprocessor.h:91
Token GetToken(bool iExpand)
Stateless tokenizer: Parse the input text and return the next token.
Definition: Preprocessor.cpp:266
Token GetArguments(int &oNumArgs, Token *&oArgs, bool iExpand)
Get all the arguments of a macro: &#39;(&#39; arg1 { &#39;,&#39; arg2 { &#39;,&#39; ...
Definition: Preprocessor.cpp:808
virtual ~CPreprocessor()
Destroy the preprocessor object.
Definition: Preprocessor.cpp:253
Token & operator=(const Token &iOther)
Assignment operator.
Definition: Preprocessor.h:134
Token GetArgument(Token &oArg, bool iExpand)
Get a single function argument until next &#39;,&#39; or &#39;)&#39;.
Definition: Preprocessor.cpp:736
Definition: Preprocessor.h:89
Token HandleDirective(Token &iToken, int iLine)
Handle a preprocessor directive.
Definition: Preprocessor.cpp:1053
Macro(const Token &iName)
Definition: Preprocessor.h:193
Macro * Next
Next macro in chained list.
Definition: Preprocessor.h:187
size_t Allocated
True if string was allocated (and must be freed)
Definition: Preprocessor.h:100
Kind Type
Token type.
Definition: Preprocessor.h:98
A input token.
Definition: Preprocessor.h:77
Definition: Preprocessor.h:88
bool operator==(const Token &iOther)
Test two tokens for equality.
Definition: Preprocessor.h:164
int CountNL()
Count number of newlines in this token.
Definition: Preprocessor.cpp:174
bool HandleElse(Token &iBody, int iLine)
Handle an #else directive.
Definition: Preprocessor.cpp:1021
const char * SourceEnd
The end of the source text.
Definition: Preprocessor.h:210
static ErrorHandlerFunc ErrorHandler
A pointer to the preprocessor&#39;s error handler.
Definition: Preprocessor.h:535
Definition: Preprocessor.h:94
bool GetValue(long &oValue) const
Get the numeric value of the token.
Definition: Preprocessor.cpp:102
Macro * IsDefined(const Token &iToken)
Check if a macro is defined, and if so, return it.
Definition: Preprocessor.cpp:398
Token Value
The macro value.
Definition: Preprocessor.h:183
Definition: Preprocessor.h:93
Token Name
Macro name.
Definition: Preprocessor.h:177
Definition: Preprocessor.h:90
Token(Kind iType)
Definition: Preprocessor.h:114
A macro definition.
Definition: Preprocessor.h:173
Token ExpandMacro(const Token &iToken)
Expand the given macro, if it exists.
Definition: Preprocessor.cpp:407
~Token()
Definition: Preprocessor.h:130
bool HandleIfDef(Token &iBody, int iLine)
Handle an #ifdef directive.
Definition: Preprocessor.cpp:947
unsigned EnableOutput
A stack of 32 booleans packed into one value :)
Definition: Preprocessor.h:216
Definition: Preprocessor.h:85
Definition: Preprocessor.h:84
bool Expanding
true if macro expansion is in progress
Definition: Preprocessor.h:191
Definition: Preprocessor.h:86
Kind
Definition: Preprocessor.h:80
Token(const Token &iOther)
Definition: Preprocessor.h:121
This is a simplistic C/C++-like preprocessor.
Definition: Preprocessor.h:62
void AppendNL(int iCount)
Append given number of newlines to this token.
Definition: Preprocessor.cpp:160
Token GetExpression(Token &oResult, int iLine, int iOpPriority=0)
Parse an expression, compute it and return the result.
Definition: Preprocessor.cpp:474
Token(Kind iType, const char *iString, size_t iLength)
Definition: Preprocessor.h:117
Token Body
Unparsed macro body (keeps the whole raw unparsed macro body)
Definition: Preprocessor.h:185
Macro * MacroList
The list of macros defined so far.
Definition: Preprocessor.h:218
static Token ExpandDefined(CPreprocessor *iParent, int iNumArgs, Token *iArgs)
The implementation of the defined() preprocessor function.
Definition: Preprocessor.cpp:982
void * ErrorData
User-specific storage, passed to Error()
Definition: Preprocessor.h:538
Definition: Preprocessor.h:82
bool HandleUnDef(Token &iBody, int iLine)
Undefine a previously defined macro.
Definition: Preprocessor.cpp:919
int Line
Current line number.
Definition: Preprocessor.h:212
void SetValue(long iValue)
Set the numeric value of the token.
Definition: Preprocessor.cpp:151