source: git/ppcc/src/pplex.re.in @ 54b24c

spielwiese
Last change on this file since 54b24c was 54b24c, checked in by Reimer Behrends <behrends@…>, 5 years ago
Finalizing thread support.
  • Property mode set to 100644
File size: 4.6 KB
Line 
1// vim:set ft=cpp:
2
3#include "adlib/lib.h"
4#include "adlib/set.h"
5// "fixstr.h" must be included before "map.h"
6#include "fixstr.h"
7#include "adlib/map.h"
8
9#include "pplex.h"
10
11#script enumnames.tcl
12
13typedef Map<FixStr, Str *> InternMap;
14
15Str *Intern(const char *ptr, Int len) {
16  static InternMap *map = NULL;
17  if (!map) {
18    GCVar(map, new InternMap());
19  }
20  FixStr fs;
21  fs.str = ptr;
22  fs.len = len;
23  Str *result = map->get(fs, NULL);
24  if (!result) {
25    result = new Str(ptr, len);
26    // `ptr` above is an interior pointer whose contents may disappear
27    // due to GC once the `SourceFile` object containing it is no
28    // longer reachable.
29    //
30    // Therefore, we replace it with `result->c_str()`. This is not only
31    // not an interior pointer, but is kept alive by the value that the
32    // key is in use for.
33    fs.str = result->c_str();
34    map->add(fs, result);
35  }
36  return result;
37}
38
39#define PUSH_TOKEN(s) \
40  token.sym = s; \
41  goto pushtoken;
42
43bool Tokenize(SourceFile *source) {
44  Str *input = source->filedata;
45  const char *cursor = input->c_str();
46  const char *marker = NULL;
47  const char *ctxmarker = NULL;
48  bool done = false;
49  bool error = false;
50  TokenList *result = new TokenList();
51  Token token;
52  while (!done) {
53    const char *last = cursor;
54    /*!re2c
55    re2c:define:YYCTYPE = "unsigned char";
56    re2c:yyfill:enable = 0;
57    re2c:define:YYCURSOR = cursor;
58    re2c:define:YYMARKER = marker;
59    re2c:define:YYCTXMARKER = ctxmarker;
60
61    alpha = [a-zA-Z_];
62    digit = [0-9];
63    oct = [0-7];
64    hex = [0-9a-fA-F];
65    floatsuffix = [fFlL]?;
66    intsuffix = [uUlL]*;
67    exp = 'e' [-+]? digit+;
68    squote = ['];
69    quote = ["];
70    any = [^\000\r\n];
71    anyunescaped = [^\000\r\n\\];
72    sp = [ \t\f];
73    eol = [\000\r\n];
74    nl = "\r" | "\n" | "\r\n";
75    postpparg = [^a-zA-Z0-9_\r\n\000];
76    ppany = anyunescaped | ("\\" sp* nl);
77    pparg = (postpparg ppany *)?;
78    anystr = any \ ["\\];
79    anych = any \ ['\\];
80    longops = "..." | ">>=" | "<<=" | "+=" | "-=" | "*=" | "/=" | "%="
81            | "&=" | "^=" | "|=" | ">>" | "<<" | "++" | "--" | "->"
82            | "&&" | "||" | "<=" | ">=" | "==" | "!=";
83    esc = "\\";
84
85#script rules.tcl
86    alpha (alpha | digit)* { PUSH_TOKEN(SymIdent); }
87    '0x' hex+ intsuffix { PUSH_TOKEN(SymLiteral); }
88    '0' oct+ intsuffix { PUSH_TOKEN(SymLiteral); }
89    digit+ intsuffix { PUSH_TOKEN(SymLiteral); }
90    "L"? squote (esc any anych* | anych) squote { PUSH_TOKEN(SymLiteral); }
91    "L"? quote (esc any | anystr)* quote { PUSH_TOKEN(SymLiteral); }
92    digit+ exp floatsuffix { PUSH_TOKEN(SymLiteral); }
93    digit* "." digit+ exp? floatsuffix { PUSH_TOKEN(SymLiteral); }
94    digit+ "." digit* exp? floatsuffix { PUSH_TOKEN(SymLiteral); }
95    "(" { PUSH_TOKEN(SymLPar); }
96    ")" { PUSH_TOKEN(SymRPar); }
97    "[" { PUSH_TOKEN(SymLBrkt); }
98    "]" { PUSH_TOKEN(SymRBrkt); }
99    "{" { PUSH_TOKEN(SymLBrace); }
100    "}" { PUSH_TOKEN(SymRBrace); }
101    "=" { PUSH_TOKEN(SymEqual); }
102    "," { PUSH_TOKEN(SymComma); }
103    ";" { PUSH_TOKEN(SymSemicolon); }
104    "&" { PUSH_TOKEN(SymAnd); }
105    "&&" { PUSH_TOKEN(SymAndAnd); }
106    "::" { PUSH_TOKEN(SymColonColon); }
107    "*" { PUSH_TOKEN(SymAst); }
108    [-.&!~+*%/<>^|?:=,] { PUSH_TOKEN(SymOp); }
109    longops { PUSH_TOKEN(SymOp); }
110    ";" { PUSH_TOKEN(SymSemicolon); }
111    "//" any+ { PUSH_TOKEN(SymComment); }
112    "/" "*" { goto comment; }
113    nl { PUSH_TOKEN(SymEOL); }
114    "\\" sp* / nl { PUSH_TOKEN(SymWS); }
115    sp+ { PUSH_TOKEN(SymWS); }
116    "#" sp* digit+ "\"" anystr* "\"" (sp | digit)* {
117      PUSH_TOKEN(SymLineDir);
118    }
119    "\000" { done = true; continue; }
120    any { error = true; PUSH_TOKEN(SymBAD); }
121    * { done = true; continue; }
122    */
123    comment:
124    /*!re2c
125    "*" "/" { PUSH_TOKEN(SymComment); }
126    [^\000] { goto comment; }
127    "\000" { done = true; PUSH_TOKEN(SymComment); }
128    */
129    pushtoken:
130      token.str = Intern(last, cursor - last);
131      result->add(token);
132  }
133  token.sym = SymEOF;
134  token.str = S("");
135  result->add(token);
136  source->tokens = result;
137  return !error;
138}
139
140SourceFile *ReadSource(Str *filename, Str *filedata) {
141  SourceFile *result = new SourceFile();
142  result->filename = filename;
143  Str *modulename = filename->clone();
144  for (Int i = 0; i < modulename->len(); i++) {
145    char ch = modulename->at(i);
146    if (ch >= 'a' && ch <= 'z') continue;
147    if (ch >= 'A' && ch <= 'Z') continue;
148    if (ch >= '0' && ch <= '9') continue;
149    if (ch == '_') continue;
150    modulename->at(i) = '_';
151  }
152  result->modulename = modulename;
153  if (!filedata)
154    filedata = ReadFile(filename);
155  result->filedata = filedata;
156  if (!result->filedata)
157    return NULL;
158  Tokenize(result);
159  return result;
160}
Note: See TracBrowser for help on using the repository browser.