1 | // vim:set ft=cpp: |
---|
2 | |
---|
3 | #include "adlib/lib.h" |
---|
4 | #include "adlib/set.h" |
---|
5 | // "fixstr.h" must be included before "map.h" |
---|
6 | #include "fixstr.h" |
---|
7 | #include "adlib/map.h" |
---|
8 | |
---|
9 | #include "pplex.h" |
---|
10 | |
---|
11 | #script enumnames.tcl |
---|
12 | |
---|
13 | typedef Map<FixStr, Str *> InternMap; |
---|
14 | |
---|
15 | Str *Intern(const char *ptr, Int len) { |
---|
16 | static InternMap *map = NULL; |
---|
17 | if (!map) { |
---|
18 | GCVar(map, new InternMap()); |
---|
19 | } |
---|
20 | FixStr fs; |
---|
21 | fs.str = ptr; |
---|
22 | fs.len = len; |
---|
23 | Str *result = map->get(fs, NULL); |
---|
24 | if (!result) { |
---|
25 | result = new Str(ptr, len); |
---|
26 | // `ptr` above is an interior pointer whose contents may disappear |
---|
27 | // due to GC once the `SourceFile` object containing it is no |
---|
28 | // longer reachable. |
---|
29 | // |
---|
30 | // Therefore, we replace it with `result->c_str()`. This is not only |
---|
31 | // not an interior pointer, but is kept alive by the value that the |
---|
32 | // key is in use for. |
---|
33 | fs.str = result->c_str(); |
---|
34 | map->add(fs, result); |
---|
35 | } |
---|
36 | return result; |
---|
37 | } |
---|
38 | |
---|
39 | #define PUSH_TOKEN(s) \ |
---|
40 | token.sym = s; \ |
---|
41 | goto pushtoken; |
---|
42 | |
---|
43 | bool Tokenize(SourceFile *source) { |
---|
44 | Str *input = source->filedata; |
---|
45 | const char *cursor = input->c_str(); |
---|
46 | const char *marker = NULL; |
---|
47 | const char *ctxmarker = NULL; |
---|
48 | bool done = false; |
---|
49 | bool error = false; |
---|
50 | TokenList *result = new TokenList(); |
---|
51 | Token token; |
---|
52 | while (!done) { |
---|
53 | const char *last = cursor; |
---|
54 | /*!re2c |
---|
55 | re2c:define:YYCTYPE = "unsigned char"; |
---|
56 | re2c:yyfill:enable = 0; |
---|
57 | re2c:define:YYCURSOR = cursor; |
---|
58 | re2c:define:YYMARKER = marker; |
---|
59 | re2c:define:YYCTXMARKER = ctxmarker; |
---|
60 | |
---|
61 | alpha = [a-zA-Z_]; |
---|
62 | digit = [0-9]; |
---|
63 | oct = [0-7]; |
---|
64 | hex = [0-9a-fA-F]; |
---|
65 | floatsuffix = [fFlL]?; |
---|
66 | intsuffix = [uUlL]*; |
---|
67 | exp = 'e' [-+]? digit+; |
---|
68 | squote = [']; |
---|
69 | quote = ["]; |
---|
70 | any = [^\000\r\n]; |
---|
71 | anyunescaped = [^\000\r\n\\]; |
---|
72 | sp = [ \t\f]; |
---|
73 | eol = [\000\r\n]; |
---|
74 | nl = "\r" | "\n" | "\r\n"; |
---|
75 | postpparg = [^a-zA-Z0-9_\r\n\000]; |
---|
76 | ppany = anyunescaped | ("\\" sp* nl); |
---|
77 | pparg = (postpparg ppany *)?; |
---|
78 | anystr = any \ ["\\]; |
---|
79 | anych = any \ ['\\]; |
---|
80 | longops = "..." | ">>=" | "<<=" | "+=" | "-=" | "*=" | "/=" | "%=" |
---|
81 | | "&=" | "^=" | "|=" | ">>" | "<<" | "++" | "--" | "->" |
---|
82 | | "&&" | "||" | "<=" | ">=" | "==" | "!="; |
---|
83 | esc = "\\"; |
---|
84 | |
---|
85 | #script rules.tcl |
---|
86 | alpha (alpha | digit)* { PUSH_TOKEN(SymIdent); } |
---|
87 | '0x' hex+ intsuffix { PUSH_TOKEN(SymLiteral); } |
---|
88 | '0' oct+ intsuffix { PUSH_TOKEN(SymLiteral); } |
---|
89 | digit+ intsuffix { PUSH_TOKEN(SymLiteral); } |
---|
90 | "L"? squote (esc any anych* | anych) squote { PUSH_TOKEN(SymLiteral); } |
---|
91 | "L"? quote (esc any | anystr)* quote { PUSH_TOKEN(SymLiteral); } |
---|
92 | digit+ exp floatsuffix { PUSH_TOKEN(SymLiteral); } |
---|
93 | digit* "." digit+ exp? floatsuffix { PUSH_TOKEN(SymLiteral); } |
---|
94 | digit+ "." digit* exp? floatsuffix { PUSH_TOKEN(SymLiteral); } |
---|
95 | "(" { PUSH_TOKEN(SymLPar); } |
---|
96 | ")" { PUSH_TOKEN(SymRPar); } |
---|
97 | "[" { PUSH_TOKEN(SymLBrkt); } |
---|
98 | "]" { PUSH_TOKEN(SymRBrkt); } |
---|
99 | "{" { PUSH_TOKEN(SymLBrace); } |
---|
100 | "}" { PUSH_TOKEN(SymRBrace); } |
---|
101 | "=" { PUSH_TOKEN(SymEqual); } |
---|
102 | "," { PUSH_TOKEN(SymComma); } |
---|
103 | ";" { PUSH_TOKEN(SymSemicolon); } |
---|
104 | "&" { PUSH_TOKEN(SymAnd); } |
---|
105 | "&&" { PUSH_TOKEN(SymAndAnd); } |
---|
106 | "::" { PUSH_TOKEN(SymColonColon); } |
---|
107 | "*" { PUSH_TOKEN(SymAst); } |
---|
108 | [-.&!~+*%/<>^|?:=,] { PUSH_TOKEN(SymOp); } |
---|
109 | longops { PUSH_TOKEN(SymOp); } |
---|
110 | ";" { PUSH_TOKEN(SymSemicolon); } |
---|
111 | "//" any+ { PUSH_TOKEN(SymComment); } |
---|
112 | "/" "*" { goto comment; } |
---|
113 | nl { PUSH_TOKEN(SymEOL); } |
---|
114 | "\\" sp* / nl { PUSH_TOKEN(SymWS); } |
---|
115 | sp+ { PUSH_TOKEN(SymWS); } |
---|
116 | "#" sp* digit+ "\"" anystr* "\"" (sp | digit)* { |
---|
117 | PUSH_TOKEN(SymLineDir); |
---|
118 | } |
---|
119 | "\000" { done = true; continue; } |
---|
120 | any { error = true; PUSH_TOKEN(SymBAD); } |
---|
121 | * { done = true; continue; } |
---|
122 | */ |
---|
123 | comment: |
---|
124 | /*!re2c |
---|
125 | "*" "/" { PUSH_TOKEN(SymComment); } |
---|
126 | [^\000] { goto comment; } |
---|
127 | "\000" { done = true; PUSH_TOKEN(SymComment); } |
---|
128 | */ |
---|
129 | pushtoken: |
---|
130 | token.str = Intern(last, cursor - last); |
---|
131 | result->add(token); |
---|
132 | } |
---|
133 | token.sym = SymEOF; |
---|
134 | token.str = S(""); |
---|
135 | result->add(token); |
---|
136 | source->tokens = result; |
---|
137 | return !error; |
---|
138 | } |
---|
139 | |
---|
140 | SourceFile *ReadSource(Str *filename, Str *filedata) { |
---|
141 | SourceFile *result = new SourceFile(); |
---|
142 | result->filename = filename; |
---|
143 | Str *modulename = filename->clone(); |
---|
144 | for (Int i = 0; i < modulename->len(); i++) { |
---|
145 | char ch = modulename->at(i); |
---|
146 | if (ch >= 'a' && ch <= 'z') continue; |
---|
147 | if (ch >= 'A' && ch <= 'Z') continue; |
---|
148 | if (ch >= '0' && ch <= '9') continue; |
---|
149 | if (ch == '_') continue; |
---|
150 | modulename->at(i) = '_'; |
---|
151 | } |
---|
152 | result->modulename = modulename; |
---|
153 | if (!filedata) |
---|
154 | filedata = ReadFile(filename); |
---|
155 | result->filedata = filedata; |
---|
156 | if (!result->filedata) |
---|
157 | return NULL; |
---|
158 | Tokenize(result); |
---|
159 | return result; |
---|
160 | } |
---|