|
1 #include "stralloc.h" |
|
2 #include "alloc.h" |
|
3 #include "str.h" |
|
4 #include "token822.h" |
|
5 #include "gen_allocdefs.h" |
|
6 |
|
7 static struct token822 comma = { TOKEN822_COMMA }; |
|
8 |
|
9 void token822_reverse(ta) |
|
10 token822_alloc *ta; |
|
11 { |
|
12 int i; |
|
13 int n; |
|
14 struct token822 temp; |
|
15 |
|
16 n = ta->len - 1; |
|
17 for (i = 0;i + i < n;++i) |
|
18 { |
|
19 temp = ta->t[i]; |
|
20 ta->t[i] = ta->t[n - i]; |
|
21 ta->t[n - i] = temp; |
|
22 } |
|
23 } |
|
24 |
|
25 GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready) |
|
26 GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus) |
|
27 GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append) |
|
28 |
|
29 static int needspace(t1,t2) |
|
30 int t1; |
|
31 int t2; |
|
32 { |
|
33 if (!t1) return 0; |
|
34 if (t1 == TOKEN822_COLON) return 1; |
|
35 if (t1 == TOKEN822_COMMA) return 1; |
|
36 if (t2 == TOKEN822_LEFT) return 1; |
|
37 switch(t1) |
|
38 { |
|
39 case TOKEN822_ATOM: case TOKEN822_LITERAL: |
|
40 case TOKEN822_QUOTE: case TOKEN822_COMMENT: |
|
41 switch(t2) |
|
42 { |
|
43 case TOKEN822_ATOM: case TOKEN822_LITERAL: |
|
44 case TOKEN822_QUOTE: case TOKEN822_COMMENT: |
|
45 return 1; |
|
46 } |
|
47 } |
|
48 return 0; |
|
49 } |
|
50 |
|
51 static int atomok(ch) |
|
52 char ch; |
|
53 { |
|
54 switch(ch) |
|
55 { |
|
56 case ' ': case '\t': case '\r': case '\n': |
|
57 case '(': case '[': case '"': |
|
58 case '<': case '>': case ';': case ':': |
|
59 case '@': case ',': case '.': |
|
60 return 0; |
|
61 } |
|
62 return 1; |
|
63 } |
|
64 |
|
65 static void atomcheck(t) |
|
66 struct token822 *t; |
|
67 { |
|
68 int i; |
|
69 char ch; |
|
70 for (i = 0;i < t->slen;++i) |
|
71 { |
|
72 ch = t->s[i]; |
|
73 if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) |
|
74 { |
|
75 t->type = TOKEN822_QUOTE; |
|
76 return; |
|
77 } |
|
78 } |
|
79 } |
|
80 |
|
81 int token822_unparse(sa,ta,linelen) |
|
82 stralloc *sa; |
|
83 token822_alloc *ta; |
|
84 unsigned int linelen; |
|
85 { |
|
86 struct token822 *t; |
|
87 int len; |
|
88 int ch; |
|
89 int i; |
|
90 int j; |
|
91 int lasttype; |
|
92 int newtype; |
|
93 char *s; |
|
94 char *lineb; |
|
95 char *linee; |
|
96 |
|
97 len = 0; |
|
98 lasttype = 0; |
|
99 for (i = 0;i < ta->len;++i) |
|
100 { |
|
101 t = ta->t + i; |
|
102 newtype = t->type; |
|
103 if (needspace(lasttype,newtype)) |
|
104 ++len; |
|
105 lasttype = newtype; |
|
106 switch(newtype) |
|
107 { |
|
108 case TOKEN822_COMMA: |
|
109 len += 3; break; |
|
110 case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT: |
|
111 case TOKEN822_SEMI: case TOKEN822_COLON: |
|
112 ++len; break; |
|
113 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: |
|
114 if (t->type != TOKEN822_ATOM) len += 2; |
|
115 for (j = 0;j < t->slen;++j) |
|
116 switch(ch = t->s[j]) |
|
117 { |
|
118 case '"': case '[': case ']': case '(': case ')': |
|
119 case '\\': case '\r': case '\n': ++len; |
|
120 default: ++len; |
|
121 } |
|
122 break; |
|
123 } |
|
124 } |
|
125 len += 2; |
|
126 |
|
127 if (!stralloc_ready(sa,len)) |
|
128 return -1; |
|
129 |
|
130 s = sa->s; |
|
131 lineb = s; |
|
132 linee = 0; |
|
133 |
|
134 lasttype = 0; |
|
135 for (i = 0;i < ta->len;++i) |
|
136 { |
|
137 t = ta->t + i; |
|
138 newtype = t->type; |
|
139 if (needspace(lasttype,newtype)) |
|
140 *s++ = ' '; |
|
141 lasttype = newtype; |
|
142 switch(newtype) |
|
143 { |
|
144 case TOKEN822_COMMA: |
|
145 *s++ = ','; |
|
146 #define NSUW \ |
|
147 s[0] = '\n'; s[1] = ' '; \ |
|
148 if (linee && (!linelen || (s - lineb <= linelen))) \ |
|
149 { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \ |
|
150 else { if (linee) lineb = linee + 1; linee = s; s += 2; } |
|
151 NSUW |
|
152 break; |
|
153 case TOKEN822_AT: *s++ = '@'; break; |
|
154 case TOKEN822_DOT: *s++ = '.'; break; |
|
155 case TOKEN822_LEFT: *s++ = '<'; break; |
|
156 case TOKEN822_RIGHT: *s++ = '>'; break; |
|
157 case TOKEN822_SEMI: *s++ = ';'; break; |
|
158 case TOKEN822_COLON: *s++ = ':'; break; |
|
159 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: |
|
160 if (t->type == TOKEN822_QUOTE) *s++ = '"'; |
|
161 if (t->type == TOKEN822_LITERAL) *s++ = '['; |
|
162 if (t->type == TOKEN822_COMMENT) *s++ = '('; |
|
163 for (j = 0;j < t->slen;++j) |
|
164 switch(ch = t->s[j]) |
|
165 { |
|
166 case '"': case '[': case ']': case '(': case ')': |
|
167 case '\\': case '\r': case '\n': *s++ = '\\'; |
|
168 default: *s++ = ch; |
|
169 } |
|
170 if (t->type == TOKEN822_QUOTE) *s++ = '"'; |
|
171 if (t->type == TOKEN822_LITERAL) *s++ = ']'; |
|
172 if (t->type == TOKEN822_COMMENT) *s++ = ')'; |
|
173 break; |
|
174 } |
|
175 } |
|
176 NSUW |
|
177 --s; |
|
178 sa->len = s - sa->s; |
|
179 return 1; |
|
180 } |
|
181 |
|
182 int token822_unquote(sa,ta) |
|
183 stralloc *sa; |
|
184 token822_alloc *ta; |
|
185 { |
|
186 struct token822 *t; |
|
187 int len; |
|
188 int i; |
|
189 int j; |
|
190 char *s; |
|
191 |
|
192 len = 0; |
|
193 for (i = 0;i < ta->len;++i) |
|
194 { |
|
195 t = ta->t + i; |
|
196 switch(t->type) |
|
197 { |
|
198 case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: |
|
199 case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON: |
|
200 ++len; break; |
|
201 case TOKEN822_LITERAL: |
|
202 len += 2; |
|
203 case TOKEN822_ATOM: case TOKEN822_QUOTE: |
|
204 len += t->slen; |
|
205 } |
|
206 } |
|
207 |
|
208 if (!stralloc_ready(sa,len)) |
|
209 return -1; |
|
210 |
|
211 s = sa->s; |
|
212 |
|
213 for (i = 0;i < ta->len;++i) |
|
214 { |
|
215 t = ta->t + i; |
|
216 switch(t->type) |
|
217 { |
|
218 case TOKEN822_COMMA: *s++ = ','; break; |
|
219 case TOKEN822_AT: *s++ = '@'; break; |
|
220 case TOKEN822_DOT: *s++ = '.'; break; |
|
221 case TOKEN822_LEFT: *s++ = '<'; break; |
|
222 case TOKEN822_RIGHT: *s++ = '>'; break; |
|
223 case TOKEN822_SEMI: *s++ = ';'; break; |
|
224 case TOKEN822_COLON: *s++ = ':'; break; |
|
225 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: |
|
226 if (t->type == TOKEN822_LITERAL) *s++ = '['; |
|
227 for (j = 0;j < t->slen;++j) |
|
228 *s++ = t->s[j]; |
|
229 if (t->type == TOKEN822_LITERAL) *s++ = ']'; |
|
230 break; |
|
231 case TOKEN822_COMMENT: break; |
|
232 } |
|
233 } |
|
234 sa->len = s - sa->s; |
|
235 return 1; |
|
236 } |
|
237 |
|
238 int token822_parse(ta,sa,buf) |
|
239 token822_alloc *ta; |
|
240 stralloc *sa; |
|
241 stralloc *buf; |
|
242 { |
|
243 int i; |
|
244 int salen; |
|
245 int level; |
|
246 struct token822 *t; |
|
247 int numtoks; |
|
248 int numchars; |
|
249 char *cbuf; |
|
250 |
|
251 salen = sa->len; |
|
252 |
|
253 numchars = 0; |
|
254 numtoks = 0; |
|
255 for (i = 0;i < salen;++i) |
|
256 switch(sa->s[i]) |
|
257 { |
|
258 case '.': case ',': case '@': case '<': case '>': case ':': case ';': |
|
259 ++numtoks; break; |
|
260 case ' ': case '\t': case '\r': case '\n': break; |
|
261 case ')': case ']': return 0; |
|
262 /* other control chars and non-ASCII chars are also bad, in theory */ |
|
263 case '(': |
|
264 level = 1; |
|
265 while (level) |
|
266 { |
|
267 if (++i >= salen) return 0; |
|
268 switch(sa->s[i]) |
|
269 { |
|
270 case '(': ++level; break; |
|
271 case ')': --level; break; |
|
272 case '\\': if (++i >= salen) return 0; |
|
273 default: ++numchars; |
|
274 } |
|
275 } |
|
276 ++numtoks; |
|
277 break; |
|
278 case '"': |
|
279 level = 1; |
|
280 while (level) |
|
281 { |
|
282 if (++i >= salen) return 0; |
|
283 switch(sa->s[i]) |
|
284 { |
|
285 case '"': --level; break; |
|
286 case '\\': if (++i >= salen) return 0; |
|
287 default: ++numchars; |
|
288 } |
|
289 } |
|
290 ++numtoks; |
|
291 break; |
|
292 case '[': |
|
293 level = 1; |
|
294 while (level) |
|
295 { |
|
296 if (++i >= salen) return 0; |
|
297 switch(sa->s[i]) |
|
298 { |
|
299 case ']': --level; break; |
|
300 case '\\': if (++i >= salen) return 0; |
|
301 default: ++numchars; |
|
302 } |
|
303 } |
|
304 ++numtoks; |
|
305 break; |
|
306 default: |
|
307 do |
|
308 { |
|
309 if (sa->s[i] == '\\') if (++i >= salen) break; |
|
310 ++numchars; |
|
311 if (++i >= salen) |
|
312 break; |
|
313 } |
|
314 while (atomok(sa->s[i])); |
|
315 --i; |
|
316 ++numtoks; |
|
317 } |
|
318 |
|
319 if (!token822_ready(ta,numtoks)) |
|
320 return -1; |
|
321 if (!stralloc_ready(buf,numchars)) |
|
322 return -1; |
|
323 cbuf = buf->s; |
|
324 ta->len = numtoks; |
|
325 |
|
326 t = ta->t; |
|
327 for (i = 0;i < salen;++i) |
|
328 switch(sa->s[i]) |
|
329 { |
|
330 case '.': t->type = TOKEN822_DOT; ++t; break; |
|
331 case ',': t->type = TOKEN822_COMMA; ++t; break; |
|
332 case '@': t->type = TOKEN822_AT; ++t; break; |
|
333 case '<': t->type = TOKEN822_LEFT; ++t; break; |
|
334 case '>': t->type = TOKEN822_RIGHT; ++t; break; |
|
335 case ':': t->type = TOKEN822_COLON; ++t; break; |
|
336 case ';': t->type = TOKEN822_SEMI; ++t; break; |
|
337 case ' ': case '\t': case '\r': case '\n': break; |
|
338 case '(': |
|
339 t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0; |
|
340 level = 1; |
|
341 while (level) |
|
342 { |
|
343 ++i; /* assert: < salen */ |
|
344 switch(sa->s[i]) |
|
345 { |
|
346 case '(': ++level; break; |
|
347 case ')': --level; break; |
|
348 case '\\': ++i; /* assert: < salen */ |
|
349 default: *cbuf++ = sa->s[i]; ++t->slen; |
|
350 } |
|
351 } |
|
352 ++t; |
|
353 break; |
|
354 case '"': |
|
355 t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0; |
|
356 level = 1; |
|
357 while (level) |
|
358 { |
|
359 ++i; /* assert: < salen */ |
|
360 switch(sa->s[i]) |
|
361 { |
|
362 case '"': --level; break; |
|
363 case '\\': ++i; /* assert: < salen */ |
|
364 default: *cbuf++ = sa->s[i]; ++t->slen; |
|
365 } |
|
366 } |
|
367 ++t; |
|
368 break; |
|
369 case '[': |
|
370 t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0; |
|
371 level = 1; |
|
372 while (level) |
|
373 { |
|
374 ++i; /* assert: < salen */ |
|
375 switch(sa->s[i]) |
|
376 { |
|
377 case ']': --level; break; |
|
378 case '\\': ++i; /* assert: < salen */ |
|
379 default: *cbuf++ = sa->s[i]; ++t->slen; |
|
380 } |
|
381 } |
|
382 ++t; |
|
383 break; |
|
384 default: |
|
385 t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0; |
|
386 do |
|
387 { |
|
388 if (sa->s[i] == '\\') if (++i >= salen) break; |
|
389 *cbuf++ = sa->s[i]; ++t->slen; |
|
390 if (++i >= salen) |
|
391 break; |
|
392 } |
|
393 while (atomok(sa->s[i])); |
|
394 atomcheck(t); |
|
395 --i; |
|
396 ++t; |
|
397 } |
|
398 return 1; |
|
399 } |
|
400 |
|
401 static int gotaddr(taout,taaddr,callback) |
|
402 token822_alloc *taout; |
|
403 token822_alloc *taaddr; |
|
404 int (*callback)(); |
|
405 { |
|
406 int i; |
|
407 |
|
408 if (callback(taaddr) != 1) |
|
409 return 0; |
|
410 |
|
411 if (!token822_readyplus(taout,taaddr->len)) |
|
412 return 0; |
|
413 |
|
414 for (i = 0;i < taaddr->len;++i) |
|
415 taout->t[taout->len++] = taaddr->t[i]; |
|
416 |
|
417 taaddr->len = 0; |
|
418 return 1; |
|
419 } |
|
420 |
|
421 int token822_addrlist(taout,taaddr,ta,callback) |
|
422 token822_alloc *taout; |
|
423 token822_alloc *taaddr; |
|
424 token822_alloc *ta; |
|
425 int (*callback)(); |
|
426 { |
|
427 struct token822 *t; |
|
428 struct token822 *beginning; |
|
429 int ingroup; |
|
430 int wordok; |
|
431 |
|
432 taout->len = 0; |
|
433 taaddr->len = 0; |
|
434 |
|
435 if (!token822_readyplus(taout,1)) return -1; |
|
436 if (!token822_readyplus(taaddr,1)) return -1; |
|
437 |
|
438 ingroup = 0; |
|
439 wordok = 1; |
|
440 |
|
441 beginning = ta->t + 2; |
|
442 t = ta->t + ta->len - 1; |
|
443 |
|
444 /* rfc 822 address lists are easy to parse from right to left */ |
|
445 |
|
446 #define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1; |
|
447 #define FLUSHCOMMA if (taaddr->len) { \ |
|
448 if (!gotaddr(taout,taaddr,callback)) return -1; \ |
|
449 if (!token822_append(taout,&comma)) return -1; } |
|
450 #define ADDRLEFT if (!token822_append(taaddr,t--)) return -1; |
|
451 #define OUTLEFT if (!token822_append(taout,t--)) return -1; |
|
452 |
|
453 while (t >= beginning) |
|
454 { |
|
455 switch(t->type) |
|
456 { |
|
457 case TOKEN822_SEMI: |
|
458 FLUSHCOMMA |
|
459 if (ingroup) return 0; |
|
460 ingroup = 1; |
|
461 wordok = 1; |
|
462 break; |
|
463 case TOKEN822_COLON: |
|
464 FLUSH |
|
465 if (!ingroup) return 0; |
|
466 ingroup = 0; |
|
467 while ((t >= beginning) && (t->type != TOKEN822_COMMA)) |
|
468 OUTLEFT |
|
469 if (t >= beginning) |
|
470 OUTLEFT |
|
471 wordok = 1; |
|
472 continue; |
|
473 case TOKEN822_RIGHT: |
|
474 FLUSHCOMMA |
|
475 OUTLEFT |
|
476 while ((t >= beginning) && (t->type != TOKEN822_LEFT)) |
|
477 ADDRLEFT |
|
478 /* important to use address here even if it's empty: <> */ |
|
479 if (!gotaddr(taout,taaddr,callback)) return -1; |
|
480 if (t < beginning) return 0; |
|
481 OUTLEFT |
|
482 while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT))) |
|
483 OUTLEFT |
|
484 wordok = 0; |
|
485 continue; |
|
486 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: |
|
487 if (!wordok) |
|
488 FLUSHCOMMA |
|
489 wordok = 0; |
|
490 ADDRLEFT |
|
491 continue; |
|
492 case TOKEN822_COMMENT: |
|
493 /* comment is lexically a space; shouldn't affect wordok */ |
|
494 break; |
|
495 case TOKEN822_COMMA: |
|
496 FLUSH |
|
497 wordok = 1; |
|
498 break; |
|
499 default: |
|
500 wordok = 1; |
|
501 ADDRLEFT |
|
502 continue; |
|
503 } |
|
504 OUTLEFT |
|
505 } |
|
506 FLUSH |
|
507 ++t; |
|
508 while (t > ta->t) |
|
509 if (!token822_append(taout,--t)) return -1; |
|
510 |
|
511 token822_reverse(taout); |
|
512 return 1; |
|
513 } |