token822.c
changeset 0 068428edee47
equal deleted inserted replaced
-1:000000000000 0:068428edee47
       
     1 #include "stralloc.h"
       
     2 #include "alloc.h"
       
     3 #include "str.h"
       
     4 #include "token822.h"
       
     5 #include "gen_allocdefs.h"
       
     6 
       
     7 static struct token822 comma = { TOKEN822_COMMA };
       
     8 
       
     9 void token822_reverse(ta)
       
    10 token822_alloc *ta;
       
    11 {
       
    12  int i;
       
    13  int n;
       
    14  struct token822 temp;
       
    15 
       
    16  n = ta->len - 1;
       
    17  for (i = 0;i + i < n;++i)
       
    18   {
       
    19    temp = ta->t[i];
       
    20    ta->t[i] = ta->t[n - i];
       
    21    ta->t[n - i] = temp;
       
    22   }
       
    23 }
       
    24 
       
    25 GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready)
       
    26 GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus)
       
    27 GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append)
       
    28 
       
    29 static int needspace(t1,t2)
       
    30 int t1;
       
    31 int t2;
       
    32 {
       
    33  if (!t1) return 0;
       
    34  if (t1 == TOKEN822_COLON) return 1;
       
    35  if (t1 == TOKEN822_COMMA) return 1;
       
    36  if (t2 == TOKEN822_LEFT) return 1;
       
    37  switch(t1)
       
    38   {
       
    39    case TOKEN822_ATOM: case TOKEN822_LITERAL:
       
    40    case TOKEN822_QUOTE: case TOKEN822_COMMENT:
       
    41      switch(t2)
       
    42       {
       
    43        case TOKEN822_ATOM: case TOKEN822_LITERAL:
       
    44        case TOKEN822_QUOTE: case TOKEN822_COMMENT:
       
    45          return 1;
       
    46       }
       
    47   }
       
    48  return 0;
       
    49 }
       
    50 
       
    51 static int atomok(ch)
       
    52 char ch;
       
    53 {
       
    54  switch(ch)
       
    55   {
       
    56    case ' ': case '\t': case '\r': case '\n':
       
    57    case '(': case '[': case '"':
       
    58    case '<': case '>': case ';': case ':':
       
    59    case '@': case ',': case '.':
       
    60      return 0;
       
    61   }
       
    62  return 1;
       
    63 }
       
    64 
       
    65 static void atomcheck(t)
       
    66 struct token822 *t;
       
    67 {
       
    68  int i;
       
    69  char ch;
       
    70  for (i = 0;i < t->slen;++i)
       
    71   {
       
    72    ch = t->s[i];
       
    73    if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\'))
       
    74     {
       
    75      t->type = TOKEN822_QUOTE;
       
    76      return;
       
    77     }
       
    78   }
       
    79 }
       
    80 
       
    81 int token822_unparse(sa,ta,linelen)
       
    82 stralloc *sa;
       
    83 token822_alloc *ta;
       
    84 unsigned int linelen;
       
    85 {
       
    86  struct token822 *t;
       
    87  int len;
       
    88  int ch;
       
    89  int i;
       
    90  int j;
       
    91  int lasttype;
       
    92  int newtype;
       
    93  char *s;
       
    94  char *lineb;
       
    95  char *linee;
       
    96 
       
    97  len = 0;
       
    98  lasttype = 0;
       
    99  for (i = 0;i < ta->len;++i)
       
   100   {
       
   101    t = ta->t + i;
       
   102    newtype = t->type;
       
   103    if (needspace(lasttype,newtype))
       
   104      ++len;
       
   105    lasttype = newtype;
       
   106    switch(newtype)
       
   107     {
       
   108      case TOKEN822_COMMA:
       
   109        len += 3; break;
       
   110      case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT:
       
   111      case TOKEN822_SEMI: case TOKEN822_COLON:
       
   112        ++len; break;
       
   113      case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
       
   114        if (t->type != TOKEN822_ATOM) len += 2;
       
   115        for (j = 0;j < t->slen;++j)
       
   116 	 switch(ch = t->s[j])
       
   117 	  {
       
   118 	   case '"': case '[': case ']': case '(': case ')':
       
   119 	   case '\\': case '\r': case '\n': ++len;
       
   120 	   default: ++len;
       
   121 	  }
       
   122        break;
       
   123     }
       
   124   }
       
   125  len += 2;
       
   126 
       
   127  if (!stralloc_ready(sa,len))
       
   128    return -1;
       
   129 
       
   130  s = sa->s;
       
   131  lineb = s;
       
   132  linee = 0;
       
   133 
       
   134  lasttype = 0;
       
   135  for (i = 0;i < ta->len;++i)
       
   136   {
       
   137    t = ta->t + i;
       
   138    newtype = t->type;
       
   139    if (needspace(lasttype,newtype))
       
   140      *s++ = ' ';
       
   141    lasttype = newtype;
       
   142    switch(newtype)
       
   143     {
       
   144      case TOKEN822_COMMA:
       
   145        *s++ = ',';
       
   146 #define NSUW \
       
   147  s[0] = '\n'; s[1] = ' '; \
       
   148  if (linee && (!linelen || (s - lineb <= linelen))) \
       
   149   { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \
       
   150  else { if (linee) lineb = linee + 1; linee = s; s += 2; }
       
   151        NSUW
       
   152        break;
       
   153      case TOKEN822_AT: *s++ = '@'; break;
       
   154      case TOKEN822_DOT: *s++ = '.'; break;
       
   155      case TOKEN822_LEFT: *s++ = '<'; break;
       
   156      case TOKEN822_RIGHT: *s++ = '>'; break;
       
   157      case TOKEN822_SEMI: *s++ = ';'; break;
       
   158      case TOKEN822_COLON: *s++ = ':'; break;
       
   159      case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
       
   160        if (t->type == TOKEN822_QUOTE) *s++ = '"';
       
   161        if (t->type == TOKEN822_LITERAL) *s++ = '[';
       
   162        if (t->type == TOKEN822_COMMENT) *s++ = '(';
       
   163        for (j = 0;j < t->slen;++j)
       
   164 	 switch(ch = t->s[j])
       
   165 	  {
       
   166 	   case '"': case '[': case ']': case '(': case ')':
       
   167 	   case '\\': case '\r': case '\n': *s++ = '\\';
       
   168 	   default: *s++ = ch;
       
   169 	  }
       
   170        if (t->type == TOKEN822_QUOTE) *s++ = '"';
       
   171        if (t->type == TOKEN822_LITERAL) *s++ = ']';
       
   172        if (t->type == TOKEN822_COMMENT) *s++ = ')';
       
   173        break;
       
   174     }
       
   175   }
       
   176  NSUW
       
   177  --s;
       
   178  sa->len = s - sa->s;
       
   179  return 1;
       
   180 }
       
   181 
       
   182 int token822_unquote(sa,ta)
       
   183 stralloc *sa;
       
   184 token822_alloc *ta;
       
   185 {
       
   186  struct token822 *t;
       
   187  int len;
       
   188  int i;
       
   189  int j;
       
   190  char *s;
       
   191 
       
   192  len = 0;
       
   193  for (i = 0;i < ta->len;++i)
       
   194   {
       
   195    t = ta->t + i;
       
   196    switch(t->type)
       
   197     {
       
   198      case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: 
       
   199      case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON: 
       
   200        ++len; break;
       
   201      case TOKEN822_LITERAL:
       
   202        len += 2;
       
   203      case TOKEN822_ATOM: case TOKEN822_QUOTE:
       
   204        len += t->slen;
       
   205     }
       
   206   }
       
   207 
       
   208  if (!stralloc_ready(sa,len))
       
   209    return -1;
       
   210 
       
   211  s = sa->s;
       
   212 
       
   213  for (i = 0;i < ta->len;++i)
       
   214   {
       
   215    t = ta->t + i;
       
   216    switch(t->type)
       
   217     {
       
   218      case TOKEN822_COMMA: *s++ = ','; break;
       
   219      case TOKEN822_AT: *s++ = '@'; break;
       
   220      case TOKEN822_DOT: *s++ = '.'; break;
       
   221      case TOKEN822_LEFT: *s++ = '<'; break;
       
   222      case TOKEN822_RIGHT: *s++ = '>'; break;
       
   223      case TOKEN822_SEMI: *s++ = ';'; break;
       
   224      case TOKEN822_COLON: *s++ = ':'; break;
       
   225      case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
       
   226        if (t->type == TOKEN822_LITERAL) *s++ = '[';
       
   227        for (j = 0;j < t->slen;++j)
       
   228 	 *s++ = t->s[j];
       
   229        if (t->type == TOKEN822_LITERAL) *s++ = ']';
       
   230        break;
       
   231      case TOKEN822_COMMENT: break;
       
   232     }
       
   233   }
       
   234  sa->len = s - sa->s;
       
   235  return 1;
       
   236 }
       
   237 
       
   238 int token822_parse(ta,sa,buf)
       
   239 token822_alloc *ta;
       
   240 stralloc *sa;
       
   241 stralloc *buf;
       
   242 {
       
   243  int i;
       
   244  int salen;
       
   245  int level;
       
   246  struct token822 *t;
       
   247  int numtoks;
       
   248  int numchars;
       
   249  char *cbuf;
       
   250 
       
   251  salen = sa->len;
       
   252 
       
   253  numchars = 0;
       
   254  numtoks = 0;
       
   255  for (i = 0;i < salen;++i)
       
   256    switch(sa->s[i])
       
   257     {
       
   258      case '.': case ',': case '@': case '<': case '>': case ':': case ';':
       
   259        ++numtoks; break;
       
   260      case ' ': case '\t': case '\r': case '\n': break;
       
   261      case ')': case ']': return 0;
       
   262      /* other control chars and non-ASCII chars are also bad, in theory */
       
   263      case '(':
       
   264        level = 1;
       
   265        while (level)
       
   266 	{
       
   267 	 if (++i >= salen) return 0;
       
   268 	 switch(sa->s[i])
       
   269 	  {
       
   270 	   case '(': ++level; break;
       
   271 	   case ')': --level; break;
       
   272 	   case '\\': if (++i >= salen) return 0;
       
   273 	   default: ++numchars;
       
   274 	  }
       
   275 	}
       
   276        ++numtoks;
       
   277        break;
       
   278      case '"':
       
   279        level = 1;
       
   280        while (level)
       
   281 	{
       
   282 	 if (++i >= salen) return 0;
       
   283 	 switch(sa->s[i])
       
   284 	  {
       
   285 	   case '"': --level; break;
       
   286 	   case '\\': if (++i >= salen) return 0;
       
   287 	   default: ++numchars;
       
   288 	  }
       
   289 	}
       
   290        ++numtoks;
       
   291        break;
       
   292      case '[':
       
   293        level = 1;
       
   294        while (level)
       
   295 	{
       
   296 	 if (++i >= salen) return 0;
       
   297 	 switch(sa->s[i])
       
   298 	  {
       
   299 	   case ']': --level; break;
       
   300 	   case '\\': if (++i >= salen) return 0;
       
   301 	   default: ++numchars;
       
   302 	  }
       
   303 	}
       
   304        ++numtoks;
       
   305        break;
       
   306      default:
       
   307        do
       
   308 	{
       
   309 	 if (sa->s[i] == '\\') if (++i >= salen) break;
       
   310 	 ++numchars;
       
   311 	 if (++i >= salen)
       
   312 	   break;
       
   313 	}
       
   314        while (atomok(sa->s[i]));
       
   315        --i;
       
   316        ++numtoks;
       
   317     }
       
   318 
       
   319  if (!token822_ready(ta,numtoks))
       
   320    return -1;
       
   321  if (!stralloc_ready(buf,numchars))
       
   322    return -1;
       
   323  cbuf = buf->s;
       
   324  ta->len = numtoks;
       
   325 
       
   326  t = ta->t;
       
   327  for (i = 0;i < salen;++i)
       
   328    switch(sa->s[i])
       
   329     {
       
   330      case '.': t->type = TOKEN822_DOT; ++t; break;
       
   331      case ',': t->type = TOKEN822_COMMA; ++t; break;
       
   332      case '@': t->type = TOKEN822_AT; ++t; break;
       
   333      case '<': t->type = TOKEN822_LEFT; ++t; break;
       
   334      case '>': t->type = TOKEN822_RIGHT; ++t; break;
       
   335      case ':': t->type = TOKEN822_COLON; ++t; break;
       
   336      case ';': t->type = TOKEN822_SEMI; ++t; break;
       
   337      case ' ': case '\t': case '\r': case '\n': break;
       
   338      case '(':
       
   339        t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0;
       
   340        level = 1;
       
   341        while (level)
       
   342 	{
       
   343 	 ++i; /* assert: < salen */
       
   344 	 switch(sa->s[i])
       
   345 	  {
       
   346 	   case '(': ++level; break;
       
   347 	   case ')': --level; break;
       
   348 	   case '\\': ++i; /* assert: < salen */
       
   349 	   default: *cbuf++ = sa->s[i]; ++t->slen;
       
   350 	  }
       
   351 	}
       
   352        ++t;
       
   353        break;
       
   354      case '"':
       
   355        t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0;
       
   356        level = 1;
       
   357        while (level)
       
   358 	{
       
   359 	 ++i; /* assert: < salen */
       
   360 	 switch(sa->s[i])
       
   361 	  {
       
   362 	   case '"': --level; break;
       
   363 	   case '\\': ++i; /* assert: < salen */
       
   364 	   default: *cbuf++ = sa->s[i]; ++t->slen;
       
   365 	  }
       
   366 	}
       
   367        ++t;
       
   368        break;
       
   369      case '[':
       
   370        t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0;
       
   371        level = 1;
       
   372        while (level)
       
   373 	{
       
   374 	 ++i; /* assert: < salen */
       
   375 	 switch(sa->s[i])
       
   376 	  {
       
   377 	   case ']': --level; break;
       
   378 	   case '\\': ++i; /* assert: < salen */
       
   379 	   default: *cbuf++ = sa->s[i]; ++t->slen;
       
   380 	  }
       
   381 	}
       
   382        ++t;
       
   383        break;
       
   384      default:
       
   385        t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0;
       
   386        do
       
   387 	{
       
   388 	 if (sa->s[i] == '\\') if (++i >= salen) break;
       
   389 	 *cbuf++ = sa->s[i]; ++t->slen;
       
   390 	 if (++i >= salen)
       
   391 	   break;
       
   392 	}
       
   393        while (atomok(sa->s[i]));
       
   394        atomcheck(t);
       
   395        --i;
       
   396        ++t;
       
   397     }
       
   398  return 1;
       
   399 }
       
   400 
       
   401 static int gotaddr(taout,taaddr,callback)
       
   402 token822_alloc *taout;
       
   403 token822_alloc *taaddr;
       
   404 int (*callback)();
       
   405 {
       
   406  int i;
       
   407 
       
   408  if (callback(taaddr) != 1)
       
   409    return 0;
       
   410 
       
   411  if (!token822_readyplus(taout,taaddr->len))
       
   412    return 0;
       
   413  
       
   414  for (i = 0;i < taaddr->len;++i)
       
   415    taout->t[taout->len++] = taaddr->t[i];
       
   416 
       
   417  taaddr->len = 0;
       
   418  return 1;
       
   419 }
       
   420 
       
   421 int token822_addrlist(taout,taaddr,ta,callback)
       
   422 token822_alloc *taout;
       
   423 token822_alloc *taaddr;
       
   424 token822_alloc *ta;
       
   425 int (*callback)();
       
   426 {
       
   427  struct token822 *t;
       
   428  struct token822 *beginning;
       
   429  int ingroup;
       
   430  int wordok;
       
   431 
       
   432  taout->len = 0;
       
   433  taaddr->len = 0;
       
   434 
       
   435  if (!token822_readyplus(taout,1)) return -1;
       
   436  if (!token822_readyplus(taaddr,1)) return -1;
       
   437  
       
   438  ingroup = 0;
       
   439  wordok = 1;
       
   440 
       
   441  beginning = ta->t + 2;
       
   442  t = ta->t + ta->len - 1;
       
   443 
       
   444  /* rfc 822 address lists are easy to parse from right to left */
       
   445 
       
   446 #define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1;
       
   447 #define FLUSHCOMMA if (taaddr->len) { \
       
   448 if (!gotaddr(taout,taaddr,callback)) return -1; \
       
   449 if (!token822_append(taout,&comma)) return -1; }
       
   450 #define ADDRLEFT if (!token822_append(taaddr,t--)) return -1;
       
   451 #define OUTLEFT if (!token822_append(taout,t--)) return -1;
       
   452 
       
   453  while (t >= beginning)
       
   454   {
       
   455    switch(t->type)
       
   456     {
       
   457      case TOKEN822_SEMI:
       
   458        FLUSHCOMMA
       
   459        if (ingroup) return 0;
       
   460        ingroup = 1;
       
   461        wordok = 1;
       
   462        break;
       
   463      case TOKEN822_COLON:
       
   464        FLUSH
       
   465        if (!ingroup) return 0;
       
   466        ingroup = 0;
       
   467        while ((t >= beginning) && (t->type != TOKEN822_COMMA))
       
   468 	 OUTLEFT
       
   469        if (t >= beginning)
       
   470 	 OUTLEFT
       
   471        wordok = 1;
       
   472        continue;
       
   473      case TOKEN822_RIGHT:
       
   474        FLUSHCOMMA
       
   475        OUTLEFT
       
   476        while ((t >= beginning) && (t->type != TOKEN822_LEFT))
       
   477 	 ADDRLEFT
       
   478        /* important to use address here even if it's empty: <> */
       
   479        if (!gotaddr(taout,taaddr,callback)) return -1;
       
   480        if (t < beginning) return 0;
       
   481        OUTLEFT
       
   482        while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT)))
       
   483 	 OUTLEFT
       
   484        wordok = 0;
       
   485        continue;
       
   486      case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
       
   487        if (!wordok)
       
   488 	 FLUSHCOMMA
       
   489        wordok = 0;
       
   490        ADDRLEFT
       
   491        continue;
       
   492      case TOKEN822_COMMENT:
       
   493        /* comment is lexically a space; shouldn't affect wordok */
       
   494        break;
       
   495      case TOKEN822_COMMA:
       
   496        FLUSH
       
   497        wordok = 1;
       
   498        break;
       
   499      default:
       
   500        wordok = 1;
       
   501        ADDRLEFT
       
   502        continue;
       
   503     }
       
   504    OUTLEFT
       
   505   }
       
   506  FLUSH
       
   507  ++t;
       
   508  while (t > ta->t)
       
   509    if (!token822_append(taout,--t)) return -1;
       
   510 
       
   511  token822_reverse(taout);
       
   512  return 1;
       
   513 }