/* ,file-id archive://[lord]/436/rx/node.h/1998-05-18
 */
/* classes: h_files */

#ifndef NODEH
#define NODEH
/*	Copyright (C) 1997 Tom Lord
 * 
 * This program is provided to you under the terms of the Liberty Software
 * License.  You are NOT permitted to redistribute, modify, or use it
 * except in very specific ways described by that license.
 *
 * This software comes with NO WARRANTY.
 * 
 * You should have received a copy of the Liberty Software License
 * along with this software; see the file =LICENSE.  If not, write to
 * the Tom Lord, 1810 Francisco St. #2, Berkeley CA, 94703, USA.  
 */


/* One way to describe regexps is as expressions in
 * an abstract linear syntax, corresponding to such
 * concrete syntaxes as "Posix Basic Expressions" or
 * "Posix Extended Expressions"
 * 
 * "rexp.h" delcares functions and data types for constructing
 * syntax trees for regexps.
 *
 * A regexp syntax tree can be built directly, or by translation 
 * from a standard concrete regexp syntax.  The interface for 
 * translating a string into a regexp syntax tree is described
 * in "comp.h"
 */

enum rx_exp_node_type
{
  r_cset = 0,			/* Match from a character set. `a' or `[a-z]'*/
  r_concat = 1,			/* Match two subexpressions in order.   `ab' */
  r_alternate = 2,		/* Choose one of two subexpressions. `a\|b' */
  r_star = 4,			/* Match the subexpression any number of times. `a*' */
  r_string = 6,			/* Shorthand for a concatenation of characters */
  r_cut = 7,			/* Generates a tagged, final nfa state. */
  r_interval = 8,		/* Counted subexpression.  `a{4, 1000}' */
  r_parens = 9,			/* Parenthesized subexpression */
  r_context = 10		/* Context-sensative operator such as "^" */
};

struct rx_exp_node
{
  int refs;
  enum rx_exp_node_type type;

  /* If the node is of type r_cset,
   * these describe the character set matched.
   */
  int cset_size;
  bitset cset;

  /* If the node is of type r_interval,
   * these describe the range of the interval.
   *
   * If the node is of type r_cut,
   * invtval is the state tag generated by the cut.
   *
   * If the node is of type r_parens,
   * invtval is the expression number (for backreferences).
   *
   * If the node is of type r_context,
   * invtval is the context operator.
   * Valid operators are '$', '^', and '0' .. '9' (backreferences).
   *
   */
  long intval;
  long intval2;

  /* If the node is of type r_concat or r_alternate,
   * these are the left and right children of the node.
   *
   * If the node is of type r_star, r_interval or r_parens,
   * left is the child of the node.
   */
  struct rx_exp_node *left;
  struct rx_exp_node *right;

  /* If the node is of type r_string,
   * this is the contents of the string.
   */
  struct dstring cstr;

  /* Intervals, parentheses and context operators are
   * special because they are not expressible as regular 
   * expressions.  Also, any composite expression with
   * a subexpression which is not a regular expression is
   * itself not a regular expression.
   *
   * rx_analyze_rexp fills in this field which is non-zero
   * for expression nodes which are "not a regular expression".
   */
  int observed;

  /* Some expressions match only strings of one particular 
   * length.  Knowing that length, if it is defined, leads to
   * some easy and rewarding optimizations.
   *
   * rx_analyze_rexp fills in this field with that length,
   * or -1 if no such length can be computed for this expression.
   */
  int len;

  /* These fields are used to cache results
   * computed by "rx_simplify_rexp" and "rx_unfa".
   */
  struct rx_exp_node * simplified;
  struct rx_cached_rexp * cr;
};


/* automatically generated __STDC__ prototypes */
extern struct rx_exp_node * rx_exp_node (int type);
extern struct rx_exp_node * rx_mk_r_cset (int type, int size, bitset b);
extern struct rx_exp_node * rx_mk_r_binop (int type,
					   struct rx_exp_node * a,
					   struct rx_exp_node * b);
extern struct rx_exp_node * rx_mk_r_monop (int type, struct rx_exp_node * a);
extern struct rx_exp_node * rx_mk_r_str_c (int type, char c);
extern struct rx_exp_node * rx_mk_r_str (int type, char * s, int len);
extern struct rx_exp_node * rx_mk_r_int (int type, int intval);
extern struct rx_exp_node * rx_mk_r_int2 (int type, int intval, int intval2);
extern void rx_save_rexp (struct rx_exp_node * node);
extern void rx_free_rexp (struct rx_exp_node * node);
extern struct rx_exp_node * rx_copy_rexp (int cset_size, struct rx_exp_node *node);
extern struct rx_exp_node * rx_shallow_copy_rexp (int cset_size, struct rx_exp_node *node);
extern int rx_exp_equal (struct rx_exp_node * a, struct rx_exp_node * b);
extern unsigned long rx_exp_hash (struct rx_exp_node * node);
extern void rx_analyze_rexp (struct rx_exp_node *** subexps,
			     int * re_nsub,
			     struct rx_exp_node * node);
extern void rx_simplify_rexp (struct rx_exp_node ** answer,
			      int cset_size,
			      struct rx_exp_node *node,
			      struct rx_exp_node ** subexps);
extern int rx_is_anchored_p (struct rx_exp_node * exp);
extern int rx_fill_in_fastmap (int cset_size, unsigned char * map, struct rx_exp_node * exp);
#endif  /* NODEH */
