ExpandCollapse

+ 1 RE2 Bootstrap Builder

$PWD/buildsystem/re2.py

import fbuild
from fbuild.functools import call
from fbuild.path import Path
from fbuild.record import Record

import buildsystem

# ------------------------------------------------------------------------------

def build_runtime(phase):
    path = Path(phase.ctx.buildroot/'share'/'src/re2/re2')

    buildsystem.copy_to(phase.ctx, phase.ctx.buildroot / "share/lib/rtl/re2", [
        path / 're2/re2.h',
        path / 're2/set.h',
        path / 're2/stringpiece.h',
        path / 're2/variadic_function.h',
        ]
     )

    dst = 'host/lib/rtl/flx_re2'
    srcs = [
        path / 're2/bitstate.cc',
        path / 're2/compile.cc',
        path / 're2/dfa.cc',
        path / 're2/filtered_re2.cc',
        path / 're2/mimics_pcre.cc',
        path / 're2/nfa.cc',
        path / 're2/onepass.cc',
        path / 're2/parse.cc',
        path / 're2/perl_groups.cc',
        path / 're2/prefilter.cc',
        path / 're2/prefilter_tree.cc',
        path / 're2/prog.cc',
        path / 're2/re2.cc',
        path / 're2/regexp.cc',
        path / 're2/set.cc',
        path / 're2/simplify.cc',
        path / 're2/tostring.cc',
        path / 're2/unicode_casefold.cc',
        path / 're2/unicode_groups.cc',
        path / 'util/arena.cc',
        #path / 'util/benchmark.cc',
        path / 'util/hash.cc',
        #path / 'util/pcre.cc',
        #path / 'util/random.cc',
        path / 'util/rune.cc',
        path / 'util/stringpiece.cc',
        path / 'util/stringprintf.cc',
        path / 'util/strutil.cc',
        #path / 'util/thread.cc',
        path / 'util/valgrind.cc',
     ]
    includes = [
      phase.ctx.buildroot / 'share/lib/rtl',
      phase.ctx.buildroot / 'host/lib/rtl',
      path ]
    macros = ['BUILD_RE2'] + (['WIN32', 'NOMINMAX'],[])[not 'win32' in phase.platform]
    cflags = ([], ['-Wno-sign-compare'])[not 'win32' in phase.platform]
    lflags = []
    libs = []
    external_libs = []

    return Record(
        static=buildsystem.build_cxx_static_lib(phase, dst, srcs,
            includes=includes,
            macros=macros,
            cflags=cflags,
            libs=libs,
            external_libs=external_libs,
            lflags=lflags),
        shared=buildsystem.build_cxx_shared_lib(phase, dst, srcs,
            includes=includes,
            macros=macros,
            cflags=cflags,
            libs=libs,
            external_libs=external_libs,
            lflags=lflags))

+ 2 String handling

share/lib/std/strings/__init__.flx

  include "std/strings/string";
  include "std/strings/re2";
  include "std/strings/tre";
  include "std/strings/regdef";
  include "std/strings/lexer";
  

+ 3 Strings

We have three string like things. cstring is just an alias for a NTBS (Null Terminated Byte String). The workhorse string type based on C++ string. A ustring is a unicode representation using a 32 bit unsigned integer as the character base. This type is deprecated, to be repalced by C++11 unicode string type.

share/lib/std/strings/string.flx

  typedef cstring = +char;
  type string = "::std::basic_string<char>" 
    requires Cxx_headers::string,
    header '#include "flx_serialisers.hpp"',
    encoder "::flx::gc::generic::string_encoder",
    decoder "::flx::gc::generic::string_decoder"
  ;
  typedef strings = typesetof (string);
  
  class Str [T] {
    virtual fun str: T -> string;
  }
  
  class Repr [T with Str[T]] {
    virtual fun repr (t:T) : string => str t;
  }
  
  class Show [T] {
    inherit Str[T];
    inherit Repr[T];
  }
  
  

+ 3.1 Equality and total ordering

share/lib/std/strings/string.flx

  instance[t in strings] Eq[t] {
    fun == : t * t -> bool = "$1==$2";
  }
  instance[t in strings] Tord[t] {
    fun < : t * t -> bool = "$1<$2";
  }
  
  open class String
  {
    inherit Eq[string];
  
    inherit Tord[string];
  

+ 3.2 Equality of string and char

share/lib/std/strings/string.flx

    fun == (s:string, c:char) => len s == 1uz and s.[0] == c;
    fun == (c:char, s:string) => len s == 1uz and s.[0] == c;
    fun != (s:string, c:char) => len s != 1uz or s.[0] != c;
    fun != (c:char, s:string) => len s != 1uz or s.[0] != c;
  

+ 3.3 Append to string object

share/lib/std/strings/string.flx

    proc  += : &string * string = "$1->append($2:assign);";
    proc  += : &string * +char = "$1->append($2:assign);";
    proc  += : &string * char = "*$1 += $2;";
  

+ 3.4 Length of string

share/lib/std/strings/string.flx

    // we need to cast to an int so that c++ won't complain
    fun len: string -> size = "$1.size()";
  

+ 3.5 String concatenation.

share/lib/std/strings/string.flx

    fun + : string * string -> string = "$1+$2";
    fun + : string * carray[char] -> string = "$1+$2";
    fun + : string * char -> string = "$1+$2";
    fun + : char * string -> string = "$1+$2";
    //fun + : string * int -> string = "$1+::flx::rtl::i18n::utf8($2:assign)" is add requires package "flx_i18n";
    fun + ( x:string,  y: int) => x + str y;
  
    // may be a bit risky!
    // IT WAS: interferes with "hello" + list ("world","blah"): 
    // is this a string or a list of strings?
    //fun + [T with Str[T]] (x:string, y:T) => x + str y;
  

+ 3.6 Repetition of string or char

share/lib/std/strings/string.flx

    fun * : string * int -> string = "::flx::rtl::strutil::mul($1:assign,$2:assign)" requires package "flx_strutil";
    fun * : char * int -> string = "::std::string($2:assign,$1:assign)";
  

+ 3.7 Application of string to string or int is concatenation

share/lib/std/strings/string.flx

    fun apply (x:string, y:string):string => x + y;
    fun apply (x:string, y:int):string => x + y;
  

+ 3.8 Construct a char from first byte of a string.

Returns nul char (code 0) if the string is empty.

share/lib/std/strings/string.flx

    ctor char (x:string) => x.[0];

+ 3.9 Constructors for string

share/lib/std/strings/string.flx

    ctor string (c:char) => ""+c;
    ctor string: +char = "::std::string($1:assign)";
    ctor string: +char  * !ints = "::std::string($1:assign,$2:assign)";
    fun utf8: int -> string = "::flx::rtl::i18n::utf8($1)" requires package "flx_i18n";
  

+ 3.10 Substrings

share/lib/std/strings/string.flx

    fun subscript: string * !ints -> char =
      "::flx::rtl::strutil::subscript($1:assign,$2:assign)" requires package "flx_strutil";
    fun copyfrom: string * !ints -> string =
      "::flx::rtl::strutil::substr($1:assign,$2:assign,$1:postfix.size())" requires package "flx_strutil";
    fun copyto: string * !ints -> string =
      "::flx::rtl::strutil::substr($1:assign,0,$2:assign)" requires package "flx_strutil";
    fun substring: string * !ints * !ints -> string =
      "::flx::rtl::strutil::substr($1:assign,$2:assign,$3:assign)" requires package "flx_strutil";
  
    fun subscript (x:string, s:slice[int]):string =>
      match s with
      | #Slice_all => substring (x, 0, x.len.int)
      | Slice_from (start) => copyfrom (x, start)
      | Slice_to (end) => copyto (x, end)
      | Slice_range (start, end) => substring (x, start, end)
      | Slice_one (index) => string x.[index]
      endmatch
    ;
  
    proc store: &string * !ints * char = "(*$1)[$2] = $3;";
  

+ 3.11 Map a string char by char

share/lib/std/strings/string.flx

    fun map (f:char->char) (var x:string): string = {
      if len x > 0uz do
        for var i in 0uz upto (len x) - 1uz do
          store(&x, i, f x.[i]);
        done
      done
      return x;
    }
  

+ 3.12 STL string functions

These come in two flavours: the standard C++ operations which return stl_npos on failure, and a more Felix like variant which uses an option type.

share/lib/std/strings/string.flx

    const stl_npos: size = "::std::string::npos";
  
    fun stl_find: string * string -> size = "$1.find($2)" is cast;
    fun stl_find: string * string * size -> size = "$1.find($2,$3)" is cast;
    fun stl_find: string * +char -> size = "$1.find($2)" is cast;
    fun stl_find: string * +char * size -> size = "$1.find($2,$3)" is cast;
    fun stl_find: string * char -> size = "$1.find($2)" is cast;
    fun stl_find: string * char * size -> size = "$1.find($2,$3)" is cast;
  
    fun find (s:string, e:string) : opt[size] => match stl_find (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find (s:string, e:string, i:size) : opt[size] => match stl_find (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find (s:string, e:+char) : opt[size] => match stl_find (s, e) with | i when i== stl_npos => None[size] | i => Some i endmatch;
    fun find (s:string, e:+char, i:size) : opt[size] => match stl_find (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find (s:string, e:char) : opt[size] => match stl_find (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find (s:string, e:char, i:size) : opt[size] => match stl_find (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    fun stl_rfind: string * string -> size = "$1.rfind($2)";
    fun stl_rfind: string * string * size -> size = "$1.rfind($2,$3)";
    fun stl_rfind: string * +char-> size = "$1.rfind($2)";
    fun stl_rfind: string * +char * size -> size = "$1.rfind($2,$3)";
    fun stl_rfind: string * char -> size = "$1.rfind($2)";
    fun stl_rfind: string * char * size -> size = "$1.rfind($2,$3)";
  
    fun rfind (s:string, e:string) : opt[size] => match stl_rfind (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun rfind (s:string, e:string, i:size) : opt[size] => match stl_rfind (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun rfind (s:string, e:+char) : opt[size] => match stl_rfind (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun rfind (s:string, e:+char, i:size) : opt[size] => match stl_rfind (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun rfind (s:string, e:char) : opt[size] => match stl_rfind (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun rfind (s:string, e:char, i:size) : opt[size] => match stl_rfind (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    fun stl_find_first_of: string * string -> size = "$1.find_first_of($2)";
    fun stl_find_first_of: string * string * size -> size = "$1.find_first_of($2,$3)";
    fun stl_find_first_of: string * +char -> size = "$1.find_first_of($2)";
    fun stl_find_first_of: string * +char * size -> size = "$1.find_first_of($2,$3)";
    fun stl_find_first_of: string * char -> size = "$1.find_first_of($2)";
    fun stl_find_first_of: string * char * size -> size = "$1.find_first_of($2,$3)";
  
    fun find_first_of (s:string, e:string) : opt[size] => match stl_find_first_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_of (s:string, e:string, i:size) : opt[size] => match stl_find_first_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_of (s:string, e:+char) : opt[size] => match stl_find_first_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_of (s:string, e:+char, i:size) : opt[size] => match stl_find_first_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_of (s:string, e:char) : opt[size] => match stl_find_first_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_of (s:string, e:char, i:size) : opt[size] => match stl_find_first_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    fun stl_find_first_not_of: string * string -> size = "$1.find_first_not_of($2)";
    fun stl_find_first_not_of: string * string * size -> size = "$1.find_first_not_of($2,$3)";
    fun stl_find_first_not_of: string * +char -> size = "$1.find_first_not_of($2)";
    fun stl_find_first_not_of: string * +char * size -> size = "$1.find_first_not_of($2,$3)";
    fun stl_find_first_not_of: string * char -> size = "$1.find_first_not_of($2)";
    fun stl_find_first_not_of: string * char * size -> size = "$1.find_first_not_of($2,$3)";
  
    fun find_first_not_of (s:string, e:string) : opt[size] => match stl_find_first_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_not_of (s:string, e:string, i:size) : opt[size] => match stl_find_first_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_not_of (s:string, e:+char) : opt[size] => match stl_find_first_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_not_of (s:string, e:+char, i:size) : opt[size] => match stl_find_first_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_not_of (s:string, e:char) : opt[size] => match stl_find_first_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_first_not_of (s:string, e:char, i:size) : opt[size] => match stl_find_first_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    fun stl_find_last_of: string * string -> size = "$1.find_last_of($2)";
    fun stl_find_last_of: string * string * size -> size = "$1.find_last_of($2,$3)";
    fun stl_find_last_of: string * +char -> size = "$1.find_last_of($2)";
    fun stl_find_last_of: string * +char * size -> size = "$1.find_last_of($2,$3)";
    fun stl_find_last_of: string * char -> size = "$1.find_last_of($2)";
    fun stl_find_last_of: string * char * size -> size = "$1.find_last_of($2,$3)";
  
    fun find_last_of (s:string, e:string) : opt[size] => match stl_find_last_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_of (s:string, e:string, i:size) : opt[size] => match stl_find_last_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_of (s:string, e:+char) : opt[size] => match stl_find_last_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_of (s:string, e:+char, i:size) : opt[size] => match stl_find_last_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_of (s:string, e:char) : opt[size] => match stl_find_last_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_of (s:string, e:char, i:size) : opt[size] => match stl_find_last_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    fun stl_find_last_not_of: string * string -> size = "$1.find_last_not_of($2)";
    fun stl_find_last_not_of: string * string * size -> size = "$1.find_last_not_of($2,$3)";
    fun stl_find_last_not_of: string * +char -> size = "$1.find_last_not_of($2)";
    fun stl_find_last_not_of: string * +char * size -> size = "$1.find_last_not_of($2,$3)";
    fun stl_find_last_not_of: string * char -> size = "$1.find_last_not_of($2)";
    fun stl_find_last_not_of: string * char * size -> size = "$1.find_last_not_of($2,$3)";
  
    fun find_last_not_of (s:string, e:string) : opt[size] => match stl_find_last_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_not_of (s:string, e:string, i:size) : opt[size] => match stl_find_last_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_not_of (s:string, e:+char) : opt[size] => match stl_find_last_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_not_of (s:string, e:+char, i:size) : opt[size] => match stl_find_last_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_not_of (s:string, e:char) : opt[size] => match stl_find_last_not_of (s, e) with | i when i == stl_npos => None[size] | i => Some i endmatch;
    fun find_last_not_of (s:string, e:char, i:size) : opt[size] => match stl_find_last_not_of (s, e, i) with | i when i == stl_npos => None[size] | i => Some i endmatch;
  
    

+ 3.13 Construe string as set of char

share/lib/std/strings/string.flx

    instance Set[string,char] {
      fun \(\in\) (c:char, s:string) => stl_find (s,c) != stl_npos;
    }
    

+ 3.14 Construe string as stream of char

share/lib/std/strings/string.flx

    instance Iterable[string, char] {
      gen iterator(var x:string) () = {
        for var i in 0 upto x.len.int - 1 do yield Some (x.[i]); done
        return None[char];
      }
    }
    inherit Streamable[string,char];
  

+ 3.15 Test if a string has given prefix or suffix

share/lib/std/strings/string.flx

    fun prefix(arg:string,key:string)=>
      arg.[to len key]==key
    ;
  
    fun suffix (arg:string,key:string)=>
      arg.[-key.len to]==key
    ;
  
  
    fun startswith (x:string) (e:string) : bool => prefix (x,e);
  
    // as above: slices are faster
    fun endswith (x:string) (e:string) : bool => suffix (x,e);
  
    fun startswith (x:string) (e:char) : bool => x.[0] == e;
    fun endswith (x:string) (e:char) : bool => x.[-1] == e;
  

+ 3.16 Trim off specified prefix or suffix or both

share/lib/std/strings/string.flx

    fun ltrim (x:string) (e:string) : string =>
      if startswith x e then
        x.[e.len.int to]
      else
        x
      endif
    ;
  
    fun rtrim (x:string) (e:string) : string =>
      if endswith x e then
        x.[to x.len.int - e.len.int]
      else
        x
      endif
    ;
  
    fun trim (x:string) (e:string) : string => ltrim (rtrim x e) e;
  

+ 3.17 Strip characters from left, right, or both end of a string.

share/lib/std/strings/string.flx

    fun lstrip (x:string, e:string) : string =
    {
      if len x > 0uz do
        for var i in 0uz upto len x - 1uz do
          var found = false;
          for var j in 0uz upto len e - 1uz do
            if x.[i] == e.[j] do
              found = true;
            done
          done
  
          if not found do
            return x.[i to];
          done
        done;
      done
      return '';
    }
  
    fun rstrip (x:string, e:string) : string =
    {
      if len x > 0uz do
        for var i in len x - 1uz downto 0uz do
          var found = false;
          for var j in 0uz upto len e - 1uz do
            if x.[i] == e.[j] do
              found = true;
            done
          done
  
          if not found do
            return x.[to i.int + 1];
          done
        done
      done
      return '';
    }
  
    fun strip (x:string, e:string) : string => lstrip(rstrip(x, e), e);
  
    fun lstrip (x:string) : string => lstrip(x, " \t\n\r\f\v");
    fun rstrip (x:string) : string => rstrip(x, " \t\n\r\f\v");
    fun strip (x:string) : string => lstrip$ rstrip x;
  

+ 3.18 Justify string contents

share/lib/std/strings/string.flx

    fun ljust(x:string, width:int) : string =>
      if x.len.int >= width
        then x
        else x + (' ' * (width - x.len.int))
      endif
    ;
  
    fun rjust(x:string, width:int) : string =>
      if x.len.int >= width
        then x
        else (' ' * (width - x.len.int)) + x
      endif
    ;
  

+ 3.19 Split a string into a list on given separator

share/lib/std/strings/string.flx

    fun split (x:string, d:char): List::list[string] => List::rev (rev_split (x,d));
  
    fun rev_split (x:string, d:char): List::list[string] = {
      fun aux (x:string,y:List::list[string]) =>
        match find (x, d) with
        | #None => Cons (x, y)
        | Some n => aux$ x.[n+1uz to], List::Cons (x.[to n],y)
        endmatch
      ;
      return aux$ x, List::Empty[string];
    }
  
    fun split (x:string, d:string): List::list[string] => List::rev (rev_split (x,d));
  
    fun rev_split (x:string, d:string): List::list[string] = {
      fun aux (pos:size,y:List::list[string]) =>
        match stl_find_first_of (x, d, pos) with
        | $(stl_npos) => List::Cons (x.[pos to],y)
        | n => aux$ (n+1uz), List::Cons (x.[pos to n],y)
        endmatch
      ;
      return aux$ 0uz, List::Empty[string];
    }
  
    fun split (x:string, d:+char): List::list[string] => List::rev (rev_split (x,d));
  
    fun rev_split (x:string, d:+char): List::list[string] = {
      fun aux (x:string,y:List::list[string]) =>
        match find_first_of (x, d) with
        | #None => List::Cons (x, y)
        | Some n => aux$ x.[n+1uz to], List::Cons (x.[to n],y)
        endmatch
      ;
      return aux$ x, List::Empty[string];
    }
  
    fun split_first (x:string, d:string): opt[string*string] =>
      match find_first_of (x, d) with
      | #None => None[string*string]
      | Some n => Some (x.[to n],substring(x,n+1uz,(len x)))
      endmatch
    ;
  
  
    Split a string on whitespace but respecting
    double quotes, single quotes, and slosh escapes.
    // leading and trailing space is removed. Embedded
    // multiple spaces cause a single split.
    class RespectfulParser {
      union quote_action_t = 
        | ignore-quote
        | keep-quote
        | drop-quote
      ; 
      union dquote_action_t = 
        | ignore-dquote
        | keep-dquote
        | drop-dquote
      ; 
      union escape_action_t = 
        | ignore-escape
        | keep-escape
        | drop-escape
      ; 
      typedef action_t = (quote:quote_action_t, dquote:dquote_action_t, escape:escape_action_t);
  
      union mode_t = | copying | skipping | quote | dquote | escape-copying | escape-quote | escape-dquote;
      typedef state_t = (mode:mode_t, current:string, parsed: list[string] );
  
      noinline fun respectful_parse (action:action_t) (var state:state_t) (var s:string) : state_t = 
      {
        var mode = state.mode;
        var current = state.current;
        var result = Empty[string];
  
        noinline proc handlecopying(ch:char) {
          if ch == char "'" do
            match action.quote with
            | #ignore-quote => 
              current += ch;
            | #keep-quote =>
              current += ch;
              mode = quote;
            | #drop-quote =>
              mode = quote;
            endmatch;
          elif ch == char '"' do
            match action.dquote with
            | #ignore-dquote => 
              current += ch;
            | #keep-dquote =>
              current += ch;
              mode = dquote;
            | #drop-dquote =>
              mode = dquote;
            endmatch;
          elif ch == char '\\' do
            match action.escape with
            | #ignore-escape => 
              current += ch;
            | #keep-escape =>
              current += ch;
              mode = escape-copying;
            | #drop-escape =>
              mode = escape-copying;
            endmatch;
          elif ord ch <= ' '.char.ord  do // can't happen if called from skipping
            result += current;
            current = "";
            mode = skipping;
          else
            current += ch;
            mode = copying;
          done
        }
  
        for ch in s do 
          match mode with
          | #copying => handlecopying ch;
          | #quote =>
            if ch == char "'" do
              match action.quote with
              | #ignore-quote => 
                assert false;
                //current += ch;
              | #keep-quote =>
                current += ch;
                mode = copying;
              | #drop-quote =>
                mode = copying;
              endmatch;
            elif ch == char "\\" do
              match action.escape with
              | #ignore-escape => 
                current += ch;
              | #keep-escape =>
                current += ch;
                mode = escape-quote;
              | #drop-escape =>
                mode = escape-quote;
              endmatch;
            else
              current += ch;
            done 
  
          | #dquote =>
            if ch == char '"' do
              match action.dquote with
              | #ignore-dquote => 
                assert false;
                //current += ch;
              | #keep-dquote =>
                current += ch;
                mode = copying;
              | #drop-dquote =>
                mode = copying;
              endmatch;
            elif ch == char "\\" do
              match action.escape with
              | #ignore-escape => 
                current += ch;
              | #keep-escape =>
                current += ch;
                mode = escape-dquote;
              | #drop-escape =>
                mode = escape-dquote;
              endmatch;
            else
              current += ch;
            done 
  
          | #escape-copying =>
             current += ch;
             mode = copying;
  
          | #escape-quote =>
             current += ch;
             mode = quote;
  
          | #escape-dquote =>
             current += ch;
             mode = dquote;
  
          | #skipping =>
            if ord ch > ' '.char.ord  do
              handlecopying ch;
            done
          endmatch;
        done
        return (mode=mode, current=current, parsed=state.parsed + result);
      }
    }
    
    // simplified one shot parser.
    // ignores mismatched quotes and backslashes.
    fun respectful_split (action:RespectfulParser::action_t) (s:string) : list[string] = 
    {
      var state = RespectfulParser::respectful_parse
        action 
        (
          mode=RespectfulParser::skipping, 
          current="", 
          parsed=Empty[string]
        ) 
        s
      ;
      // ignore mismatched quotes and backslashes.
      match state.mode with 
      | #skipping => ;
      | _ => state.parsed = state.parsed + state.current;
      endmatch;
      return state.parsed;
   
    }
  
    fun respectful_split (s:string) : list[string] =>
      respectful_split (
        quote=RespectfulParser::keep-quote, 
        dquote=RespectfulParser::keep-dquote, 
        escape=RespectfulParser::keep-escape
      ) 
      s
    ; 
  
    // OO version of the parser.
    object respectfulParser (action:RespectfulParser::action_t) = {
      var state = (mode=RespectfulParser::skipping, current="", parsed=Empty[string]);
      method proc parse (s:string) {
        state = RespectfulParser::respectful_parse action state s;
      }
      method fun get_parsed () => state.parsed;
    }
  

+ 3.20 erase, insert or replace substrings

share/lib/std/strings/string.flx

    // Note: pos, length!
    mutators
    proc erase: &string * size * size = "$1->erase($2,$3);";
    proc insert: &string * size * string = "$1->insert($2,$3);";
    proc replace: &string * size * size * string = "$1->replace($2,$3,$4);";
  
    functional
    fun erase: string * size * size -> string = "::std::string($1).erase($2,$3)";
    fun insert: string * size * string -> string = "::std::string($1).insert($2,$3)";
    fun replace: string * size * size * string -> string = "::std::string($1).replace($2,$3,$4)";
  
  

+ 3.21 search and replace

Search and replace by string.

share/lib/std/strings/string.flx

    fun search_and_replace (x:string, var spos:size, s:string, r:string) : string =
    {
      val m = s.len;
      var o = x.[to spos];
      var n = (x,s,spos).stl_find;
      while n != stl_npos do
        o+=x.[spos to n]+r;
        spos = n+m;
        n = (x,s,spos).stl_find.size;
      done
      o+=x.[spos to];
      return o;
    }
    fun search_and_replace (x:string, s:string, r:string) : string => search_and_replace (x,0uz,s,r);
  
    fun search_and_replace (vs:list[string * string]) (var v:string) = {
      match k,b in vs do
        v = search_and_replace (v,k,b);
      done
      return v;
    }
  

+ 3.22 Regexp search and replace

Uses Google RE2 engine.

share/lib/std/strings/string.flx

    // Replace \0 \1 \2 etc in s with text from v
    fun subst(s:string, v:varray[StringPiece]): string =
    {
    //println$ "Subst " + s +" with " + str v;
       enum mode_t {cp, ins};
       var b = "";
       var mode=cp;
       var j = 0;
       var count = 0;
       for var i in 0 upto s.len.int - 1 do
         match mode with
         | #cp => 
           if s.[i] == char "\\" do 
             mode = ins; 
             j=0; count = 0; 
           else 
            b += s.[i]; 
           done
         | #ins =>
           if s.[i] in "0123456789" do
             j = j * 10 + ord(s.[i]) - ord (char "0");
             ++count;
           else
             if count == 0 do
               b += "\\";
             elif j < v.len.int do
               b+= str v.stl_begin.j;
             done
             // adjacent insertion?
             if s.[i] == char "\\" do
               j=0; count=0;
             else
               mode = cp;
               b += s.[i]; 
             done
           done
         endmatch;
       done
       // run off end
       match mode with
       | #cp => ;
       | #ins =>
         if count == 0 do
           b += "\\";
         elif j < v.len.int do
           b+= str v.j;
         done
       endmatch;
       return b;
    }
    // Search for regex, replace by r with \0 \1 \2 etc replace by match groups.
    fun search_and_replace (x:string, var spos: size, re:Re2::RE2, r:string) : string =
    {
      var ngroups = re.NumberOfCapturingGroups + 1;
      var v = varray[StringPiece]$ (ngroups+1).size, StringPiece "";
      var o = x.[to spos];             // initial substring
      var sp = StringPiece(x);
      var base : +char = sp.data;      // base pointer of char array
      while Re2::Match(re, sp, spos.int, UNANCHORED, v.stl_begin, v.len.int) do
        var mpos = size(v.0.data - base);  // start of match
        o+= x.[spos to mpos];          // copy upto start of match
        o+= subst(r,v);                // copy replacement
        spos = mpos + v.0.len;       // advance over match
      done
      o+=x.[spos to];                  // rest of string
      return o;
    }

+ 3.23 Parse string to numeric type

share/lib/std/strings/string.flx

    fun atoi: string -> int = "::std::atoi($1:postfix.c_str())"  requires Cxx_headers::cstdlib;
    fun atol: string -> long = "::std::atol($1:postfix.c_str())"  requires Cxx_headers::cstdlib;
    fun atoll: string -> long = "::std::atoll($1:postfix.c_str())"  requires Cxx_headers::cstdlib;
    fun atof: string -> double = "::std::atof($1:postfix.c_str())"  requires Cxx_headers::cstdlib;
  

+ 3.24 Reserve store

share/lib/std/strings/string.flx

    proc reserve: &string * !ints = "$1->reserve($2);";
  

+ 3.25 Fetch underlying cstring.

share/lib/std/strings/string.flx

    // Entirely unsafe because string could be an rvalue.
    fun _unsafe_cstr: string -> +char = "((char*)$1.c_str())" is atom;
  
    // partially unsafe because the string could be modified.
    fun stl_begin: &string -> +char = "((char*)$1->c_str())" is atom;
    fun stl_end: &string -> +char = "((char*)($1->c_str()+$1->size()))" is atom;
  
    // this operation is entirely safe because the char array
    // provided is copied (and garbage collected)
    fun cstr (x:string) : +char => (varray[char] x).stl_begin;
  

+ 3.26 Polymorphic vsprintf hack

share/lib/std/strings/string.flx

    fun vsprintf[t]: +char  * t -> string =
      "::flx::rtl::strutil::flx_asprintf($1,$2)" requires package "flx_strutil"
    ;
  
    fun vsprintf[t]: string * t -> string =
      "::flx::rtl::strutil::flx_asprintf(const_cast<char*>($1.c_str()),$2)" requires package "flx_strutil"
    ;
  

+ 3.27 Case translation

share/lib/std/strings/string.flx

    // Convert all characters to upper case  
    fun toupper(s:string):string => map (toupper of char) s;
    // Convert all characters to lower case
    fun tolower(s:string):string => map (tolower of char) s;
  }
  
  

+ 3.28 Transation to string

share/lib/std/strings/string.flx

  
  instance Str[string] {
    fun str (s:string) : string => s;
  }
  
  instance Str[+char] {
    fun str: +char -> string = '::flx::rtl::strutil::atostr($1)' requires package "flx_strutil";
  }
  
  instance Repr[string] {
    fun repr (x:string) : string = {
      var o = "'";
      if len x > 0uz do
        for var i in 0uz upto (String::len x) - 1uz do
          o += repr x.[i];
        done
      done
      return o + "'";
    }
  }
  
  open[T in strings] Show[T];
  open Set[string,char];
  

+ 4 String syntax

share/lib/std/strings/stringexpr.fsyn

syntax stringexpr
{
  //$ String subscript.
  x[sfactor_pri] := x[sfactor_pri] "." "[" sexpr "]" =># "`(ast_apply ,_sr (,(noi 'subscript) (,_1 ,_4)))";

  //$ String substring.
  x[sfactor_pri] := x[sfactor_pri] "." "[" sexpr "to" sexpr "]" =># "`(ast_apply ,_sr (,(noi 'substring) (,_1 ,_4 ,_6)))";

  //$ String substring, to end of string.
  x[sfactor_pri] := x[sfactor_pri] "." "[" sexpr "to" "]" =># "`(ast_apply ,_sr (,(noi 'copyfrom) (,_1 ,_4)))";

  //$ String substring, from start of string.
  x[sfactor_pri] := x[sfactor_pri] "." "[" "to" sexpr "]" =># "`(ast_apply ,_sr (,(noi 'copyto) (,_1 ,_5)))";
}

+ 5 RE2 regexps

share/lib/std/strings/re2.flx

  
  include "stl/stl_map";
  
  Binding of Google RE2 regexp library.
  open class Re2 {
    requires package "re2";
  
  // This is an almost full binding of Google's re2 package.
  // We do not support conversions of digits strings to integers
  //
  // TODO: we need to check the lvalue handling here
  // The RE2, Options classes aren't copyable, so we may have
  // to use pointers
  //
  // TODO: named group extractor
  
    // hackery because ::re2::RE2 isn't copyable, so we have to use a pointer
    // but we need the shape of RE2 to create on the heap
    private body RE2_serial = """
    static ::std::string RE2_encoder(void *p) { 
      return (*(::std::shared_ptr< ::re2::RE2>*)p)->pattern(); 
    }
  
    static size_t RE2_decoder (void *p, char *s, size_t i) { 
      char tmp[sizeof(::std::string)];
      i = ::flx::gc::generic::string_decoder (&tmp,s,i);
      new(p) ::std::shared_ptr< ::re2::RE2> (new ::re2::RE2 (*(::std::string*)(&tmp)));
      ::destroy((::std::string*)&tmp);
      return i;
    }
    """; 
    private type RE2_ = "::re2::RE2" 
    ;
    type RE2 = "::std::shared_ptr< ::re2::RE2>" 
      requires Cxx11_headers::memory,
      RE2_serial, encoder "RE2_encoder", decoder "RE2_decoder"
    ;
  
    gen _ctor_RE2 : string -> RE2 = "::std::shared_ptr< ::re2::RE2>(new RE2($1))";
  
  
    type StringPiece = "::re2::StringPiece";
      ctor StringPiece: string = "::re2::StringPiece($1)";
      ctor StringPiece: unit = "::re2::StringPiece()";
      ctor string: StringPiece = "$1.as_string()";
      fun len: StringPiece -> size = "(size_t)$1.length()";
      fun data: StringPiece -> +char = "(char*)$1.data()"; // cast away const
   
   
      instance Container[StringPiece,char] {
        fun len: StringPiece -> size = "$1.size()";
      }
      instance Eq[StringPiece] {
        fun == : StringPiece * StringPiece -> bool = "$1==$2";
      }
      instance Tord[StringPiece] {
        fun < : StringPiece * StringPiece -> bool = "$1<$2";
      }
      instance Str[StringPiece] {
        fun str: StringPiece -> string ="$1.as_string()";
      }
  
    type Arg = "::re2::Arg";
  
    type Encoding = "::re2::RE2::Encoding";
      const EncodingUTF8: Encoding = "::re2::RE2::EncodingUTF8";
      const EncodingLatin1: Encoding = "::re2::RE2::EncodingLatin1";
  
    type RE2Options = "::re2::RE2::Options";
  
      proc Copy: RE2Options * RE2Options = "$1.Copy($2);";
  
      fun encoding: RE2Options -> Encoding = "$1.encoding()";
      proc set_encoding: RE2Options * Encoding = "$1.set_encoding($2);";
      
      fun posix_syntax: RE2Options -> bool = "$1.posix_syntax()";
      proc set_posix_syntax: RE2Options * bool = "$1.set_posix_syntax($2);";
  
      fun longest_match: RE2Options -> bool = "$1.longest_match()";
      proc set_longest_match: RE2Options * bool = "$1.set_longest_match($2);";
      
      fun log_errors: RE2Options -> bool = "$1.log_errors()";
      proc set_log_errors: RE2Options * bool = "$1.set_log_errors($2);";
      
      fun max_mem: RE2Options -> int = "$1.max_mem()";
      proc set_max_mem: RE2Options * int = "$1.set_max_mem($2);";
      
      fun literal: RE2Options -> bool = "$1.literal()";
      proc set_literal: RE2Options * bool = "$1.set_literal($2);";
  
      fun never_nl: RE2Options -> bool = "$1.never_nl()";
      proc set_never_nl: RE2Options * bool = "$1.set_never_nl($2);";
      
      fun case_sensitive: RE2Options -> bool = "$1.case_sensitive()";
      proc set_case_sensitive: RE2Options * bool = "$1.set_case_sensitive($2);";
      
      fun perl_classes: RE2Options -> bool = "$1.perl_classes()";
      proc set_perl_classes: RE2Options * bool = "$1.set_perl_classes($2);";
      
      fun word_boundary: RE2Options -> bool = "$1.word_boundary()";
      proc set_word_boundary: RE2Options * bool = "$1.set_word_boundary($2);";
      
      fun one_line: RE2Options -> bool = "$1.one_line()";
      proc set_one_line: RE2Options * bool = "$1.set_one_line($2);";
  
      fun ParseFlags: RE2Options -> int = "$1.ParseFlags()";
     
    type ErrorCode = "::re2::RE2::ErrorCode";
      const NoError : ErrorCode = "::re2::RE2::NoError";
      const ErrorInternal: ErrorCode = "::re2::RE2::ErrorInternal";
      const ErrorBadEscape : ErrorCode = "::re2::RE2::ErrorBadEscape";
      const ErrorBadCharClass : ErrorCode = "::re2::RE2::ErrorBadCharClass";
      const ErrorBadCharRange : ErrorCode = "::re2::RE2::ErrorBadCharRange";
      const ErrorMissingBracket : ErrorCode = "::re2::RE2::ErrorMissingBracket";
      const ErrorMissingParen : ErrorCode = "::re2::RE2::ErrorMissingParen";
      const ErrorTrailingBackslash : ErrorCode = "::re2::RE2::ErrorTrailingBackslash";
      const ErrorRepeatArgument : ErrorCode = "::re2::RE2::ErrorRepeatArgument";
      const ErrorRepeatSize : ErrorCode = "::re2::RE2::ErrorRepeatSize";
      const ErrorRepeatOp: ErrorCode = "::re2::RE2::ErrorRepeatOp";
      const ErrorBadPerlOp: ErrorCode = "::re2::RE2::ErrprBadPerlOp";
      const ErrorBadUTF8: ErrorCode = "::re2::RE2::ErrorBadUTF8";
      const ErrorBadNamedCapture: ErrorCode = "::re2::RE2::ErrorBadNamedCapture";
      const ErrorPatternTooLarge: ErrorCode = "::re2::RE2::ErrorPatternTooLarge";
  
    type Anchor = "::re2::RE2::Anchor";
      const UNANCHORED: Anchor = "::re2::RE2::UNANCHORED";
      const ANCHOR_START: Anchor = "::re2::RE2::ANCHOR_START";
      const ANCHOR_BOTH: Anchor = "::re2::RE2::ANCHOR_BOTH";
  
    fun pattern: RE2 -> string = "$1->pattern()";
    fun error: RE2 -> string = "$1->error()";
    fun error_code: RE2 -> ErrorCode = "$1->error_code()";
    fun error_arg: RE2 -> string = "$1->error_arg()";
    fun ok: RE2 -> bool = "$1->ok()";
    fun ProgramSize: RE2 -> int = "$1->ProgramSize()";
  
    gen GlobalReplace: &string * RE2 * StringPiece -> int = "::re2::RE2::GlobalReplace($1, *$2, $3)";
    gen Extract: StringPiece * RE2 * StringPiece * &string -> bool = "::re2::RE2::Extract($1, *$2, $3, $4)";
  
    fun QuoteMeta: StringPiece -> string = "::re2::RE2::QuoteMeta($1)";
   
    fun PossibleMatchRange: RE2 * &string * &string * int -> bool = "$1->PossibleMatchRange($2,$3,$3,$4)";
    fun NumberOfCapturingGroups: RE2 -> int = "$1->NumberOfCapturingGroups()";
    fun NamedCapturingGroups: RE2 -> Stl_Map::stl_map[string, int] = "$1->NamedCapturingGroups()";
  
    // this function is fully general, just needs an anchor
    gen Match: RE2 * StringPiece * int * Anchor * +StringPiece * int -> bool = 
      "$1->Match($2, $3, $2.length(),$4, $5, $6)"
     ;
  
    noinline gen Match(re:RE2, var s:string) : opt[varray[string]] = {
      var emptystring = "";
      var n = NumberOfCapturingGroups re;
      var v = varray[StringPiece] (n.size+1,StringPiece emptystring);
      var Match-result = Match (re, StringPiece s, 0, ANCHOR_BOTH, v.stl_begin, n+1);
      return 
        if Match-result then
          Some$ map string of (StringPiece) v
        else 
          None[varray[string]]
      ;
    }
  
    gen apply (re:RE2, s:string) => Match (re,s);
  
    fun CheckRewriteString: RE2 * StringPiece * &string -> bool = "$1->CheckRewriteString($2, $3)";
  
    instance Set[RE2, string] {
      fun \(\in\) : string * RE2 -> bool =
        "$2->Match(::re2::StringPiece($1),0, ::re2::StringPiece($1).length(),::re2::RE2::ANCHOR_BOTH, (::re2::StringPiece*)0, 0)"
      ;
    }
  
    gen iterator (re2:string, var target:string) => iterator (RE2 re2, target);
  
    instance Iterable[RE2 * string, varray[string]] {
      gen iterator (r:RE2, var target:string) () : opt[varray[string]] = {
        var emptystring = "";
        var l = len target;
        var s = StringPiece target;
        var p1 = s.data;  
        var p = 0;
        var n = NumberOfCapturingGroups(r)+1;
        var v1 = varray[StringPiece] (n.size,StringPiece emptystring);
        var v2 = varray[string] (n.size,"");
      again:>
        var result = Match(r, s, p, UNANCHORED,v1.stl_begin, n);
        if not result goto endoff;
        for var i in 0 upto n - 1 do set(v2, i.size, string(v1.i)); done
        var p2 = v1.0.data;
        assert(v1.0.len.int > 0); // prevent infinite loop
        p = (p2 - p1).int+v1.0.len.int;
        yield Some v2;
        goto again;
      endoff:>
        return None[varray[string]];
      }
    }
    inherit Streamable[RE2 * string, Varray::varray[string]];
  
    // Extract Some match array or None if not matching.
    fun extract (re2:string, target:string) : opt[varray[string]] => iterator (RE2 re2, target) ();
    fun extract (re2:RE2, target:string) : opt[varray[string]] => iterator (re2, target) ();
  
  }
  
  open Set[RE2, string];
  

+ 6 Regular definitions

share/lib/std/strings/regdef.flx

  
  class Regdef {
    union regex =
    | Alts of list[regex]
    | Seqs of list[regex]
    | Rpt of regex * int * int
    | Charset of string
    | String of string
    | Group of regex
    | Perl of string
    ;
  
    private fun prec: regex -> int =
    | Perl _ => 3
    | Alts _ => 3
    | Seqs _ => 2
    | String _ => 2
    | Rpt _ => 1
    | Charset _ => 0
    | Group _ => 0
    ;
  
    private fun hex_digit (i:int)=>
      if i<10 then string (char (ord (char "0") + i)) 
      else string (char (ord (char "A") + i - 10))
      endif
    ;
    private fun hex2(c:char)=>
      let i = ord c in
      "\\x" + hex_digit ( i / 16 ) + hex_digit ( i % 16 )
    ;
    private fun charset_quote(c:char)=>
      if c in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstvuwxyz" then string c
      else hex2 c
      endif
    ;
  
    private fun hex(s:string when len s > 0uz)= {
      var r = ""; 
      for var i in 0uz upto len s - 1uz do
        r += charset_quote s.[i];
      done
      return r; 
    }
  
    fun ngrp (s:string)=> "(?:"+s+")";
    private fun cngrp (s:string, op: int, ip: int) => if ip > op then ngrp (s) else s endif; 
  
    fun render: regex -> string =
    | Alts rs => fold_left 
     (fun (acc:string) (elt:regex)=> 
       (if acc == "" then "" else acc + "|" endif) + (render elt)) 
      "" rs
    | Seqs rs => fold_left 
      (fun (acc:string) (elt:regex)=> acc + cngrp(render elt,2,prec elt))
      "" rs
    | Rpt (r,i,x) =>
      if i == 0 and x == -1 then ngrp (render r) + "*"
      elif i == 1 and x == -1 then ngrp (render r) + "+"
      elif i == 0 and x == 1 then ngrp (render r) + "?"
      else
        cngrp(render r,1,prec r) + "{" + str i + "," + if x < 0 then "" else str x endif + "}"
      endif
  
    | String s => hex(s)
    | Charset s => "[" + hex s + "]"
    | Group r => "(" + render r + ")"
    | Perl s => s
    ;
  }
  

+ 7 Syntax

share/lib/std/strings/regexps.fsyn

//$ Syntax for regular definitions.
//$ Binds to library class Regdef,
//$ which in turn binds to the binding of Google RE2.
SCHEME """(define (regdef x) `(ast_lookup (,(noi 'Regdef) ,x ())))""";

syntax regexps {
  priority 
    ralt_pri <
    rseq_pri <
    rpostfix_pri <
    ratom_pri
  ;

 
  //$ Regular definition binder.
  //$ Statement to name a regular expression.
  //$ The expression may contain names of previously named regular expressions.
  //$ Defines the LHS symbol as a value of type Regdef::regex.
  stmt := "regdef" sdeclname "=" sregexp[ralt_pri] ";" =># 
    """
    `(ast_val_decl ,_sr ,(first _2) ,(second _2) (some ,(regdef "regex" )) (some ,_4))
    """;

  //$ Inline regular expression.
  //$ Can be used anywhere in Felix code.
  //$ Returns a a value of type Regdef::regex.
  x[sapplication_pri] := "regexp" "(" sregexp[ralt_pri] ")" =># "_3";

  //$ Alternatives.
  private sregexp[ralt_pri] := sregexp[>ralt_pri] ("|" sregexp[>ralt_pri])+ =># 
    """`(ast_apply ,_sr (  
      ,(regdef "Alts")
      (ast_apply ,_sr (,(noi 'list) ,(cons _1 (map second _2))))))"""
  ;

  //$ Sequential concatenation.
  private sregexp[rseq_pri] := sregexp[>rseq_pri] (sregexp[>rseq_pri])+ =># 
    """`(ast_apply ,_sr ( 
      ,(regdef "Seqs")
      (ast_apply ,_sr (,(noi 'list) ,(cons _1 _2)))))"""
  ;


  //$ Postfix star (*).
  //$ Kleene closure: zero or more repetitions.
  private sregexp[rpostfix_pri] := sregexp[rpostfix_pri] "*" =># 
    """`(ast_apply ,_sr ( ,(regdef "Rpt") (,_1,0,-1)))"""
  ;

  //$ Postfix plus (+).
  //$ One or more repetitions.
  private sregexp[rpostfix_pri] := sregexp[rpostfix_pri] "+" =>#
    """`(ast_apply ,_sr ( ,(regdef "Rpt") (,_1,1,-1)))"""
  ;

  //$ Postfix question mark (?).
  //$ Optional. Zero or one repetitions.
  private sregexp[rpostfix_pri] := sregexp[rpostfix_pri] "?" =>#
    """`(ast_apply ,_sr (,(regdef "Rpt") (,_1,0,1)))"""
  ;

  //$ Parenthesis. Non-capturing group.
  private sregexp[ratom_pri] := "(" sregexp[ralt_pri] ")" =># "_2";

  //$ Group psuedo function.
  //$ Capturing group.
  private sregexp[ratom_pri] := "group" "(" sregexp[ralt_pri] ")" =># 
    """`(ast_apply ,_sr ( ,(regdef "Group") ,_3))"""
  ;

  //$ The charset prefix operator.
  //$ Treat the string as a set of characters,
  //$ that is, one of the contained characters.
  private sregexp[ratom_pri] := "charset" String =># 
    """`(ast_apply ,_sr ( ,(regdef "Charset") ,_2))"""
  ;

  //$ The string literal.
  //$ The given sequence of characters.
  //$ Any valid Felix string can be used here.
  private sregexp[ratom_pri] := String =># 
    """`(ast_apply ,_sr ( ,(regdef "String") ,_1)) """
  ;

  //$ The Perl psuedo function.
  //$ Treat the argument string expression as
  //$ a Perl regular expression, with constraints
  //$ as specified for Google RE2.
  private sregexp[ratom_pri] := "perl" "(" sexpr ")" =># 
    """`(ast_apply ,_sr ( ,(regdef "Perl") ,_3)) """
  ;

  //$ The regex psuedo function.
  //$ Treat the argument Felix expression of type Regdef::regex
  //$ as a regular expression.
  private sregexp[ratom_pri] := "regex" "(" sexpr ")" =># "_3";

  //$ Identifier.
  //$ Must name a previously defined variable of type Regdef:;regex.
  //$ For example, the LHS of a regdef binder.
  private sregexp[ratom_pri] := sname=># "`(ast_name ,_sr ,_1 ())";
 
}

+ 8 Lexer

share/lib/std/strings/lexer.flx

  class Lexer
  {
    pod type lex_iterator = "char const*";
    fun start_iterator : string -> lex_iterator = "$1.c_str()";
    fun end_iterator: string -> lex_iterator = "$1.c_str()+$1.size()";
    fun bounds (x:string): lex_iterator * lex_iterator = {
      return
        start_iterator x,
        end_iterator x
      ;
    }
    fun string_between: lex_iterator * lex_iterator -> string =
     "::std::string($1,$2)";
  
    fun + : lex_iterator * int -> lex_iterator = "$1 + $2";
    fun - : lex_iterator * int -> lex_iterator = "$1 - $2";
    fun - : lex_iterator * lex_iterator -> int = "$1 - $2";
    fun deref: lex_iterator -> char = "*$1";
  }
  
  instance Eq[Lexer::lex_iterator] {
    fun == :Lexer::lex_iterator * Lexer::lex_iterator -> bool = "$1==$2";
  }
  
  instance Tord[Lexer::lex_iterator] {
    fun < :Lexer::lex_iterator * Lexer::lex_iterator -> bool = "$1<$2";
  }
  
  open Eq[Lexer::lex_iterator];
  

+ 9 Config

$PWD/src/config/unix/re2.fpc

Name: Re2
Description: Google Re2 regexp library
provides_dlib: -lflx_re2_dynamic
provides_slib: -lflx_re2_static
includes: '"re2/re2.h"'
library: flx_re2
macros: BUILD_RE2
srcdir: src/re2/re2
headers: re2/(re2|set|stringpiece|variadic_function)\.h  
src: re2/[^/]*\.cc|util/arena\.cc|util/hash\.cc|util/rune\.cc|util/stringpiece\.cc|util/strutil.cc|util/stringprintf\.cc|util/valgrind\.cc
build_includes: src/re2/re2

$PWD/src/config/win32/re2.fpc

Name: Re2
Description: Google Re2 regexp library
provides_dlib: /DEFAULTLIB:flx_re2_dynamic
provides_slib: /DEFAULTLIB:flx_re2_static
includes: '"re2/re2.h"'
library: flx_re2
macros: BUILD_RE2 WIN32 NOMINMAX
srcdir: src\re2\re2
headers: re2\\(re2|set|stringpiece|variadic_function)\.h  
src: re2\\[^\\]*\.cc|util\\arena\.cc|util\\hash\.cc|util\\rune\.cc|util\\stringpiece\.cc|util\\strutil.cc|util\\stringprintf\.cc|util\\valgrind\.cc
build_includes: src/re2/re2

share/lib/rtl/flx_re2_config.hpp

#ifndef __FLX_RE2_CONFIG_H__
#define __FLX_RE2_CONFIG_H__
#include "flx_rtl_config.hpp"
#ifdef BUILD_RE2
#define RE2_EXTERN FLX_EXPORT
#else
#define RE2_EXTERN FLX_IMPORT
#endif
#endif

+ 10 Regular Expressions Tutoral

Felix uses RE2 for regular expressions.

$PWD/src/web/tut/regexp_index.fdoc

<p><a href='tutorial.fdoc'>Up</a></p>

$PWD/src/web/tut/regexp_01.fdoc

Felix provides Google's RE2 engine for regular expressions. The basic syntax and capabilities are a subset of Perl's PCRE, only RE2 actually works correctly and performs well. RE2 does not support backreferences.

A regexp can be compiled with the RE2 function.

var r = RE2(" *([A-Za-z_][A-Za-z0-9]*).*");

Matching is done with the Match function:

var line = "Hello World"; var maybe_subgroups = Match (r, line);

Please note, Match only supports a complete match. There's no searching or partial matching. Instead, just use repeated wildcards as shown.

The best way to check the result of a Match is with a pattern match as follows:

match maybe_subgroups with | #None => println$ "No match"; | Some a => println$ "Matched " + a.1; endmatch;

Matched Hello

You may want to match more than one instance of a pattern in a string. For example, you may want to capture each word in a line of text. This can be done by iterating over a regex like the following

var r2 = RE2("\w+"); // try to match a word var sentence = "Hello World"; for x in (r2, sentence) do println$ x.0; done

Hello World

If you use the simple method, you'll only match a single word, but with the for loop you get every match.

See RE2 syntax.

Regular expressions are quoting hell. Luckily Felix provides a solution: regular definitions:

begin regdef lower = charset "abcdefghijklmnopqrstuvwxyz"; regdef upper = charset "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; regdef digit = charset "0123456789"; regdef alpha = upper | lower; regdef cid0 = alpha | "_"; regdef cid1 = cid0 | digit; regdef cid = cid0 cid1 *; regdef space = " "; regdef white = space +; regdef integer = digit+;

These are some basic definitions. Note that regdef introduces a new syntax corresponding with the notation usually used for regular expressions.

This is called a DSSL or Domain Specific Sub-Language. Its not a DSL, because that's a complete new language, rather the sub suggests its an extension of normal Felix. The extension is written entirely in user space.

Now to use these definitions:

// match an assignment statement regdef sassign = white? "var" white? group (cid) white? "=" white? (group (cid) | group (integer)) white? ";" white? ;

var rstr : string = sassign.Regdef::render; var ra = RE2 rstr; var result = Match (ra, " var a = b; "); match result with | #None => println$ "No match?";

| Some groups => if groups.2 != "" do println$ "Assigned " + groups.1 + " from variable " + groups.2; else println$ "Assigned " + groups.1 + " from integer" + groups.3; done; endmatch; end

Assigned a from variable b

Note that the regdef kind of variable must be converted to a Perl regexp in a string form using the render function.