Update to 7.6, for real

author Matthias Clasen <matthiasc@src.gnome.org>

Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)

committer Matthias Clasen <matthiasc@src.gnome.org>

Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)
author Matthias Clasen <matthiasc@src.gnome.org>
Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)
committer Matthias Clasen <matthiasc@src.gnome.org>
Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am

index a9971a8e474a9dffb46e3e2a8f400a7d81a04e09..c5aedb673a936282bb9bb8dce6100bf5b0581710 100644 (file)
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -1,5 +1,3 @@
-include $(top_srcdir)/Makefile.decl
-
  INCLUDES = \
         -DG_LOG_DOMAIN=\"GLib-GRegex\" \
         -DSUPPORT_UCP \
@@ -60,7 +58,7 @@ libpcre_la_LIBADD = $(DEP_LIBS)
  
  libpcre_la_LDFLAGS = -no-undefined
  
-EXTRA_DIST +=                          \
+EXTRA_DIST =                           \
                 COPYING                 \
                 makefile.msc
  
diff --git a/glib/pcre/makefile.msc b/glib/pcre/makefile.msc

index 8939a7245b3a7686c6f99a701d9ad99b1a6445ae..0a3ffc5090c1ee2f689e501ff6ee29bf9675c2dc 100644 (file)
--- a/glib/pcre/makefile.msc
+++ b/glib/pcre/makefile.msc
@@ -1,49 +1,35 @@
  TOP = ..\..\..
  !INCLUDE ..\..\build\win32\make.msc
  
-INCLUDES = \
-       -I ..\.. \
-       -I ..
-       
-DEFINES = \
-       -DPCRE_STATIC \
-       -DHAVE_CONFIG_H \
-       -DHAVE_LONG_LONG_FORMAT \
-       -DSUPPORT_UCP \
-       -DSUPPORT_UTF8 \
-       -DNEWLINE=-1 \
-       -DMATCH_LIMIT=10000000 \
-       -DMATCH_LIMIT_RECURSION=10000000 \
-       -DMAX_NAME_SIZE=32 \
-       -DMAX_NAME_COUNT=10000 \
-       -DMAX_DUPLENGTH=30000 \
-       -DLINK_SIZE=2 \
-       -UEBCDIC \
-       -DPOSIX_MALLOC_THRESHOLD=10
+INCLUDES = \\
+        -I ..\.. \\
+        -I ..
+        
+DEFINES = \\
+        -DPCRE_STATIC \\
+        -DHAVE_CONFIG_H \\
+        -DHAVE_LONG_LONG_FORMAT \\
+        -DSUPPORT_UCP \\
+        -DSUPPORT_UTF8 \\
+        -DNEWLINE=-1 \\
+        -DMATCH_LIMIT=10000000 \\
+        -DMATCH_LIMIT_RECURSION=10000000 \\
+        -DMAX_NAME_SIZE=32 \\
+        -DMAX_NAME_COUNT=10000 \\
+        -DMAX_DUPLENGTH=30000 \\
+        -DLINK_SIZE=2 \\
+        -DEBCDIC=0 \\
+        -DPOSIX_MALLOC_THRESHOLD=10
  
-OBJECTS = \
-       pcre_chartables.obj \
-       pcre_compile.obj \
-       pcre_config.obj \
-       pcre_dfa_exec.obj \
-       pcre_exec.obj \
-       pcre_fullinfo.obj \
-       pcre_get.obj \
-       pcre_globals.obj \
-       pcre_info.obj \
-       pcre_maketables.obj \
-       pcre_newline.obj \
-       pcre_ord2utf8.obj \
-       pcre_refcount.obj \
-       pcre_study.obj \
-       pcre_tables.obj \
-       pcre_try_flipped.obj \
-       pcre_ucp_searchfuncs.obj \
-       pcre_valid_utf8.obj \
-       pcre_version.obj \
-       pcre_xclass.obj \
+OBJECTS = \\
+`
+for f in $all_files; do
+    echo "      $f.obj \\\\"
+done
+`
  
  all : pcre.lib
  
-pcre.lib : $(OBJECTS)
-       lib -out:pcre.lib $(OBJECTS)
+pcre.lib : \$(OBJECTS)
+        lib -out:pcre.lib \$(OBJECTS)
+
diff --git a/glib/pcre/pcre.h b/glib/pcre/pcre.h

index a95f1ca5ed12e68d373fe7bed3edc31c0d29400d..ec08f34cbbfcdd5471e304dd2088ac68fc3132e2 100644 (file)
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@@ -5,7 +5,7 @@
  /* This is the public header file for the PCRE library, to be #included by
  applications that call the PCRE functions.
  
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
  /* The current PCRE version information. */
  
  #define PCRE_MAJOR          7
-#define PCRE_MINOR          4
+
+#define PCRE_MINOR          6
  #define PCRE_PRERELEASE     
-#define PCRE_DATE           2007-09-21
+#define PCRE_DATE           2008-01-28
  
  /* When an application links to a PCRE DLL in Windows, the symbols that are
  imported have to be identified as such. When building PCRE, the appropriate
@@ -242,13 +243,6 @@ typedef struct pcre_callout_block {
    /* ------------------------------------------------------------------ */
  } pcre_callout_block;
  
-
-/* Indirection for store get and free functions. These can be set to
-alternative malloc/free functions if required. Special ones are used in the
-non-recursive case for "frames". There is also an optional callout function
-that is triggered by the (?) regex item. For Virtual Pascal, these definitions
-have to take another form. */
-
  #include "glib.h"
  #include "galias.h"
  
diff --git a/glib/pcre/pcre_chartables.c b/glib/pcre/pcre_chartables.c

index 75e46a08a50b29d9ee623bb4b01e18b5e4acc106..ae45db0ca35b595a8e119b89b2880724813283fd 100644 (file)
--- a/glib/pcre/pcre_chartables.c
+++ b/glib/pcre/pcre_chartables.c
@@ -1,6 +1,3 @@
-/* This file is autogenerated by ../update-pcre/update.sh during
- * the update of the local copy of PCRE.
- */
  /*************************************************
  *      Perl-Compatible Regular Expressions       *
  *************************************************/
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c

index 26f87c37e44c0f727b5a3a1c3169758b14cef329..967d811828bd55898dd08aaf61d62b4b7e929a4e 100644 (file)
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
@@ -241,7 +241,7 @@ static const char error_texts[] =
    /* 10 */
    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
    "internal error: unexpected repeat\0"
-  "unrecognized character after (?\0"
+  "unrecognized character after (? or (?-\0"
    "POSIX named classes are supported only within a class\0"
    "missing )\0"
    /* 15 */
@@ -300,7 +300,9 @@ static const char error_texts[] =
    "(*VERB) with an argument is not supported\0"
    /* 60 */
    "(*VERB) not recognized\0"
-  "number is too big";
+  "number is too big\0"
+  "subpattern name expected\0"
+  "digit expected after (?+";
  
  
  /* Definition to allow mutual recursion */
@@ -372,19 +374,13 @@ ptr--;                            /* Set pointer back to the last byte */
  
  if (c == 0) *errorcodeptr = ERR1;
  
-/* Non-alphamerics are literals. For digits or letters, do an initial lookup in
-a table. A non-zero result is something that can be returned immediately.
+/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
+in a table. A non-zero result is something that can be returned immediately.
  Otherwise further processing may be required. */
  
-#ifndef EBCDIC  /* ASCII coding */
-else if (c < '0' || c > 'z') {}                           /* Not alphameric */
+else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
  else if ((i = escapes[c - '0']) != 0) c = i;
  
-#else           /* EBCDIC coding */
-else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
-else if ((i = escapes[c - 0x48]) != 0)  c = i;
-#endif
-
  /* Escapes that need further processing, or are illegal. */
  
  else
@@ -598,10 +594,10 @@ else
      break;
  
      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
-    other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
-    for Perl compatibility, it is a literal. This code looks a bit odd, but
-    there used to be some cases other than the default, and there may be again
-    in future, so I haven't "optimized" it. */
+    other alphanumeric following \ is an error if PCRE_EXTRA was set;
+    otherwise, for Perl compatibility, it is a literal. This code looks a bit
+    odd, but there used to be some cases other than the default, and there may
+    be again in future, so I haven't "optimized" it. */
  
      default:
      if ((options & PCRE_EXTRA) != 0) switch(c)
@@ -1382,8 +1378,9 @@ for (;;)
  can match the empty string or not. It is called from could_be_empty()
  below and from compile_branch() when checking for an unlimited repeat of a
  group that can match nothing. Note that first_significant_code() skips over
-assertions. If we hit an unclosed bracket, we return "empty" - this means we've
-struck an inner bracket whose current branch will already have been scanned.
+backward and negative forward assertions when its final argument is TRUE. If we
+hit an unclosed bracket, we return "empty" - this means we've struck an inner
+bracket whose current branch will already have been scanned.
  
  Arguments:
    code        points to start of search
@@ -1405,6 +1402,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
  
    c = *code;
  
+  /* Skip over forward assertions; the other assertions are skipped by
+  first_significant_code() with a TRUE final argument. */
+
+  if (c == OP_ASSERT)
+    {
+    do code += GET(code, 1); while (*code == OP_ALT);
+    c = *code;
+    continue;
+    }
+
    /* Groups with zero repeats can of course be empty; skip them. */
  
    if (c == OP_BRAZERO || c == OP_BRAMINZERO)
@@ -1600,29 +1607,48 @@ return TRUE;
  *************************************************/
  
  /* This function is called when the sequence "[:" or "[." or "[=" is
-encountered in a character class. It checks whether this is followed by an
-optional ^ and then a sequence of letters, terminated by a matching ":]" or
-".]" or "=]".
+encountered in a character class. It checks whether this is followed by a
+sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
+reach an unescaped ']' without the special preceding character, return FALSE.
+
+Originally, this function only recognized a sequence of letters between the
+terminators, but it seems that Perl recognizes any sequence of characters,
+though of course unknown POSIX names are subsequently rejected. Perl gives an
+"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
+didn't consider this to be a POSIX class. Likewise for [:1234:].
+
+The problem in trying to be exactly like Perl is in the handling of escapes. We
+have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
+class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
+below handles the special case of \], but does not try to do any other escape
+processing. This makes it different from Perl for cases such as [:l\ower:]
+where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
+"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
+I think.
  
-Argument:
+Arguments:
    ptr      pointer to the initial [
    endptr   where to return the end pointer
-  cd       pointer to compile data
  
  Returns:   TRUE or FALSE
  */
  
  static BOOL
-check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
+check_posix_syntax(const uschar *ptr, const uschar **endptr)
  {
  int terminator;          /* Don't combine these lines; the Solaris cc */
  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
-if (*(++ptr) == '^') ptr++;
-while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
-if (*ptr == terminator && ptr[1] == ']')
+for (++ptr; *ptr != 0; ptr++)
    {
-  *endptr = ptr;
-  return TRUE;
+  if (*ptr == '\\' && ptr[1] == ']') ptr++; else
+    {
+    if (*ptr == ']') return FALSE;
+    if (*ptr == terminator && ptr[1] == ']')
+      {
+      *endptr = ptr;
+      return TRUE;
+      }
+    }
    }
  return FALSE;
  }
@@ -2220,6 +2246,7 @@ uschar classbits[32];
  BOOL class_utf8;
  BOOL utf8 = (options & PCRE_UTF8) != 0;
  uschar *class_utf8data;
+uschar *class_utf8data_base;
  uschar utf8_char[6];
  #else
  BOOL utf8 = FALSE;
@@ -2259,6 +2286,7 @@ req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
  for (;; ptr++)
    {
    BOOL negate_class;
+  BOOL should_flip_negation;
    BOOL possessive_quantifier;
    BOOL is_quantifier;
    BOOL is_recurse;
@@ -2482,7 +2510,7 @@ for (;; ptr++)
      they are encountered at the top level, so we'll do that too. */
  
      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
-        check_posix_syntax(ptr, &tempptr, cd))
+        check_posix_syntax(ptr, &tempptr))
        {
        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
        goto FAILED;
@@ -2507,6 +2535,12 @@ for (;; ptr++)
        else break;
        }
  
+    /* If a class contains a negative special such as \S, we need to flip the
+    negation flag at the end, so that support for characters > 255 works
+    correctly (they are all included in the class). */
+
+    should_flip_negation = FALSE;
+
      /* Keep a count of chars with values < 256 so that we can optimize the case
      of just a single character (as long as it's < 256). However, For higher
      valued UTF-8 characters, we don't yet do any optimization. */
@@ -2524,6 +2558,7 @@ for (;; ptr++)
  #ifdef SUPPORT_UTF8
      class_utf8 = FALSE;                       /* No chars >= 256 */
      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
+    class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
  #endif
  
      /* Process characters until ] is reached. By writing this as a "do" it
@@ -2539,6 +2574,18 @@ for (;; ptr++)
          {                           /* Braces are required because the */
          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
          }
+
+      /* In the pre-compile phase, accumulate the length of any UTF-8 extra
+      data and reset the pointer. This is so that very large classes that
+      contain a zillion UTF-8 characters no longer overwrite the work space
+      (which is on the stack). */
+
+      if (lengthptr != NULL)
+        {
+        *lengthptr += class_utf8data - class_utf8data_base;
+        class_utf8data = class_utf8data_base;
+        }
+
  #endif
  
        /* Inside \Q...\E everything is literal except \E */
@@ -2562,7 +2609,7 @@ for (;; ptr++)
  
        if (c == '[' &&
            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
-          check_posix_syntax(ptr, &tempptr, cd))
+          check_posix_syntax(ptr, &tempptr))
          {
          BOOL local_negate = FALSE;
          int posix_class, taboffset, tabopt;
@@ -2579,6 +2626,7 @@ for (;; ptr++)
          if (*ptr == '^')
            {
            local_negate = TRUE;
+          should_flip_negation = TRUE;  /* Note negative special */
            ptr++;
            }
  
@@ -2653,7 +2701,7 @@ for (;; ptr++)
          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
          if (*errorcodeptr != 0) goto FAILED;
  
-        if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */
+        if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
          else if (-c == ESC_Q)            /* Handle start of quoted string */
@@ -2681,6 +2729,7 @@ for (;; ptr++)
              continue;
  
              case ESC_D:
+            should_flip_negation = TRUE;
              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
              continue;
  
@@ -2689,6 +2738,7 @@ for (;; ptr++)
              continue;
  
              case ESC_W:
+            should_flip_negation = TRUE;
              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
              continue;
  
@@ -2698,13 +2748,11 @@ for (;; ptr++)
              continue;
  
              case ESC_S:
+            should_flip_negation = TRUE;
              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
              continue;
  
-            case ESC_E: /* Perl ignores an orphan \E */
-            continue;
-
              default:    /* Not recognized; fall through */
              break;      /* Need "default" setting to stop compiler warning. */
              }
@@ -2939,7 +2987,7 @@ for (;; ptr++)
            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
            if (*errorcodeptr != 0) goto FAILED;
  
-          /* \b is backslash; \X is literal X; \R is literal R; any other
+          /* \b is backspace; \X is literal X; \R is literal R; any other
            special means the '-' was literal */
  
            if (d < 0)
@@ -3203,11 +3251,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
      zeroreqbyte = reqbyte;
  
      /* If there are characters with values > 255, we have to compile an
-    extended class, with its own opcode. If there are no characters < 256,
-    we can omit the bitmap in the actual compiled code. */
+    extended class, with its own opcode, unless there was a negated special
+    such as \S in the class, because in that case all characters > 255 are in
+    the class, so any that were explicitly given as well can be ignored. If
+    (when there are explicit characters > 255 that must be listed) there are no
+    characters < 256, we can omit the bitmap in the actual compiled code. */
  
  #ifdef SUPPORT_UTF8
-    if (class_utf8)
+    if (class_utf8 && !should_flip_negation)
        {
        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
        *code++ = OP_XCLASS;
@@ -3233,20 +3284,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        }
  #endif
  
-    /* If there are no characters > 255, negate the 32-byte map if necessary,
-    and copy it into the code vector. If this is the first thing in the branch,
-    there can be no first char setting, whatever the repeat count. Any reqbyte
-    setting must remain unchanged after any kind of repeat. */
+    /* If there are no characters > 255, set the opcode to OP_CLASS or
+    OP_NCLASS, depending on whether the whole class was negated and whether
+    there were negative specials such as \S in the class. Then copy the 32-byte
+    map into the code vector, negating it if necessary. */
  
+    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
      if (negate_class)
        {
-      *code++ = OP_NCLASS;
        if (lengthptr == NULL)    /* Save time in the pre-compile phase */
          for (c = 0; c < 32; c++) code[c] = ~classbits[c];
        }
      else
        {
-      *code++ = OP_CLASS;
        memcpy(code, classbits, 32);
        }
      code += 32;
@@ -3882,7 +3932,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        int len;
        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
            *tempcode == OP_NOTEXACT)
-        tempcode += _pcre_OP_lengths[*tempcode];
+        tempcode += _pcre_OP_lengths[*tempcode] +
+          ((*tempcode == OP_TYPEEXACT &&
+             (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
        len = code - tempcode;
        if (len > 0) switch (*tempcode)
          {
@@ -4109,16 +4161,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
              *errorcodeptr = ERR58;
              goto FAILED;
              }
-          if (refsign == '-')
+          recno = (refsign == '-')?
+            cd->bracount - recno + 1 : recno +cd->bracount;
+          if (recno <= 0 || recno > cd->final_bracount)
              {
-            recno = cd->bracount - recno + 1;
-            if (recno <= 0)
-              {
-              *errorcodeptr = ERR15;
-              goto FAILED;
-              }
+            *errorcodeptr = ERR15;
+            goto FAILED;
              }
-          else recno += cd->bracount;
            PUT2(code, 2+LINK_SIZE, recno);
            break;
            }
@@ -4190,9 +4239,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            skipbytes = 1;
            }
  
-        /* Check for the "name" actually being a subpattern number. */
+        /* Check for the "name" actually being a subpattern number. We are
+        in the second pass here, so final_bracount is set. */
  
-        else if (recno > 0)
+        else if (recno > 0 && recno <= cd->final_bracount)
            {
            PUT2(code, 2+LINK_SIZE, recno);
            }
@@ -4386,7 +4436,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
  
          /* We come here from the Python syntax above that handles both
          references (?P=name) and recursion (?P>name), as well as falling
-        through from the Perl recursion syntax (?&name). */
+        through from the Perl recursion syntax (?&name). We also come here from
+        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
+        .NET syntax. */
  
          NAMED_REF_OR_RECURSE:
          name = ++ptr;
@@ -4398,6 +4450,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
  
          if (lengthptr != NULL)
            {
+          if (namelen == 0)
+            {
+            *errorcodeptr = ERR62;
+            goto FAILED;
+            }
            if (*ptr != terminator)
              {
              *errorcodeptr = ERR42;
@@ -4411,14 +4468,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            recno = 0;
            }
  
-        /* In the real compile, seek the name in the table */
+        /* In the real compile, seek the name in the table. We check the name
+        first, and then check that we have reached the end of the name in the
+        table. That way, if the name that is longer than any in the table,
+        the comparison will fail without reading beyond the table entry. */
  
          else
            {
            slot = cd->name_table;
            for (i = 0; i < cd->names_found; i++)
              {
-            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
+            if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
+                slot[2+namelen] == 0)
+              break;
              slot += cd->name_entry_size;
              }
  
@@ -4455,7 +4517,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            {
            const uschar *called;
  
-          if ((refsign = *ptr) == '+') ptr++;
+          if ((refsign = *ptr) == '+')
+            {
+            ptr++;
+            if (g_ascii_isdigit(*ptr) == 0)
+              {
+              *errorcodeptr = ERR63;
+              goto FAILED;
+              }
+            }
            else if (refsign == '-')
              {
              if (g_ascii_isdigit(ptr[1]) == 0)
@@ -5621,7 +5691,6 @@ to fill in forward references to subpatterns. */
  
  uschar cworkspace[COMPILE_WORK_SIZE];
  
-
  /* Set this early so that early errors get offset 0. */
  
  ptr = (const uschar *)pattern;
@@ -5782,7 +5851,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is
  no longer needed, so hopefully this workspace will never overflow, though there
  is a test for its doing so. */
  
-cd->bracount = 0;
+cd->bracount = cd->final_bracount = 0;
  cd->names_found = 0;
  cd->name_entry_size = 0;
  cd->name_table = NULL;
@@ -5859,6 +5928,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm
  field; this time it's used for remembering forward references to subpatterns.
  */
  
+cd->final_bracount = cd->bracount;  /* Save for checking forward references */
  cd->bracount = 0;
  cd->names_found = 0;
  cd->name_table = (uschar *)re + re->name_table_offset;
diff --git a/glib/pcre/pcre_config.c b/glib/pcre/pcre_config.c

index 220ef93cb1020ef9beff55f61615aafcde21af61..454fed98dfb76cce95ab4baa9c39eb32e55769cb 100644 (file)
--- a/glib/pcre/pcre_config.c
+++ b/glib/pcre/pcre_config.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c

index e590fbb145faad87b94a92a6a6cd1a22f08383fb..6283ff7816477428890fd60793364f1d6029cec0 100644 (file)
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c

index 6db7c354c8fbdf4a142e8b363b418563dbd03274..56739915e293dca2687be3c6e1d469e2492031d9 100644 (file)
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
@@ -4670,10 +4670,10 @@ for(;;)
      if (first_byte_caseless)
        while (start_match < end_subject &&
               md->lcc[*start_match] != first_byte)
-        start_match++;
+        { NEXTCHAR(start_match); }
      else
        while (start_match < end_subject && *start_match != first_byte)
-        start_match++;
+        { NEXTCHAR(start_match); }
      }
  
    /* Or to just after a linebreak for a multiline match if possible */
@@ -4683,7 +4683,7 @@ for(;;)
      if (start_match > md->start_subject + start_offset)
        {
        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
-        start_match++;
+        { NEXTCHAR(start_match); }
  
        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
        and we are now at a LF, advance the match position by one more character.
@@ -4704,7 +4704,9 @@ for(;;)
      while (start_match < end_subject)
        {
        register unsigned int c = *start_match;
-      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
+      if ((start_bits[c/8] & (1 << (c&7))) == 0)
+        { NEXTCHAR(start_match); }
+      else break;
        }
      }
  
diff --git a/glib/pcre/pcre_fullinfo.c b/glib/pcre/pcre_fullinfo.c

index 04e31f69adfee90cb4f6de37c75df03bb7c450b7..7b001c60b2c56fd900226e74b089f132144b31a5 100644 (file)
--- a/glib/pcre/pcre_fullinfo.c
+++ b/glib/pcre/pcre_fullinfo.c
@@ -2,11 +2,11 @@
  *      Perl-Compatible Regular Expressions       *
  *************************************************/
  
-/*PCRE is a library of functions to support regular expressions whose syntax
+/* PCRE is a library of functions to support regular expressions whose syntax
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_get.c b/glib/pcre/pcre_get.c

index fc283c88e1a080b513816657180369113e1ef3cd..68b8de47b6ffcd21874e5ac59be6957b4c14f6d7 100644 (file)
--- a/glib/pcre/pcre_get.c
+++ b/glib/pcre/pcre_get.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c

index 56b4ce44a75ca1eac720a3e9ad01f2f5de72db19..e759ed5ce05e33638fcdbdc457a65256cb3fa4e3 100644 (file)
--- a/glib/pcre/pcre_globals.c
+++ b/glib/pcre/pcre_globals.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_info.c b/glib/pcre/pcre_info.c

index 9bcccbcab37db092f6389bbe9a1153c8f02a5b46..638a475316458b13e15e81bc741ea7504e8e81c9 100644 (file)
--- a/glib/pcre/pcre_info.c
+++ b/glib/pcre/pcre_info.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h

index 6923f713ae7e3ab5bea7775a215669c1ee3886ce..4d41e79cee641ea4cf9d6bb24589fbf0d215194b 100644 (file)
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
@@ -363,6 +363,7 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
  support is omitted, we don't even define it. */
  
  #ifndef SUPPORT_UTF8
+#define NEXTCHAR(p) p++;
  #define GETCHAR(c, eptr) c = *eptr;
  #define GETCHARTEST(c, eptr) c = *eptr;
  #define GETCHARINC(c, eptr) c = *eptr++;
@@ -372,6 +373,13 @@ support is omitted, we don't even define it. */
  
  #else   /* SUPPORT_UTF8 */
  
+/* Advance a character pointer one byte in non-UTF-8 mode and by one character
+in UTF-8 mode. */
+
+#define NEXTCHAR(p) \
+  p++; \
+  if (utf8) { while((*p & 0xc0) == 0x80) p++; }
+
  /* Get the next UTF-8 character, not advancing the pointer. This is called when
  we know we are in UTF-8 mode. */
  
@@ -535,7 +543,7 @@ req_byte match. */
  #define REQ_BYTE_MAX 1000
  
  /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
-variable-length repeat, or anything other than literal characters. */
+variable-length repeat, or a anything other than literal characters. */
  
  #define REQ_CASELESS 0x0100    /* indicates caselessness */
  #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
@@ -868,7 +876,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
-       ERR60, ERR61 };
+       ERR60, ERR61, ERR62, ERR63 };
  
  /* The real format of the start of the pcre block; the index of names and the
  code vector run on as long as necessary after the end. We store an explicit
@@ -931,7 +939,8 @@ typedef struct compile_data {
    uschar *name_table;           /* The name/number table */
    int  names_found;             /* Number of entries so far */
    int  name_entry_size;         /* Size of each entry */
-  int  bracount;                /* Count of capturing parens */
+  int  bracount;                /* Count of capturing parens as we compile */
+  int  final_bracount;          /* Saved value after first pass */
    int  top_backref;             /* Maximum back reference */
    unsigned int backref_map;     /* Bitmap of low back refs */
    int  external_options;        /* External (initial) options */
@@ -1033,7 +1042,7 @@ typedef struct dfa_match_data {
  #define ctype_letter  0x02
  #define ctype_digit   0x04
  #define ctype_xdigit  0x08
-#define ctype_word    0x10   /* alphameric or '_' */
+#define ctype_word    0x10   /* alphanumeric or '_' */
  #define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
  
  /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
diff --git a/glib/pcre/pcre_maketables.c b/glib/pcre/pcre_maketables.c

index 352bea98e7a31d9286ef87e9f349c35be38d0d48..219973e3786149c29d338abdec1fe660a735b576 100644 (file)
--- a/glib/pcre/pcre_maketables.c
+++ b/glib/pcre/pcre_maketables.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_newline.c b/glib/pcre/pcre_newline.c

index 1708d939589f499fe52eb7a83ff460262994fdd3..58885760effd1e3aa7ab356ecd516abe8d336f5a 100644 (file)
--- a/glib/pcre/pcre_newline.c
+++ b/glib/pcre/pcre_newline.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_ord2utf8.c b/glib/pcre/pcre_ord2utf8.c

index d3904c655d3227b4e24d7a6e196cbb9ee9b95c09..0fdc512a0071a0b9ab4d34aa3e9b5331092972aa 100644 (file)
--- a/glib/pcre/pcre_ord2utf8.c
+++ b/glib/pcre/pcre_ord2utf8.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_refcount.c b/glib/pcre/pcre_refcount.c

index b14103c7b8ef4873635f933a94fcb025eebad563..eeb2897c82fca8138980256a2a840921e6cc7184 100644 (file)
--- a/glib/pcre/pcre_refcount.c
+++ b/glib/pcre/pcre_refcount.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_study.c b/glib/pcre/pcre_study.c

index 1c283848b13168457eb7a3b8a8c9e85d4c055abb..ff1f260ec101de1feac57d22b69eb68b68ad4ec1 100644 (file)
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c

index 4b14fd1befacaab55d919e35f84a8dad8c552304..e37dcb629b9d77d4c134a846a051538924c3991e 100644 (file)
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_try_flipped.c b/glib/pcre/pcre_try_flipped.c

index 412902bbbeb0e1677dc53b1ae671a08fb29091cb..0d2f3a2d385da6e661bdf5061f6192914658d97b 100644 (file)
--- a/glib/pcre/pcre_try_flipped.c
+++ b/glib/pcre/pcre_try_flipped.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_version.c b/glib/pcre/pcre_version.c

index 425ab214cea87ccb9e8aff227d70fe6575585377..697b44b3910c5a41a13ff3d62be71e19de872493 100644 (file)
--- a/glib/pcre/pcre_version.c
+++ b/glib/pcre/pcre_version.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c

index cdf1af12fde6eb5930a5290f520e7f36c8a57833..cfb5a77b503a545c333c2fb26f86c88a01a52d8f 100644 (file)
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
  and semantics are as close as possible to those of the Perl 5 language.
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2008 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
author	Matthias Clasen <matthiasc@src.gnome.org>
	Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)
committer	Matthias Clasen <matthiasc@src.gnome.org>
	Tue, 11 Mar 2008 01:51:07 +0000 (01:51 +0000)
glib/pcre/Makefile.am		patch \| blob \| history
glib/pcre/makefile.msc		patch \| blob \| history
glib/pcre/pcre.h		patch \| blob \| history
glib/pcre/pcre_chartables.c		patch \| blob \| history
glib/pcre/pcre_compile.c		patch \| blob \| history
glib/pcre/pcre_config.c		patch \| blob \| history
glib/pcre/pcre_dfa_exec.c		patch \| blob \| history
glib/pcre/pcre_exec.c		patch \| blob \| history
glib/pcre/pcre_fullinfo.c		patch \| blob \| history
glib/pcre/pcre_get.c		patch \| blob \| history
glib/pcre/pcre_globals.c		patch \| blob \| history
glib/pcre/pcre_info.c		patch \| blob \| history
glib/pcre/pcre_internal.h		patch \| blob \| history
glib/pcre/pcre_maketables.c		patch \| blob \| history
glib/pcre/pcre_newline.c		patch \| blob \| history
glib/pcre/pcre_ord2utf8.c		patch \| blob \| history
glib/pcre/pcre_refcount.c		patch \| blob \| history
glib/pcre/pcre_study.c		patch \| blob \| history
glib/pcre/pcre_tables.c		patch \| blob \| history
glib/pcre/pcre_try_flipped.c		patch \| blob \| history
glib/pcre/pcre_version.c		patch \| blob \| history
glib/pcre/pcre_xclass.c		patch \| blob \| history