Update PCRE to 7.8

author Matthias Clasen <matthiasc@src.gnome.org>

Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)

committer Matthias Clasen <matthiasc@src.gnome.org>

Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)
author Matthias Clasen <matthiasc@src.gnome.org>
Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)
committer Matthias Clasen <matthiasc@src.gnome.org>
Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)
diff --git a/ChangeLog b/ChangeLog

index 53774b690627cc3d9a076d1f708075688b0f5bc6..751f30627bccd8f6af0f3ef7d3139dc02ac1f9e4 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2009-01-18  Matthias Clasen  <mclasen@redhat.com>
+
+       * glib/pcre: Update to PCRE 7.8
+
  2009-01-17  Matthias Clasen  <mclasen@redhat.com>
  
         Bug 567977 – textdomain() macro should not return NULL when 
diff --git a/glib/pcre/pcre.h b/glib/pcre/pcre.h

index 4d3ac779a4c20801f7a73846a672e97a52863f20..8fc80a70e82e0340de75c4629b02ad0479b5dcd4 100644 (file)
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
  /* The current PCRE version information. */
  
  #define PCRE_MAJOR          7
-#define PCRE_MINOR          7
+#define PCRE_MINOR          8
  #define PCRE_PRERELEASE     
-#define PCRE_DATE           2008-05-07
+#define PCRE_DATE           2008-09-05
  
  /* When an application links to a PCRE DLL in Windows, the symbols that are
  imported have to be identified as such. When building PCRE, the appropriate
diff --git a/glib/pcre/pcre_chartables.c b/glib/pcre/pcre_chartables.c

index 75e46a08a50b29d9ee623bb4b01e18b5e4acc106..ae45db0ca35b595a8e119b89b2880724813283fd 100644 (file)
--- a/glib/pcre/pcre_chartables.c
+++ b/glib/pcre/pcre_chartables.c
@@ -1,6 +1,3 @@
-/* This file is autogenerated by ../update-pcre/update.sh during
- * the update of the local copy of PCRE.
- */
  /*************************************************
  *      Perl-Compatible Regular Expressions       *
  *************************************************/
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c

index 54756bc7403a3fb1a442030d2d3d73130a6c5498..ae68fb56644938b2dafc129493de2f3cfc682bc0 100644 (file)
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -331,7 +331,7 @@ static const char *
  find_error_text(int n)
  {
  const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0);
+for (; n > 0; n--) while (*s++ != 0) {};
  return s;
  }
  
@@ -437,7 +437,7 @@ else
        {
        const uschar *p;
        for (p = ptr+2; *p != 0 && *p != '}'; p++)
-        if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
+        if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
        if (*p != 0 && *p != '}')
          {
          c = -ESC_k;
@@ -456,7 +456,7 @@ else
      else negated = FALSE;
  
      c = 0;
-    while (g_ascii_isdigit(ptr[1]) != 0)
+    while (g_ascii_isdigit (ptr[1]) != 0)
        c = c * 10 + *(++ptr) - '0';
  
      if (c < 0)   /* Integer overflow */
@@ -509,7 +509,7 @@ else
        {
        oldptr = ptr;
        c -= '0';
-      while (g_ascii_isdigit(ptr[1]) != 0)
+      while (g_ascii_isdigit (ptr[1]))
          c = c * 10 + *(++ptr) - '0';
        if (c < 0)    /* Integer overflow */
          {
@@ -559,7 +559,7 @@ else
        int count = 0;
  
        c = 0;
-      while (g_ascii_isxdigit(*pt) != 0)
+      while (g_ascii_isxdigit (*pt) != 0)
          {
          register int cc = *pt++;
          if (c == 0 && cc == '0') continue;     /* Leading zeroes */
@@ -588,7 +588,7 @@ else
      /* Read just a single-byte hex-defined char */
  
      c = 0;
-    while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
+    while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
        {
        int cc;                               /* Some compilers don't like ++ */
        cc = *(++ptr);                        /* in initializers */
@@ -757,15 +757,15 @@ Returns:    TRUE or FALSE
  static BOOL
  is_counted_repeat(const uschar *p)
  {
-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;
  if (*p == '}') return TRUE;
  
  if (*p++ != ',') return FALSE;
  if (*p == '}') return TRUE;
  
-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;
  
  return (*p == '}');
  }
@@ -800,7 +800,7 @@ int max = -1;
  /* Read the minimum value and do a paranoid check: a negative value indicates
  an integer overflow. */
  
-while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - '0';
+while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
  if (min < 0 || min > 65535)
    {
    *errorcodeptr = ERR5;
@@ -815,7 +815,7 @@ if (*p == '}') max = min; else
    if (*(++p) != '}')
      {
      max = 0;
-    while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - '0';
+    while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
      if (max < 0 || max > 65535)
        {
        *errorcodeptr = ERR5;
@@ -878,7 +878,7 @@ for (; *ptr != 0; ptr++)
      if (*(++ptr) == 0) return -1;
      if (*ptr == 'Q') for (;;)
        {
-      while (*(++ptr) != 0 && *ptr != '\\');
+      while (*(++ptr) != 0 && *ptr != '\\') {};
        if (*ptr == 0) return -1;
        if (*(++ptr) == 'E') break;
        }
@@ -921,7 +921,7 @@ for (; *ptr != 0; ptr++)
          if (*(++ptr) == 0) return -1;
          if (*ptr == 'Q') for (;;)
            {
-          while (*(++ptr) != 0 && *ptr != '\\');
+          while (*(++ptr) != 0 && *ptr != '\\') {};
            if (*ptr == 0) return -1;
            if (*(++ptr) == 'E') break;
            }
@@ -935,7 +935,7 @@ for (; *ptr != 0; ptr++)
  
    if (xmode && *ptr == '#')
      {
-    while (*(++ptr) != 0 && *ptr != '\n');
+    while (*(++ptr) != 0 && *ptr != '\n') {};
      if (*ptr == 0) return -1;
      continue;
      }
@@ -1326,6 +1326,8 @@ for (;;)
        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
        break;
        }
+#else
+    (void)(utf8);  /* Keep compiler happy by referencing function argument */
  #endif
      }
    }
@@ -1419,6 +1421,8 @@ for (;;)
        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
        break;
        }
+#else
+    (void)(utf8);  /* Keep compiler happy by referencing function argument */
  #endif
      }
    }
@@ -1891,7 +1895,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
  unsigned int c, othercase, next;
  
  for (c = *cptr; c <= d; c++)
-  { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
+  { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
  
  if (c > d) return FALSE;
  
@@ -1900,7 +1904,7 @@ next = othercase + 1;
  
  for (++c; c <= d; c++)
    {
-  if (_pcre_ucp_othercase(c) != next) break;
+  if (UCD_OTHERCASE(c) != next) break;
    next++;
    }
  
@@ -2010,6 +2014,8 @@ if (next >= 0) switch(op_code)
    case OP_CHAR:
  #ifdef SUPPORT_UTF8
    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+#else
+  (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
  #endif
    return item != next;
  
@@ -2028,7 +2034,7 @@ if (next >= 0) switch(op_code)
      unsigned int othercase;
      if (next < 128) othercase = cd->fcc[next]; else
  #ifdef SUPPORT_UCP
-    othercase = _pcre_ucp_othercase((unsigned int)next);
+    othercase = UCD_OTHERCASE((unsigned int)next);
  #else
      othercase = NOTACHAR;
  #endif
@@ -2049,7 +2055,7 @@ if (next >= 0) switch(op_code)
      unsigned int othercase;
      if (next < 128) othercase = cd->fcc[next]; else
  #ifdef SUPPORT_UCP
-    othercase = _pcre_ucp_othercase(next);
+    othercase = UCD_OTHERCASE(next);
  #else
      othercase = NOTACHAR;
  #endif
@@ -3215,7 +3221,7 @@ for (;; ptr++)
          if ((options & PCRE_CASELESS) != 0)
            {
            unsigned int othercase;
-          if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
+          if ((othercase = UCD_OTHERCASE(c)) != c)
              {
              *class_utf8data++ = XCL_SINGLE;
              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@@ -4092,7 +4098,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
        const char *vn = verbnames;
        const uschar *name = ++ptr;
        previous = NULL;
-      while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
+      while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
        if (*ptr == ':')
          {
          *errorcodeptr = ERR59;   /* Not supported */
@@ -4230,7 +4236,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          while ((cd->ctypes[*ptr] & ctype_word) != 0)
            {
            if (recno >= 0)
-            recno = (g_ascii_isdigit(*ptr) != 0)?
+            recno = (g_ascii_isdigit (*ptr) != 0)?
                recno * 10 + *ptr - '0' : -1;
            ptr++;
            }
@@ -4315,7 +4321,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            recno = 0;
            for (i = 1; i < namelen; i++)
              {
-            if (g_ascii_isdigit(name[i]) == 0)
+            if (g_ascii_isdigit (name[i]) == 0)
                {
                *errorcodeptr = ERR15;
                goto FAILED;
@@ -4411,7 +4417,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          *code++ = OP_CALLOUT;
            {
            int n = 0;
-          while (g_ascii_isdigit(*(++ptr)) != 0)
+          while (g_ascii_isdigit (*(++ptr)) != 0)
              n = n * 10 + *ptr - '0';
            if (*ptr != ')')
              {
@@ -4626,7 +4632,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
            if ((refsign = *ptr) == '+')
              {
              ptr++;
-            if (g_ascii_isdigit(*ptr) == 0)
+            if (g_ascii_isdigit (*ptr) == 0)
                {
                *errorcodeptr = ERR63;
                goto FAILED;
@@ -4634,13 +4640,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
              }
            else if (refsign == '-')
              {
-            if (g_ascii_isdigit(ptr[1]) == 0)
+            if (g_ascii_isdigit (ptr[1]) == 0)
                goto OTHER_CHAR_AFTER_QUERY;
              ptr++;
              }
  
            recno = 0;
-          while(g_ascii_isdigit(*ptr) != 0)
+          while(g_ascii_isdigit (*ptr) != 0)
              recno = recno * 10 + *ptr++ - '0';
  
            if (*ptr != terminator)
@@ -4796,10 +4802,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          both phases.
  
          If we are not at the pattern start, compile code to change the ims
-        options if this setting actually changes any of them. We also pass the
-        new setting back so that it can be put at the start of any following
-        branches, and when this group ends (if we are in a group), a resetting
-        item can be compiled. */
+        options if this setting actually changes any of them, and reset the
+        greedy defaults and the case value for firstbyte and reqbyte. */
  
          if (*ptr == ')')
            {
@@ -4807,7 +4811,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
              {
              cd->external_options = newoptions;
-            options = *optionsptr = newoptions;
              }
           else
              {
@@ -4816,17 +4819,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
                *code++ = OP_OPT;
                *code++ = newoptions & PCRE_IMS;
                }
-
-            /* Change options at this level, and pass them back for use
-            in subsequent branches. Reset the greedy defaults and the case
-            value for firstbyte and reqbyte. */
-
-            *optionsptr = options = newoptions;
              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
              greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
              }
  
+          /* Change options at this level, and pass them back for use
+          in subsequent branches. When not at the start of the pattern, this
+          information is also necessary so that a resetting item can be
+          compiled at the end of a group (if we are in a group). */
+
+          *optionsptr = options = newoptions;
            previous = NULL;       /* This item can't be repeated */
            continue;              /* It is complete */
            }
@@ -5115,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
          /* Test a signed number in angle brackets or quotes. */
  
          p = ptr + 2;
-        while (g_ascii_isdigit(*p) != 0) p++;
+        while (g_ascii_isdigit (*p) != 0) p++;
          if (*p != terminator)
            {
            *errorcodeptr = ERR57;
@@ -5820,7 +5823,7 @@ Returns:        pointer to compiled data block, or NULL on error,
                  with errorptr and erroroffset set
  */
  
-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
  pcre_compile(const char *pattern, int options, const char **errorptr,
    int *erroroffset, const unsigned char *tables)
  {
@@ -5828,7 +5831,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
  }
  
  
-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
    const char **errorptr, int *erroroffset, const unsigned char *tables)
  {
diff --git a/glib/pcre/pcre_config.c b/glib/pcre/pcre_config.c

index 454fed98dfb76cce95ab4baa9c39eb32e55769cb..114f0fb7aa3ce1a47a10a937d9ac73e46eab4b62 100644 (file)
--- a/glib/pcre/pcre_config.c
+++ b/glib/pcre/pcre_config.c
@@ -62,7 +62,7 @@ Arguments:
  Returns:           0 if data returned, negative on error
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_config(int what, void *where)
  {
  switch (what)
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c

index 01aad5df44892a93683d394572124b4fb0ac66e1..9a73a5209a7f4c432e35bf63fb32143d32630fe9 100644 (file)
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -512,9 +512,6 @@ for (;;)
      const uschar *code;
      int state_offset = current_state->offset;
      int count, codevalue;
-#ifdef SUPPORT_UCP
-    int chartype, script;
-#endif
  
  #ifdef DEBUG
      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -825,7 +822,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
          switch(code[1])
            {
            case PT_ANY:
@@ -837,7 +834,7 @@ for (;;)
            break;
  
            case PT_GC:
-          OK = category == code[2];
+          OK = _pcre_ucp_gentype[chartype] == code[2];
            break;
  
            case PT_PC:
@@ -845,7 +842,7 @@ for (;;)
            break;
  
            case PT_SC:
-          OK = script == code[2];
+          OK = UCD_SCRIPT(c) == code[2];
            break;
  
            /* Should never occur, but keep compilers from grumbling. */
@@ -994,7 +991,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
@@ -1006,7 +1003,7 @@ for (;;)
            break;
  
            case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
            break;
  
            case PT_PC:
@@ -1014,7 +1011,7 @@ for (;;)
            break;
  
            case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
            break;
  
            /* Should never occur, but keep compilers from grumbling. */
@@ -1043,7 +1040,7 @@ for (;;)
        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
        count = current_state->count;  /* Already matched */
        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
          const uschar *nptr = ptr + clen;
          int ncount = 0;
@@ -1057,7 +1054,7 @@ for (;;)
            int nd;
            int ndlen = 1;
            GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
            ncount++;
            nptr += ndlen;
            }
@@ -1216,7 +1213,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
@@ -1228,7 +1225,7 @@ for (;;)
            break;
  
            case PT_GC:
-          OK = category == code[3];
+          OK = _pcre_ucp_gentype[chartype] == code[3];
            break;
  
            case PT_PC:
@@ -1236,7 +1233,7 @@ for (;;)
            break;
  
            case PT_SC:
-          OK = script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
            break;
  
            /* Should never occur, but keep compilers from grumbling. */
@@ -1274,7 +1271,7 @@ for (;;)
        QS2:
  
        ADD_ACTIVE(state_offset + 2, 0);
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
          const uschar *nptr = ptr + clen;
          int ncount = 0;
@@ -1289,7 +1286,7 @@ for (;;)
            int nd;
            int ndlen = 1;
            GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
            ncount++;
            nptr += ndlen;
            }
@@ -1463,7 +1460,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int category = _pcre_ucp_findprop(c, &chartype, &script);
+        int chartype = UCD_CHARTYPE(c);
          switch(code[4])
            {
            case PT_ANY:
@@ -1475,7 +1472,7 @@ for (;;)
            break;
  
            case PT_GC:
-          OK = category == code[5];
+          OK = _pcre_ucp_gentype[chartype] == code[5];
            break;
  
            case PT_PC:
@@ -1483,7 +1480,7 @@ for (;;)
            break;
  
            case PT_SC:
-          OK = script == code[5];
+          OK = UCD_SCRIPT(c) == code[5];
            break;
  
            /* Should never occur, but keep compilers from grumbling. */
@@ -1516,7 +1513,7 @@ for (;;)
        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
          { ADD_ACTIVE(state_offset + 4, 0); }
        count = current_state->count;  /* Number already matched */
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
          const uschar *nptr = ptr + clen;
          int ncount = 0;
@@ -1530,7 +1527,7 @@ for (;;)
            int nd;
            int ndlen = 1;
            GETCHARLEN(nd, nptr, ndlen);
-          if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(nd) != ucp_M) break;
            ncount++;
            nptr += ndlen;
            }
@@ -1710,7 +1707,7 @@ for (;;)
            other case of the character. */
  
  #ifdef SUPPORT_UCP
-          othercase = _pcre_ucp_othercase(c);
+          othercase = UCD_OTHERCASE(c);
  #else
            othercase = NOTACHAR;
  #endif
@@ -1735,7 +1732,7 @@ for (;;)
        to wait for them to pass before continuing. */
  
        case OP_EXTUNI:
-      if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
          const uschar *nptr = ptr + clen;
          int ncount = 0;
@@ -1743,7 +1740,7 @@ for (;;)
            {
            int nclen = 1;
            GETCHARLEN(c, nptr, nclen);
-          if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
+          if (UCD_CATEGORY(c) != ucp_M) break;
            ncount++;
            nptr += nclen;
            }
@@ -1911,7 +1908,7 @@ for (;;)
            if (utf8 && d >= 128)
              {
  #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
@@ -1949,7 +1946,7 @@ for (;;)
            if (utf8 && d >= 128)
              {
  #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
@@ -1985,7 +1982,7 @@ for (;;)
            if (utf8 && d >= 128)
              {
  #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
@@ -2017,7 +2014,7 @@ for (;;)
            if (utf8 && d >= 128)
              {
  #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
@@ -2052,7 +2049,7 @@ for (;;)
            if (utf8 && d >= 128)
              {
  #ifdef SUPPORT_UCP
-            otherd = _pcre_ucp_othercase(d);
+            otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
@@ -2508,7 +2505,7 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                   < -1 => some kind of unexpected problem
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
    const char *subject, int length, int start_offset, int options, int *offsets,
    int offsetcount, int *workspace, int wscount)
@@ -2736,7 +2733,18 @@ for (;;)
  
      if (firstline)
        {
-      const uschar *t = current_subject;
+      USPTR t = current_subject;
+#ifdef SUPPORT_UTF8
+      if (utf8)
+        {
+        while (t < md->end_subject && !IS_NEWLINE(t))
+          {
+          t++;
+          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+          }
+        }
+      else
+#endif
        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
        end_subject = t;
        }
@@ -2758,7 +2766,20 @@ for (;;)
        {
        if (current_subject > md->start_subject + start_offset)
          {
-        while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+        if (utf8)
+          {
+          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+            {
+            current_subject++;
+            while(current_subject < end_subject &&
+                  (*current_subject & 0xc0) == 0x80)
+              current_subject++;
+            }
+          }
+        else
+#endif
+        while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
            current_subject++;
  
          /* If we have just passed a CR and the newline option is ANY or
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c

index ed28ae7c97c9f7c0d9ccfc02af130b3c91f7a844..06ce8f70e9ce17d59ca20cb5f967960f9555a749 100644 (file)
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -158,13 +158,39 @@ printf("\n");
  
  if (length > md->end_subject - eptr) return FALSE;
  
-/* Separate the caselesss case for speed */
+/* Separate the caseless case for speed. In UTF-8 mode we can only do this
+properly if Unicode properties are supported. Otherwise, we can check only
+ASCII characters. */
  
  if ((ims & PCRE_CASELESS) != 0)
    {
+#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UCP
+  if (md->utf8)
+    {
+    USPTR endptr = eptr + length;
+    while (eptr < endptr)
+      {
+      int c, d;
+      GETCHARINC(c, eptr);
+      GETCHARINC(d, p);
+      if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
+      }
+    }
+  else
+#endif
+#endif
+
+  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
+  is no UCP support. */
+
    while (length-- > 0)
-    if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
+    { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
    }
+
+/* In the caseful case, we can just compare the bytes, whether or not we
+are in UTF-8 mode. */
+
  else
    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
  
@@ -1653,9 +1679,7 @@ for (;;)
      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
      GETCHARINCTEST(c, eptr);
        {
-      int chartype, script;
-      int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+      int chartype = UCD_CHARTYPE(c);
        switch(ecode[1])
          {
          case PT_ANY:
@@ -1670,7 +1694,7 @@ for (;;)
           break;
  
          case PT_GC:
-        if ((ecode[2] != category) == (op == OP_PROP))
+        if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
@@ -1680,7 +1704,7 @@ for (;;)
          break;
  
          case PT_SC:
-        if ((ecode[2] != script) == (op == OP_PROP))
+        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
@@ -1699,8 +1723,7 @@ for (;;)
      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
      GETCHARINCTEST(c, eptr);
        {
-      int chartype, script;
-      int category = _pcre_ucp_findprop(c, &chartype, &script);
+      int category = UCD_CATEGORY(c);
        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
        while (eptr < md->end_subject)
          {
@@ -1709,7 +1732,7 @@ for (;;)
            {
            GETCHARLEN(c, eptr, len);
            }
-        category = _pcre_ucp_findprop(c, &chartype, &script);
+        category = UCD_CATEGORY(c);
          if (category != ucp_M) break;
          eptr += len;
          }
@@ -2174,7 +2197,7 @@ for (;;)
          if (fc != dc)
            {
  #ifdef SUPPORT_UCP
-          if (dc != _pcre_ucp_othercase(fc))
+          if (dc != UCD_OTHERCASE(fc))
  #endif
              RRETURN(MATCH_NOMATCH);
            }
@@ -2265,7 +2288,7 @@ for (;;)
  #ifdef SUPPORT_UCP
          unsigned int othercase;
          if ((ims & PCRE_CASELESS) != 0 &&
-            (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
+            (othercase = UCD_OTHERCASE(fc)) != fc)
            oclength = _pcre_ord2utf8(othercase, occhars);
          else oclength = 0;
  #endif  /* SUPPORT_UCP */
@@ -2585,10 +2608,11 @@ for (;;)
              {
              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(d, eptr);
              if (d < 256) d = md->lcc[d];
-            if (fi >= max || eptr >= md->end_subject || fc == d)
-              RRETURN(MATCH_NOMATCH);
+            if (fc == d) RRETURN(MATCH_NOMATCH);
+
              }
            }
          else
@@ -2694,9 +2718,9 @@ for (;;)
              {
              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(d, eptr);
-            if (fi >= max || eptr >= md->end_subject || fc == d)
-              RRETURN(MATCH_NOMATCH);
+            if (fc == d) RRETURN(MATCH_NOMATCH);
              }
            }
          else
@@ -2870,7 +2894,7 @@ for (;;)
              {
              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == ucp_Lu ||
                   prop_chartype == ucp_Ll ||
                   prop_chartype == ucp_Lt) == prop_fail_result)
@@ -2883,7 +2907,7 @@ for (;;)
              {
              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if ((prop_category == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -2894,7 +2918,7 @@ for (;;)
              {
              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -2905,7 +2929,7 @@ for (;;)
              {
              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINCTEST(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
              if ((prop_script == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -2924,7 +2948,7 @@ for (;;)
          for (i = 1; i <= min; i++)
            {
            GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
            while (eptr < md->end_subject)
              {
@@ -2933,7 +2957,7 @@ for (;;)
                {
                GETCHARLEN(c, eptr, len);
                }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if (prop_category != ucp_M) break;
              eptr += len;
              }
@@ -3349,7 +3373,7 @@ for (;;)
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == ucp_Lu ||
                   prop_chartype == ucp_Ll ||
                   prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3364,7 +3388,7 @@ for (;;)
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if ((prop_category == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -3377,7 +3401,7 @@ for (;;)
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -3390,7 +3414,7 @@ for (;;)
              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
              GETCHARINC(c, eptr);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
              if ((prop_script == prop_value) == prop_fail_result)
                RRETURN(MATCH_NOMATCH);
              }
@@ -3412,7 +3436,7 @@ for (;;)
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
            GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
            while (eptr < md->end_subject)
              {
@@ -3421,7 +3445,7 @@ for (;;)
                {
                GETCHARLEN(c, eptr, len);
                }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if (prop_category != ucp_M) break;
              eptr += len;
              }
@@ -3739,7 +3763,7 @@ for (;;)
              int len = 1;
              if (eptr >= md->end_subject) break;
              GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == ucp_Lu ||
                   prop_chartype == ucp_Ll ||
                   prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3754,7 +3778,7 @@ for (;;)
              int len = 1;
              if (eptr >= md->end_subject) break;
              GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if ((prop_category == prop_value) == prop_fail_result)
                break;
              eptr+= len;
@@ -3767,7 +3791,7 @@ for (;;)
              int len = 1;
              if (eptr >= md->end_subject) break;
              GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_chartype = UCD_CHARTYPE(c);
              if ((prop_chartype == prop_value) == prop_fail_result)
                break;
              eptr+= len;
@@ -3780,7 +3804,7 @@ for (;;)
              int len = 1;
              if (eptr >= md->end_subject) break;
              GETCHARLEN(c, eptr, len);
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_script = UCD_SCRIPT(c);
              if ((prop_script == prop_value) == prop_fail_result)
                break;
              eptr+= len;
@@ -3809,7 +3833,7 @@ for (;;)
            {
            if (eptr >= md->end_subject) break;
            GETCHARINCTEST(c, eptr);
-          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+          prop_category = UCD_CATEGORY(c);
            if (prop_category == ucp_M) break;
            while (eptr < md->end_subject)
              {
@@ -3818,7 +3842,7 @@ for (;;)
                {
                GETCHARLEN(c, eptr, len);
                }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if (prop_category != ucp_M) break;
              eptr += len;
              }
@@ -3840,7 +3864,7 @@ for (;;)
                BACKCHAR(eptr);
                GETCHARLEN(c, eptr, len);
                }
-            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+            prop_category = UCD_CATEGORY(c);
              if (prop_category != ucp_M) break;
              eptr--;
              }
@@ -4360,7 +4384,7 @@ Returns:          > 0 => success; value is the number of elements filled in
                   < -1 => some kind of unexpected problem
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
    int offsetcount)
@@ -4672,31 +4696,53 @@ for(;;)
    if (firstline)
      {
      USPTR t = start_match;
+#ifdef SUPPORT_UTF8
+    if (utf8)
+      {
+      while (t < md->end_subject && !IS_NEWLINE(t))
+        {
+        t++;
+        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+        }
+      }
+    else
+#endif
      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
      end_subject = t;
      }
  
-  /* Now test for a unique first byte */
+  /* Now advance to a unique first byte if there is one. */
  
    if (first_byte >= 0)
      {
      if (first_byte_caseless)
-      while (start_match < end_subject &&
-             md->lcc[*start_match] != first_byte)
-        { NEXTCHAR(start_match); }
+      while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+        start_match++;
      else
        while (start_match < end_subject && *start_match != first_byte)
-        { NEXTCHAR(start_match); }
+        start_match++;
      }
  
-  /* Or to just after a linebreak for a multiline match if possible */
+  /* Or to just after a linebreak for a multiline match */
  
    else if (startline)
      {
      if (start_match > md->start_subject + start_offset)
        {
-      while (start_match <= end_subject && !WAS_NEWLINE(start_match))
-        { NEXTCHAR(start_match); }
+#ifdef SUPPORT_UTF8
+      if (utf8)
+        {
+        while (start_match < end_subject && !WAS_NEWLINE(start_match))
+          {
+          start_match++;
+          while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+            start_match++;
+          }
+        }
+      else
+#endif
+      while (start_match < end_subject && !WAS_NEWLINE(start_match))
+        start_match++;
  
        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
        and we are now at a LF, advance the match position by one more character.
@@ -4710,16 +4756,15 @@ for(;;)
        }
      }
  
-  /* Or to a non-unique first char after study */
+  /* Or to a non-unique first byte after study */
  
    else if (start_bits != NULL)
      {
      while (start_match < end_subject)
        {
        register unsigned int c = *start_match;
-      if ((start_bits[c/8] & (1 << (c&7))) == 0)
-        { NEXTCHAR(start_match); }
-      else break;
+      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
+        else break;
        }
      }
  
diff --git a/glib/pcre/pcre_fullinfo.c b/glib/pcre/pcre_fullinfo.c

index 7b001c60b2c56fd900226e74b089f132144b31a5..30566bbc01f8f4c258e1c645f8149f93eea0f15f 100644 (file)
--- a/glib/pcre/pcre_fullinfo.c
+++ b/glib/pcre/pcre_fullinfo.c
@@ -65,7 +65,7 @@ Arguments:
  Returns:           0 if data returned, negative on error
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
    void *where)
  {
diff --git a/glib/pcre/pcre_get.c b/glib/pcre/pcre_get.c

index 68b8de47b6ffcd21874e5ac59be6957b4c14f6d7..61177864093e5efc920cc1458da059a6974e767d 100644 (file)
--- a/glib/pcre/pcre_get.c
+++ b/glib/pcre/pcre_get.c
@@ -65,7 +65,7 @@ Returns:      the number of the named parentheses, or a negative number
                  (PCRE_ERROR_NOSUBSTRING) if not found
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_get_stringnumber(const pcre *code, const char *stringname)
  {
  int rc;
@@ -114,7 +114,7 @@ Returns:      the length of each entry, or a negative number
                  (PCRE_ERROR_NOSUBSTRING) if not found
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_get_stringtable_entries(const pcre *code, const char *stringname,
    char **firstptr, char **lastptr)
  {
@@ -231,7 +231,7 @@ Returns:         if successful:
                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_copy_substring(const char *subject, int *ovector, int stringcount,
    int stringnumber, char *buffer, int size)
  {
@@ -276,7 +276,7 @@ Returns:         if successful:
                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
    int stringcount, const char *stringname, char *buffer, int size)
  {
@@ -308,7 +308,7 @@ Returns:         if successful: 0
                     PCRE_ERROR_NOMEMORY (-6) failed to get store
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
    const char ***listptr)
  {
@@ -353,7 +353,7 @@ Argument:   the result of a previous pcre_get_substring_list()
  Returns:    nothing
  */
  
-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  pcre_free_substring_list(const char **pointer)
  {
  (pcre_free)((void *)pointer);
@@ -386,7 +386,7 @@ Returns:         if successful:
                     PCRE_ERROR_NOSUBSTRING (-7) substring not present
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_get_substring(const char *subject, int *ovector, int stringcount,
    int stringnumber, const char **stringptr)
  {
@@ -433,7 +433,7 @@ Returns:         if successful:
                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  */
  
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
    int stringcount, const char *stringname, const char **stringptr)
  {
@@ -456,7 +456,7 @@ Argument:   the result of a previous pcre_get_substring()
  Returns:    nothing
  */
  
-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  pcre_free_substring(const char *pointer)
  {
  (pcre_free)((void *)pointer);
diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c

index fe119dcd88957a2ebbcf1bfbb66ed7ae7e09537d..e759ed5ce05e33638fcdbdc457a65256cb3fa4e3 100644 (file)
--- a/glib/pcre/pcre_globals.c
+++ b/glib/pcre/pcre_globals.c
@@ -52,8 +52,6 @@ differently, and global variables are not used (see pcre.in). */
  
  #include "pcre_internal.h"
  
-#ifndef VPCOMPAT
  PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
-#endif
  
  /* End of pcre_globals.c */
diff --git a/glib/pcre/pcre_info.c b/glib/pcre/pcre_info.c

index 638a475316458b13e15e81bc741ea7504e8e81c9..02cf1c91df04a763f360ab130104051cd40c6674 100644 (file)
--- a/glib/pcre/pcre_info.c
+++ b/glib/pcre/pcre_info.c
@@ -72,7 +72,7 @@ Returns:        number of capturing subpatterns
                  or negative values on error
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
  {
  real_pcre internal_re;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h

index a6d33509703f34de080c620505203d06d4b2d03a..519b8712a5ec2337bc3d3c0b853fb6a5d73d7e05 100644 (file)
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
  #  endif
  #endif
  
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+  void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE_CALL_CONVENTION
+#define PCRE_CALL_CONVENTION
+#endif
+
  /* We need to have types that specify unsigned 16-bit and 32-bit integers. We
  cannot determine these outside the compilation (e.g. by running a program as
  part of "configure") because PCRE is often cross-compiled for use on other
@@ -140,16 +154,20 @@ preprocessor time in standard C environments. */
  
  #if USHRT_MAX == 65535
    typedef unsigned short pcre_uint16;
+  typedef short pcre_int16;
  #elif UINT_MAX == 65535
    typedef unsigned int pcre_uint16;
+  typedef int pcre_int16;
  #else
    #error Cannot determine a type for 16-bit unsigned integers
  #endif
  
  #if UINT_MAX == 4294967295
    typedef unsigned int pcre_uint32;
+  typedef int pcre_int32;
  #elif ULONG_MAX == 4294967295
    typedef unsigned long int pcre_uint32;
+  typedef long int pcre_int32;
  #else
    #error Cannot determine a type for 32-bit unsigned integers
  #endif
@@ -241,7 +259,6 @@ option on the command line. */
  #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
  #define memcmp(s,c,n)    _memcmp(s,c,n)
  #define memcpy(d,s,n)    _memcpy(d,s,n)
-#define memmove(d,s,n)   _memmove(d,s,n)
  #define memset(s,c,n)    _memset(s,c,n)
  #else  /* VPCOMPAT */
  
@@ -363,7 +380,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
  support is omitted, we don't even define it. */
  
  #ifndef SUPPORT_UTF8
-#define NEXTCHAR(p) p++;
  #define GETCHAR(c, eptr) c = *eptr;
  #define GETCHARTEST(c, eptr) c = *eptr;
  #define GETCHARINC(c, eptr) c = *eptr++;
@@ -373,13 +389,6 @@ support is omitted, we don't even define it. */
  
  #else   /* SUPPORT_UTF8 */
  
-/* Advance a character pointer one byte in non-UTF-8 mode and by one character
-in UTF-8 mode. */
-
-#define NEXTCHAR(p) \
-  p++; \
-  if (utf8) { while((*p & 0xc0) == 0x80) p++; }
-
  /* Get the next UTF-8 character, not advancing the pointer. This is called when
  we know we are in UTF-8 mode. */
  
@@ -549,7 +558,8 @@ variable-length repeat, or a anything other than literal characters. */
  #define REQ_CASELESS 0x0100    /* indicates caselessness */
  #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
  
-/* Miscellaneous definitions */
+/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
+environments where these macros are defined elsewhere. */
  
  typedef gboolean BOOL;
  
@@ -1123,12 +1133,24 @@ extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *,
  extern int          _pcre_ord2utf8(int, uschar *);
  extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
                        const pcre_study_data *, pcre_study_data *);
-extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
-extern unsigned int _pcre_ucp_othercase(const unsigned int);
  extern int          _pcre_valid_utf8(const uschar *, int);
  extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
                        int *, BOOL);
  extern BOOL         _pcre_xclass(int, const uschar *);
+extern unsigned int _pcre_ucp_othercase(unsigned int);
+
+
+extern const int         _pcre_ucp_gentype[];
+
+
+/* UCD access macros */
+
+#include "../glib.h"
+
+#define UCD_CHARTYPE(ch)  g_unichar_type(ch)
+#define UCD_SCRIPT(ch)    g_unichar_get_script(ch)
+#define UCD_CATEGORY(ch)  _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
+#define UCD_OTHERCASE(ch) _pcre_ucp_othercase(ch)
  
  #endif
  
diff --git a/glib/pcre/pcre_ord2utf8.c b/glib/pcre/pcre_ord2utf8.c

index 0fdc512a0071a0b9ab4d34aa3e9b5331092972aa..6f4eb9ebe95813a59bbd10ab44cc2647e18026a7 100644 (file)
--- a/glib/pcre/pcre_ord2utf8.c
+++ b/glib/pcre/pcre_ord2utf8.c
@@ -78,8 +78,10 @@ for (j = i; j > 0; j--)
  *buffer = _pcre_utf8_table2[i] | cvalue;
  return i + 1;
  #else
-return 0;   /* Keep compiler happy; this function won't ever be */
-#endif      /* called when SUPPORT_UTF8 is not defined. */
+(void)(cvalue);  /* Keep compiler happy; this function won't ever be */
+(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+return 0;
+#endif
  }
  
  /* End of pcre_ord2utf8.c */
diff --git a/glib/pcre/pcre_refcount.c b/glib/pcre/pcre_refcount.c

index eeb2897c82fca8138980256a2a840921e6cc7184..92e4b8505c6aeaf3fa04c8b49e1bfbe6d89f1878 100644 (file)
--- a/glib/pcre/pcre_refcount.c
+++ b/glib/pcre/pcre_refcount.c
@@ -68,7 +68,7 @@ Returns:        the (possibly updated) count value (a non-negative number), or
                  a negative error number
  */
  
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_refcount(pcre *argument_re, int adjust)
  {
  real_pcre *re = (real_pcre *)argument_re;
diff --git a/glib/pcre/pcre_study.c b/glib/pcre/pcre_study.c

index e4e24b61e7eb6252edb5ef569d6cce027df53108..778851d24cf2ce4c8d8bb9d313e15beb7b197324 100644 (file)
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@@ -220,6 +220,7 @@ do
        /* SKIPZERO skips the bracket. */
  
        case OP_SKIPZERO:
+      tcode++;
        do tcode += GET(tcode,1); while (*tcode == OP_ALT);
        tcode += 1 + LINK_SIZE;
        break;
@@ -503,7 +504,7 @@ Returns:    pointer to a pcre_extra block, with study_data filled in and the
              NULL on error or if no optimization possible
  */
  
-PCRE_EXP_DEFN pcre_extra *
+PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
  pcre_study(const pcre *external_re, int options, const char **errorptr)
  {
  uschar start_bits[32];
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c

index e37dcb629b9d77d4c134a846a051538924c3991e..160bc5d538de7eaa9344615ff50901a70c141556 100644 (file)
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
  
+/* Table to translate from particular type value to the general value. */
+
+const int _pcre_ucp_gentype[] = {
+  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
+  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
+  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
+  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
+  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
+  ucp_P, ucp_P,                       /* Ps, Po */
+  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
+  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
+};
+
  /* The pcre_utt[] table below translates Unicode property names into type and
  code values. It is searched by binary chop, so must be in collating sequence of
  name. Originally, the table contained pointers to the name strings in the first
@@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
  a shared library is dynamically loaded. A significant reduction is made by
  putting all the names into a single, large string and then using offsets in the
  table itself. Maintenance is more error-prone, but frequent changes to this
-data is unlikely. */
+data are unlikely.
+
+July 2008: There is now a script called maint/GenerateUtt.py which can be used
+to generate this data instead of maintaining it entirely by hand. */
  
  const char _pcre_utt_names[] =
    "Any\0"
@@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
    "Buhid\0"
    "C\0"
    "Canadian_Aboriginal\0"
+  "Carian\0"
    "Cc\0"
    "Cf\0"
+  "Cham\0"
    "Cherokee\0"
    "Cn\0"
    "Co\0"
@@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
    "Inherited\0"
    "Kannada\0"
    "Katakana\0"
+  "Kayah_Li\0"
    "Kharoshthi\0"
    "Khmer\0"
    "L\0"
    "L&\0"
    "Lao\0"
    "Latin\0"
+  "Lepcha\0"
    "Limbu\0"
    "Linear_B\0"
    "Ll\0"
@@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
    "Lo\0"
    "Lt\0"
    "Lu\0"
+  "Lycian\0"
+  "Lydian\0"
    "M\0"
    "Malayalam\0"
    "Mc\0"
@@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
    "Nl\0"
    "No\0"
    "Ogham\0"
+  "Ol_Chiki\0"
    "Old_Italic\0"
    "Old_Persian\0"
    "Oriya\0"
@@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
    "Pi\0"
    "Po\0"
    "Ps\0"
+  "Rejang\0"
    "Runic\0"
    "S\0"
+  "Saurashtra\0"
    "Sc\0"
    "Shavian\0"
    "Sinhala\0"
    "Sk\0"
    "Sm\0"
    "So\0"
+  "Sundanese\0"
    "Syloti_Nagri\0"
    "Syriac\0"
    "Tagalog\0"
@@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
    "Tibetan\0"
    "Tifinagh\0"
    "Ugaritic\0"
+  "Vai\0"
    "Yi\0"
    "Z\0"
    "Zl\0"
@@ -204,111 +231,122 @@ const char _pcre_utt_names[] =
    "Zs\0";
  
  const ucp_type_table _pcre_utt[] = {
-  { 0,   PT_ANY, 0 },
-  { 4,   PT_SC, ucp_Arabic },
-  { 11,  PT_SC, ucp_Armenian },
-  { 20,  PT_SC, ucp_Balinese },
-  { 29,  PT_SC, ucp_Bengali },
-  { 37,  PT_SC, ucp_Bopomofo },
-  { 46,  PT_SC, ucp_Braille },
-  { 54,  PT_SC, ucp_Buginese },
-  { 63,  PT_SC, ucp_Buhid },
-  { 69,  PT_GC, ucp_C },
-  { 71,  PT_SC, ucp_Canadian_Aboriginal },
-  { 91,  PT_PC, ucp_Cc },
-  { 94,  PT_PC, ucp_Cf },
-  { 97,  PT_SC, ucp_Cherokee },
-  { 106, PT_PC, ucp_Cn },
-  { 109, PT_PC, ucp_Co },
-  { 112, PT_SC, ucp_Common },
-  { 119, PT_SC, ucp_Coptic },
-  { 126, PT_PC, ucp_Cs },
-  { 129, PT_SC, ucp_Cuneiform },
-  { 139, PT_SC, ucp_Cypriot },
-  { 147, PT_SC, ucp_Cyrillic },
-  { 156, PT_SC, ucp_Deseret },
-  { 164, PT_SC, ucp_Devanagari },
-  { 175, PT_SC, ucp_Ethiopic },
-  { 184, PT_SC, ucp_Georgian },
-  { 193, PT_SC, ucp_Glagolitic },
-  { 204, PT_SC, ucp_Gothic },
-  { 211, PT_SC, ucp_Greek },
-  { 217, PT_SC, ucp_Gujarati },
-  { 226, PT_SC, ucp_Gurmukhi },
-  { 235, PT_SC, ucp_Han },
-  { 239, PT_SC, ucp_Hangul },
-  { 246, PT_SC, ucp_Hanunoo },
-  { 254, PT_SC, ucp_Hebrew },
-  { 261, PT_SC, ucp_Hiragana },
-  { 270, PT_SC, ucp_Inherited },
-  { 280, PT_SC, ucp_Kannada },
-  { 288, PT_SC, ucp_Katakana },
-  { 297, PT_SC, ucp_Kharoshthi },
-  { 308, PT_SC, ucp_Khmer },
-  { 314, PT_GC, ucp_L },
-  { 316, PT_LAMP, 0 },
-  { 319, PT_SC, ucp_Lao },
-  { 323, PT_SC, ucp_Latin },
-  { 329, PT_SC, ucp_Limbu },
-  { 335, PT_SC, ucp_Linear_B },
-  { 344, PT_PC, ucp_Ll },
-  { 347, PT_PC, ucp_Lm },
-  { 350, PT_PC, ucp_Lo },
-  { 353, PT_PC, ucp_Lt },
-  { 356, PT_PC, ucp_Lu },
-  { 359, PT_GC, ucp_M },
-  { 361, PT_SC, ucp_Malayalam },
-  { 371, PT_PC, ucp_Mc },
-  { 374, PT_PC, ucp_Me },
-  { 377, PT_PC, ucp_Mn },
-  { 380, PT_SC, ucp_Mongolian },
-  { 390, PT_SC, ucp_Myanmar },
-  { 398, PT_GC, ucp_N },
-  { 400, PT_PC, ucp_Nd },
-  { 403, PT_SC, ucp_New_Tai_Lue },
-  { 415, PT_SC, ucp_Nko },
-  { 419, PT_PC, ucp_Nl },
-  { 422, PT_PC, ucp_No },
-  { 425, PT_SC, ucp_Ogham },
-  { 431, PT_SC, ucp_Old_Italic },
-  { 442, PT_SC, ucp_Old_Persian },
-  { 454, PT_SC, ucp_Oriya },
-  { 460, PT_SC, ucp_Osmanya },
-  { 468, PT_GC, ucp_P },
-  { 470, PT_PC, ucp_Pc },
-  { 473, PT_PC, ucp_Pd },
-  { 476, PT_PC, ucp_Pe },
-  { 479, PT_PC, ucp_Pf },
-  { 482, PT_SC, ucp_Phags_Pa },
-  { 491, PT_SC, ucp_Phoenician },
-  { 502, PT_PC, ucp_Pi },
-  { 505, PT_PC, ucp_Po },
-  { 508, PT_PC, ucp_Ps },
-  { 511, PT_SC, ucp_Runic },
-  { 517, PT_GC, ucp_S },
-  { 519, PT_PC, ucp_Sc },
-  { 522, PT_SC, ucp_Shavian },
-  { 530, PT_SC, ucp_Sinhala },
-  { 538, PT_PC, ucp_Sk },
-  { 541, PT_PC, ucp_Sm },
-  { 544, PT_PC, ucp_So },
-  { 547, PT_SC, ucp_Syloti_Nagri },
-  { 560, PT_SC, ucp_Syriac },
-  { 567, PT_SC, ucp_Tagalog },
-  { 575, PT_SC, ucp_Tagbanwa },
-  { 584, PT_SC, ucp_Tai_Le },
-  { 591, PT_SC, ucp_Tamil },
-  { 597, PT_SC, ucp_Telugu },
-  { 604, PT_SC, ucp_Thaana },
-  { 611, PT_SC, ucp_Thai },
-  { 616, PT_SC, ucp_Tibetan },
-  { 624, PT_SC, ucp_Tifinagh },
-  { 633, PT_SC, ucp_Ugaritic },
-  { 642, PT_SC, ucp_Yi },
-  { 645, PT_GC, ucp_Z },
-  { 647, PT_PC, ucp_Zl },
-  { 650, PT_PC, ucp_Zp },
-  { 653, PT_PC, ucp_Zs }
+  {   0, PT_ANY, 0 },
+  {   4, PT_SC, ucp_Arabic },
+  {  11, PT_SC, ucp_Armenian },
+  {  20, PT_SC, ucp_Balinese },
+  {  29, PT_SC, ucp_Bengali },
+  {  37, PT_SC, ucp_Bopomofo },
+  {  46, PT_SC, ucp_Braille },
+  {  54, PT_SC, ucp_Buginese },
+  {  63, PT_SC, ucp_Buhid },
+  {  69, PT_GC, ucp_C },
+  {  71, PT_SC, ucp_Canadian_Aboriginal },
+  {  91, PT_SC, ucp_Carian },
+  {  98, PT_PC, ucp_Cc },
+  { 101, PT_PC, ucp_Cf },
+  { 104, PT_SC, ucp_Cham },
+  { 109, PT_SC, ucp_Cherokee },
+  { 118, PT_PC, ucp_Cn },
+  { 121, PT_PC, ucp_Co },
+  { 124, PT_SC, ucp_Common },
+  { 131, PT_SC, ucp_Coptic },
+  { 138, PT_PC, ucp_Cs },
+  { 141, PT_SC, ucp_Cuneiform },
+  { 151, PT_SC, ucp_Cypriot },
+  { 159, PT_SC, ucp_Cyrillic },
+  { 168, PT_SC, ucp_Deseret },
+  { 176, PT_SC, ucp_Devanagari },
+  { 187, PT_SC, ucp_Ethiopic },
+  { 196, PT_SC, ucp_Georgian },
+  { 205, PT_SC, ucp_Glagolitic },
+  { 216, PT_SC, ucp_Gothic },
+  { 223, PT_SC, ucp_Greek },
+  { 229, PT_SC, ucp_Gujarati },
+  { 238, PT_SC, ucp_Gurmukhi },
+  { 247, PT_SC, ucp_Han },
+  { 251, PT_SC, ucp_Hangul },
+  { 258, PT_SC, ucp_Hanunoo },
+  { 266, PT_SC, ucp_Hebrew },
+  { 273, PT_SC, ucp_Hiragana },
+  { 282, PT_SC, ucp_Inherited },
+  { 292, PT_SC, ucp_Kannada },
+  { 300, PT_SC, ucp_Katakana },
+  { 309, PT_SC, ucp_Kayah_Li },
+  { 318, PT_SC, ucp_Kharoshthi },
+  { 329, PT_SC, ucp_Khmer },
+  { 335, PT_GC, ucp_L },
+  { 337, PT_LAMP, 0 },
+  { 340, PT_SC, ucp_Lao },
+  { 344, PT_SC, ucp_Latin },
+  { 350, PT_SC, ucp_Lepcha },
+  { 357, PT_SC, ucp_Limbu },
+  { 363, PT_SC, ucp_Linear_B },
+  { 372, PT_PC, ucp_Ll },
+  { 375, PT_PC, ucp_Lm },
+  { 378, PT_PC, ucp_Lo },
+  { 381, PT_PC, ucp_Lt },
+  { 384, PT_PC, ucp_Lu },
+  { 387, PT_SC, ucp_Lycian },
+  { 394, PT_SC, ucp_Lydian },
+  { 401, PT_GC, ucp_M },
+  { 403, PT_SC, ucp_Malayalam },
+  { 413, PT_PC, ucp_Mc },
+  { 416, PT_PC, ucp_Me },
+  { 419, PT_PC, ucp_Mn },
+  { 422, PT_SC, ucp_Mongolian },
+  { 432, PT_SC, ucp_Myanmar },
+  { 440, PT_GC, ucp_N },
+  { 442, PT_PC, ucp_Nd },
+  { 445, PT_SC, ucp_New_Tai_Lue },
+  { 457, PT_SC, ucp_Nko },
+  { 461, PT_PC, ucp_Nl },
+  { 464, PT_PC, ucp_No },
+  { 467, PT_SC, ucp_Ogham },
+  { 473, PT_SC, ucp_Ol_Chiki },
+  { 482, PT_SC, ucp_Old_Italic },
+  { 493, PT_SC, ucp_Old_Persian },
+  { 505, PT_SC, ucp_Oriya },
+  { 511, PT_SC, ucp_Osmanya },
+  { 519, PT_GC, ucp_P },
+  { 521, PT_PC, ucp_Pc },
+  { 524, PT_PC, ucp_Pd },
+  { 527, PT_PC, ucp_Pe },
+  { 530, PT_PC, ucp_Pf },
+  { 533, PT_SC, ucp_Phags_Pa },
+  { 542, PT_SC, ucp_Phoenician },
+  { 553, PT_PC, ucp_Pi },
+  { 556, PT_PC, ucp_Po },
+  { 559, PT_PC, ucp_Ps },
+  { 562, PT_SC, ucp_Rejang },
+  { 569, PT_SC, ucp_Runic },
+  { 575, PT_GC, ucp_S },
+  { 577, PT_SC, ucp_Saurashtra },
+  { 588, PT_PC, ucp_Sc },
+  { 591, PT_SC, ucp_Shavian },
+  { 599, PT_SC, ucp_Sinhala },
+  { 607, PT_PC, ucp_Sk },
+  { 610, PT_PC, ucp_Sm },
+  { 613, PT_PC, ucp_So },
+  { 616, PT_SC, ucp_Sundanese },
+  { 626, PT_SC, ucp_Syloti_Nagri },
+  { 639, PT_SC, ucp_Syriac },
+  { 646, PT_SC, ucp_Tagalog },
+  { 654, PT_SC, ucp_Tagbanwa },
+  { 663, PT_SC, ucp_Tai_Le },
+  { 670, PT_SC, ucp_Tamil },
+  { 676, PT_SC, ucp_Telugu },
+  { 683, PT_SC, ucp_Thaana },
+  { 690, PT_SC, ucp_Thai },
+  { 695, PT_SC, ucp_Tibetan },
+  { 703, PT_SC, ucp_Tifinagh },
+  { 712, PT_SC, ucp_Ugaritic },
+  { 721, PT_SC, ucp_Vai },
+  { 725, PT_SC, ucp_Yi },
+  { 728, PT_GC, ucp_Z },
+  { 730, PT_PC, ucp_Zl },
+  { 733, PT_PC, ucp_Zp },
+  { 736, PT_PC, ucp_Zs }
  };
  
  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
diff --git a/glib/pcre/pcre_ucp_searchfuncs.c b/glib/pcre/pcre_ucp_searchfuncs.c

index 77ec8d14b50e6927991666f79f711d28910d2da9..9060fbccd12df24a0390cc6858b99f84b62f7b38 100644 (file)
--- a/glib/pcre/pcre_ucp_searchfuncs.c
+++ b/glib/pcre/pcre_ucp_searchfuncs.c
@@ -43,58 +43,9 @@ POSSIBILITY OF SUCH DAMAGE.
  /* This module contains code for searching the table of Unicode character
  properties. */
  
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
  #include "pcre_internal.h"
  
  #include "ucp.h"               /* Category definitions */
-#include "ucpinternal.h"       /* Internal table details */
-
-
-/* Table to translate from particular type value to the general value. */
-
-static int ucp_gentype[] = {
-  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
-  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
-  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
-  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
-  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
-  ucp_P, ucp_P,                       /* Ps, Po */
-  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
-  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
-};
-
-
-
-/*************************************************
-*         Search table and return type           *
-*************************************************/
-
-/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
-character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
-
-Arguments:
-  c           the character value
-  type_ptr    the detailed character type is returned here
-  script_ptr  the script is returned here
-
-Returns:      the character type category
-*/
-
-int
-_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
-{
-/* Note that the Unicode types have the same values in glib and in
- * PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
- * ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
-*type_ptr = g_unichar_type(c);
-*script_ptr = g_unichar_get_script(c);
-return ucp_gentype[*type_ptr];
-}
-
-
  
  
  /*************************************************
@@ -113,7 +64,7 @@ Returns:      the other case or NOTACHAR if none
  unsigned int
  _pcre_ucp_othercase(const unsigned int c)
  {
-int other_case = NOTACHAR;
+unsigned int other_case = NOTACHAR;
  
  if (g_unichar_islower(c))
    other_case = g_unichar_toupper(c);
diff --git a/glib/pcre/pcre_valid_utf8.c b/glib/pcre/pcre_valid_utf8.c

index b7671a96ea0fc757db97ad20ce505b959461e05c..a5766b454d88cab8d15fff871c74f33143ac60fe 100644 (file)
--- a/glib/pcre/pcre_valid_utf8.c
+++ b/glib/pcre/pcre_valid_utf8.c
@@ -1,4 +1,3 @@
-#include "config.h"
  #include "pcre_internal.h"
  
  /*
diff --git a/glib/pcre/pcre_version.c b/glib/pcre/pcre_version.c

index 697b44b3910c5a41a13ff3d62be71e19de872493..7067cd4aebac39e4356a1d4b812fbd1937f9ce6b 100644 (file)
--- a/glib/pcre/pcre_version.c
+++ b/glib/pcre/pcre_version.c
@@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
  pre-processor time. This hack uses a standard trick for avoiding calling
  the STRING macro with an empty argument when doing the test. */
  
-PCRE_EXP_DEFN const char *
+PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
  pcre_version(void)
  {
  return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c

index cfb5a77b503a545c333c2fb26f86c88a01a52d8f..e8e7a50494031fe84cfe7da0a49bf91402cb7caa 100644 (file)
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -104,9 +104,7 @@ while ((t = *data++) != XCL_END)
  #ifdef SUPPORT_UCP
    else  /* XCL_PROP & XCL_NOTPROP */
      {
-    int chartype, script;
-    int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+    int chartype = UCD_CHARTYPE(c);
      switch(*data)
        {
        case PT_ANY:
@@ -119,7 +117,7 @@ while ((t = *data++) != XCL_END)
        break;
  
        case PT_GC:
-      if ((data[1] == category) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
        break;
  
        case PT_PC:
@@ -127,7 +125,7 @@ while ((t = *data++) != XCL_END)
        break;
  
        case PT_SC:
-      if ((data[1] == script) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
        break;
  
        /* This should never occur, but compilers may mutter if there is no
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h

index b2616b273209e938355e79faf6176aeaf237be0a..fe910ce0031fc325e7dcb5ecc14fb0cea75dd51d 100644 (file)
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@@ -125,7 +125,18 @@ enum {
    ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,   /* New for Unicode 5.0.0 */
    ucp_Nko = G_UNICODE_SCRIPT_NKO,              /* New for Unicode 5.0.0 */
    ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,     /* New for Unicode 5.0.0 */
-  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN  /* New for Unicode 5.0.0 */
+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
+  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,         /* New for Unicode 5.1 */
+  ucp_Cham = G_UNICODE_SCRIPT_CHAM,             /* New for Unicode 5.1 */
+  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,     /* New for Unicode 5.1 */
+  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,         /* New for Unicode 5.1 */
+  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,         /* New for Unicode 5.1 */
+  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,         /* New for Unicode 5.1 */
+  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,     /* New for Unicode 5.1 */
+  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,         /* New for Unicode 5.1 */
+  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
+  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,   /* New for Unicode 5.1 */
+  ucp_Vai = G_UNICODE_SCRIPT_VAI                /* New for Unicode 5.1 */
  };
  
  #endif
author	Matthias Clasen <matthiasc@src.gnome.org>
	Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)
committer	Matthias Clasen <matthiasc@src.gnome.org>
	Sun, 18 Jan 2009 06:32:03 +0000 (06:32 +0000)
ChangeLog		patch \| blob \| history
glib/pcre/pcre.h		patch \| blob \| history
glib/pcre/pcre_chartables.c		patch \| blob \| history
glib/pcre/pcre_compile.c		patch \| blob \| history
glib/pcre/pcre_config.c		patch \| blob \| history
glib/pcre/pcre_dfa_exec.c		patch \| blob \| history
glib/pcre/pcre_exec.c		patch \| blob \| history
glib/pcre/pcre_fullinfo.c		patch \| blob \| history
glib/pcre/pcre_get.c		patch \| blob \| history
glib/pcre/pcre_globals.c		patch \| blob \| history
glib/pcre/pcre_info.c		patch \| blob \| history
glib/pcre/pcre_internal.h		patch \| blob \| history
glib/pcre/pcre_ord2utf8.c		patch \| blob \| history
glib/pcre/pcre_refcount.c		patch \| blob \| history
glib/pcre/pcre_study.c		patch \| blob \| history
glib/pcre/pcre_tables.c		patch \| blob \| history
glib/pcre/pcre_ucp_searchfuncs.c		patch \| blob \| history
glib/pcre/pcre_valid_utf8.c		patch \| blob \| history
glib/pcre/pcre_version.c		patch \| blob \| history
glib/pcre/pcre_xclass.c		patch \| blob \| history
glib/pcre/ucp.h		patch \| blob \| history