*** empty log message ***
authorpcg <pcg>
Wed, 3 Mar 2004 17:15:49 +0000 (17:15 +0000)
committerpcg <pcg>
Wed, 3 Mar 2004 17:15:49 +0000 (17:15 +0000)
src/encoding.C

index 41fe41f33344847b5b672e6ca23caf922c943c08..b44ff3a525917a7754ebd1b974ba2822e0b46351 100644 (file)
@@ -10,50 +10,64 @@ const struct n2cs {
   codeset cs;
 } n2cs[] = {
   /* first one found is the normalized one */
-  { "ISO88591",                CS_ISO8859_1    },
-  { "ISO8859PRIMARY",  CS_ISO8859_1    }, // some stupid fonts use this (hi tigert)
-  { "ISO88592",                CS_ISO8859_2    },
-  { "ISO88593",                CS_ISO8859_3    },
-  { "ISO88594",                CS_ISO8859_4    },
-  { "ISO88595",                CS_ISO8859_5    },
-  { "ISO88596",                CS_ISO8859_6    },
-  { "ISO88597",                CS_ISO8859_7    },
-  { "ISO88598",                CS_ISO8859_8    },
-  { "ISO88599",                CS_ISO8859_9    },
-  { "ISO885910",       CS_ISO8859_10   },
-  { "ISO885911",       CS_ISO8859_11   },
-  { "ISO885913",       CS_ISO8859_13   },
-  { "ISO885914",       CS_ISO8859_14   },
-  { "ISO885915",       CS_ISO8859_15   },
-  { "FCD885915",       CS_ISO8859_15   },
-  { "ISO885916",       CS_ISO8859_16   },
-
-  { "ISO106461",       CS_UNICODE      },
-  { "UNICODE",         CS_UNICODE      },
-  { "UTF8",            CS_UNICODE      },
-
-  { "ASCII",           CS_US_ASCII     },
-  { "USASCII",         CS_US_ASCII     },
-  { "ANSIX341968",     CS_US_ASCII     },
-
-  { "KOI8R",           CS_KOI8_R       },
-  { "GOST19768741",     CS_KOI8_R       },
-  { "KOI8U",           CS_KOI8_U       },
-
-  { "KSC560119870",    CS_KSC5601_1987_0 },
-  { "KSX100119970",    CS_KSC5601_1987_0 },
-  { "KSX100119980",    CS_KSC5601_1987_0 }, // adds johab
-
-  { "GB231219800",     CS_GB2312_1980_0 },
-
-  { "VISCII",          CS_VISCII       },
-  { "VISCII111",       CS_VISCII       },
-  { "TIS62025291",     CS_VISCII       }, /* close enough */
-
-  { "JISX020119760",   CS_JIS0201_1976_0 },
-  { "JISX020819830",   CS_JIS0208_1983_0 },
-  { "JISX020819900",   CS_JIS0208_1983_0 }, /* ehrm. */
-  { "JISX021219900",   CS_JIS0212_1990_0 },
+  { "ISO88591",                CS_ISO8859_1        },
+  { "ISO8859PRIMARY",  CS_ISO8859_1        }, // some stupid fonts use this (hi tigert)
+  { "ISO88592",                CS_ISO8859_2        },
+  { "ISO88593",                CS_ISO8859_3        },
+  { "ISO88594",                CS_ISO8859_4        },
+  { "ISO88595",                CS_ISO8859_5        },
+  { "ISO88596",                CS_ISO8859_6        },
+  { "ISO88597",                CS_ISO8859_7        },
+  { "ISO88598",                CS_ISO8859_8        },
+  { "ISO88599",                CS_ISO8859_9        },
+  { "ISO885910",       CS_ISO8859_10       },
+  { "ISO885911",       CS_ISO8859_11       },
+  { "ISO885913",       CS_ISO8859_13       },
+  { "ISO885914",       CS_ISO8859_14       },
+  { "ISO885915",       CS_ISO8859_15       },
+  { "FCD885915",       CS_ISO8859_15       },
+  { "ISO885916",       CS_ISO8859_16       },
+                                            
+  { "ISO10646*",       CS_UNICODE          },
+  { "UNICODE",         CS_UNICODE          },
+  { "UTF8",            CS_UNICODE          },
+                                            
+  { "ASCII",           CS_US_ASCII         },
+  { "USASCII",         CS_US_ASCII         },
+  { "ANSIX341968",     CS_US_ASCII         },
+                                            
+  { "KOI8R",           CS_KOI8_R           },
+  { "GOST1976874*",     CS_KOI8_R           },
+  { "KOI8RU",          CS_KOI8_U           },
+  { "KOI8U",           CS_KOI8_U           },
+
+  { "VISCII*",         CS_VISCII           },
+  { "TIS62025291",     CS_VISCII           }, // close enough
+                                            
+  { "JISX0201*",       CS_JIS0201_1976_0   },
+  { "JISX0208*",       CS_JIS0208_1983_0   }, // also wrongly matches -1990-0 (check Encode::JP)
+  { "JISX0212*",       CS_JIS0212_1990_0   },
+  { "JISX0221*",       CS_UNICODE          },
+                                            
+  { "KSC5601*",                CS_KSC5601_1987_0   },
+  { "KSX1001*",                CS_KSC5601_1987_0   },
+  { "KSC5700*",                CS_UNICODE          }, // unicode plus extensions
+                                            
+  { "BIG5P*",          CS_BIG5_PLUS        },
+  { "BIG5ETEN*",       CS_BIG5_EXT         },
+  { "BIG5*",           CS_BIG5             },
+  { "GB2312*",         CS_GB2312_1980_0    },
+  { "GB6345*",         CS_GB2312_1980_0    }, // slightly different to gb2312??
+  { "GB8565*",         CS_GB2312_1980_0    }, // a superset of gb2312??
+  { "GB13000*",                CS_UNICODE          },
+  { "CNS1164319921",   CS_CNS11643_1992_1  },
+  { "CNS1164319922",   CS_CNS11643_1992_2  },
+  { "CNS1164319923",   CS_CNS11643_1992_3  },
+  { "CNS1164319924",   CS_CNS11643_1992_4  },
+  { "CNS1164319925",   CS_CNS11643_1992_5  },
+  { "CNS1164319926",   CS_CNS11643_1992_6  },
+  { "CNS1164319927",   CS_CNS11643_1992_7  },
+  { "CNS116431992F",   CS_CNS11643_1992_F  },
 
   { 0,                 CS_UNKNOWN      }
 };
@@ -87,8 +101,13 @@ codeset_from_name (const char *name)
   const struct n2cs *i = n2cs;
 
   do {
-    if (!strcmp (name, i->name))
-      return i->cs;
+    int len = strlen (i->name);
+
+    if ((i->name[len - 1] == '*'
+         && !strncmp (name, i->name, len - 1))
+        || !strcmp (name, i->name))
+        return i->cs;
+
   } while ((++i)->name);
 
   return CS_UNKNOWN;
@@ -154,6 +173,7 @@ struct rxvt_codeset_conv_special : rxvt_codeset_conv {
 //#define ENCODING_CN
 
 #include "table/gb2312_1980_0.h"
+#include "table/big5.h"
 
 //#define ENCODING_CN_EXT
 
@@ -183,6 +203,7 @@ struct rxvt_codeset_conv_special : rxvt_codeset_conv {
 #include "table/jis0213_1.h"
 #include "table/jis0213_2.h"
 
+// order must match table in encoding.h(!)
 const rxvt_codeset_conv *rxvt_codeset[NUM_CODESETS] = {
   &rxvt_codeset_conv_unknown,
   &rxvt_codeset_conv_special,
@@ -227,6 +248,7 @@ const rxvt_codeset_conv *rxvt_codeset[NUM_CODESETS] = {
   &rxvt_codeset_conv_cns11643_1992_6,
   &rxvt_codeset_conv_cns11643_1992_7,
   &rxvt_codeset_conv_cns11643_1992_f,
+  &rxvt_codeset_conv_big5,
   &rxvt_codeset_conv_big5_ext,
   &rxvt_codeset_conv_big5_plus,