Exposed utf8_valid as a public function to reduce duplicity. Allow utf-8 string length of 0 to be valid.

2012-05-11 15:03:35 +00:00
parent 35e953ba8b
commit 54c4ce342c
3 changed files with 21 additions and 117 deletions
@@ -145,6 +145,9 @@ extern "C" {
        BACNET_CHARACTER_STRING * char_string);
    bool characterstring_valid(
        BACNET_CHARACTER_STRING * char_string);
    bool utf8_isvalid(
        const char *str,
        size_t length);
    /* returns false if the string exceeds capacity
       initialize by using length=0 */
@@ -35,107 +35,6 @@
 #include "device.h"
 #include "bname.h"
 /* Basic UTF-8 manipulation routines
  by Jeff Bezanson
  placed in the public domain Fall 2005 */
 static const char trailingBytesForUTF8[256] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4,
        4, 4, 4, 5, 5, 5, 5
 };
 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
   length is in bytes, since without knowing whether the string is valid
   it's hard to know how many characters there are! */
 static int utf8_isvalid(
    const char *str,
    int length)
 {
    const unsigned char *p, *pend = (unsigned char *) str + length;
    unsigned char c;
    int ab;
    for (p = (unsigned char *) str; p < pend; p++) {
        c = *p;
        /* null in middle of string */
        if (c == 0) {
            return 0;
        }
        /* ASCII character */
        if (c < 128) {
            continue;
        }
        if ((c & 0xc0) != 0xc0) {
            return 0;
        }
        ab = trailingBytesForUTF8[c];
        if (length < ab) {
            return 0;
        }
        length -= ab;
        p++;
        /* Check top bits in the second byte */
        if ((*p & 0xc0) != 0x80) {
            return 0;
        }
        /* Check for overlong sequences for each different length */
        switch (ab) {
                /* Check for xx00 000x */
            case 1:
                if ((c & 0x3e) == 0)
                    return 0;
                continue;       /* We know there aren't any more bytes to check */
                /* Check for 1110 0000, xx0x xxxx */
            case 2:
                if (c == 0xe0 && (*p & 0x20) == 0)
                    return 0;
                break;
                /* Check for 1111 0000, xx00 xxxx */
            case 3:
                if (c == 0xf0 && (*p & 0x30) == 0)
                    return 0;
                break;
                /* Check for 1111 1000, xx00 0xxx */
            case 4:
                if (c == 0xf8 && (*p & 0x38) == 0)
                    return 0;
                break;
                /* Check for leading 0xfe or 0xff,
                   and then for 1111 1100, xx00 00xx */
            case 5:
                if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
                    return 0;
                break;
        }
        /* Check for valid bytes after the 2nd, if any; all must start 10 */
        while (--ab > 0) {
            if ((*(++p) & 0xc0) != 0x80)
                return 0;
        }
    }
    return 1;
 }
 static bool bacnet_name_isvalid(
    uint8_t encoding,
    uint8_t length,
@@ -145,10 +44,8 @@ static bool bacnet_name_isvalid(
    if ((encoding < MAX_CHARACTER_STRING_ENCODING) &&
        (length <= NV_EEPROM_NAME_SIZE)) {
-        if (encoding == CHARACTER_ANSI_X34) {
+        if (encoding == CHARACTER_UTF8) {
-            if (utf8_isvalid(str, length)) {
+            valid = utf8_isvalid(str, length);
                valid = true;
            }
        } else {
            valid = true;
        }
@@ -521,7 +521,7 @@ static const char trailingBytesForUTF8[256] = {
 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
   length is in bytes, since without knowing whether the string is valid
   it's hard to know how many characters there are! */
-static int utf8_isvalid(
+bool utf8_isvalid(
    const char *str,
    size_t length)
 {
@@ -529,72 +529,76 @@ static int utf8_isvalid(
    unsigned char c;
    size_t ab;
    /* empty string is valid */
    if (length == 0) {
        return true;
    }
    for (p = (unsigned char *) str; p < pend; p++) {
        c = *p;
        /* null in middle of string */
        if (c == 0) {
-            return 0;
+            return false;
        }
        /* ASCII character */
        if (c < 128) {
            continue;
        }
        if ((c & 0xc0) != 0xc0) {
-            return 0;
+            return false;
        }
        ab = (size_t)trailingBytesForUTF8[c];
        if (length < ab) {
-            return 0;
+            return false;
        }
        length -= ab;
        p++;
        /* Check top bits in the second byte */
        if ((*p & 0xc0) != 0x80) {
-            return 0;
+            return false;
        }
        /* Check for overlong sequences for each different length */
        switch (ab) {
                /* Check for xx00 000x */
            case 1:
                if ((c & 0x3e) == 0)
-                    return 0;
+                    return false;
                continue;       /* We know there aren't any more bytes to check */
                /* Check for 1110 0000, xx0x xxxx */
            case 2:
                if (c == 0xe0 && (*p & 0x20) == 0)
-                    return 0;
+                    return false;
                break;
                /* Check for 1111 0000, xx00 xxxx */
            case 3:
                if (c == 0xf0 && (*p & 0x30) == 0)
-                    return 0;
+                    return false;
                break;
                /* Check for 1111 1000, xx00 0xxx */
            case 4:
                if (c == 0xf8 && (*p & 0x38) == 0)
-                    return 0;
+                    return false;
                break;
                /* Check for leading 0xfe or 0xff,
                   and then for 1111 1100, xx00 00xx */
            case 5:
                if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
-                    return 0;
+                    return false;
                break;
        }
        /* Check for valid bytes after the 2nd, if any; all must start 10 */
        while (--ab > 0) {
            if ((*(++p) & 0xc0) != 0x80)
-                return 0;
+                return false;
        }
    }
-    return 1;
+    return true;
 }
 bool characterstring_valid(