Exposed utf8_valid as a public function to reduce duplicity. Allow utf-8 string length of 0 to be valid.
This commit is contained in:
@@ -145,6 +145,9 @@ extern "C" {
|
||||
BACNET_CHARACTER_STRING * char_string);
|
||||
bool characterstring_valid(
|
||||
BACNET_CHARACTER_STRING * char_string);
|
||||
bool utf8_isvalid(
|
||||
const char *str,
|
||||
size_t length);
|
||||
|
||||
/* returns false if the string exceeds capacity
|
||||
initialize by using length=0 */
|
||||
|
||||
@@ -35,107 +35,6 @@
|
||||
#include "device.h"
|
||||
#include "bname.h"
|
||||
|
||||
/* Basic UTF-8 manipulation routines
|
||||
by Jeff Bezanson
|
||||
placed in the public domain Fall 2005 */
|
||||
static const char trailingBytesForUTF8[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4,
|
||||
4, 4, 4, 5, 5, 5, 5
|
||||
};
|
||||
|
||||
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
||||
length is in bytes, since without knowing whether the string is valid
|
||||
it's hard to know how many characters there are! */
|
||||
static int utf8_isvalid(
|
||||
const char *str,
|
||||
int length)
|
||||
{
|
||||
const unsigned char *p, *pend = (unsigned char *) str + length;
|
||||
unsigned char c;
|
||||
int ab;
|
||||
|
||||
for (p = (unsigned char *) str; p < pend; p++) {
|
||||
c = *p;
|
||||
/* null in middle of string */
|
||||
if (c == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* ASCII character */
|
||||
if (c < 128) {
|
||||
continue;
|
||||
}
|
||||
if ((c & 0xc0) != 0xc0) {
|
||||
return 0;
|
||||
}
|
||||
ab = trailingBytesForUTF8[c];
|
||||
if (length < ab) {
|
||||
return 0;
|
||||
}
|
||||
length -= ab;
|
||||
|
||||
p++;
|
||||
/* Check top bits in the second byte */
|
||||
if ((*p & 0xc0) != 0x80) {
|
||||
return 0;
|
||||
}
|
||||
/* Check for overlong sequences for each different length */
|
||||
switch (ab) {
|
||||
/* Check for xx00 000x */
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0)
|
||||
return 0;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx */
|
||||
case 2:
|
||||
if (c == 0xe0 && (*p & 0x20) == 0)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx */
|
||||
case 3:
|
||||
if (c == 0xf0 && (*p & 0x30) == 0)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
/* Check for leading 0xfe or 0xff,
|
||||
and then for 1111 1100, xx00 00xx */
|
||||
case 5:
|
||||
if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
while (--ab > 0) {
|
||||
if ((*(++p) & 0xc0) != 0x80)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool bacnet_name_isvalid(
|
||||
uint8_t encoding,
|
||||
uint8_t length,
|
||||
@@ -145,10 +44,8 @@ static bool bacnet_name_isvalid(
|
||||
|
||||
if ((encoding < MAX_CHARACTER_STRING_ENCODING) &&
|
||||
(length <= NV_EEPROM_NAME_SIZE)) {
|
||||
if (encoding == CHARACTER_ANSI_X34) {
|
||||
if (utf8_isvalid(str, length)) {
|
||||
valid = true;
|
||||
}
|
||||
if (encoding == CHARACTER_UTF8) {
|
||||
valid = utf8_isvalid(str, length);
|
||||
} else {
|
||||
valid = true;
|
||||
}
|
||||
|
||||
+16
-12
@@ -521,7 +521,7 @@ static const char trailingBytesForUTF8[256] = {
|
||||
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
||||
length is in bytes, since without knowing whether the string is valid
|
||||
it's hard to know how many characters there are! */
|
||||
static int utf8_isvalid(
|
||||
bool utf8_isvalid(
|
||||
const char *str,
|
||||
size_t length)
|
||||
{
|
||||
@@ -529,72 +529,76 @@ static int utf8_isvalid(
|
||||
unsigned char c;
|
||||
size_t ab;
|
||||
|
||||
/* empty string is valid */
|
||||
if (length == 0) {
|
||||
return true;
|
||||
}
|
||||
for (p = (unsigned char *) str; p < pend; p++) {
|
||||
c = *p;
|
||||
/* null in middle of string */
|
||||
if (c == 0) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
/* ASCII character */
|
||||
if (c < 128) {
|
||||
continue;
|
||||
}
|
||||
if ((c & 0xc0) != 0xc0) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
ab = (size_t)trailingBytesForUTF8[c];
|
||||
if (length < ab) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
length -= ab;
|
||||
|
||||
p++;
|
||||
/* Check top bits in the second byte */
|
||||
if ((*p & 0xc0) != 0x80) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
/* Check for overlong sequences for each different length */
|
||||
switch (ab) {
|
||||
/* Check for xx00 000x */
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0)
|
||||
return 0;
|
||||
return false;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx */
|
||||
case 2:
|
||||
if (c == 0xe0 && (*p & 0x20) == 0)
|
||||
return 0;
|
||||
return false;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx */
|
||||
case 3:
|
||||
if (c == 0xf0 && (*p & 0x30) == 0)
|
||||
return 0;
|
||||
return false;
|
||||
break;
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0)
|
||||
return 0;
|
||||
return false;
|
||||
break;
|
||||
|
||||
/* Check for leading 0xfe or 0xff,
|
||||
and then for 1111 1100, xx00 00xx */
|
||||
case 5:
|
||||
if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
|
||||
return 0;
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
while (--ab > 0) {
|
||||
if ((*(++p) & 0xc0) != 0x80)
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool characterstring_valid(
|
||||
|
||||
Reference in New Issue
Block a user