Exposed utf8_valid as a public function to reduce duplicity. Allow utf-8 string length of 0 to be valid.
This commit is contained in:
@@ -145,6 +145,9 @@ extern "C" {
|
|||||||
BACNET_CHARACTER_STRING * char_string);
|
BACNET_CHARACTER_STRING * char_string);
|
||||||
bool characterstring_valid(
|
bool characterstring_valid(
|
||||||
BACNET_CHARACTER_STRING * char_string);
|
BACNET_CHARACTER_STRING * char_string);
|
||||||
|
bool utf8_isvalid(
|
||||||
|
const char *str,
|
||||||
|
size_t length);
|
||||||
|
|
||||||
/* returns false if the string exceeds capacity
|
/* returns false if the string exceeds capacity
|
||||||
initialize by using length=0 */
|
initialize by using length=0 */
|
||||||
|
|||||||
@@ -35,107 +35,6 @@
|
|||||||
#include "device.h"
|
#include "device.h"
|
||||||
#include "bname.h"
|
#include "bname.h"
|
||||||
|
|
||||||
/* Basic UTF-8 manipulation routines
|
|
||||||
by Jeff Bezanson
|
|
||||||
placed in the public domain Fall 2005 */
|
|
||||||
static const char trailingBytesForUTF8[256] = {
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4,
|
|
||||||
4, 4, 4, 5, 5, 5, 5
|
|
||||||
};
|
|
||||||
|
|
||||||
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
|
||||||
length is in bytes, since without knowing whether the string is valid
|
|
||||||
it's hard to know how many characters there are! */
|
|
||||||
static int utf8_isvalid(
|
|
||||||
const char *str,
|
|
||||||
int length)
|
|
||||||
{
|
|
||||||
const unsigned char *p, *pend = (unsigned char *) str + length;
|
|
||||||
unsigned char c;
|
|
||||||
int ab;
|
|
||||||
|
|
||||||
for (p = (unsigned char *) str; p < pend; p++) {
|
|
||||||
c = *p;
|
|
||||||
/* null in middle of string */
|
|
||||||
if (c == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/* ASCII character */
|
|
||||||
if (c < 128) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if ((c & 0xc0) != 0xc0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
ab = trailingBytesForUTF8[c];
|
|
||||||
if (length < ab) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
length -= ab;
|
|
||||||
|
|
||||||
p++;
|
|
||||||
/* Check top bits in the second byte */
|
|
||||||
if ((*p & 0xc0) != 0x80) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/* Check for overlong sequences for each different length */
|
|
||||||
switch (ab) {
|
|
||||||
/* Check for xx00 000x */
|
|
||||||
case 1:
|
|
||||||
if ((c & 0x3e) == 0)
|
|
||||||
return 0;
|
|
||||||
continue; /* We know there aren't any more bytes to check */
|
|
||||||
|
|
||||||
/* Check for 1110 0000, xx0x xxxx */
|
|
||||||
case 2:
|
|
||||||
if (c == 0xe0 && (*p & 0x20) == 0)
|
|
||||||
return 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Check for 1111 0000, xx00 xxxx */
|
|
||||||
case 3:
|
|
||||||
if (c == 0xf0 && (*p & 0x30) == 0)
|
|
||||||
return 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Check for 1111 1000, xx00 0xxx */
|
|
||||||
case 4:
|
|
||||||
if (c == 0xf8 && (*p & 0x38) == 0)
|
|
||||||
return 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Check for leading 0xfe or 0xff,
|
|
||||||
and then for 1111 1100, xx00 00xx */
|
|
||||||
case 5:
|
|
||||||
if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
|
|
||||||
return 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
|
||||||
while (--ab > 0) {
|
|
||||||
if ((*(++p) & 0xc0) != 0x80)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool bacnet_name_isvalid(
|
static bool bacnet_name_isvalid(
|
||||||
uint8_t encoding,
|
uint8_t encoding,
|
||||||
uint8_t length,
|
uint8_t length,
|
||||||
@@ -145,10 +44,8 @@ static bool bacnet_name_isvalid(
|
|||||||
|
|
||||||
if ((encoding < MAX_CHARACTER_STRING_ENCODING) &&
|
if ((encoding < MAX_CHARACTER_STRING_ENCODING) &&
|
||||||
(length <= NV_EEPROM_NAME_SIZE)) {
|
(length <= NV_EEPROM_NAME_SIZE)) {
|
||||||
if (encoding == CHARACTER_ANSI_X34) {
|
if (encoding == CHARACTER_UTF8) {
|
||||||
if (utf8_isvalid(str, length)) {
|
valid = utf8_isvalid(str, length);
|
||||||
valid = true;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
valid = true;
|
valid = true;
|
||||||
}
|
}
|
||||||
|
|||||||
+16
-12
@@ -521,7 +521,7 @@ static const char trailingBytesForUTF8[256] = {
|
|||||||
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
||||||
length is in bytes, since without knowing whether the string is valid
|
length is in bytes, since without knowing whether the string is valid
|
||||||
it's hard to know how many characters there are! */
|
it's hard to know how many characters there are! */
|
||||||
static int utf8_isvalid(
|
bool utf8_isvalid(
|
||||||
const char *str,
|
const char *str,
|
||||||
size_t length)
|
size_t length)
|
||||||
{
|
{
|
||||||
@@ -529,72 +529,76 @@ static int utf8_isvalid(
|
|||||||
unsigned char c;
|
unsigned char c;
|
||||||
size_t ab;
|
size_t ab;
|
||||||
|
|
||||||
|
/* empty string is valid */
|
||||||
|
if (length == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
for (p = (unsigned char *) str; p < pend; p++) {
|
for (p = (unsigned char *) str; p < pend; p++) {
|
||||||
c = *p;
|
c = *p;
|
||||||
/* null in middle of string */
|
/* null in middle of string */
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
/* ASCII character */
|
/* ASCII character */
|
||||||
if (c < 128) {
|
if (c < 128) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((c & 0xc0) != 0xc0) {
|
if ((c & 0xc0) != 0xc0) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
ab = (size_t)trailingBytesForUTF8[c];
|
ab = (size_t)trailingBytesForUTF8[c];
|
||||||
if (length < ab) {
|
if (length < ab) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
length -= ab;
|
length -= ab;
|
||||||
|
|
||||||
p++;
|
p++;
|
||||||
/* Check top bits in the second byte */
|
/* Check top bits in the second byte */
|
||||||
if ((*p & 0xc0) != 0x80) {
|
if ((*p & 0xc0) != 0x80) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
/* Check for overlong sequences for each different length */
|
/* Check for overlong sequences for each different length */
|
||||||
switch (ab) {
|
switch (ab) {
|
||||||
/* Check for xx00 000x */
|
/* Check for xx00 000x */
|
||||||
case 1:
|
case 1:
|
||||||
if ((c & 0x3e) == 0)
|
if ((c & 0x3e) == 0)
|
||||||
return 0;
|
return false;
|
||||||
continue; /* We know there aren't any more bytes to check */
|
continue; /* We know there aren't any more bytes to check */
|
||||||
|
|
||||||
/* Check for 1110 0000, xx0x xxxx */
|
/* Check for 1110 0000, xx0x xxxx */
|
||||||
case 2:
|
case 2:
|
||||||
if (c == 0xe0 && (*p & 0x20) == 0)
|
if (c == 0xe0 && (*p & 0x20) == 0)
|
||||||
return 0;
|
return false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Check for 1111 0000, xx00 xxxx */
|
/* Check for 1111 0000, xx00 xxxx */
|
||||||
case 3:
|
case 3:
|
||||||
if (c == 0xf0 && (*p & 0x30) == 0)
|
if (c == 0xf0 && (*p & 0x30) == 0)
|
||||||
return 0;
|
return false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Check for 1111 1000, xx00 0xxx */
|
/* Check for 1111 1000, xx00 0xxx */
|
||||||
case 4:
|
case 4:
|
||||||
if (c == 0xf8 && (*p & 0x38) == 0)
|
if (c == 0xf8 && (*p & 0x38) == 0)
|
||||||
return 0;
|
return false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Check for leading 0xfe or 0xff,
|
/* Check for leading 0xfe or 0xff,
|
||||||
and then for 1111 1100, xx00 00xx */
|
and then for 1111 1100, xx00 00xx */
|
||||||
case 5:
|
case 5:
|
||||||
if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
|
if (c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
|
||||||
return 0;
|
return false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||||
while (--ab > 0) {
|
while (--ab > 0) {
|
||||||
if ((*(++p) & 0xc0) != 0x80)
|
if ((*(++p) & 0xc0) != 0x80)
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool characterstring_valid(
|
bool characterstring_valid(
|
||||||
|
|||||||
Reference in New Issue
Block a user