85 lines
2.2 KiB
C
85 lines
2.2 KiB
C
/*-----------------------------------------------------------------------
|
|
* ascii.h
|
|
*
|
|
* Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
|
|
*
|
|
* src/include/utils/ascii.h
|
|
*
|
|
*-----------------------------------------------------------------------
|
|
*/
|
|
|
|
#ifndef _ASCII_H_
|
|
#define _ASCII_H_
|
|
|
|
#include "port/simd.h"
|
|
|
|
extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);
|
|
|
|
/*
|
|
* Verify a chunk of bytes for valid ASCII.
|
|
*
|
|
* Returns false if the input contains any zero bytes or bytes with the
|
|
* high-bit set. Input len must be a multiple of the chunk size (8 or 16).
|
|
*/
|
|
static inline bool
|
|
is_valid_ascii(const unsigned char *s, int len)
|
|
{
|
|
const unsigned char *const s_end = s + len;
|
|
Vector8 chunk;
|
|
Vector8 highbit_cum = vector8_broadcast(0);
|
|
#ifdef USE_NO_SIMD
|
|
Vector8 zero_cum = vector8_broadcast(0x80);
|
|
#endif
|
|
|
|
Assert(len % sizeof(chunk) == 0);
|
|
|
|
while (s < s_end)
|
|
{
|
|
vector8_load(&chunk, s);
|
|
|
|
/* Capture any zero bytes in this chunk. */
|
|
#ifdef USE_NO_SIMD
|
|
|
|
/*
|
|
* First, add 0x7f to each byte. This sets the high bit in each byte,
|
|
* unless it was a zero. If any resulting high bits are zero, the
|
|
* corresponding high bits in the zero accumulator will be cleared.
|
|
*
|
|
* If none of the bytes in the chunk had the high bit set, the max
|
|
* value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
|
|
* and we don't need to worry about carrying over to the next byte. If
|
|
* any input bytes did have the high bit set, it doesn't matter
|
|
* because we check for those separately.
|
|
*/
|
|
zero_cum &= (chunk + vector8_broadcast(0x7F));
|
|
#else
|
|
|
|
/*
|
|
* Set all bits in each lane of the highbit accumulator where input
|
|
* bytes are zero.
|
|
*/
|
|
highbit_cum = vector8_or(highbit_cum,
|
|
vector8_eq(chunk, vector8_broadcast(0)));
|
|
#endif
|
|
|
|
/* Capture all set bits in this chunk. */
|
|
highbit_cum = vector8_or(highbit_cum, chunk);
|
|
|
|
s += sizeof(chunk);
|
|
}
|
|
|
|
/* Check if any high bits in the high bit accumulator got set. */
|
|
if (vector8_is_highbit_set(highbit_cum))
|
|
return false;
|
|
|
|
#ifdef USE_NO_SIMD
|
|
/* Check if any high bits in the zero accumulator got cleared. */
|
|
if (zero_cum != vector8_broadcast(0x80))
|
|
return false;
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
#endif /* _ASCII_H_ */
|