* Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de.
* Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
* Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
* This file may be used under the terms of the MIT License.
*/
#include <algorithm>
#include <file_systems/QueryParserUtils.h>
#ifndef FS_SHELL
# include <string.h>
# include <TypeConstants.h>
#endif
namespace QueryParser {
template<typename Key>
static inline int
compare_integral(const Key& a, const Key& b)
{
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
}
void
skipWhitespace(const char** expr, int32 skip)
{
const char* string = (*expr) + skip;
while (*string == ' ' || *string == '\t') string++;
*expr = string;
}
void
skipWhitespaceReverse(const char** expr, const char* stop)
{
const char* string = *expr;
while (string > stop && (*string == ' ' || *string == '\t'))
string--;
*expr = string;
}
int
compareKeys(uint32 type, const void* key1, size_t length1, const void* key2,
size_t length2)
{
switch (type) {
case B_INT32_TYPE:
return compare_integral(*(int32*)key1, *(int32*)key2);
case B_UINT32_TYPE:
return compare_integral(*(uint32*)key1, *(uint32*)key2);
case B_INT64_TYPE:
return compare_integral(*(int64*)key1, *(int64*)key2);
case B_UINT64_TYPE:
return compare_integral(*(uint64*)key1, *(uint64*)key2);
case B_FLOAT_TYPE:
return compare_integral(*(float*)key1, *(float*)key2);
case B_DOUBLE_TYPE:
return compare_integral(*(double*)key1, *(double*)key2);
case B_STRING_TYPE:
case B_MIME_STRING_TYPE:
{
int result = strncmp((const char*)key1, (const char*)key2,
std::min(length1, length2));
if (result == 0) {
result = compare_integral(strnlen((const char*)key1, length1),
strnlen((const char*)key2, length2));
}
return result;
}
}
return -1;
}
uint32
utf8ToUnicode(const char** string)
{
uint8* bytes = (uint8*)*string;
int32 length;
uint8 mask = 0x1f;
switch (bytes[0] & 0xf0) {
case 0xc0:
case 0xd0:
length = 2;
break;
case 0xe0:
length = 3;
break;
case 0xf0:
mask = 0x0f;
length = 4;
break;
default:
(*string)++;
return bytes[0];
}
uint32 c = bytes[0] & mask;
int32 i = 1;
for (; i < length && (bytes[i] & 0x80) > 0; i++)
c = (c << 6) | (bytes[i] & 0x3f);
if (i < length) {
(*string)++;
return (uint32)bytes[0];
}
*string += length;
return c;
}
int32
getFirstPatternSymbol(const char* string)
{
char c;
for (int32 index = 0; (c = *string++); index++) {
if (c == '*' || c == '?' || c == '[')
return index;
}
return -1;
}
status_t
isValidPattern(const char* pattern)
{
while (*pattern) {
switch (*pattern++) {
case '\\':
if (!*pattern++)
return PATTERN_INVALID_ESCAPE;
break;
case '[':
if (pattern[0] == ']' || !pattern[0])
return PATTERN_INVALID_SET;
while (*pattern != ']') {
if (*pattern == '\\' && !*++pattern)
return PATTERN_INVALID_ESCAPE;
if (!*pattern)
return PATTERN_INVALID_SET;
if (pattern[0] == '-' && pattern[1] == '-')
return PATTERN_INVALID_RANGE;
pattern++;
}
break;
}
}
return B_OK;
}
Returns either MATCH_OK, or NO_MATCH when everything went fine, or
values < 0 (see enum at the top of Query.cpp) if an error occurs.
The pattern matching is roughly based on code originally written
by J. Kercheval, and on code written by Kenneth Almquist, though
it shares no code.
*/
status_t
matchString(const char* pattern, const char* string)
{
while (*pattern) {
if (!string[0]) {
while (pattern[0] == '*')
pattern++;
return !pattern[0] ? MATCH_OK : NO_MATCH;
}
switch (*pattern++) {
case '?':
{
utf8ToUnicode(&string);
break;
}
case '*':
{
while (true) {
if (pattern[0] == '?') {
if (!*++string)
return NO_MATCH;
} else if (pattern[0] != '*')
break;
pattern++;
}
if (!pattern[0])
return MATCH_OK;
while(true) {
if (pattern[0] == string[0]
|| pattern[0] == '['
|| pattern[0] == '\\') {
status_t status = matchString(pattern, string);
if (status < B_OK || status == MATCH_OK)
return status;
}
if (!*++string)
return NO_MATCH;
}
break;
}
case '[':
{
bool invert = false;
if (pattern[0] == '^' || pattern[0] == '!') {
invert = true;
pattern++;
}
if (!pattern[0] || pattern[0] == ']')
return MATCH_BAD_PATTERN;
uint32 c = utf8ToUnicode(&string);
bool matched = false;
while (pattern[0] != ']') {
if (!pattern[0])
return MATCH_BAD_PATTERN;
if (pattern[0] == '\\')
pattern++;
uint32 first = utf8ToUnicode(&pattern);
if (first == c) {
matched = true;
break;
} else if (pattern[0] == '-' && pattern[1] != ']'
&& pattern[1]) {
pattern++;
if (pattern[0] == '\\') {
pattern++;
if (!pattern[0])
return MATCH_BAD_PATTERN;
}
uint32 last = utf8ToUnicode(&pattern);
if (c >= first && c <= last) {
matched = true;
break;
}
}
}
if (invert)
matched = !matched;
if (matched) {
while (pattern[0] != ']') {
if (!pattern[0])
return MATCH_BAD_PATTERN;
pattern++;
}
pattern++;
break;
}
return NO_MATCH;
}
case '\\':
if (!pattern[0])
return MATCH_BAD_PATTERN;
default:
if (pattern[-1] != string[0])
return NO_MATCH;
string++;
break;
}
}
if (string[0])
return NO_MATCH;
return MATCH_OK;
}
}