Tokenizer.cxx 4.43 KB
Newer Older
1
/*
2
 * Copyright (C) 2009-2014 Max Kellermann <max@duempel.org>
3
 *
4 5 6
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
7
 *
8 9
 * - Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
10
 *
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * - Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the
 * distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 29
 */

30
#include "config.h"
31
#include "Tokenizer.hxx"
32
#include "CharUtil.hxx"
33
#include "StringUtil.hxx"
34 35
#include "Error.hxx"
#include "Domain.hxx"
36

37
static constexpr Domain tokenizer_domain("tokenizer");
38 39 40 41

static inline bool
valid_word_first_char(char ch)
{
42
	return IsAlphaASCII(ch);
43 44 45 46 47
}

static inline bool
valid_word_char(char ch)
{
48
	return IsAlphaNumericASCII(ch) || ch == '_';
49 50 51
}

char *
52
Tokenizer::NextWord(Error &error)
53
{
54
	char *const word = input;
55 56

	if (*input == 0)
57
		return nullptr;
58 59 60 61

	/* check the first character */

	if (!valid_word_first_char(*input)) {
62
		error.Set(tokenizer_domain, "Letter expected");
63
		return nullptr;
64 65 66 67 68 69
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
70
		if (IsWhitespaceFast(*input)) {
71 72 73
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
74
			input = StripLeft(input + 1);
75 76 77 78
			break;
		}

		if (!valid_word_char(*input)) {
79
			error.Set(tokenizer_domain, "Invalid word character");
80
			return nullptr;
81 82 83 84 85 86 87 88 89
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

90 91 92 93 94 95 96
static inline bool
valid_unquoted_char(char ch)
{
	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
}

char *
97
Tokenizer::NextUnquoted(Error &error)
98
{
99
	char *const word = input;
100 101

	if (*input == 0)
102
		return nullptr;
103 104 105 106

	/* check the first character */

	if (!valid_unquoted_char(*input)) {
107
		error.Set(tokenizer_domain, "Invalid unquoted character");
108
		return nullptr;
109 110 111 112 113 114
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
115
		if (IsWhitespaceFast(*input)) {
116 117 118
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
119
			input = StripLeft(input + 1);
120 121 122 123
			break;
		}

		if (!valid_unquoted_char(*input)) {
124 125
			error.Set(tokenizer_domain,
				  "Invalid unquoted character");
126
			return nullptr;
127 128 129 130 131 132 133 134 135
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

136
char *
137
Tokenizer::NextString(Error &error)
138
{
139
	char *const word = input, *dest = input;
140 141 142

	if (*input == 0)
		/* end of line */
143
		return nullptr;
144 145 146 147

	/* check for the opening " */

	if (*input != '"') {
148
		error.Set(tokenizer_domain, "'\"' expected");
149
		return nullptr;
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
	}

	++input;

	/* copy all characters */

	while (*input != '"') {
		if (*input == '\\')
			/* the backslash escapes the following
			   character */
			++input;

		if (*input == 0) {
			/* return input-1 so the caller can see the
			   difference between "end of line" and
			   "error" */
166
			--input;
167
			error.Set(tokenizer_domain, "Missing closing '\"'");
168
			return nullptr;
169 170 171 172 173 174 175 176 177 178
		}

		/* copy one character */
		*dest++ = *input++;
	}

	/* the following character must be a whitespace (or end of
	   line) */

	++input;
179
	if (!IsWhitespaceFast(*input)) {
180 181
		error.Set(tokenizer_domain,
			  "Space expected after closing '\"'");
182
		return nullptr;
183 184 185 186 187
	}

	/* finish the string and return it */

	*dest = 0;
188
	input = StripLeft(input);
189 190 191 192
	return word;
}

char *
193
Tokenizer::NextParam(Error &error)
194
{
195
	if (*input == '"')
196
		return NextString(error);
197
	else
198
		return NextUnquoted(error);
199
}