Tokenizer.cxx 3.98 KB
Newer Older
1
/*
2
 * Copyright (C) 2009-2014 Max Kellermann <max.kellermann@gmail.com>
3
 *
4 5 6
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
7
 *
8 9
 * - Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
10
 *
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * - Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the
 * distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 29
 */

30
#include "Tokenizer.hxx"
31
#include "CharUtil.hxx"
32
#include "StringStrip.hxx"
33

34
#include <stdexcept>
35 36 37 38

static inline bool
valid_word_first_char(char ch)
{
39
	return IsAlphaASCII(ch);
40 41 42 43 44
}

static inline bool
valid_word_char(char ch)
{
45
	return IsAlphaNumericASCII(ch) || ch == '_';
46 47 48
}

char *
49
Tokenizer::NextWord()
50
{
51
	char *const word = input;
52 53

	if (*input == 0)
54
		return nullptr;
55 56 57

	/* check the first character */

58 59
	if (!valid_word_first_char(*input))
		throw std::runtime_error("Letter expected");
60 61 62 63 64

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
65
		if (IsWhitespaceFast(*input)) {
66 67 68
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
69
			input = StripLeft(input + 1);
70 71 72
			break;
		}

73 74
		if (!valid_word_char(*input))
			throw std::runtime_error("Invalid word character");
75 76 77 78 79 80 81 82
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

83 84 85 86 87 88 89
static inline bool
valid_unquoted_char(char ch)
{
	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
}

char *
90
Tokenizer::NextUnquoted()
91
{
92
	char *const word = input;
93 94

	if (*input == 0)
95
		return nullptr;
96 97 98

	/* check the first character */

99 100
	if (!valid_unquoted_char(*input))
		throw std::runtime_error("Invalid unquoted character");
101 102 103 104 105

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
106
		if (IsWhitespaceFast(*input)) {
107 108 109
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
110
			input = StripLeft(input + 1);
111 112 113
			break;
		}

114 115
		if (!valid_unquoted_char(*input))
			throw std::runtime_error("Invalid unquoted character");
116 117 118 119 120 121 122 123
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

124
char *
125
Tokenizer::NextString()
126
{
127
	char *const word = input, *dest = input;
128 129 130

	if (*input == 0)
		/* end of line */
131
		return nullptr;
132 133 134

	/* check for the opening " */

135 136
	if (*input != '"')
		throw std::runtime_error("'\"' expected");
137 138 139 140 141 142 143 144 145 146 147

	++input;

	/* copy all characters */

	while (*input != '"') {
		if (*input == '\\')
			/* the backslash escapes the following
			   character */
			++input;

148 149
		if (*input == 0)
			throw std::runtime_error("Missing closing '\"'");
150 151 152 153 154 155 156 157 158

		/* copy one character */
		*dest++ = *input++;
	}

	/* the following character must be a whitespace (or end of
	   line) */

	++input;
159 160
	if (!IsWhitespaceFast(*input))
		throw std::runtime_error("Space expected after closing '\"'");
161 162 163 164

	/* finish the string and return it */

	*dest = 0;
165
	input = StripLeft(input);
166 167 168 169
	return word;
}

char *
170
Tokenizer::NextParam()
171
{
172
	if (*input == '"')
173
		return NextString();
174
	else
175
		return NextUnquoted();
176
}