Tokenizer.cxx 4 KB
Newer Older
1
/*
2
 * Copyright (C) 2009-2014 Max Kellermann <max.kellermann@gmail.com>
3
 *
4 5 6
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
7
 *
8 9
 * - Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
10
 *
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * - Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the
 * distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 29
 */

30
#include "config.h"
31
#include "Tokenizer.hxx"
32
#include "CharUtil.hxx"
33
#include "StringStrip.hxx"
34

35
#include <stdexcept>
36 37 38 39

static inline bool
valid_word_first_char(char ch)
{
40
	return IsAlphaASCII(ch);
41 42 43 44 45
}

static inline bool
valid_word_char(char ch)
{
46
	return IsAlphaNumericASCII(ch) || ch == '_';
47 48 49
}

char *
50
Tokenizer::NextWord()
51
{
52
	char *const word = input;
53 54

	if (*input == 0)
55
		return nullptr;
56 57 58

	/* check the first character */

59 60
	if (!valid_word_first_char(*input))
		throw std::runtime_error("Letter expected");
61 62 63 64 65

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
66
		if (IsWhitespaceFast(*input)) {
67 68 69
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
70
			input = StripLeft(input + 1);
71 72 73
			break;
		}

74 75
		if (!valid_word_char(*input))
			throw std::runtime_error("Invalid word character");
76 77 78 79 80 81 82 83
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

84 85 86 87 88 89 90
static inline bool
valid_unquoted_char(char ch)
{
	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
}

char *
91
Tokenizer::NextUnquoted()
92
{
93
	char *const word = input;
94 95

	if (*input == 0)
96
		return nullptr;
97 98 99

	/* check the first character */

100 101
	if (!valid_unquoted_char(*input))
		throw std::runtime_error("Invalid unquoted character");
102 103 104 105 106

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
107
		if (IsWhitespaceFast(*input)) {
108 109 110
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
111
			input = StripLeft(input + 1);
112 113 114
			break;
		}

115 116
		if (!valid_unquoted_char(*input))
			throw std::runtime_error("Invalid unquoted character");
117 118 119 120 121 122 123 124
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

125
char *
126
Tokenizer::NextString()
127
{
128
	char *const word = input, *dest = input;
129 130 131

	if (*input == 0)
		/* end of line */
132
		return nullptr;
133 134 135

	/* check for the opening " */

136 137
	if (*input != '"')
		throw std::runtime_error("'\"' expected");
138 139 140 141 142 143 144 145 146 147 148

	++input;

	/* copy all characters */

	while (*input != '"') {
		if (*input == '\\')
			/* the backslash escapes the following
			   character */
			++input;

149 150
		if (*input == 0)
			throw std::runtime_error("Missing closing '\"'");
151 152 153 154 155 156 157 158 159

		/* copy one character */
		*dest++ = *input++;
	}

	/* the following character must be a whitespace (or end of
	   line) */

	++input;
160 161
	if (!IsWhitespaceFast(*input))
		throw std::runtime_error("Space expected after closing '\"'");
162 163 164 165

	/* finish the string and return it */

	*dest = 0;
166
	input = StripLeft(input);
167 168 169 170
	return word;
}

char *
171
Tokenizer::NextParam()
172
{
173
	if (*input == '"')
174
		return NextString();
175
	else
176
		return NextUnquoted();
177
}