Tokenizer.cxx 3.96 KB
Newer Older
1
/*
2
 * Copyright (C) 2003-2013 The Music Player Daemon Project
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

20
#include "config.h"
21
#include "Tokenizer.hxx"
22
#include "CharUtil.hxx"
23
#include "StringUtil.hxx"
24 25
#include "Error.hxx"
#include "Domain.hxx"
26

27 28
#include <glib.h>

29 30 31
#include <assert.h>
#include <string.h>

32
static constexpr Domain tokenizer_domain("tokenizer");
33 34 35 36

static inline bool
valid_word_first_char(char ch)
{
37
	return IsAlphaASCII(ch);
38 39 40 41 42
}

static inline bool
valid_word_char(char ch)
{
43
	return IsAlphaNumericASCII(ch) || ch == '_';
44 45 46
}

char *
47
Tokenizer::NextWord(Error &error)
48
{
49
	char *const word = input;
50 51

	if (*input == 0)
52
		return nullptr;
53 54 55 56

	/* check the first character */

	if (!valid_word_first_char(*input)) {
57
		error.Set(tokenizer_domain, "Letter expected");
58
		return nullptr;
59 60 61 62 63 64
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
65
		if (IsWhitespaceOrNull(*input)) {
66 67 68
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
69
			input = strchug_fast(input + 1);
70 71 72 73
			break;
		}

		if (!valid_word_char(*input)) {
74
			error.Set(tokenizer_domain, "Invalid word character");
75
			return nullptr;
76 77 78 79 80 81 82 83 84
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

85 86 87 88 89 90 91
static inline bool
valid_unquoted_char(char ch)
{
	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
}

char *
92
Tokenizer::NextUnquoted(Error &error)
93
{
94
	char *const word = input;
95 96

	if (*input == 0)
97
		return nullptr;
98 99 100 101

	/* check the first character */

	if (!valid_unquoted_char(*input)) {
102
		error.Set(tokenizer_domain, "Invalid unquoted character");
103
		return nullptr;
104 105 106 107 108 109
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
110
		if (IsWhitespaceOrNull(*input)) {
111 112 113
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
114
			input = strchug_fast(input + 1);
115 116 117 118
			break;
		}

		if (!valid_unquoted_char(*input)) {
119 120
			error.Set(tokenizer_domain,
				  "Invalid unquoted character");
121
			return nullptr;
122 123 124 125 126 127 128 129 130
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

131
char *
132
Tokenizer::NextString(Error &error)
133
{
134
	char *const word = input, *dest = input;
135 136 137

	if (*input == 0)
		/* end of line */
138
		return nullptr;
139 140 141 142

	/* check for the opening " */

	if (*input != '"') {
143
		error.Set(tokenizer_domain, "'\"' expected");
144
		return nullptr;
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	}

	++input;

	/* copy all characters */

	while (*input != '"') {
		if (*input == '\\')
			/* the backslash escapes the following
			   character */
			++input;

		if (*input == 0) {
			/* return input-1 so the caller can see the
			   difference between "end of line" and
			   "error" */
161
			--input;
162
			error.Set(tokenizer_domain, "Missing closing '\"'");
163
			return nullptr;
164 165 166 167 168 169 170 171 172 173
		}

		/* copy one character */
		*dest++ = *input++;
	}

	/* the following character must be a whitespace (or end of
	   line) */

	++input;
174
	if (!IsWhitespaceOrNull(*input)) {
175 176
		error.Set(tokenizer_domain,
			  "Space expected after closing '\"'");
177
		return nullptr;
178 179 180 181 182
	}

	/* finish the string and return it */

	*dest = 0;
183
	input = strchug_fast(input);
184 185 186 187
	return word;
}

char *
188
Tokenizer::NextParam(Error &error)
189
{
190
	if (*input == '"')
191
		return NextString(error);
192
	else
193
		return NextUnquoted(error);
194
}