Tokenizer.cxx 4.13 KB
Newer Older
1
/*
2
 * Copyright (C) 2003-2013 The Music Player Daemon Project
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

20
#include "config.h"
21
#include "Tokenizer.hxx"
22
#include "StringUtil.hxx"
23

24 25
#include <glib.h>

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
#include <stdbool.h>
#include <assert.h>
#include <string.h>

G_GNUC_CONST
static GQuark
tokenizer_quark(void)
{
	return g_quark_from_static_string("tokenizer");
}

static inline bool
valid_word_first_char(char ch)
{
	return g_ascii_isalpha(ch);
}

static inline bool
valid_word_char(char ch)
{
	return g_ascii_isalnum(ch) || ch == '_';
}

char *
50
Tokenizer::NextWord(GError **error_r)
51
{
52
	char *const word = input;
53 54

	if (*input == 0)
55
		return nullptr;
56 57 58 59 60 61

	/* check the first character */

	if (!valid_word_first_char(*input)) {
		g_set_error(error_r, tokenizer_quark(), 0,
			    "Letter expected");
62
		return nullptr;
63 64 65 66 67 68 69 70 71 72
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
		if (g_ascii_isspace(*input)) {
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
73
			input = strchug_fast(input + 1);
74 75 76 77 78 79
			break;
		}

		if (!valid_word_char(*input)) {
			g_set_error(error_r, tokenizer_quark(), 0,
				    "Invalid word character");
80
			return nullptr;
81 82 83 84 85 86 87 88 89
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

90 91 92 93 94 95 96
static inline bool
valid_unquoted_char(char ch)
{
	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
}

char *
97
Tokenizer::NextUnquoted(GError **error_r)
98
{
99
	char *const word = input;
100 101

	if (*input == 0)
102
		return nullptr;
103 104 105 106 107 108

	/* check the first character */

	if (!valid_unquoted_char(*input)) {
		g_set_error(error_r, tokenizer_quark(), 0,
			    "Invalid unquoted character");
109
		return nullptr;
110 111 112 113 114 115 116 117 118 119
	}

	/* now iterate over the other characters until we find a
	   whitespace or end-of-string */

	while (*++input != 0) {
		if (g_ascii_isspace(*input)) {
			/* a whitespace: the word ends here */
			*input = 0;
			/* skip all following spaces, too */
120
			input = strchug_fast(input + 1);
121 122 123 124 125 126
			break;
		}

		if (!valid_unquoted_char(*input)) {
			g_set_error(error_r, tokenizer_quark(), 0,
				    "Invalid unquoted character");
127
			return nullptr;
128 129 130 131 132 133 134 135 136
		}
	}

	/* end of string: the string is already null-terminated
	   here */

	return word;
}

137
char *
138
Tokenizer::NextString(GError **error_r)
139
{
140
	char *const word = input, *dest = input;
141 142 143

	if (*input == 0)
		/* end of line */
144
		return nullptr;
145 146 147 148 149 150

	/* check for the opening " */

	if (*input != '"') {
		g_set_error(error_r, tokenizer_quark(), 0,
			    "'\"' expected");
151
		return nullptr;
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
	}

	++input;

	/* copy all characters */

	while (*input != '"') {
		if (*input == '\\')
			/* the backslash escapes the following
			   character */
			++input;

		if (*input == 0) {
			/* return input-1 so the caller can see the
			   difference between "end of line" and
			   "error" */
168
			--input;
169 170
			g_set_error(error_r, tokenizer_quark(), 0,
				    "Missing closing '\"'");
171
			return nullptr;
172 173 174 175 176 177 178 179 180 181 182 183 184
		}

		/* copy one character */
		*dest++ = *input++;
	}

	/* the following character must be a whitespace (or end of
	   line) */

	++input;
	if (*input != 0 && !g_ascii_isspace(*input)) {
		g_set_error(error_r, tokenizer_quark(), 0,
			    "Space expected after closing '\"'");
185
		return nullptr;
186 187 188 189 190
	}

	/* finish the string and return it */

	*dest = 0;
191
	input = strchug_fast(input);
192 193 194 195
	return word;
}

char *
196
Tokenizer::NextParam(GError **error_r)
197
{
198 199
	if (*input == '"')
		return NextString(error_r);
200
	else
201
		return NextUnquoted(error_r);
202
}