Commit 77df5a8f authored by Max Kellermann's avatar Max Kellermann Committed by Max Kellermann

lib/pcre: migrate to PCRE2

parent d6bebd25
ver 0.23.5 (not yet released) ver 0.23.5 (not yet released)
* migrate to PCRE2
* GCC 12 build fixes * GCC 12 build fixes
ver 0.23.4 (2021/11/11) ver 0.23.4 (2021/11/11)
......
...@@ -35,6 +35,7 @@ db_plugins = static_library( ...@@ -35,6 +35,7 @@ db_plugins = static_library(
include_directories: inc, include_directories: inc,
dependencies: [ dependencies: [
upnp_dep, upnp_dep,
pcre_dep,
libmpdclient_dep, libmpdclient_dep,
log_dep, log_dep,
], ],
......
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "Error.hxx"
#include <pcre2.h>
namespace Pcre {
ErrorCategory error_category;
std::string
ErrorCategory::message(int condition) const
{
PCRE2_UCHAR8 buffer[256];
pcre2_get_error_message_8(condition, buffer, std::size(buffer));
return std::string{(const char *)buffer};
}
} // namespace Pcre
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <system_error>
namespace Pcre {
class ErrorCategory final : public std::error_category {
public:
const char *name() const noexcept override {
return "pcre2";
}
std::string message(int condition) const override;
};
extern ErrorCategory error_category;
inline std::system_error
MakeError(int error, const char *msg) noexcept
{
return std::system_error(error, error_category, msg);
}
} // namespace Pcre
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <pcre2.h>
#include <cassert>
#include <cstddef>
#include <string_view>
#include <utility>
class MatchData {
friend class RegexPointer;
pcre2_match_data_8 *match_data = nullptr;
const char *s;
PCRE2_SIZE *ovector;
std::size_t n;
explicit MatchData(pcre2_match_data_8 *_md, const char *_s) noexcept
:match_data(_md), s(_s),
ovector(pcre2_get_ovector_pointer_8(match_data))
{
}
public:
MatchData() = default;
MatchData(MatchData &&src) noexcept
:match_data(std::exchange(src.match_data, nullptr)),
s(src.s), ovector(src.ovector), n(src.n) {}
~MatchData() noexcept {
if (match_data != nullptr)
pcre2_match_data_free_8(match_data);
}
MatchData &operator=(MatchData &&src) noexcept {
using std::swap;
swap(match_data, src.match_data);
swap(s, src.s);
swap(ovector, src.ovector);
swap(n, src.n);
return *this;
}
constexpr operator bool() const noexcept {
return match_data != nullptr;
}
constexpr std::size_t size() const noexcept {
assert(*this);
return static_cast<std::size_t>(n);
}
[[gnu::pure]]
constexpr std::string_view operator[](std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int start = ovector[2 * i];
if (start < 0)
return {};
int end = ovector[2 * i + 1];
assert(end >= start);
return { s + start, std::size_t(end - start) };
}
static constexpr std::size_t npos = ~std::size_t{};
[[gnu::pure]]
constexpr std::size_t GetCaptureStart(std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int start = ovector[2 * i];
if (start < 0)
return npos;
return std::size_t(start);
}
[[gnu::pure]]
constexpr std::size_t GetCaptureEnd(std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int end = ovector[2 * i + 1];
if (end < 0)
return npos;
return std::size_t(end);
}
};
/* /*
* Copyright 2007-2018 Content Management AG * Copyright 2007-2021 CM4all GmbH
* All rights reserved. * All rights reserved.
* *
* author: Max Kellermann <mk@cm4all.com> * author: Max Kellermann <mk@cm4all.com>
...@@ -30,27 +30,17 @@ ...@@ -30,27 +30,17 @@
* OF THE POSSIBILITY OF SUCH DAMAGE. * OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef REGEX_POINTER_HXX #pragma once
#define REGEX_POINTER_HXX
#include "util/StringView.hxx" #include "MatchData.hxx"
#include "util/Compiler.h"
#include <pcre.h> #include <pcre2.h>
#include <array> #include <string_view>
#if GCC_CHECK_VERSION(11,0)
#pragma GCC diagnostic push
/* bogus GCC 11 warning "ovector may be used uninitialized" in the
ovector.size() call */
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
class RegexPointer { class RegexPointer {
protected: protected:
pcre *re = nullptr; pcre2_code_8 *re = nullptr;
pcre_extra *extra = nullptr;
unsigned n_capture = 0; unsigned n_capture = 0;
...@@ -60,18 +50,28 @@ public: ...@@ -60,18 +50,28 @@ public:
} }
[[gnu::pure]] [[gnu::pure]]
bool Match(StringView s) const noexcept { MatchData Match(std::string_view s) const noexcept {
/* we don't need the data written to ovector, but PCRE can MatchData match_data{
omit internal allocations if we pass a buffer to pcre2_match_data_create_from_pattern_8(re, nullptr),
pcre_exec() */ s.data(),
std::array<int, 16> ovector; };
return pcre_exec(re, extra, s.data, s.size,
0, 0, &ovector.front(), ovector.size()) >= 0; int n = pcre2_match_8(re, (PCRE2_SPTR8)s.data(), s.size(),
} 0, 0,
}; match_data.match_data, nullptr);
if (n < 0)
/* no match (or error) */
return {};
#if GCC_CHECK_VERSION(11,0) match_data.n = n;
#pragma GCC diagnostic pop
#endif
#endif if (n_capture >= match_data.n)
/* in its return value, PCRE omits mismatching
optional captures if (and only if) they are
the last capture; this kludge works around
this */
match_data.n = n_capture + 1;
return match_data;
}
};
/* /*
* Copyright 2007-2018 Content Management AG * Copyright 2007-2021 CM4all GmbH
* All rights reserved. * All rights reserved.
* *
* author: Max Kellermann <mk@cm4all.com> * author: Max Kellermann <mk@cm4all.com>
...@@ -31,41 +31,40 @@ ...@@ -31,41 +31,40 @@
*/ */
#include "UniqueRegex.hxx" #include "UniqueRegex.hxx"
#include "util/RuntimeError.hxx" #include "Error.hxx"
#include <stdio.h>
void void
UniqueRegex::Compile(const char *pattern, bool anchored, bool capture, UniqueRegex::Compile(const char *pattern, bool anchored, bool capture,
bool caseless) bool caseless)
{ {
constexpr int default_options = PCRE_DOTALL|PCRE_NO_AUTO_CAPTURE|PCRE_UTF8; constexpr int default_options = PCRE2_DOTALL|PCRE2_NO_AUTO_CAPTURE;
int options = default_options; uint32_t options = default_options;
if (anchored) if (anchored)
options |= PCRE_ANCHORED; options |= PCRE2_ANCHORED;
if (capture) if (capture)
options &= ~PCRE_NO_AUTO_CAPTURE; options &= ~PCRE2_NO_AUTO_CAPTURE;
if (caseless) if (caseless)
options |= PCRE_CASELESS; options |= PCRE2_CASELESS;
const char *error_string;
int error_offset;
re = pcre_compile(pattern, options, &error_string, &error_offset, nullptr);
if (re == nullptr)
throw FormatRuntimeError("Error in regex at offset %d: %s",
error_offset, error_string);
int study_options = 0; int error_number;
#ifdef PCRE_CONFIG_JIT PCRE2_SIZE error_offset;
study_options |= PCRE_STUDY_JIT_COMPILE; re = pcre2_compile_8(PCRE2_SPTR8(pattern),
#endif PCRE2_ZERO_TERMINATED, options,
extra = pcre_study(re, study_options, &error_string); &error_number, &error_offset,
if (extra == nullptr && error_string != nullptr) { nullptr);
pcre_free(re); if (re == nullptr) {
re = nullptr; char msg[256];
throw FormatRuntimeError("Regex study error: %s", error_string); snprintf(msg, sizeof(msg), "Error in regex at offset %zu",
error_offset);
throw Pcre::MakeError(error_number, msg);
} }
int n; pcre2_jit_compile_8(re, PCRE2_JIT_COMPLETE);
if (capture && pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &n) == 0)
if (int n; capture &&
pcre2_pattern_info_8(re, PCRE2_INFO_CAPTURECOUNT, &n) == 0)
n_capture = n; n_capture = n;
} }
/* /*
* Copyright 2007-2018 Content Management AG * Copyright 2007-2021 CM4all GmbH
* All rights reserved. * All rights reserved.
* *
* author: Max Kellermann <mk@cm4all.com> * author: Max Kellermann <mk@cm4all.com>
...@@ -30,15 +30,12 @@ ...@@ -30,15 +30,12 @@
* OF THE POSSIBILITY OF SUCH DAMAGE. * OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef UNIQUE_REGEX_HXX #pragma once
#define UNIQUE_REGEX_HXX
#include "RegexPointer.hxx" #include "RegexPointer.hxx"
#include <utility> #include <utility>
#include <pcre.h>
class UniqueRegex : public RegexPointer { class UniqueRegex : public RegexPointer {
public: public:
UniqueRegex() = default; UniqueRegex() = default;
...@@ -50,29 +47,22 @@ public: ...@@ -50,29 +47,22 @@ public:
UniqueRegex(UniqueRegex &&src) noexcept:RegexPointer(src) { UniqueRegex(UniqueRegex &&src) noexcept:RegexPointer(src) {
src.re = nullptr; src.re = nullptr;
src.extra = nullptr;
} }
~UniqueRegex() noexcept { ~UniqueRegex() noexcept {
pcre_free(re); if (re != nullptr)
#ifdef PCRE_CONFIG_JIT pcre2_code_free_8(re);
pcre_free_study(extra);
#else
pcre_free(extra);
#endif
} }
UniqueRegex &operator=(UniqueRegex &&src) { UniqueRegex &operator=(UniqueRegex &&src) noexcept {
using std::swap; using std::swap;
swap<RegexPointer>(*this, src); swap<RegexPointer>(*this, src);
return *this; return *this;
} }
/** /**
* Throws std::runtime_error on error. * Throws Pcre::Error on error.
*/ */
void Compile(const char *pattern, bool anchored, bool capture, void Compile(const char *pattern, bool anchored, bool capture,
bool caseless); bool caseless);
}; };
#endif
pcre_dep = dependency('libpcre', required: get_option('pcre')) pcre_dep = dependency('libpcre2-8', required: get_option('pcre'))
conf.set('HAVE_PCRE', pcre_dep.found()) conf.set('HAVE_PCRE', pcre_dep.found())
if not pcre_dep.found() if not pcre_dep.found()
subdir_done() subdir_done()
endif endif
pcre_dep = declare_dependency(
compile_args: '-DPCRE2_CODE_UNIT_WIDTH=0',
dependencies: pcre_dep,
)
pcre = static_library( pcre = static_library(
'pcre', 'pcre',
'Error.cxx',
'UniqueRegex.cxx', 'UniqueRegex.cxx',
include_directories: inc, include_directories: inc,
dependencies: [ dependencies: [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment