Commit 77df5a8f authored by Max Kellermann's avatar Max Kellermann Committed by Max Kellermann

lib/pcre: migrate to PCRE2

parent d6bebd25
ver 0.23.5 (not yet released)
* migrate to PCRE2
* GCC 12 build fixes
ver 0.23.4 (2021/11/11)
......
......@@ -35,6 +35,7 @@ db_plugins = static_library(
include_directories: inc,
dependencies: [
upnp_dep,
pcre_dep,
libmpdclient_dep,
log_dep,
],
......
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "Error.hxx"
#include <pcre2.h>
namespace Pcre {
ErrorCategory error_category;
std::string
ErrorCategory::message(int condition) const
{
PCRE2_UCHAR8 buffer[256];
pcre2_get_error_message_8(condition, buffer, std::size(buffer));
return std::string{(const char *)buffer};
}
} // namespace Pcre
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <system_error>
namespace Pcre {
class ErrorCategory final : public std::error_category {
public:
const char *name() const noexcept override {
return "pcre2";
}
std::string message(int condition) const override;
};
extern ErrorCategory error_category;
inline std::system_error
MakeError(int error, const char *msg) noexcept
{
return std::system_error(error, error_category, msg);
}
} // namespace Pcre
/*
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <pcre2.h>
#include <cassert>
#include <cstddef>
#include <string_view>
#include <utility>
class MatchData {
friend class RegexPointer;
pcre2_match_data_8 *match_data = nullptr;
const char *s;
PCRE2_SIZE *ovector;
std::size_t n;
explicit MatchData(pcre2_match_data_8 *_md, const char *_s) noexcept
:match_data(_md), s(_s),
ovector(pcre2_get_ovector_pointer_8(match_data))
{
}
public:
MatchData() = default;
MatchData(MatchData &&src) noexcept
:match_data(std::exchange(src.match_data, nullptr)),
s(src.s), ovector(src.ovector), n(src.n) {}
~MatchData() noexcept {
if (match_data != nullptr)
pcre2_match_data_free_8(match_data);
}
MatchData &operator=(MatchData &&src) noexcept {
using std::swap;
swap(match_data, src.match_data);
swap(s, src.s);
swap(ovector, src.ovector);
swap(n, src.n);
return *this;
}
constexpr operator bool() const noexcept {
return match_data != nullptr;
}
constexpr std::size_t size() const noexcept {
assert(*this);
return static_cast<std::size_t>(n);
}
[[gnu::pure]]
constexpr std::string_view operator[](std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int start = ovector[2 * i];
if (start < 0)
return {};
int end = ovector[2 * i + 1];
assert(end >= start);
return { s + start, std::size_t(end - start) };
}
static constexpr std::size_t npos = ~std::size_t{};
[[gnu::pure]]
constexpr std::size_t GetCaptureStart(std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int start = ovector[2 * i];
if (start < 0)
return npos;
return std::size_t(start);
}
[[gnu::pure]]
constexpr std::size_t GetCaptureEnd(std::size_t i) const noexcept {
assert(*this);
assert(i < size());
int end = ovector[2 * i + 1];
if (end < 0)
return npos;
return std::size_t(end);
}
};
/*
* Copyright 2007-2018 Content Management AG
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
......@@ -30,27 +30,17 @@
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef REGEX_POINTER_HXX
#define REGEX_POINTER_HXX
#pragma once
#include "util/StringView.hxx"
#include "util/Compiler.h"
#include "MatchData.hxx"
#include <pcre.h>
#include <pcre2.h>
#include <array>
#if GCC_CHECK_VERSION(11,0)
#pragma GCC diagnostic push
/* bogus GCC 11 warning "ovector may be used uninitialized" in the
ovector.size() call */
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
#include <string_view>
class RegexPointer {
protected:
pcre *re = nullptr;
pcre_extra *extra = nullptr;
pcre2_code_8 *re = nullptr;
unsigned n_capture = 0;
......@@ -60,18 +50,28 @@ public:
}
[[gnu::pure]]
bool Match(StringView s) const noexcept {
/* we don't need the data written to ovector, but PCRE can
omit internal allocations if we pass a buffer to
pcre_exec() */
std::array<int, 16> ovector;
return pcre_exec(re, extra, s.data, s.size,
0, 0, &ovector.front(), ovector.size()) >= 0;
}
};
MatchData Match(std::string_view s) const noexcept {
MatchData match_data{
pcre2_match_data_create_from_pattern_8(re, nullptr),
s.data(),
};
int n = pcre2_match_8(re, (PCRE2_SPTR8)s.data(), s.size(),
0, 0,
match_data.match_data, nullptr);
if (n < 0)
/* no match (or error) */
return {};
#if GCC_CHECK_VERSION(11,0)
#pragma GCC diagnostic pop
#endif
match_data.n = n;
#endif
if (n_capture >= match_data.n)
/* in its return value, PCRE omits mismatching
optional captures if (and only if) they are
the last capture; this kludge works around
this */
match_data.n = n_capture + 1;
return match_data;
}
};
/*
* Copyright 2007-2018 Content Management AG
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
......@@ -31,41 +31,40 @@
*/
#include "UniqueRegex.hxx"
#include "util/RuntimeError.hxx"
#include "Error.hxx"
#include <stdio.h>
void
UniqueRegex::Compile(const char *pattern, bool anchored, bool capture,
bool caseless)
{
constexpr int default_options = PCRE_DOTALL|PCRE_NO_AUTO_CAPTURE|PCRE_UTF8;
constexpr int default_options = PCRE2_DOTALL|PCRE2_NO_AUTO_CAPTURE;
int options = default_options;
uint32_t options = default_options;
if (anchored)
options |= PCRE_ANCHORED;
options |= PCRE2_ANCHORED;
if (capture)
options &= ~PCRE_NO_AUTO_CAPTURE;
options &= ~PCRE2_NO_AUTO_CAPTURE;
if (caseless)
options |= PCRE_CASELESS;
const char *error_string;
int error_offset;
re = pcre_compile(pattern, options, &error_string, &error_offset, nullptr);
if (re == nullptr)
throw FormatRuntimeError("Error in regex at offset %d: %s",
error_offset, error_string);
options |= PCRE2_CASELESS;
int study_options = 0;
#ifdef PCRE_CONFIG_JIT
study_options |= PCRE_STUDY_JIT_COMPILE;
#endif
extra = pcre_study(re, study_options, &error_string);
if (extra == nullptr && error_string != nullptr) {
pcre_free(re);
re = nullptr;
throw FormatRuntimeError("Regex study error: %s", error_string);
int error_number;
PCRE2_SIZE error_offset;
re = pcre2_compile_8(PCRE2_SPTR8(pattern),
PCRE2_ZERO_TERMINATED, options,
&error_number, &error_offset,
nullptr);
if (re == nullptr) {
char msg[256];
snprintf(msg, sizeof(msg), "Error in regex at offset %zu",
error_offset);
throw Pcre::MakeError(error_number, msg);
}
int n;
if (capture && pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &n) == 0)
pcre2_jit_compile_8(re, PCRE2_JIT_COMPLETE);
if (int n; capture &&
pcre2_pattern_info_8(re, PCRE2_INFO_CAPTURECOUNT, &n) == 0)
n_capture = n;
}
/*
* Copyright 2007-2018 Content Management AG
* Copyright 2007-2021 CM4all GmbH
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
......@@ -30,15 +30,12 @@
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UNIQUE_REGEX_HXX
#define UNIQUE_REGEX_HXX
#pragma once
#include "RegexPointer.hxx"
#include <utility>
#include <pcre.h>
class UniqueRegex : public RegexPointer {
public:
UniqueRegex() = default;
......@@ -50,29 +47,22 @@ public:
UniqueRegex(UniqueRegex &&src) noexcept:RegexPointer(src) {
src.re = nullptr;
src.extra = nullptr;
}
~UniqueRegex() noexcept {
pcre_free(re);
#ifdef PCRE_CONFIG_JIT
pcre_free_study(extra);
#else
pcre_free(extra);
#endif
if (re != nullptr)
pcre2_code_free_8(re);
}
UniqueRegex &operator=(UniqueRegex &&src) {
UniqueRegex &operator=(UniqueRegex &&src) noexcept {
using std::swap;
swap<RegexPointer>(*this, src);
return *this;
}
/**
* Throws std::runtime_error on error.
* Throws Pcre::Error on error.
*/
void Compile(const char *pattern, bool anchored, bool capture,
bool caseless);
};
#endif
pcre_dep = dependency('libpcre', required: get_option('pcre'))
pcre_dep = dependency('libpcre2-8', required: get_option('pcre'))
conf.set('HAVE_PCRE', pcre_dep.found())
if not pcre_dep.found()
subdir_done()
endif
pcre_dep = declare_dependency(
compile_args: '-DPCRE2_CODE_UNIT_WIDTH=0',
dependencies: pcre_dep,
)
pcre = static_library(
'pcre',
'Error.cxx',
'UniqueRegex.cxx',
include_directories: inc,
dependencies: [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment