added vendor

parent b2e2a7b2

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"88c12a803c6c06c47cd9dabc8bcdba81f35d3bab637221d2106a86a543532731","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"bfef1a94c5e7410584b1beb4e857b40d1ae2031b881cbc06fb1300409bbd555f","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"08ec116a4a842a2bb1221d296a2515ef3672c54906bed588fb733364c07855d3","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"70c325cfa6f7c5c4c9a6af7b133b75a29e65990a7fe0b9a4c4ce3c3d5a0fe587","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"e264e6abebf5622b59f6500210773db36048371c4e509c930263334095959a52","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"}
\ No newline at end of file
This project is dual-licensed under the Unlicense and MIT licenses.
You may use this code under the terms of either license.
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.60.0"
name = "aho-corasick"
version = "1.1.3"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = [
"/aho-corasick-debug",
"/benchmarks",
"/tmp",
]
autotests = false
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
readme = "README.md"
keywords = [
"string",
"search",
"text",
"pattern",
"multi",
]
categories = ["text-processing"]
license = "Unlicense OR MIT"
repository = "https://github.com/BurntSushi/aho-corasick"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
"--generate-link-to-definition",
]
[profile.bench]
debug = 2
[profile.release]
debug = 2
[lib]
name = "aho_corasick"
[dependencies.log]
version = "0.4.17"
optional = true
[dependencies.memchr]
version = "2.4.0"
optional = true
default-features = false
[dev-dependencies.doc-comment]
version = "0.3.3"
[features]
default = [
"std",
"perf-literal",
]
logging = ["dep:log"]
perf-literal = ["dep:memchr"]
std = ["memchr?/std"]
The MIT License (MIT)
Copyright (c) 2015 Andrew Gallant
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
aho-corasick
============
A library for finding occurrences of many patterns at once with SIMD
acceleration in some cases. This library provides multiple pattern
search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a finite state machine for executing searches in linear time.
Features include case insensitive matching, overlapping matches, fast searching
via SIMD and optional full DFA construction and search & replace in streams.
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
[![crates.io](https://img.shields.io/crates/v/aho-corasick.svg)](https://crates.io/crates/aho-corasick)
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/).
### Documentation
https://docs.rs/aho-corasick
### Usage
Run `cargo add aho-corasick` to automatically add this crate as a dependency
in your `Cargo.toml` file.
### Example: basic searching
This example shows how to search for occurrences of multiple patterns
simultaneously. Each match includes the pattern that matched along with the
byte offsets of the match.
```rust
use aho_corasick::{AhoCorasick, PatternID};
let patterns = &["apple", "maple", "Snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::new(patterns).unwrap();
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(PatternID::must(1), 13, 18),
(PatternID::must(0), 28, 33),
(PatternID::must(2), 43, 50),
]);
```
### Example: ASCII case insensitivity
This is like the previous example, but matches `Snapple` case insensitively
using `AhoCorasickBuilder`:
```rust
use aho_corasick::{AhoCorasick, PatternID};
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let ac = AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(patterns)
.unwrap();
let mut matches = vec![];
for mat in ac.find_iter(haystack) {
matches.push((mat.pattern(), mat.start(), mat.end()));
}
assert_eq!(matches, vec![
(PatternID::must(1), 13, 18),
(PatternID::must(0), 28, 33),
(PatternID::must(2), 43, 50),
]);
```
### Example: replacing matches in a stream
This example shows how to execute a search and replace on a stream without
loading the entire stream into memory first.
```rust,ignore
use aho_corasick::AhoCorasick;
let patterns = &["fox", "brown", "quick"];
let replace_with = &["sloth", "grey", "slow"];
// In a real example, these might be `std::fs::File`s instead. All you need to
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns).unwrap();
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
.expect("stream_replace_all failed");
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
```
### Example: finding the leftmost first match
In the textbook description of Aho-Corasick, its formulation is typically
structured such that it reports all possible matches, even when they overlap
with another. In many cases, overlapping matches may not be desired, such as
the case of finding all successive non-overlapping matches like you might with
a standard regular expression.
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
this doesn't always work in the expected way, since it will report matches as
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
against the text `Samwise`. Most regex engines (that are Perl-like, or
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
algorithm modified for reporting non-overlapping matches will report `Sam`.
A novel contribution of this library is the ability to change the match
semantics of Aho-Corasick (without additional search time overhead) such that
`Samwise` is reported instead. For example, here's the standard approach:
```rust
use aho_corasick::AhoCorasick;
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::new(patterns).unwrap();
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
```
And now here's the leftmost-first version, which matches how a Perl-like
regex will work:
```rust
use aho_corasick::{AhoCorasick, MatchKind};
let patterns = &["Samwise", "Sam"];
let haystack = "Samwise";
let ac = AhoCorasick::builder()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns)
.unwrap();
let mat = ac.find(haystack).expect("should have a match");
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
```
In addition to leftmost-first semantics, this library also supports
leftmost-longest semantics, which match the POSIX behavior of a regular
expression alternation. See `MatchKind` in the docs for more details.
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.60.0`.
The current policy is that the minimum Rust version required to use this crate
can be increased in minor version updates. For example, if `crate 1.0` requires
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
version of Rust.
In general, this crate will be conservative with respect to the minimum
supported version of Rust.
### FFI bindings
* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/)
is a Python wrapper for this library.
* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go
wrapper for this library.
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>
max_width = 79
use_small_heuristics = "max"
This source diff could not be displayed because it is too large. You can view the blob instead.
#![allow(unused_macros)]
macro_rules! log {
($($tt:tt)*) => {
#[cfg(feature = "logging")]
{
$($tt)*
}
}
}
macro_rules! debug {
($($tt:tt)*) => { log!(log::debug!($($tt)*)) }
}
macro_rules! trace {
($($tt:tt)*) => { log!(log::trace!($($tt)*)) }
}
/*!
Provides direct access to NFA implementations of Aho-Corasick.
The principle characteristic of an NFA in this crate is that it may
transition through multiple states per byte of haystack. In Aho-Corasick
parlance, NFAs follow failure transitions during a search. In contrast,
a [`DFA`](crate::dfa::DFA) pre-computes all failure transitions during
compilation at the expense of a much bigger memory footprint.
Currently, there are two NFA implementations provided: noncontiguous and
contiguous. The names reflect their internal representation, and consequently,
the trade offs associated with them:
* A [`noncontiguous::NFA`] uses a separate allocation for every NFA state to
represent its transitions in a sparse format. This is ideal for building an
NFA, since it cheaply permits different states to have a different number of
transitions. A noncontiguous NFA is where the main Aho-Corasick construction
algorithm is implemented. All other Aho-Corasick implementations are built by
first constructing a noncontiguous NFA.
* A [`contiguous::NFA`] is uses a single allocation to represent all states,
while still encoding most states as sparse states but permitting states near
the starting state to have a dense representation. The dense representation
uses more memory, but permits computing transitions during a search more
quickly. By only making the most active states dense (the states near the
starting state), a contiguous NFA better balances memory usage with search
speed. The single contiguous allocation also uses less overhead per state and
enables compression tricks where most states only use 8 bytes of heap memory.
When given the choice between these two, you almost always want to pick a
contiguous NFA. It takes only a little longer to build, but both its memory
usage and search speed are typically much better than a noncontiguous NFA. A
noncontiguous NFA is useful when prioritizing build times, or when there are
so many patterns that a contiguous NFA could not be built. (Currently, because
of both memory and search speed improvements, a contiguous NFA has a smaller
internal limit on the total number of NFA states it can represent. But you
would likely need to have hundreds of thousands or even millions of patterns
before you hit this limit.)
*/
pub mod contiguous;
pub mod noncontiguous;
/// A trait for adding some helper routines to pointers.
pub(crate) trait Pointer {
/// Returns the distance, in units of `T`, between `self` and `origin`.
///
/// # Safety
///
/// Same as `ptr::offset_from` in addition to `self >= origin`.
unsafe fn distance(self, origin: Self) -> usize;
/// Casts this pointer to `usize`.
///
/// Callers should not convert the `usize` back to a pointer if at all
/// possible. (And if you believe it's necessary, open an issue to discuss
/// why. Otherwise, it has the potential to violate pointer provenance.)
/// The purpose of this function is just to be able to do arithmetic, i.e.,
/// computing offsets or alignments.
fn as_usize(self) -> usize;
}
impl<T> Pointer for *const T {
unsafe fn distance(self, origin: *const T) -> usize {
// TODO: Replace with `ptr::sub_ptr` once stabilized.
usize::try_from(self.offset_from(origin)).unwrap_unchecked()
}
fn as_usize(self) -> usize {
self as usize
}
}
impl<T> Pointer for *mut T {
unsafe fn distance(self, origin: *mut T) -> usize {
(self as *const T).distance(origin as *const T)
}
fn as_usize(self) -> usize {
(self as *const T).as_usize()
}
}
/*!
Provides packed multiple substring search, principally for a small number of
patterns.
This sub-module provides vectorized routines for quickly finding
matches of a small number of patterns. In general, users of this crate
shouldn't need to interface with this module directly, as the primary
[`AhoCorasick`](crate::AhoCorasick) searcher will use these routines
automatically as a prefilter when applicable. However, in some cases, callers
may want to bypass the Aho-Corasick machinery entirely and use this vectorized
searcher directly.
# Overview
The primary types in this sub-module are:
* [`Searcher`] executes the actual search algorithm to report matches in a
haystack.
* [`Builder`] accumulates patterns incrementally and can construct a
`Searcher`.
* [`Config`] permits tuning the searcher, and itself will produce a `Builder`
(which can then be used to build a `Searcher`). Currently, the only tuneable
knob are the match semantics, but this may be expanded in the future.
# Examples
This example shows how to create a searcher from an iterator of patterns.
By default, leftmost-first match semantics are used. (See the top-level
[`MatchKind`] type for more details about match semantics, which apply
similarly to packed substring search.)
```
use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
# fn example() -> Option<()> {
let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
let matches: Vec<PatternID> = searcher
.find_iter("foobar")
.map(|mat| mat.pattern())
.collect();
assert_eq!(vec![PatternID::ZERO], matches);
# Some(()) }
# if cfg!(all(feature = "std", any(
# target_arch = "x86_64", target_arch = "aarch64",
# ))) {
# example().unwrap()
# } else {
# assert!(example().is_none());
# }
```
This example shows how to use [`Config`] to change the match semantics to
leftmost-longest:
```
use aho_corasick::{packed::{Config, MatchKind}, PatternID};
# fn example() -> Option<()> {
let searcher = Config::new()
.match_kind(MatchKind::LeftmostLongest)
.builder()
.add("foo")
.add("foobar")
.build()?;
let matches: Vec<PatternID> = searcher
.find_iter("foobar")
.map(|mat| mat.pattern())
.collect();
assert_eq!(vec![PatternID::must(1)], matches);
# Some(()) }
# if cfg!(all(feature = "std", any(
# target_arch = "x86_64", target_arch = "aarch64",
# ))) {
# example().unwrap()
# } else {
# assert!(example().is_none());
# }
```
# Packed substring searching
Packed substring searching refers to the use of SIMD (Single Instruction,
Multiple Data) to accelerate the detection of matches in a haystack. Unlike
conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring
search tend to do better with a small number of patterns, where as Aho-Corasick
generally maintains reasonably consistent performance regardless of the number
of patterns you give it. Because of this, the vectorized searcher in this
sub-module cannot be used as a general purpose searcher, since building the
searcher may fail even when given a small number of patterns. However, in
exchange, when searching for a small number of patterns, searching can be quite
a bit faster than Aho-Corasick (sometimes by an order of magnitude).
The key take away here is that constructing a searcher from a list of patterns
is a fallible operation with no clear rules for when it will fail. While the
precise conditions under which building a searcher can fail is specifically an
implementation detail, here are some common reasons:
* Too many patterns were given. Typically, the limit is on the order of 100 or
so, but this limit may fluctuate based on available CPU features.
* The available packed algorithms require CPU features that aren't available.
For example, currently, this crate only provides packed algorithms for
`x86_64` and `aarch64`. Therefore, constructing a packed searcher on any
other target will always fail.
* Zero patterns were given, or one of the patterns given was empty. Packed
searchers require at least one pattern and that all patterns are non-empty.
* Something else about the nature of the patterns (typically based on
heuristics) suggests that a packed searcher would perform very poorly, so
no searcher is built.
*/
pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher};
mod api;
mod ext;
mod pattern;
mod rabinkarp;
mod teddy;
#[cfg(all(feature = "std", test))]
mod tests;
mod vector;
use alloc::{sync::Arc, vec, vec::Vec};
use crate::{packed::pattern::Patterns, util::search::Match, PatternID};
/// The type of the rolling hash used in the Rabin-Karp algorithm.
type Hash = usize;
/// The number of buckets to store our patterns in. We don't want this to be
/// too big in order to avoid wasting memory, but we don't want it to be too
/// small either to avoid spending too much time confirming literals.
///
/// The number of buckets MUST be a power of two. Otherwise, determining the
/// bucket from a hash will slow down the code considerably. Using a power
/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and`
/// instruction.
const NUM_BUCKETS: usize = 64;
/// An implementation of the Rabin-Karp algorithm. The main idea of this
/// algorithm is to maintain a rolling hash as it moves through the input, and
/// then check whether that hash corresponds to the same hash for any of the
/// patterns we're looking for.
///
/// A draw back of naively scaling Rabin-Karp to multiple patterns is that
/// it requires all of the patterns to be the same length, which in turn
/// corresponds to the number of bytes to hash. We adapt this to work for
/// multiple patterns of varying size by fixing the number of bytes to hash
/// to be the length of the smallest pattern. We also split the patterns into
/// several buckets to hopefully make the confirmation step faster.
///
/// Wikipedia has a decent explanation, if a bit heavy on the theory:
/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
///
/// But ESMAJ provides something a bit more concrete:
/// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html
#[derive(Clone, Debug)]
pub(crate) struct RabinKarp {
/// The patterns we're searching for.
patterns: Arc<Patterns>,
/// The order of patterns in each bucket is significant. Namely, they are
/// arranged such that the first one to match is the correct match. This
/// may not necessarily correspond to the order provided by the caller.
/// For example, if leftmost-longest semantics are used, then the patterns
/// are sorted by their length in descending order. If leftmost-first
/// semantics are used, then the patterns are sorted by their pattern ID
/// in ascending order (which corresponds to the caller's order).
buckets: Vec<Vec<(Hash, PatternID)>>,
/// The length of the hashing window. Generally, this corresponds to the
/// length of the smallest pattern.
hash_len: usize,
/// The factor to subtract out of a hash before updating it with a new
/// byte.
hash_2pow: usize,
}
impl RabinKarp {
/// Compile a new Rabin-Karp matcher from the patterns given.
///
/// This panics if any of the patterns in the collection are empty, or if
/// the collection is itself empty.
pub(crate) fn new(patterns: &Arc<Patterns>) -> RabinKarp {
assert!(patterns.len() >= 1);
let hash_len = patterns.minimum_len();
assert!(hash_len >= 1);
let mut hash_2pow = 1usize;
for _ in 1..hash_len {
hash_2pow = hash_2pow.wrapping_shl(1);
}
let mut rk = RabinKarp {
patterns: Arc::clone(patterns),
buckets: vec![vec![]; NUM_BUCKETS],
hash_len,
hash_2pow,
};
for (id, pat) in patterns.iter() {
let hash = rk.hash(&pat.bytes()[..rk.hash_len]);
let bucket = hash % NUM_BUCKETS;
rk.buckets[bucket].push((hash, id));
}
rk
}
/// Return the first matching pattern in the given haystack, begining the
/// search at `at`.
pub(crate) fn find_at(
&self,
haystack: &[u8],
mut at: usize,
) -> Option<Match> {
assert_eq!(NUM_BUCKETS, self.buckets.len());
if at + self.hash_len > haystack.len() {
return None;
}
let mut hash = self.hash(&haystack[at..at + self.hash_len]);
loop {
let bucket = &self.buckets[hash % NUM_BUCKETS];
for &(phash, pid) in bucket {
if phash == hash {
if let Some(c) = self.verify(pid, haystack, at) {
return Some(c);
}
}
}
if at + self.hash_len >= haystack.len() {
return None;
}
hash = self.update_hash(
hash,
haystack[at],
haystack[at + self.hash_len],
);
at += 1;
}
}
/// Returns the approximate total amount of heap used by this searcher, in
/// units of bytes.
pub(crate) fn memory_usage(&self) -> usize {
self.buckets.len() * core::mem::size_of::<Vec<(Hash, PatternID)>>()
+ self.patterns.len() * core::mem::size_of::<(Hash, PatternID)>()
}
/// Verify whether the pattern with the given id matches at
/// `haystack[at..]`.
///
/// We tag this function as `cold` because it helps improve codegen.
/// Intuitively, it would seem like inlining it would be better. However,
/// the only time this is called and a match is not found is when there
/// there is a hash collision, or when a prefix of a pattern matches but
/// the entire pattern doesn't match. This is hopefully fairly rare, and
/// if it does occur a lot, it's going to be slow no matter what we do.
#[cold]
fn verify(
&self,
id: PatternID,
haystack: &[u8],
at: usize,
) -> Option<Match> {
let pat = self.patterns.get(id);
if pat.is_prefix(&haystack[at..]) {
Some(Match::new(id, at..at + pat.len()))
} else {
None
}
}
/// Hash the given bytes.
fn hash(&self, bytes: &[u8]) -> Hash {
assert_eq!(self.hash_len, bytes.len());
let mut hash = 0usize;
for &b in bytes {
hash = hash.wrapping_shl(1).wrapping_add(b as usize);
}
hash
}
/// Update the hash given based on removing `old_byte` at the beginning
/// of some byte string, and appending `new_byte` to the end of that same
/// byte string.
fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash {
prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow))
.wrapping_shl(1)
.wrapping_add(new_byte as usize)
}
}
// Regrettable, but Teddy stuff just isn't used on all targets. And for some
// targets, like aarch64, only "slim" Teddy is used and so "fat" Teddy gets a
// bunch of dead-code warnings. Just not worth trying to squash them. Blech.
#![allow(dead_code)]
pub(crate) use self::builder::{Builder, Searcher};
mod builder;
mod generic;
/*!
Provides implementations of `fst::Automaton` for Aho-Corasick automata.
This works by providing two wrapper types, [`Anchored`] and [`Unanchored`].
The former executes an anchored search on an FST while the latter executes
an unanchored search. Building these wrappers is fallible and will fail if
the underlying Aho-Corasick automaton does not support the type of search it
represents.
*/
use crate::{
automaton::{Automaton, StateID},
Anchored as AcAnchored, Input, MatchError,
};
/// Represents an unanchored Aho-Corasick search of a finite state transducer.
///
/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
/// underlying automaton does not support unanchored searches.
///
/// # Example
///
/// This shows how to build an FST of keys and then run an unanchored search on
/// those keys using an Aho-Corasick automaton.
///
/// ```
/// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored};
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
///
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
/// // NFAs always support both unanchored and anchored searches.
/// let searcher = Unanchored::new(&nfa).unwrap();
///
/// let mut stream = set.search(searcher).into_stream();
/// let mut results = vec![];
/// while let Some(key) = stream.next() {
/// results.push(std::str::from_utf8(key).unwrap().to_string());
/// }
/// assert_eq!(vec!["abcd", "bcd", "xyz"], results);
/// ```
#[derive(Clone, Debug)]
pub struct Unanchored<A>(A);
impl<A: Automaton> Unanchored<A> {
/// Create a new `Unanchored` implementation of the `fst::Automaton` trait.
///
/// If the given Aho-Corasick automaton does not support unanchored
/// searches, then this returns an error.
pub fn new(aut: A) -> Result<Unanchored<A>, MatchError> {
let input = Input::new("").anchored(AcAnchored::No);
let _ = aut.start_state(&input)?;
Ok(Unanchored(aut))
}
/// Returns a borrow to the underlying automaton.
pub fn as_ref(&self) -> &A {
&self.0
}
/// Unwrap this value and return the inner automaton.
pub fn into_inner(self) -> A {
self.0
}
}
impl<A: Automaton> fst::Automaton for Unanchored<A> {
type State = StateID;
#[inline]
fn start(&self) -> StateID {
let input = Input::new("").anchored(AcAnchored::No);
self.0.start_state(&input).expect("support for unanchored searches")
}
#[inline]
fn is_match(&self, state: &StateID) -> bool {
self.0.is_match(*state)
}
#[inline]
fn accept(&self, state: &StateID, byte: u8) -> StateID {
if fst::Automaton::is_match(self, state) {
return *state;
}
self.0.next_state(AcAnchored::No, *state, byte)
}
#[inline]
fn can_match(&self, state: &StateID) -> bool {
!self.0.is_dead(*state)
}
}
/// Represents an anchored Aho-Corasick search of a finite state transducer.
///
/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
/// underlying automaton does not support unanchored searches.
///
/// # Example
///
/// This shows how to build an FST of keys and then run an anchored search on
/// those keys using an Aho-Corasick automaton.
///
/// ```
/// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored};
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
///
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
/// // NFAs always support both unanchored and anchored searches.
/// let searcher = Anchored::new(&nfa).unwrap();
///
/// let mut stream = set.search(searcher).into_stream();
/// let mut results = vec![];
/// while let Some(key) = stream.next() {
/// results.push(std::str::from_utf8(key).unwrap().to_string());
/// }
/// assert_eq!(vec!["bcd", "xyz"], results);
/// ```
///
/// This is like the example above, except we use an Aho-Corasick DFA, which
/// requires explicitly configuring it to support anchored searches. (NFAs
/// unconditionally support both unanchored and anchored searches.)
///
/// ```
/// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind};
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
///
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
/// let dfa = DFA::builder()
/// .start_kind(StartKind::Anchored)
/// .build(&["bcd", "x"])
/// .unwrap();
/// // We've explicitly configured our DFA to support anchored searches.
/// let searcher = Anchored::new(&dfa).unwrap();
///
/// let mut stream = set.search(searcher).into_stream();
/// let mut results = vec![];
/// while let Some(key) = stream.next() {
/// results.push(std::str::from_utf8(key).unwrap().to_string());
/// }
/// assert_eq!(vec!["bcd", "xyz"], results);
/// ```
#[derive(Clone, Debug)]
pub struct Anchored<A>(A);
impl<A: Automaton> Anchored<A> {
/// Create a new `Anchored` implementation of the `fst::Automaton` trait.
///
/// If the given Aho-Corasick automaton does not support anchored searches,
/// then this returns an error.
pub fn new(aut: A) -> Result<Anchored<A>, MatchError> {
let input = Input::new("").anchored(AcAnchored::Yes);
let _ = aut.start_state(&input)?;
Ok(Anchored(aut))
}
/// Returns a borrow to the underlying automaton.
pub fn as_ref(&self) -> &A {
&self.0
}
/// Unwrap this value and return the inner automaton.
pub fn into_inner(self) -> A {
self.0
}
}
impl<A: Automaton> fst::Automaton for Anchored<A> {
type State = StateID;
#[inline]
fn start(&self) -> StateID {
let input = Input::new("").anchored(AcAnchored::Yes);
self.0.start_state(&input).expect("support for unanchored searches")
}
#[inline]
fn is_match(&self, state: &StateID) -> bool {
self.0.is_match(*state)
}
#[inline]
fn accept(&self, state: &StateID, byte: u8) -> StateID {
if fst::Automaton::is_match(self, state) {
return *state;
}
self.0.next_state(AcAnchored::Yes, *state, byte)
}
#[inline]
fn can_match(&self, state: &StateID) -> bool {
!self.0.is_dead(*state)
}
}
#[cfg(test)]
mod tests {
use alloc::{string::String, vec, vec::Vec};
use fst::{Automaton, IntoStreamer, Set, Streamer};
use crate::{
dfa::DFA,
nfa::{contiguous, noncontiguous},
StartKind,
};
use super::*;
fn search<A: Automaton, D: AsRef<[u8]>>(
set: &Set<D>,
aut: A,
) -> Vec<String> {
let mut stream = set.search(aut).into_stream();
let mut results = vec![];
while let Some(key) = stream.next() {
results.push(String::from(core::str::from_utf8(key).unwrap()));
}
results
}
#[test]
fn unanchored() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let patterns = vec!["baz", "bax"];
let expected = vec!["baz", "xbax"];
let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap());
let got = search(&set, &aut);
assert_eq!(got, expected);
let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap());
let got = search(&set, &aut);
assert_eq!(got, expected);
let aut = Unanchored(DFA::new(&patterns).unwrap());
let got = search(&set, &aut);
assert_eq!(got, expected);
}
#[test]
fn anchored() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let patterns = vec!["baz", "bax"];
let expected = vec!["baz"];
let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap());
let got = search(&set, &aut);
assert_eq!(got, expected);
let aut = Anchored(contiguous::NFA::new(&patterns).unwrap());
let got = search(&set, &aut);
assert_eq!(got, expected);
let aut = Anchored(
DFA::builder()
.start_kind(StartKind::Anchored)
.build(&patterns)
.unwrap(),
);
let got = search(&set, &aut);
assert_eq!(got, expected);
}
}
use alloc::{vec, vec::Vec};
/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
/// A fairly simple roll buffer for supporting stream searches.
///
/// This buffer acts as a temporary place to store a fixed amount of data when
/// reading from a stream. Its central purpose is to allow "rolling" some
/// suffix of the data to the beginning of the buffer before refilling it with
/// more data from the stream. For example, let's say we are trying to match
/// "foobar" on a stream. When we report the match, we'd like to not only
/// report the correct offsets at which the match occurs, but also the matching
/// bytes themselves. So let's say our stream is a file with the following
/// contents: `test test foobar test test`. Now assume that we happen to read
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
/// Naively, it would not be possible to report a single contiguous `foobar`
/// match, but this roll buffer allows us to do that. Namely, after the second
/// read, the contents of the buffer should be `st foobar test test`, where the
/// search should ultimately resume immediately after `foo`. (The prefix `st `
/// is included because the roll buffer saves N bytes at the end of the buffer,
/// where N is the maximum possible length of a match.)
///
/// A lot of the logic for dealing with this is unfortunately split out between
/// this roll buffer and the `StreamChunkIter`.
///
/// Note also that this buffer is not actually required to just report matches.
/// Because a `Match` is just some offsets. But it *is* required for supporting
/// things like `try_stream_replace_all` because that needs some mechanism for
/// knowing which bytes in the stream correspond to a match and which don't. So
/// when a match occurs across two `read` calls, *something* needs to retain
/// the bytes from the previous `read` call because you don't know before the
/// second read call whether a match exists or not.
#[derive(Debug)]
pub(crate) struct Buffer {
/// The raw buffer contents. This has a fixed size and never increases.
buf: Vec<u8>,
/// The minimum size of the buffer, which is equivalent to the maximum
/// possible length of a match. This corresponds to the amount that we
/// roll
min: usize,
/// The end of the contents of this buffer.
end: usize,
}
impl Buffer {
/// Create a new buffer for stream searching. The minimum buffer length
/// given should be the size of the maximum possible match length.
pub(crate) fn new(min_buffer_len: usize) -> Buffer {
let min = core::cmp::max(1, min_buffer_len);
// The minimum buffer amount is also the amount that we roll our
// buffer in order to support incremental searching. To this end,
// our actual capacity needs to be at least 1 byte bigger than our
// minimum amount, otherwise we won't have any overlap. In actuality,
// we want our buffer to be a bit bigger than that for performance
// reasons, so we set a lower bound of `8 * min`.
//
// TODO: It would be good to find a way to test the streaming
// implementation with the minimal buffer size. For now, we just
// uncomment out the next line and comment out the subsequent line.
// let capacity = 1 + min;
let capacity = core::cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
Buffer { buf: vec![0; capacity], min, end: 0 }
}
/// Return the contents of this buffer.
#[inline]
pub(crate) fn buffer(&self) -> &[u8] {
&self.buf[..self.end]
}
/// Return the minimum size of the buffer. The only way a buffer may be
/// smaller than this is if the stream itself contains less than the
/// minimum buffer amount.
#[inline]
pub(crate) fn min_buffer_len(&self) -> usize {
self.min
}
/// Return all free capacity in this buffer.
fn free_buffer(&mut self) -> &mut [u8] {
&mut self.buf[self.end..]
}
/// Refill the contents of this buffer by reading as much as possible into
/// this buffer's free capacity. If no more bytes could be read, then this
/// returns false. Otherwise, this reads until it has filled the buffer
/// past the minimum amount.
pub(crate) fn fill<R: std::io::Read>(
&mut self,
mut rdr: R,
) -> std::io::Result<bool> {
let mut readany = false;
loop {
let readlen = rdr.read(self.free_buffer())?;
if readlen == 0 {
return Ok(readany);
}
readany = true;
self.end += readlen;
if self.buffer().len() >= self.min {
return Ok(true);
}
}
}
/// Roll the contents of the buffer so that the suffix of this buffer is
/// moved to the front and all other contents are dropped. The size of the
/// suffix corresponds precisely to the minimum buffer length.
///
/// This should only be called when the entire contents of this buffer have
/// been searched.
pub(crate) fn roll(&mut self) {
let roll_start = self
.end
.checked_sub(self.min)
.expect("buffer capacity should be bigger than minimum amount");
let roll_end = roll_start + self.min;
assert!(roll_end <= self.end);
self.buf.copy_within(roll_start..roll_end, 0);
self.end = self.min;
}
}
pub const BYTE_FREQUENCIES: [u8; 256] = [
55, // '\x00'
52, // '\x01'
51, // '\x02'
50, // '\x03'
49, // '\x04'
48, // '\x05'
47, // '\x06'
46, // '\x07'
45, // '\x08'
103, // '\t'
242, // '\n'
66, // '\x0b'
67, // '\x0c'
229, // '\r'
44, // '\x0e'
43, // '\x0f'
42, // '\x10'
41, // '\x11'
40, // '\x12'
39, // '\x13'
38, // '\x14'
37, // '\x15'
36, // '\x16'
35, // '\x17'
34, // '\x18'
33, // '\x19'
56, // '\x1a'
32, // '\x1b'
31, // '\x1c'
30, // '\x1d'
29, // '\x1e'
28, // '\x1f'
255, // ' '
148, // '!'
164, // '"'
149, // '#'
136, // '$'
160, // '%'
155, // '&'
173, // "'"
221, // '('
222, // ')'
134, // '*'
122, // '+'
232, // ','
202, // '-'
215, // '.'
224, // '/'
208, // '0'
220, // '1'
204, // '2'
187, // '3'
183, // '4'
179, // '5'
177, // '6'
168, // '7'
178, // '8'
200, // '9'
226, // ':'
195, // ';'
154, // '<'
184, // '='
174, // '>'
126, // '?'
120, // '@'
191, // 'A'
157, // 'B'
194, // 'C'
170, // 'D'
189, // 'E'
162, // 'F'
161, // 'G'
150, // 'H'
193, // 'I'
142, // 'J'
137, // 'K'
171, // 'L'
176, // 'M'
185, // 'N'
167, // 'O'
186, // 'P'
112, // 'Q'
175, // 'R'
192, // 'S'
188, // 'T'
156, // 'U'
140, // 'V'
143, // 'W'
123, // 'X'
133, // 'Y'
128, // 'Z'
147, // '['
138, // '\\'
146, // ']'
114, // '^'
223, // '_'
151, // '`'
249, // 'a'
216, // 'b'
238, // 'c'
236, // 'd'
253, // 'e'
227, // 'f'
218, // 'g'
230, // 'h'
247, // 'i'
135, // 'j'
180, // 'k'
241, // 'l'
233, // 'm'
246, // 'n'
244, // 'o'
231, // 'p'
139, // 'q'
245, // 'r'
243, // 's'
251, // 't'
235, // 'u'
201, // 'v'
196, // 'w'
240, // 'x'
214, // 'y'
152, // 'z'
182, // '{'
205, // '|'
181, // '}'
127, // '~'
27, // '\x7f'
212, // '\x80'
211, // '\x81'
210, // '\x82'
213, // '\x83'
228, // '\x84'
197, // '\x85'
169, // '\x86'
159, // '\x87'
131, // '\x88'
172, // '\x89'
105, // '\x8a'
80, // '\x8b'
98, // '\x8c'
96, // '\x8d'
97, // '\x8e'
81, // '\x8f'
207, // '\x90'
145, // '\x91'
116, // '\x92'
115, // '\x93'
144, // '\x94'
130, // '\x95'
153, // '\x96'
121, // '\x97'
107, // '\x98'
132, // '\x99'
109, // '\x9a'
110, // '\x9b'
124, // '\x9c'
111, // '\x9d'
82, // '\x9e'
108, // '\x9f'
118, // '\xa0'
141, // '¡'
113, // '¢'
129, // '£'
119, // '¤'
125, // '¥'
165, // '¦'
117, // '§'
92, // '¨'
106, // '©'
83, // 'ª'
72, // '«'
99, // '¬'
93, // '\xad'
65, // '®'
79, // '¯'
166, // '°'
237, // '±'
163, // '²'
199, // '³'
190, // '´'
225, // 'µ'
209, // '¶'
203, // '·'
198, // '¸'
217, // '¹'
219, // 'º'
206, // '»'
234, // '¼'
248, // '½'
158, // '¾'
239, // '¿'
255, // 'À'
255, // 'Á'
255, // 'Â'
255, // 'Ã'
255, // 'Ä'
255, // 'Å'
255, // 'Æ'
255, // 'Ç'
255, // 'È'
255, // 'É'
255, // 'Ê'
255, // 'Ë'
255, // 'Ì'
255, // 'Í'
255, // 'Î'
255, // 'Ï'
255, // 'Ð'
255, // 'Ñ'
255, // 'Ò'
255, // 'Ó'
255, // 'Ô'
255, // 'Õ'
255, // 'Ö'
255, // '×'
255, // 'Ø'
255, // 'Ù'
255, // 'Ú'
255, // 'Û'
255, // 'Ü'
255, // 'Ý'
255, // 'Þ'
255, // 'ß'
255, // 'à'
255, // 'á'
255, // 'â'
255, // 'ã'
255, // 'ä'
255, // 'å'
255, // 'æ'
255, // 'ç'
255, // 'è'
255, // 'é'
255, // 'ê'
255, // 'ë'
255, // 'ì'
255, // 'í'
255, // 'î'
255, // 'ï'
255, // 'ð'
255, // 'ñ'
255, // 'ò'
255, // 'ó'
255, // 'ô'
255, // 'õ'
255, // 'ö'
255, // '÷'
255, // 'ø'
255, // 'ù'
255, // 'ú'
255, // 'û'
255, // 'ü'
255, // 'ý'
255, // 'þ'
255, // 'ÿ'
];
/// A type that wraps a single byte with a convenient fmt::Debug impl that
/// escapes the byte.
pub(crate) struct DebugByte(pub(crate) u8);
impl core::fmt::Debug for DebugByte {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
// Special case ASCII space. It's too hard to read otherwise, so
// put quotes around it. I sometimes wonder whether just '\x20' would
// be better...
if self.0 == b' ' {
return write!(f, "' '");
}
// 10 bytes is enough to cover any output from ascii::escape_default.
let mut bytes = [0u8; 10];
let mut len = 0;
for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
// capitalize \xab to \xAB
if i >= 2 && b'a' <= b && b <= b'f' {
b -= 32;
}
bytes[len] = b;
len += 1;
}
write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
}
}
use crate::util::{
primitives::{PatternID, SmallIndex},
search::MatchKind,
};
/// An error that occurred during the construction of an Aho-Corasick
/// automaton.
///
/// Build errors occur when some kind of limit has been exceeded, either in the
/// number of states, the number of patterns of the length of a pattern. These
/// limits aren't part of the public API, but they should generally be large
/// enough to handle most use cases.
///
/// When the `std` feature is enabled, this implements the `std::error::Error`
/// trait.
#[derive(Clone, Debug)]
pub struct BuildError {
kind: ErrorKind,
}
/// The kind of error that occurred.
#[derive(Clone, Debug)]
enum ErrorKind {
/// An error that occurs when allocating a new state would result in an
/// identifier that exceeds the capacity of a `StateID`.
StateIDOverflow {
/// The maximum possible id.
max: u64,
/// The maximum ID requested.
requested_max: u64,
},
/// An error that occurs when adding a pattern to an Aho-Corasick
/// automaton would result in an identifier that exceeds the capacity of a
/// `PatternID`.
PatternIDOverflow {
/// The maximum possible id.
max: u64,
/// The maximum ID requested.
requested_max: u64,
},
/// Occurs when a pattern string is given to the Aho-Corasick constructor
/// that is too long.
PatternTooLong {
/// The ID of the pattern that was too long.
pattern: PatternID,
/// The length that was too long.
len: usize,
},
}
impl BuildError {
pub(crate) fn state_id_overflow(
max: u64,
requested_max: u64,
) -> BuildError {
BuildError { kind: ErrorKind::StateIDOverflow { max, requested_max } }
}
pub(crate) fn pattern_id_overflow(
max: u64,
requested_max: u64,
) -> BuildError {
BuildError {
kind: ErrorKind::PatternIDOverflow { max, requested_max },
}
}
pub(crate) fn pattern_too_long(
pattern: PatternID,
len: usize,
) -> BuildError {
BuildError { kind: ErrorKind::PatternTooLong { pattern, len } }
}
}
#[cfg(feature = "std")]
impl std::error::Error for BuildError {}
impl core::fmt::Display for BuildError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self.kind {
ErrorKind::StateIDOverflow { max, requested_max } => {
write!(
f,
"state identifier overflow: failed to create state ID \
from {}, which exceeds the max of {}",
requested_max, max,
)
}
ErrorKind::PatternIDOverflow { max, requested_max } => {
write!(
f,
"pattern identifier overflow: failed to create pattern ID \
from {}, which exceeds the max of {}",
requested_max, max,
)
}
ErrorKind::PatternTooLong { pattern, len } => {
write!(
f,
"pattern {} with length {} exceeds \
the maximum pattern length of {}",
pattern.as_usize(),
len,
SmallIndex::MAX.as_usize(),
)
}
}
}
}
/// An error that occurred during an Aho-Corasick search.
///
/// An error that occurs during a search is limited to some kind of
/// misconfiguration that resulted in an illegal call. Stated differently,
/// whether an error occurs is not dependent on the specific bytes in the
/// haystack.
///
/// Examples of misconfiguration:
///
/// * Executing a stream or overlapping search on a searcher that was built was
/// something other than [`MatchKind::Standard`](crate::MatchKind::Standard)
/// semantics.
/// * Requested an anchored or an unanchored search on a searcher that doesn't
/// support unanchored or anchored searches, respectively.
///
/// When the `std` feature is enabled, this implements the `std::error::Error`
/// trait.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct MatchError(alloc::boxed::Box<MatchErrorKind>);
impl MatchError {
/// Create a new error value with the given kind.
///
/// This is a more verbose version of the kind-specific constructors, e.g.,
/// `MatchError::unsupported_stream`.
pub fn new(kind: MatchErrorKind) -> MatchError {
MatchError(alloc::boxed::Box::new(kind))
}
/// Returns a reference to the underlying error kind.
pub fn kind(&self) -> &MatchErrorKind {
&self.0
}
/// Create a new "invalid anchored search" error. This occurs when the
/// caller requests an anchored search but where anchored searches aren't
/// supported.
///
/// This is the same as calling `MatchError::new` with a
/// [`MatchErrorKind::InvalidInputAnchored`] kind.
pub fn invalid_input_anchored() -> MatchError {
MatchError::new(MatchErrorKind::InvalidInputAnchored)
}
/// Create a new "invalid unanchored search" error. This occurs when the
/// caller requests an unanchored search but where unanchored searches
/// aren't supported.
///
/// This is the same as calling `MatchError::new` with a
/// [`MatchErrorKind::InvalidInputUnanchored`] kind.
pub fn invalid_input_unanchored() -> MatchError {
MatchError::new(MatchErrorKind::InvalidInputUnanchored)
}
/// Create a new "unsupported stream search" error. This occurs when the
/// caller requests a stream search while using an Aho-Corasick automaton
/// with a match kind other than [`MatchKind::Standard`].
///
/// The match kind given should be the match kind of the automaton. It
/// should never be `MatchKind::Standard`.
pub fn unsupported_stream(got: MatchKind) -> MatchError {
MatchError::new(MatchErrorKind::UnsupportedStream { got })
}
/// Create a new "unsupported overlapping search" error. This occurs when
/// the caller requests an overlapping search while using an Aho-Corasick
/// automaton with a match kind other than [`MatchKind::Standard`].
///
/// The match kind given should be the match kind of the automaton. It
/// should never be `MatchKind::Standard`.
pub fn unsupported_overlapping(got: MatchKind) -> MatchError {
MatchError::new(MatchErrorKind::UnsupportedOverlapping { got })
}
/// Create a new "unsupported empty pattern" error. This occurs when the
/// caller requests a search for which matching an automaton that contains
/// an empty pattern string is not supported.
pub fn unsupported_empty() -> MatchError {
MatchError::new(MatchErrorKind::UnsupportedEmpty)
}
}
/// The underlying kind of a [`MatchError`].
///
/// This is a **non-exhaustive** enum. That means new variants may be added in
/// a semver-compatible release.
#[non_exhaustive]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum MatchErrorKind {
/// An error indicating that an anchored search was requested, but from a
/// searcher that was built without anchored support.
InvalidInputAnchored,
/// An error indicating that an unanchored search was requested, but from a
/// searcher that was built without unanchored support.
InvalidInputUnanchored,
/// An error indicating that a stream search was attempted on an
/// Aho-Corasick automaton with an unsupported `MatchKind`.
UnsupportedStream {
/// The match semantics for the automaton that was used.
got: MatchKind,
},
/// An error indicating that an overlapping search was attempted on an
/// Aho-Corasick automaton with an unsupported `MatchKind`.
UnsupportedOverlapping {
/// The match semantics for the automaton that was used.
got: MatchKind,
},
/// An error indicating that the operation requested doesn't support
/// automatons that contain an empty pattern string.
UnsupportedEmpty,
}
#[cfg(feature = "std")]
impl std::error::Error for MatchError {}
impl core::fmt::Display for MatchError {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
match *self.kind() {
MatchErrorKind::InvalidInputAnchored => {
write!(f, "anchored searches are not supported or enabled")
}
MatchErrorKind::InvalidInputUnanchored => {
write!(f, "unanchored searches are not supported or enabled")
}
MatchErrorKind::UnsupportedStream { got } => {
write!(
f,
"match kind {:?} does not support stream searching",
got,
)
}
MatchErrorKind::UnsupportedOverlapping { got } => {
write!(
f,
"match kind {:?} does not support overlapping searches",
got,
)
}
MatchErrorKind::UnsupportedEmpty => {
write!(
f,
"matching with an empty pattern string is not \
supported for this operation",
)
}
}
}
}
/*!
This module provides several integer oriented traits for converting between
both fixed size integers and integers whose size varies based on the target
(like `usize`).
The main design principle for this module is to centralize all uses of `as`.
The thinking here is that `as` makes it very easy to perform accidental lossy
conversions, and if we centralize all its uses here under more descriptive
higher level operations, its use and correctness becomes easier to audit.
This was copied mostly wholesale from `regex-automata`.
NOTE: for simplicity, we don't take target pointer width into account here for
`usize` conversions. Since we currently only panic in debug mode, skipping the
check when it can be proven it isn't needed at compile time doesn't really
matter. Now, if we wind up wanting to do as many checks as possible in release
mode, then we would want to skip those when we know the conversions are always
non-lossy.
*/
// We define a little more than what we need, but I'd rather just have
// everything via a consistent and uniform API then have holes.
#![allow(dead_code)]
pub(crate) trait U8 {
fn as_usize(self) -> usize;
}
impl U8 for u8 {
fn as_usize(self) -> usize {
usize::from(self)
}
}
pub(crate) trait U16 {
fn as_usize(self) -> usize;
fn low_u8(self) -> u8;
fn high_u8(self) -> u8;
}
impl U16 for u16 {
fn as_usize(self) -> usize {
usize::from(self)
}
fn low_u8(self) -> u8 {
self as u8
}
fn high_u8(self) -> u8 {
(self >> 8) as u8
}
}
pub(crate) trait U32 {
fn as_usize(self) -> usize;
fn low_u8(self) -> u8;
fn low_u16(self) -> u16;
fn high_u16(self) -> u16;
}
impl U32 for u32 {
#[inline]
fn as_usize(self) -> usize {
#[cfg(debug_assertions)]
{
usize::try_from(self).expect("u32 overflowed usize")
}
#[cfg(not(debug_assertions))]
{
self as usize
}
}
fn low_u8(self) -> u8 {
self as u8
}
fn low_u16(self) -> u16 {
self as u16
}
fn high_u16(self) -> u16 {
(self >> 16) as u16
}
}
pub(crate) trait U64 {
fn as_usize(self) -> usize;
fn low_u8(self) -> u8;
fn low_u16(self) -> u16;
fn low_u32(self) -> u32;
fn high_u32(self) -> u32;
}
impl U64 for u64 {
fn as_usize(self) -> usize {
#[cfg(debug_assertions)]
{
usize::try_from(self).expect("u64 overflowed usize")
}
#[cfg(not(debug_assertions))]
{
self as usize
}
}
fn low_u8(self) -> u8 {
self as u8
}
fn low_u16(self) -> u16 {
self as u16
}
fn low_u32(self) -> u32 {
self as u32
}
fn high_u32(self) -> u32 {
(self >> 32) as u32
}
}
pub(crate) trait I8 {
fn as_usize(self) -> usize;
fn to_bits(self) -> u8;
fn from_bits(n: u8) -> i8;
}
impl I8 for i8 {
fn as_usize(self) -> usize {
#[cfg(debug_assertions)]
{
usize::try_from(self).expect("i8 overflowed usize")
}
#[cfg(not(debug_assertions))]
{
self as usize
}
}
fn to_bits(self) -> u8 {
self as u8
}
fn from_bits(n: u8) -> i8 {
n as i8
}
}
pub(crate) trait I32 {
fn as_usize(self) -> usize;
fn to_bits(self) -> u32;
fn from_bits(n: u32) -> i32;
}
impl I32 for i32 {
fn as_usize(self) -> usize {
#[cfg(debug_assertions)]
{
usize::try_from(self).expect("i32 overflowed usize")
}
#[cfg(not(debug_assertions))]
{
self as usize
}
}
fn to_bits(self) -> u32 {
self as u32
}
fn from_bits(n: u32) -> i32 {
n as i32
}
}
pub(crate) trait I64 {
fn as_usize(self) -> usize;
fn to_bits(self) -> u64;
fn from_bits(n: u64) -> i64;
}
impl I64 for i64 {
fn as_usize(self) -> usize {
#[cfg(debug_assertions)]
{
usize::try_from(self).expect("i64 overflowed usize")
}
#[cfg(not(debug_assertions))]
{
self as usize
}
}
fn to_bits(self) -> u64 {
self as u64
}
fn from_bits(n: u64) -> i64 {
n as i64
}
}
pub(crate) trait Usize {
fn as_u8(self) -> u8;
fn as_u16(self) -> u16;
fn as_u32(self) -> u32;
fn as_u64(self) -> u64;
}
impl Usize for usize {
fn as_u8(self) -> u8 {
#[cfg(debug_assertions)]
{
u8::try_from(self).expect("usize overflowed u8")
}
#[cfg(not(debug_assertions))]
{
self as u8
}
}
fn as_u16(self) -> u16 {
#[cfg(debug_assertions)]
{
u16::try_from(self).expect("usize overflowed u16")
}
#[cfg(not(debug_assertions))]
{
self as u16
}
}
fn as_u32(self) -> u32 {
#[cfg(debug_assertions)]
{
u32::try_from(self).expect("usize overflowed u32")
}
#[cfg(not(debug_assertions))]
{
self as u32
}
}
fn as_u64(self) -> u64 {
#[cfg(debug_assertions)]
{
u64::try_from(self).expect("usize overflowed u64")
}
#[cfg(not(debug_assertions))]
{
self as u64
}
}
}
// Pointers aren't integers, but we convert pointers to integers to perform
// offset arithmetic in some places. (And no, we don't convert the integers
// back to pointers.) So add 'as_usize' conversions here too for completeness.
//
// These 'as' casts are actually okay because they're always non-lossy. But the
// idea here is to just try and remove as much 'as' as possible, particularly
// in this crate where we are being really paranoid about offsets and making
// sure we don't panic on inputs that might be untrusted. This way, the 'as'
// casts become easier to audit if they're all in one place, even when some of
// them are actually okay 100% of the time.
pub(crate) trait Pointer {
fn as_usize(self) -> usize;
}
impl<T> Pointer for *const T {
fn as_usize(self) -> usize {
self as usize
}
}
pub(crate) mod alphabet;
#[cfg(feature = "std")]
pub(crate) mod buffer;
pub(crate) mod byte_frequencies;
pub(crate) mod debug;
pub(crate) mod error;
pub(crate) mod int;
pub(crate) mod prefilter;
pub(crate) mod primitives;
pub(crate) mod remapper;
pub(crate) mod search;
pub(crate) mod special;
use alloc::vec::Vec;
use crate::{nfa::noncontiguous, util::primitives::StateID};
/// Remappable is a tightly coupled abstraction that facilitates remapping
/// state identifiers in DFAs.
///
/// The main idea behind remapping state IDs is that DFAs often need to check
/// if a certain state is a "special" state of some kind (like a match state)
/// during a search. Since this is extremely perf critical code, we want this
/// check to be as fast as possible. Partitioning state IDs into, for example,
/// into "non-match" and "match" states means one can tell if a state is a
/// match state via a simple comparison of the state ID.
///
/// The issue is that during the DFA construction process, it's not
/// particularly easy to partition the states. Instead, the simplest thing is
/// to often just do a pass over all of the states and shuffle them into their
/// desired partitionings. To do that, we need a mechanism for swapping states.
/// Hence, this abstraction.
///
/// Normally, for such little code, I would just duplicate it. But this is a
/// key optimization and the implementation is a bit subtle. So the abstraction
/// is basically a ham-fisted attempt at DRY. The only place we use this is in
/// the dense and one-pass DFAs.
///
/// See also src/dfa/special.rs for a more detailed explanation of how dense
/// DFAs are partitioned.
pub(crate) trait Remappable: core::fmt::Debug {
/// Return the total number of states.
fn state_len(&self) -> usize;
/// Swap the states pointed to by the given IDs. The underlying finite
/// state machine should be mutated such that all of the transitions in
/// `id1` are now in the memory region where the transitions for `id2`
/// were, and all of the transitions in `id2` are now in the memory region
/// where the transitions for `id1` were.
///
/// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
///
/// It is expected that, after calling this, the underlying state machine
/// will be left in an inconsistent state, since any other transitions
/// pointing to, e.g., `id1` need to be updated to point to `id2`, since
/// that's where `id1` moved to.
///
/// In order to "fix" the underlying inconsistent state, a `Remapper`
/// should be used to guarantee that `remap` is called at the appropriate
/// time.
fn swap_states(&mut self, id1: StateID, id2: StateID);
/// This must remap every single state ID in the underlying value according
/// to the function given. For example, in a DFA, this should remap every
/// transition and every starting state ID.
fn remap(&mut self, map: impl Fn(StateID) -> StateID);
}
/// Remapper is an abstraction the manages the remapping of state IDs in a
/// finite state machine. This is useful when one wants to shuffle states into
/// different positions in the machine.
///
/// One of the key complexities this manages is the ability to correctly move
/// one state multiple times.
///
/// Once shuffling is complete, `remap` must be called, which will rewrite
/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
/// will almost certainly result in a corrupt machine.
#[derive(Debug)]
pub(crate) struct Remapper {
/// A map from the index of a state to its pre-multiplied identifier.
///
/// When a state is swapped with another, then their corresponding
/// locations in this map are also swapped. Thus, its new position will
/// still point to its old pre-multiplied StateID.
///
/// While there is a bit more to it, this then allows us to rewrite the
/// state IDs in a DFA's transition table in a single pass. This is done
/// by iterating over every ID in this map, then iterating over each
/// transition for the state at that ID and re-mapping the transition from
/// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
/// in this map where `old_id` *started*, and set it to where it ended up
/// after all swaps have been completed.
map: Vec<StateID>,
/// A way to map indices to state IDs (and back).
idx: IndexMapper,
}
impl Remapper {
/// Create a new remapper from the given remappable implementation. The
/// remapper can then be used to swap states. The remappable value given
/// here must the same one given to `swap` and `remap`.
///
/// The given stride should be the stride of the transition table expressed
/// as a power of 2. This stride is used to map between state IDs and state
/// indices. If state IDs and state indices are equivalent, then provide
/// a `stride2` of `0`, which acts as an identity.
pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {
let idx = IndexMapper { stride2 };
let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();
Remapper { map, idx }
}
/// Swap two states. Once this is called, callers must follow through to
/// call `remap`, or else it's possible for the underlying remappable
/// value to be in a corrupt state.
pub(crate) fn swap(
&mut self,
r: &mut impl Remappable,
id1: StateID,
id2: StateID,
) {
if id1 == id2 {
return;
}
r.swap_states(id1, id2);
self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));
}
/// Complete the remapping process by rewriting all state IDs in the
/// remappable value according to the swaps performed.
pub(crate) fn remap(mut self, r: &mut impl Remappable) {
// Update the map to account for states that have been swapped
// multiple times. For example, if (A, C) and (C, G) are swapped, then
// transitions previously pointing to A should now point to G. But if
// we don't update our map, they will erroneously be set to C. All we
// do is follow the swaps in our map until we see our original state
// ID.
//
// The intuition here is to think about how changes are made to the
// map: only through pairwise swaps. That means that starting at any
// given state, it is always possible to find the loop back to that
// state by following the swaps represented in the map (which might be
// 0 swaps).
//
// We are also careful to clone the map before starting in order to
// freeze it. We use the frozen map to find our loops, since we need to
// update our map as well. Without freezing it, our updates could break
// the loops referenced above and produce incorrect results.
let oldmap = self.map.clone();
for i in 0..r.state_len() {
let cur_id = self.idx.to_state_id(i);
let mut new_id = oldmap[i];
if cur_id == new_id {
continue;
}
loop {
let id = oldmap[self.idx.to_index(new_id)];
if cur_id == id {
self.map[i] = new_id;
break;
}
new_id = id;
}
}
r.remap(|sid| self.map[self.idx.to_index(sid)]);
}
}
/// A simple type for mapping between state indices and state IDs.
///
/// The reason why this exists is because state IDs are "premultiplied" in a
/// DFA. That is, in order to get to the transitions for a particular state,
/// one need only use the state ID as-is, instead of having to multiply it by
/// transition table's stride.
///
/// The downside of this is that it's inconvenient to map between state IDs
/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,
/// etc.
///
/// Since our state IDs are premultiplied, we can convert back-and-forth
/// between IDs and indices by simply unmultiplying the IDs and multiplying the
/// indices.
///
/// Note that for a sparse NFA, state IDs and indices are equivalent. In this
/// case, we set the stride of the index mapped to be `0`, which acts as an
/// identity.
#[derive(Debug)]
struct IndexMapper {
/// The power of 2 corresponding to the stride of the corresponding
/// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
/// stride2' pre-multiplies an index to an ID.
stride2: usize,
}
impl IndexMapper {
/// Convert a state ID to a state index.
fn to_index(&self, id: StateID) -> usize {
id.as_usize() >> self.stride2
}
/// Convert a state index to a state ID.
fn to_state_id(&self, index: usize) -> StateID {
// CORRECTNESS: If the given index is not valid, then it is not
// required for this to panic or return a valid state ID. We'll "just"
// wind up with panics or silent logic errors at some other point. But
// this is OK because if Remappable::state_len is correct and so is
// 'to_index', then all inputs to 'to_state_id' should be valid indices
// and thus transform into valid state IDs.
StateID::new_unchecked(index << self.stride2)
}
}
impl Remappable for noncontiguous::NFA {
fn state_len(&self) -> usize {
noncontiguous::NFA::states(self).len()
}
fn swap_states(&mut self, id1: StateID, id2: StateID) {
noncontiguous::NFA::swap_states(self, id1, id2)
}
fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
noncontiguous::NFA::remap(self, map)
}
}
use crate::util::primitives::StateID;
/// A collection of sentinel state IDs for Aho-Corasick automata.
///
/// This specifically enables the technique by which we determine which states
/// are dead, matches or start states. Namely, by arranging states in a
/// particular order, we can determine the type of a state simply by looking at
/// its ID.
#[derive(Clone, Debug)]
pub(crate) struct Special {
/// The maximum ID of all the "special" states. This corresponds either to
/// start_anchored_id when a prefilter is active and max_match_id when a
/// prefilter is not active. The idea here is that if there is no prefilter,
/// then there is no point in treating start states as special.
pub(crate) max_special_id: StateID,
/// The maximum ID of all the match states. Any state ID bigger than this
/// is guaranteed to be a non-match ID.
///
/// It is possible and legal for max_match_id to be equal to
/// start_anchored_id, which occurs precisely in the case where the empty
/// string is a pattern that was added to the underlying automaton.
pub(crate) max_match_id: StateID,
/// The state ID of the start state used for unanchored searches.
pub(crate) start_unanchored_id: StateID,
/// The state ID of the start state used for anchored searches. This is
/// always start_unanchored_id+1.
pub(crate) start_anchored_id: StateID,
}
impl Special {
/// Create a new set of "special" state IDs with all IDs initialized to
/// zero. The general idea here is that they will be updated and set to
/// correct values later.
pub(crate) fn zero() -> Special {
Special {
max_special_id: StateID::ZERO,
max_match_id: StateID::ZERO,
start_unanchored_id: StateID::ZERO,
start_anchored_id: StateID::ZERO,
}
}
}
{"files":{"Cargo.lock":"54add33b71837dc37caabe694c30b631ebd7830b4c1c218da994bb7dd4d80881","Cargo.toml":"da1897bfc5dec7fe375da24eb8401e208cad60ddb6adb1c5f6c8158a659fc2fc","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"6efb0476a1cc085077ed49357026d8c173bf33017278ef440f222fb9cbcb66e6","README.md":"b230c2257d0c7a49b9bd97f2fa73abedcdc055757b5cedd2b0eb1a7a448ff461","benches/stream.rs":"72eaf3be51ee55deec3c8d086d3c92a8ee93be7d4acba88cd85ada237f17e0de","benches/strip.rs":"4246f8496f938ad01fa24c110c32db92ddefb96022bff60b08a4aab7782421f9","benches/wincon.rs":"6990ef2e87d983124eaf10151716567faa55a0b84439542e454ce8262ee6862c","examples/dump-stream.rs":"6dbd8fca846ac8addd43ae941fa1220a7c31862896ac2ae4971afe792480523d","examples/query-stream.rs":"3796dec1ffe79cc49d088086eb27214764e8e0cff0370d60150aeecdadd47d30","src/_macros.rs":"690de2f4535d931b27e864ea23fcdd3ae53e4171d68830736911f062739e9d24","src/adapter/mod.rs":"baf4237ea0b18df63609e49d93572ca27c2202a4cbec0220adb5a7e815c7d8ed","src/adapter/strip.rs":"3430fd619b4e65a3b3fc0e61793d814fdc9de7caa6336bf1c5506f3a1b7e49ce","src/adapter/wincon.rs":"73eb62071a15908eb9d09aeccb0a4b50ad6259bfb367e496c42d48932b8fef10","src/auto.rs":"e513fd903153df148fe1718471db3363fb3ec6a3d7bc167426033ed37dd307ba","src/buffer.rs":"597cc2a109a49f0a48888c7d882058d0ec4b771da34348d4da384ba056c6ea1c","src/fmt.rs":"cc11b005c4559843bd908a57958a13c8d0922fae6aff5261f3583c90e60da73c","src/lib.rs":"d03cd364bf57ac8d02fde032f70959fd3d1eadd36a69f69c588170fa11f156e6","src/stream.rs":"b49a3c855b0e26263a39416df4463a08ae3b724a22a8def86abfe9d496cf08d2","src/strip.rs":"155fa7bb0bc333b7648236df44d14ff742139c28659332bf03ed00b2943b8fac","src/wincon.rs":"e85c03ccfeca352a32572db8bb6c903f78c2003f5b375254edc5a69d6843728f"},"package":"8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"}
\ No newline at end of file
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.66.0"
name = "anstream"
version = "0.6.18"
build = false
include = [
"build.rs",
"src/**/*",
"Cargo.toml",
"Cargo.lock",
"LICENSE*",
"README.md",
"benches/**/*",
"examples/**/*",
]
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "A simple cross platform library for writing colored text to a terminal."
homepage = "https://github.com/rust-cli/anstyle"
readme = "README.md"
keywords = [
"ansi",
"terminal",
"color",
"strip",
"wincon",
]
categories = ["command-line-interface"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-cli/anstyle.git"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
min = 1
replace = "{{version}}"
search = "Unreleased"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = "...{{tag_name}}"
search = '\.\.\.HEAD'
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
min = 1
replace = "{{date}}"
search = "ReleaseDate"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = """
<!-- next-header -->
## [Unreleased] - ReleaseDate
"""
search = "<!-- next-header -->"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = """
<!-- next-url -->
[Unreleased]: https://github.com/rust-cli/anstyle/compare/{{tag_name}}...HEAD"""
search = "<!-- next-url -->"
[lib]
name = "anstream"
path = "src/lib.rs"
[[example]]
name = "dump-stream"
path = "examples/dump-stream.rs"
required-features = ["auto"]
[[example]]
name = "query-stream"
path = "examples/query-stream.rs"
required-features = ["auto"]
[[bench]]
name = "stream"
path = "benches/stream.rs"
harness = false
[[bench]]
name = "strip"
path = "benches/strip.rs"
harness = false
[[bench]]
name = "wincon"
path = "benches/wincon.rs"
harness = false
[dependencies.anstyle]
version = "1.0.0"
[dependencies.anstyle-parse]
version = "0.2.0"
[dependencies.anstyle-query]
version = "1.0.0"
optional = true
[dependencies.colorchoice]
version = "1.0.0"
[dependencies.is_terminal_polyfill]
version = "1.48"
[dependencies.utf8parse]
version = "0.2.1"
[dev-dependencies.divan]
version = "0.1.11"
[dev-dependencies.lexopt]
version = "0.3.0"
[dev-dependencies.owo-colors]
version = "4.0.0"
[dev-dependencies.proptest]
version = "1.4.0"
[dev-dependencies.strip-ansi-escapes]
version = "0.2.0"
[features]
auto = ["dep:anstyle-query"]
default = [
"auto",
"wincon",
]
test = []
wincon = ["dep:anstyle-wincon"]
[target."cfg(windows)".dependencies.anstyle-wincon]
version = "3.0.5"
optional = true
[lints.clippy]
bool_assert_comparison = "allow"
branches_sharing_code = "allow"
checked_conversions = "warn"
collapsible_else_if = "allow"
create_dir = "warn"
dbg_macro = "warn"
debug_assert_with_mut_call = "warn"
doc_markdown = "warn"
empty_enum = "warn"
enum_glob_use = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
fallible_impl_from = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
float_cmp_const = "warn"
fn_params_excessive_bools = "warn"
from_iter_instead_of_collect = "warn"
if_same_then_else = "allow"
implicit_clone = "warn"
imprecise_flops = "warn"
inconsistent_struct_constructor = "warn"
inefficient_to_string = "warn"
infinite_loop = "warn"
invalid_upcast_comparisons = "warn"
large_digit_groups = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_and_return = "allow"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
mem_forget = "warn"
mutex_integer = "warn"
needless_continue = "warn"
needless_for_each = "warn"
negative_feature_names = "warn"
path_buf_push_overwrite = "warn"
ptr_as_ptr = "warn"
rc_mutex = "warn"
redundant_feature_names = "warn"
ref_option_ref = "warn"
rest_pat_in_fully_bound_structs = "warn"
same_functions_in_if_condition = "warn"
self_named_module_files = "warn"
semicolon_if_nothing_returned = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_as_bytes = "warn"
string_to_string = "warn"
todo = "warn"
trait_duplication_in_bounds = "warn"
uninlined_format_args = "warn"
verbose_file_reads = "warn"
wildcard_imports = "warn"
zero_sized_map_values = "warn"
[lints.rust]
unreachable_pub = "warn"
unsafe_op_in_unsafe_fn = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[lints.rust.rust_2018_idioms]
level = "warn"
priority = -1
Copyright (c) Individual contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# anstream
> A simple cross platform library for writing colored text to a terminal.
*A portmanteau of "ansi stream"*
[![Documentation](https://img.shields.io/badge/docs-master-blue.svg)][Documentation]
![License](https://img.shields.io/crates/l/anstream.svg)
[![Crates Status](https://img.shields.io/crates/v/anstream.svg)](https://crates.io/crates/anstream)
Specialized `stdout` and `stderr` that accept ANSI escape codes and adapt them
based on the terminal's capabilities.
`anstream::adapter::strip_str` may also be of interest on its own for low
overhead stripping of ANSI escape codes.
## License
Licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally
submitted for inclusion in the work by you, as defined in the Apache-2.0
license, shall be dual licensed as above, without any additional terms or
conditions.
[Crates.io]: https://crates.io/crates/anstream
[Documentation]: https://docs.rs/anstream
#![allow(clippy::unwrap_used)]
use std::io::Write as _;
#[divan::bench(args = DATA)]
fn nop(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = buffer;
stream.write_all(data.content()).unwrap();
stream
}
#[divan::bench(args = DATA)]
fn strip_stream(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = anstream::StripStream::new(buffer);
stream.write_all(data.content()).unwrap();
stream.into_inner()
}
#[divan::bench(args = DATA)]
#[cfg(all(windows, feature = "wincon"))]
fn wincon_stream(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = anstream::WinconStream::new(buffer);
stream.write_all(data.content()).unwrap();
stream.into_inner()
}
#[divan::bench(args = DATA)]
fn auto_stream_always_ansi(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = anstream::AutoStream::always_ansi(buffer);
stream.write_all(data.content()).unwrap();
stream.into_inner()
}
#[divan::bench(args = DATA)]
fn auto_stream_always(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = anstream::AutoStream::always(buffer);
stream.write_all(data.content()).unwrap();
stream.into_inner()
}
#[divan::bench(args = DATA)]
fn auto_stream_never(data: &Data) -> Vec<u8> {
let buffer = Vec::with_capacity(data.content().len());
let mut stream = anstream::AutoStream::never(buffer);
stream.write_all(data.content()).unwrap();
stream.into_inner()
}
const DATA: &[Data] = &[
Data(
"0-state_changes",
b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\".as_slice(),
),
Data("1-demo.vte", include_bytes!("../tests/demo.vte").as_slice()),
Data(
"2-rg_help.vte",
include_bytes!("../tests/rg_help.vte").as_slice(),
),
Data(
"3-rg_linus.vte",
include_bytes!("../tests/rg_linus.vte").as_slice(),
),
];
#[derive(Debug)]
struct Data(&'static str, &'static [u8]);
impl Data {
const fn name(&self) -> &'static str {
self.0
}
const fn content(&self) -> &'static [u8] {
self.1
}
}
impl std::fmt::Display for Data {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name().fmt(f)
}
}
fn main() {
divan::main();
}
#![allow(clippy::unwrap_used)]
#[derive(Default)]
struct Strip(String);
impl Strip {
fn with_capacity(capacity: usize) -> Self {
Self(String::with_capacity(capacity))
}
}
impl anstyle_parse::Perform for Strip {
fn print(&mut self, c: char) {
self.0.push(c);
}
fn execute(&mut self, byte: u8) {
if byte.is_ascii_whitespace() {
self.0.push(byte as char);
}
}
}
#[divan::bench(args = DATA)]
fn advance_strip(data: &Data) -> String {
let mut stripped = Strip::with_capacity(data.content().len());
let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
for byte in data.content() {
parser.advance(&mut stripped, *byte);
}
stripped.0
}
#[divan::bench(args = DATA)]
fn strip_ansi_escapes(data: &Data) -> Vec<u8> {
let stripped = strip_ansi_escapes::strip(data.content());
stripped
}
#[divan::bench(args = DATA)]
fn strip_str(data: &Data) -> String {
if let Ok(content) = std::str::from_utf8(data.content()) {
let stripped = anstream::adapter::strip_str(content).to_string();
stripped
} else {
"".to_owned()
}
}
#[divan::bench(args = DATA)]
fn strip_str_strip_next(data: &Data) -> String {
if let Ok(content) = std::str::from_utf8(data.content()) {
let mut stripped = String::with_capacity(data.content().len());
let mut state = anstream::adapter::StripStr::new();
for printable in state.strip_next(content) {
stripped.push_str(printable);
}
stripped
} else {
"".to_owned()
}
}
#[divan::bench(args = DATA)]
fn strip_bytes(data: &Data) -> Vec<u8> {
let stripped = anstream::adapter::strip_bytes(data.content()).into_vec();
stripped
}
const DATA: &[Data] = &[
Data(
"0-state_changes",
b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\".as_slice(),
),
Data("1-demo.vte", include_bytes!("../tests/demo.vte").as_slice()),
Data(
"2-rg_help.vte",
include_bytes!("../tests/rg_help.vte").as_slice(),
),
Data(
"3-rg_linus.vte",
include_bytes!("../tests/rg_linus.vte").as_slice(),
),
];
#[derive(Debug)]
struct Data(&'static str, &'static [u8]);
impl Data {
const fn name(&self) -> &'static str {
self.0
}
const fn content(&self) -> &'static [u8] {
self.1
}
}
impl std::fmt::Display for Data {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name().fmt(f)
}
}
#[test]
fn verify_data() {
for data in DATA {
// Make sure the comparison is fair
if let Ok(content) = std::str::from_utf8(data.content()) {
let mut stripped = Strip::with_capacity(content.len());
let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
for byte in content.as_bytes() {
parser.advance(&mut stripped, *byte);
}
assert_eq!(
stripped.0,
anstream::adapter::strip_str(content).to_string()
);
assert_eq!(
stripped.0,
String::from_utf8(anstream::adapter::strip_bytes(content.as_bytes()).into_vec())
.unwrap()
);
}
}
}
fn main() {
divan::main();
}
#[divan::bench(args = DATA)]
fn nop(data: &Data) -> Vec<(anstyle::Style, String)> {
let mut state = anstream::adapter::WinconBytes::new();
let stripped = state.extract_next(data.content()).collect::<Vec<_>>();
stripped
}
const DATA: &[Data] = &[
Data(
"0-state_changes",
b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\".as_slice(),
),
Data("1-demo.vte", include_bytes!("../tests/demo.vte").as_slice()),
Data(
"2-rg_help.vte",
include_bytes!("../tests/rg_help.vte").as_slice(),
),
Data(
"3-rg_linus.vte",
include_bytes!("../tests/rg_linus.vte").as_slice(),
),
];
#[derive(Debug)]
struct Data(&'static str, &'static [u8]);
impl Data {
const fn name(&self) -> &'static str {
self.0
}
const fn content(&self) -> &'static [u8] {
self.1
}
}
impl std::fmt::Display for Data {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name().fmt(f)
}
}
fn main() {
divan::main();
}
//! Write colored text, adapting to the terminals capabilities
use std::io::Write;
fn main() -> Result<(), lexopt::Error> {
let args = Args::parse()?;
let stdout = anstream::stdout();
let mut stdout = stdout.lock();
for fixed in 0..16 {
let color = anstyle::Ansi256Color(fixed)
.into_ansi()
.expect("within 4-bit color range");
let style = style(color, args.layer, args.effects);
let _ = print_number(&mut stdout, fixed, style);
if fixed == 7 || fixed == 15 {
let _ = writeln!(&mut stdout);
}
}
for fixed in 16..232 {
let col = (fixed - 16) % 36;
if col == 0 {
let _ = writeln!(stdout);
}
let color = anstyle::Ansi256Color(fixed);
let style = style(color, args.layer, args.effects);
let _ = print_number(&mut stdout, fixed, style);
}
let _ = writeln!(stdout);
let _ = writeln!(stdout);
for fixed in 232..=255 {
let color = anstyle::Ansi256Color(fixed);
let style = style(color, args.layer, args.effects);
let _ = print_number(&mut stdout, fixed, style);
}
let _ = writeln!(stdout);
Ok(())
}
fn style(
color: impl Into<anstyle::Color>,
layer: Layer,
effects: anstyle::Effects,
) -> anstyle::Style {
let color = color.into();
(match layer {
Layer::Fg => anstyle::Style::new().fg_color(Some(color)),
Layer::Bg => anstyle::Style::new().bg_color(Some(color)),
Layer::Underline => anstyle::Style::new().underline_color(Some(color)),
}) | effects
}
fn print_number(stdout: &mut impl Write, fixed: u8, style: anstyle::Style) -> std::io::Result<()> {
write!(stdout, "{style}{fixed:>3X}{style:#}",)
}
#[derive(Default)]
struct Args {
effects: anstyle::Effects,
layer: Layer,
}
#[derive(Copy, Clone, Default)]
enum Layer {
#[default]
Fg,
Bg,
Underline,
}
impl Args {
fn parse() -> Result<Self, lexopt::Error> {
use lexopt::prelude::*;
let mut res = Args::default();
let mut args = lexopt::Parser::from_env();
while let Some(arg) = args.next()? {
match arg {
Long("layer") => {
res.layer = args.value()?.parse_with(|s| match s {
"fg" => Ok(Layer::Fg),
"bg" => Ok(Layer::Bg),
"underline" => Ok(Layer::Underline),
_ => Err("expected values fg, bg, underline"),
})?;
}
Long("effect") => {
const EFFECTS: [(&str, anstyle::Effects); 12] = [
("bold", anstyle::Effects::BOLD),
("dimmed", anstyle::Effects::DIMMED),
("italic", anstyle::Effects::ITALIC),
("underline", anstyle::Effects::UNDERLINE),
("double_underline", anstyle::Effects::DOUBLE_UNDERLINE),
("curly_underline", anstyle::Effects::CURLY_UNDERLINE),
("dotted_underline", anstyle::Effects::DOTTED_UNDERLINE),
("dashed_underline", anstyle::Effects::DASHED_UNDERLINE),
("blink", anstyle::Effects::BLINK),
("invert", anstyle::Effects::INVERT),
("hidden", anstyle::Effects::HIDDEN),
("strikethrough", anstyle::Effects::STRIKETHROUGH),
];
let effect = args.value()?.parse_with(|s| {
EFFECTS
.into_iter()
.find(|(name, _)| *name == s)
.map(|(_, effect)| effect)
.ok_or_else(|| {
format!(
"expected one of {}",
EFFECTS
.into_iter()
.map(|(n, _)| n)
.collect::<Vec<_>>()
.join(", ")
)
})
})?;
res.effects = res.effects.insert(effect);
}
_ => return Err(arg.unexpected()),
}
}
Ok(res)
}
}
//! Report a terminal's capabilities
fn main() {
println!("stdout:");
println!(
" choice: {:?}",
anstream::AutoStream::choice(&std::io::stdout())
);
println!(
" choice: {:?}",
anstream::AutoStream::auto(std::io::stdout()).current_choice()
);
println!("stderr:");
println!(
" choice: {:?}",
anstream::AutoStream::choice(&std::io::stderr())
);
println!(
" choice: {:?}",
anstream::AutoStream::auto(std::io::stderr()).current_choice()
);
}
//! Gracefully degrade styled output
mod strip;
mod wincon;
pub use strip::strip_bytes;
pub use strip::strip_str;
pub use strip::StripBytes;
pub use strip::StripBytesIter;
pub use strip::StripStr;
pub use strip::StripStrIter;
pub use strip::StrippedBytes;
pub use strip::StrippedStr;
pub use wincon::WinconBytes;
pub use wincon::WinconBytesIter;
#![allow(deprecated)]
/// In-memory [`RawStream`][crate::stream::RawStream]
#[derive(Clone, Default, Debug, PartialEq, Eq)]
#[deprecated(since = "0.6.2", note = "Use Vec")]
#[doc(hidden)]
pub struct Buffer(Vec<u8>);
impl Buffer {
#[inline]
pub fn new() -> Self {
Default::default()
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity))
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
}
impl AsRef<[u8]> for Buffer {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl std::io::Write for Buffer {
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.0.extend(buf);
Ok(buf.len())
}
#[inline]
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[cfg(all(windows, feature = "wincon"))]
impl anstyle_wincon::WinconStream for Buffer {
fn write_colored(
&mut self,
fg: Option<anstyle::AnsiColor>,
bg: Option<anstyle::AnsiColor>,
data: &[u8],
) -> std::io::Result<usize> {
self.0.write_colored(fg, bg, data)
}
}
/// A shim which allows a [`std::io::Write`] to be implemented in terms of a [`std::fmt::Write`]
///
/// This saves off I/O errors. instead of discarding them
pub(crate) struct Adapter<W>
where
W: FnMut(&[u8]) -> std::io::Result<()>,
{
writer: W,
error: std::io::Result<()>,
}
impl<W> Adapter<W>
where
W: FnMut(&[u8]) -> std::io::Result<()>,
{
pub(crate) fn new(writer: W) -> Self {
Adapter {
writer,
error: Ok(()),
}
}
pub(crate) fn write_fmt(mut self, fmt: std::fmt::Arguments<'_>) -> std::io::Result<()> {
match std::fmt::write(&mut self, fmt) {
Ok(()) => Ok(()),
Err(..) => {
// check if the error came from the underlying `Write` or not
if self.error.is_err() {
self.error
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"formatter error",
))
}
}
}
}
}
impl<W> std::fmt::Write for Adapter<W>
where
W: FnMut(&[u8]) -> std::io::Result<()>,
{
fn write_str(&mut self, s: &str) -> std::fmt::Result {
match (self.writer)(s.as_bytes()) {
Ok(()) => Ok(()),
Err(e) => {
self.error = Err(e);
Err(std::fmt::Error)
}
}
}
}
//! **Auto-adapting [`stdout`] / [`stderr`] streams**
//!
//! *A portmanteau of "ansi stream"*
//!
//! [`AutoStream`] always accepts [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code),
//! [adapting to the user's terminal's capabilities][AutoStream].
//!
//! Benefits
//! - Allows the caller to not be concerned with the terminal's capabilities
//! - Semver safe way of passing styled text between crates as ANSI escape codes offer more
//! compatibility than most crate APIs.
//!
//! Available styling crates:
//! - [anstyle](https://docs.rs/anstyle) for minimal runtime styling, designed to go in public APIs
//! - [owo-colors](https://docs.rs/owo-colors) for feature-rich runtime styling
//! - [color-print](https://docs.rs/color-print) for feature-rich compile-time styling
//!
//! # Example
//!
//! ```
//! # #[cfg(feature = "auto")] {
//! use anstream::println;
//! use owo_colors::OwoColorize as _;
//!
//! // Foreground colors
//! println!("My number is {:#x}!", 10.green());
//! // Background colors
//! println!("My number is not {}!", 4.on_red());
//! # }
//! ```
//!
//! And this will correctly handle piping to a file, etc
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![warn(missing_docs)]
#![warn(clippy::print_stderr)]
#![warn(clippy::print_stdout)]
pub mod adapter;
pub mod stream;
#[doc(hidden)]
#[macro_use]
pub mod _macros;
mod auto;
mod buffer;
mod fmt;
mod strip;
#[cfg(all(windows, feature = "wincon"))]
mod wincon;
pub use auto::AutoStream;
pub use strip::StripStream;
#[cfg(all(windows, feature = "wincon"))]
pub use wincon::WinconStream;
#[allow(deprecated)]
pub use buffer::Buffer;
/// An adaptive wrapper around the global standard output stream of the current process
pub type Stdout = AutoStream<std::io::Stdout>;
/// An adaptive wrapper around the global standard error stream of the current process
pub type Stderr = AutoStream<std::io::Stderr>;
/// Create an ANSI escape code compatible stdout
///
/// **Note:** Call [`AutoStream::lock`] in loops to avoid the performance hit of acquiring/releasing
/// from the implicit locking in each [`std::io::Write`] call
#[cfg(feature = "auto")]
pub fn stdout() -> Stdout {
let stdout = std::io::stdout();
AutoStream::auto(stdout)
}
/// Create an ANSI escape code compatible stderr
///
/// **Note:** Call [`AutoStream::lock`] in loops to avoid the performance hit of acquiring/releasing
/// from the implicit locking in each [`std::io::Write`] call
#[cfg(feature = "auto")]
pub fn stderr() -> Stderr {
let stderr = std::io::stderr();
AutoStream::auto(stderr)
}
/// Selection for overriding color output
pub use colorchoice::ColorChoice;
//! Higher-level traits to describe writeable streams
/// Required functionality for underlying [`std::io::Write`] for adaptation
#[cfg(not(all(windows, feature = "wincon")))]
pub trait RawStream: std::io::Write + IsTerminal + private::Sealed {}
/// Required functionality for underlying [`std::io::Write`] for adaptation
#[cfg(all(windows, feature = "wincon"))]
pub trait RawStream:
std::io::Write + IsTerminal + anstyle_wincon::WinconStream + private::Sealed
{
}
impl<T: RawStream + ?Sized> RawStream for &mut T {}
impl<T: RawStream + ?Sized> RawStream for Box<T> {}
impl RawStream for std::io::Stdout {}
impl RawStream for std::io::StdoutLock<'_> {}
impl RawStream for std::io::Stderr {}
impl RawStream for std::io::StderrLock<'_> {}
impl RawStream for dyn std::io::Write {}
impl RawStream for dyn std::io::Write + Send {}
impl RawStream for dyn std::io::Write + Send + Sync {}
impl RawStream for Vec<u8> {}
impl RawStream for std::fs::File {}
#[allow(deprecated)]
impl RawStream for crate::Buffer {}
/// Trait to determine if a descriptor/handle refers to a terminal/tty.
pub trait IsTerminal: private::Sealed {
/// Returns `true` if the descriptor/handle refers to a terminal/tty.
fn is_terminal(&self) -> bool;
}
impl<T: IsTerminal + ?Sized> IsTerminal for &T {
#[inline]
fn is_terminal(&self) -> bool {
(**self).is_terminal()
}
}
impl<T: IsTerminal + ?Sized> IsTerminal for &mut T {
#[inline]
fn is_terminal(&self) -> bool {
(**self).is_terminal()
}
}
impl<T: IsTerminal + ?Sized> IsTerminal for Box<T> {
#[inline]
fn is_terminal(&self) -> bool {
(**self).is_terminal()
}
}
impl IsTerminal for std::io::Stdout {
#[inline]
fn is_terminal(&self) -> bool {
is_terminal_polyfill::IsTerminal::is_terminal(self)
}
}
impl IsTerminal for std::io::StdoutLock<'_> {
#[inline]
fn is_terminal(&self) -> bool {
is_terminal_polyfill::IsTerminal::is_terminal(self)
}
}
impl IsTerminal for std::io::Stderr {
#[inline]
fn is_terminal(&self) -> bool {
is_terminal_polyfill::IsTerminal::is_terminal(self)
}
}
impl IsTerminal for std::io::StderrLock<'_> {
#[inline]
fn is_terminal(&self) -> bool {
is_terminal_polyfill::IsTerminal::is_terminal(self)
}
}
impl IsTerminal for dyn std::io::Write {
#[inline]
fn is_terminal(&self) -> bool {
false
}
}
impl IsTerminal for dyn std::io::Write + Send {
#[inline]
fn is_terminal(&self) -> bool {
false
}
}
impl IsTerminal for dyn std::io::Write + Send + Sync {
#[inline]
fn is_terminal(&self) -> bool {
false
}
}
impl IsTerminal for Vec<u8> {
#[inline]
fn is_terminal(&self) -> bool {
false
}
}
impl IsTerminal for std::fs::File {
#[inline]
fn is_terminal(&self) -> bool {
is_terminal_polyfill::IsTerminal::is_terminal(self)
}
}
#[allow(deprecated)]
impl IsTerminal for crate::Buffer {
#[inline]
fn is_terminal(&self) -> bool {
false
}
}
/// Lock a stream
pub trait AsLockedWrite: private::Sealed {
/// Locked writer type
type Write<'w>: RawStream + 'w
where
Self: 'w;
/// Lock a stream
fn as_locked_write(&mut self) -> Self::Write<'_>;
}
impl<T: AsLockedWrite + ?Sized> AsLockedWrite for &mut T {
type Write<'w>
= T::Write<'w>
where
Self: 'w;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
(**self).as_locked_write()
}
}
impl<T: AsLockedWrite + ?Sized> AsLockedWrite for Box<T> {
type Write<'w>
= T::Write<'w>
where
Self: 'w;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
(**self).as_locked_write()
}
}
impl AsLockedWrite for std::io::Stdout {
type Write<'w> = std::io::StdoutLock<'w>;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self.lock()
}
}
impl AsLockedWrite for std::io::StdoutLock<'static> {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for std::io::Stderr {
type Write<'w> = std::io::StderrLock<'w>;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self.lock()
}
}
impl AsLockedWrite for std::io::StderrLock<'static> {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for dyn std::io::Write {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for dyn std::io::Write + Send {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for dyn std::io::Write + Send + Sync {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for Vec<u8> {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
impl AsLockedWrite for std::fs::File {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
#[allow(deprecated)]
impl AsLockedWrite for crate::Buffer {
type Write<'w> = &'w mut Self;
#[inline]
fn as_locked_write(&mut self) -> Self::Write<'_> {
self
}
}
mod private {
pub trait Sealed {}
impl<T: Sealed + ?Sized> Sealed for &T {}
impl<T: Sealed + ?Sized> Sealed for &mut T {}
impl<T: Sealed + ?Sized> Sealed for Box<T> {}
impl Sealed for std::io::Stdout {}
impl Sealed for std::io::StdoutLock<'_> {}
impl Sealed for std::io::Stderr {}
impl Sealed for std::io::StderrLock<'_> {}
impl Sealed for dyn std::io::Write {}
impl Sealed for dyn std::io::Write + Send {}
impl Sealed for dyn std::io::Write + Send + Sync {}
impl Sealed for Vec<u8> {}
impl Sealed for std::fs::File {}
#[allow(deprecated)]
impl Sealed for crate::Buffer {}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_raw_stream<T: RawStream>()
where
crate::AutoStream<T>: std::io::Write,
{
}
#[test]
fn test() {
assert_raw_stream::<Box<dyn std::io::Write>>();
assert_raw_stream::<Box<dyn std::io::Write + 'static>>();
assert_raw_stream::<Box<dyn std::io::Write + Send>>();
assert_raw_stream::<Box<dyn std::io::Write + Send + Sync>>();
assert_raw_stream::<&mut (dyn std::io::Write)>();
assert_raw_stream::<&mut (dyn std::io::Write + 'static)>();
assert_raw_stream::<&mut (dyn std::io::Write + Send)>();
assert_raw_stream::<&mut (dyn std::io::Write + Send + Sync)>();
assert_raw_stream::<Vec<u8>>();
assert_raw_stream::<&mut Vec<u8>>();
assert_raw_stream::<std::fs::File>();
assert_raw_stream::<&mut std::fs::File>();
}
}
use crate::adapter::StripBytes;
use crate::stream::AsLockedWrite;
use crate::stream::IsTerminal;
/// Only pass printable data to the inner `Write`
#[derive(Debug)]
pub struct StripStream<S>
where
S: std::io::Write,
{
raw: S,
state: StripBytes,
}
impl<S> StripStream<S>
where
S: std::io::Write,
{
/// Only pass printable data to the inner `Write`
#[inline]
pub fn new(raw: S) -> Self {
Self {
raw,
state: Default::default(),
}
}
/// Get the wrapped [`std::io::Write`]
#[inline]
pub fn into_inner(self) -> S {
self.raw
}
}
impl<S> StripStream<S>
where
S: std::io::Write,
S: IsTerminal,
{
/// Returns `true` if the descriptor/handle refers to a terminal/tty.
#[inline]
pub fn is_terminal(&self) -> bool {
self.raw.is_terminal()
}
}
impl StripStream<std::io::Stdout> {
/// Get exclusive access to the `StripStream`
///
/// Why?
/// - Faster performance when writing in a loop
/// - Avoid other threads interleaving output with the current thread
#[inline]
pub fn lock(self) -> StripStream<std::io::StdoutLock<'static>> {
StripStream {
raw: self.raw.lock(),
state: self.state,
}
}
}
impl StripStream<std::io::Stderr> {
/// Get exclusive access to the `StripStream`
///
/// Why?
/// - Faster performance when writing in a loop
/// - Avoid other threads interleaving output with the current thread
#[inline]
pub fn lock(self) -> StripStream<std::io::StderrLock<'static>> {
StripStream {
raw: self.raw.lock(),
state: self.state,
}
}
}
impl<S> std::io::Write for StripStream<S>
where
S: std::io::Write,
S: AsLockedWrite,
{
// Must forward all calls to ensure locking happens appropriately
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
write(&mut self.raw.as_locked_write(), &mut self.state, buf)
}
#[inline]
fn write_vectored(&mut self, bufs: &[std::io::IoSlice<'_>]) -> std::io::Result<usize> {
let buf = bufs
.iter()
.find(|b| !b.is_empty())
.map(|b| &**b)
.unwrap_or(&[][..]);
self.write(buf)
}
// is_write_vectored: nightly only
#[inline]
fn flush(&mut self) -> std::io::Result<()> {
self.raw.as_locked_write().flush()
}
#[inline]
fn write_all(&mut self, buf: &[u8]) -> std::io::Result<()> {
write_all(&mut self.raw.as_locked_write(), &mut self.state, buf)
}
// write_all_vectored: nightly only
#[inline]
fn write_fmt(&mut self, args: std::fmt::Arguments<'_>) -> std::io::Result<()> {
write_fmt(&mut self.raw.as_locked_write(), &mut self.state, args)
}
}
fn write(
raw: &mut dyn std::io::Write,
state: &mut StripBytes,
buf: &[u8],
) -> std::io::Result<usize> {
let initial_state = state.clone();
for printable in state.strip_next(buf) {
let possible = printable.len();
let written = raw.write(printable)?;
if possible != written {
let divergence = &printable[written..];
let offset = offset_to(buf, divergence);
let consumed = &buf[offset..];
*state = initial_state;
state.strip_next(consumed).last();
return Ok(offset);
}
}
Ok(buf.len())
}
fn write_all(
raw: &mut dyn std::io::Write,
state: &mut StripBytes,
buf: &[u8],
) -> std::io::Result<()> {
for printable in state.strip_next(buf) {
raw.write_all(printable)?;
}
Ok(())
}
fn write_fmt(
raw: &mut dyn std::io::Write,
state: &mut StripBytes,
args: std::fmt::Arguments<'_>,
) -> std::io::Result<()> {
let write_all = |buf: &[u8]| write_all(raw, state, buf);
crate::fmt::Adapter::new(write_all).write_fmt(args)
}
#[inline]
fn offset_to(total: &[u8], subslice: &[u8]) -> usize {
let total = total.as_ptr();
let subslice = subslice.as_ptr();
debug_assert!(
total <= subslice,
"`Offset::offset_to` only accepts slices of `self`"
);
subslice as usize - total as usize
}
#[cfg(test)]
mod test {
use super::*;
use proptest::prelude::*;
use std::io::Write as _;
proptest! {
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_all_no_escapes(s in "\\PC*") {
let buffer = Vec::new();
let mut stream = StripStream::new(buffer);
stream.write_all(s.as_bytes()).unwrap();
let buffer = stream.into_inner();
let actual = std::str::from_utf8(buffer.as_ref()).unwrap();
assert_eq!(s, actual);
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_byte_no_escapes(s in "\\PC*") {
let buffer = Vec::new();
let mut stream = StripStream::new(buffer);
for byte in s.as_bytes() {
stream.write_all(&[*byte]).unwrap();
}
let buffer = stream.into_inner();
let actual = std::str::from_utf8(buffer.as_ref()).unwrap();
assert_eq!(s, actual);
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_all_random(s in any::<Vec<u8>>()) {
let buffer = Vec::new();
let mut stream = StripStream::new(buffer);
stream.write_all(s.as_slice()).unwrap();
let buffer = stream.into_inner();
if let Ok(actual) = std::str::from_utf8(buffer.as_ref()) {
for char in actual.chars() {
assert!(!char.is_ascii() || !char.is_control() || char.is_ascii_whitespace(), "{:?} -> {:?}: {:?}", String::from_utf8_lossy(&s), actual, char);
}
}
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_byte_random(s in any::<Vec<u8>>()) {
let buffer = Vec::new();
let mut stream = StripStream::new(buffer);
for byte in s.as_slice() {
stream.write_all(&[*byte]).unwrap();
}
let buffer = stream.into_inner();
if let Ok(actual) = std::str::from_utf8(buffer.as_ref()) {
for char in actual.chars() {
assert!(!char.is_ascii() || !char.is_control() || char.is_ascii_whitespace(), "{:?} -> {:?}: {:?}", String::from_utf8_lossy(&s), actual, char);
}
}
}
}
}
use crate::adapter::WinconBytes;
use crate::stream::AsLockedWrite;
use crate::stream::IsTerminal;
/// Only pass printable data to the inner `Write`
#[cfg(feature = "wincon")] // here mostly for documentation purposes
#[derive(Debug)]
pub struct WinconStream<S>
where
S: anstyle_wincon::WinconStream,
{
raw: S,
// `WinconBytes` is especially large compared to other variants of `AutoStream`, so boxing it
// here so `AutoStream` doesn't have to discard one allocation and create another one when
// calling `AutoStream::lock`
state: Box<WinconBytes>,
}
impl<S> WinconStream<S>
where
S: anstyle_wincon::WinconStream,
{
/// Only pass printable data to the inner `Write`
#[inline]
pub fn new(raw: S) -> Self {
Self {
raw,
state: Default::default(),
}
}
/// Get the wrapped [`anstyle_wincon::WinconStream`]
#[inline]
pub fn into_inner(self) -> S {
self.raw
}
}
impl<S> WinconStream<S>
where
S: anstyle_wincon::WinconStream,
S: IsTerminal,
{
#[inline]
pub fn is_terminal(&self) -> bool {
self.raw.is_terminal()
}
}
impl WinconStream<std::io::Stdout> {
/// Get exclusive access to the `WinconStream`
///
/// Why?
/// - Faster performance when writing in a loop
/// - Avoid other threads interleaving output with the current thread
#[inline]
pub fn lock(self) -> WinconStream<std::io::StdoutLock<'static>> {
WinconStream {
raw: self.raw.lock(),
state: self.state,
}
}
}
impl WinconStream<std::io::Stderr> {
/// Get exclusive access to the `WinconStream`
///
/// Why?
/// - Faster performance when writing in a loop
/// - Avoid other threads interleaving output with the current thread
#[inline]
pub fn lock(self) -> WinconStream<std::io::StderrLock<'static>> {
WinconStream {
raw: self.raw.lock(),
state: self.state,
}
}
}
impl<S> std::io::Write for WinconStream<S>
where
S: anstyle_wincon::WinconStream,
S: AsLockedWrite,
{
// Must forward all calls to ensure locking happens appropriately
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
write(&mut self.raw.as_locked_write(), &mut self.state, buf)
}
#[inline]
fn write_vectored(&mut self, bufs: &[std::io::IoSlice<'_>]) -> std::io::Result<usize> {
let buf = bufs
.iter()
.find(|b| !b.is_empty())
.map(|b| &**b)
.unwrap_or(&[][..]);
self.write(buf)
}
// is_write_vectored: nightly only
#[inline]
fn flush(&mut self) -> std::io::Result<()> {
self.raw.as_locked_write().flush()
}
#[inline]
fn write_all(&mut self, buf: &[u8]) -> std::io::Result<()> {
write_all(&mut self.raw.as_locked_write(), &mut self.state, buf)
}
// write_all_vectored: nightly only
#[inline]
fn write_fmt(&mut self, args: std::fmt::Arguments<'_>) -> std::io::Result<()> {
write_fmt(&mut self.raw.as_locked_write(), &mut self.state, args)
}
}
fn write(
raw: &mut dyn anstyle_wincon::WinconStream,
state: &mut WinconBytes,
buf: &[u8],
) -> std::io::Result<usize> {
for (style, printable) in state.extract_next(buf) {
let fg = style.get_fg_color().and_then(cap_wincon_color);
let bg = style.get_bg_color().and_then(cap_wincon_color);
let written = raw.write_colored(fg, bg, printable.as_bytes())?;
let possible = printable.len();
if possible != written {
// HACK: Unsupported atm
break;
}
}
Ok(buf.len())
}
fn write_all(
raw: &mut dyn anstyle_wincon::WinconStream,
state: &mut WinconBytes,
buf: &[u8],
) -> std::io::Result<()> {
for (style, printable) in state.extract_next(buf) {
let mut buf = printable.as_bytes();
let fg = style.get_fg_color().and_then(cap_wincon_color);
let bg = style.get_bg_color().and_then(cap_wincon_color);
while !buf.is_empty() {
match raw.write_colored(fg, bg, buf) {
Ok(0) => {
return Err(std::io::Error::new(
std::io::ErrorKind::WriteZero,
"failed to write whole buffer",
));
}
Ok(n) => buf = &buf[n..],
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
}
}
Ok(())
}
fn write_fmt(
raw: &mut dyn anstyle_wincon::WinconStream,
state: &mut WinconBytes,
args: std::fmt::Arguments<'_>,
) -> std::io::Result<()> {
let write_all = |buf: &[u8]| write_all(raw, state, buf);
crate::fmt::Adapter::new(write_all).write_fmt(args)
}
fn cap_wincon_color(color: anstyle::Color) -> Option<anstyle::AnsiColor> {
match color {
anstyle::Color::Ansi(c) => Some(c),
anstyle::Color::Ansi256(c) => c.into_ansi(),
anstyle::Color::Rgb(_) => None,
}
}
#[cfg(test)]
mod test {
use super::*;
use proptest::prelude::*;
use std::io::Write as _;
proptest! {
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_all_no_escapes(s in "\\PC*") {
let buffer = Vec::new();
let mut stream = WinconStream::new(buffer);
stream.write_all(s.as_bytes()).unwrap();
let buffer = stream.into_inner();
let actual = std::str::from_utf8(buffer.as_ref()).unwrap();
assert_eq!(s, actual);
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_byte_no_escapes(s in "\\PC*") {
let buffer = Vec::new();
let mut stream = WinconStream::new(buffer);
for byte in s.as_bytes() {
stream.write_all(&[*byte]).unwrap();
}
let buffer = stream.into_inner();
let actual = std::str::from_utf8(buffer.as_ref()).unwrap();
assert_eq!(s, actual);
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_all_random(s in any::<Vec<u8>>()) {
let buffer = Vec::new();
let mut stream = WinconStream::new(buffer);
stream.write_all(s.as_slice()).unwrap();
}
#[test]
#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
fn write_byte_random(s in any::<Vec<u8>>()) {
let buffer = Vec::new();
let mut stream = WinconStream::new(buffer);
for byte in s.as_slice() {
stream.write_all(&[*byte]).unwrap();
}
}
}
}
{"files":{"Cargo.lock":"57086bfa666a6bfc294dfbfc33466ae893201a1f35385aa1c2b7f060fe4dd90a","Cargo.toml":"9c6fbf548a59370ce126a38243de1fc84449b78b83097bfda64723ed6a523afb","LICENSE-APACHE":"b40930bbcf80744c86c46a12bc9da056641d722716c378f5659b9e555ef833e1","LICENSE-MIT":"c1d4bc00896473e0109ccb4c3c7d21addb55a4ff1a644be204dcfce26612af2a","README.md":"abc82171d436ee0eb221838e8d21a21a2e392504e87f0c130b5eca6a35671e1e","benches/parse.rs":"00747c503290b2e9171055861676e9037a555a8c968f58bade6c9411cee71e5f","examples/parselog.rs":"83bb99aec7704e6aa5524dd6090f1f4837c37935e53d6ff5d17b723b0c55d029","src/lib.rs":"53d3cc122726aa2521141592a11cf56c08dfcb577ad89c7b9151fb75999a846a","src/params.rs":"8cfef4e2ab1961ca2d9f210da553fc6ac64bb6dbd03321f0ee7d6089ab45389c","src/state/codegen.rs":"0820bb4e54d9b3f1e0e80c1935ca57466cfe60e98f556c31f19e5e1b6ed31787","src/state/definitions.rs":"86433ae4901b2b647486c78fd74230d2963668d45e1d2df28b852d05610a2eff","src/state/mod.rs":"5c07420f7245b823bc1bcff797ae70c21cfdfaee8f326d37a6e95d529b08f38d","src/state/table.rs":"673b7e9242c5248efc076086cc6923578ec2f059c0c26da21363528e20e4285c"},"package":"3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"}
\ No newline at end of file
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.66.0"
name = "anstyle-parse"
version = "0.2.6"
build = false
include = [
"build.rs",
"src/**/*",
"Cargo.toml",
"Cargo.lock",
"LICENSE*",
"README.md",
"benches/**/*",
"examples/**/*",
]
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Parse ANSI Style Escapes"
homepage = "https://github.com/rust-cli/anstyle"
readme = "README.md"
keywords = [
"ansi",
"terminal",
"color",
"vte",
]
categories = ["command-line-interface"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-cli/anstyle.git"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
min = 1
replace = "{{version}}"
search = "Unreleased"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = "...{{tag_name}}"
search = '\.\.\.HEAD'
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
min = 1
replace = "{{date}}"
search = "ReleaseDate"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = """
<!-- next-header -->
## [Unreleased] - ReleaseDate
"""
search = "<!-- next-header -->"
[[package.metadata.release.pre-release-replacements]]
exactly = 1
file = "CHANGELOG.md"
replace = """
<!-- next-url -->
[Unreleased]: https://github.com/rust-cli/anstyle/compare/{{tag_name}}...HEAD"""
search = "<!-- next-url -->"
[lib]
name = "anstyle_parse"
path = "src/lib.rs"
[[example]]
name = "parselog"
path = "examples/parselog.rs"
[[bench]]
name = "parse"
path = "benches/parse.rs"
harness = false
required-features = ["utf8"]
[dependencies.arrayvec]
version = "0.7.2"
optional = true
default-features = false
[dependencies.utf8parse]
version = "0.2.1"
optional = true
[dev-dependencies.codegenrs]
version = "3.0.1"
default-features = false
[dev-dependencies.divan]
version = "0.1.14"
[dev-dependencies.proptest]
version = "1.4.0"
[dev-dependencies.snapbox]
version = "0.6.5"
[dev-dependencies.vte_generate_state_changes]
version = "0.1.1"
[features]
core = ["dep:arrayvec"]
default = ["utf8"]
utf8 = ["dep:utf8parse"]
[lints.clippy]
bool_assert_comparison = "allow"
branches_sharing_code = "allow"
checked_conversions = "warn"
collapsible_else_if = "allow"
create_dir = "warn"
dbg_macro = "warn"
debug_assert_with_mut_call = "warn"
doc_markdown = "warn"
empty_enum = "warn"
enum_glob_use = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
fallible_impl_from = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
float_cmp_const = "warn"
fn_params_excessive_bools = "warn"
from_iter_instead_of_collect = "warn"
if_same_then_else = "allow"
implicit_clone = "warn"
imprecise_flops = "warn"
inconsistent_struct_constructor = "warn"
inefficient_to_string = "warn"
infinite_loop = "warn"
invalid_upcast_comparisons = "warn"
large_digit_groups = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_and_return = "allow"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
mem_forget = "warn"
mutex_integer = "warn"
needless_continue = "warn"
needless_for_each = "warn"
negative_feature_names = "warn"
path_buf_push_overwrite = "warn"
ptr_as_ptr = "warn"
rc_mutex = "warn"
redundant_feature_names = "warn"
ref_option_ref = "warn"
rest_pat_in_fully_bound_structs = "warn"
same_functions_in_if_condition = "warn"
self_named_module_files = "warn"
semicolon_if_nothing_returned = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_as_bytes = "warn"
string_to_string = "warn"
todo = "warn"
trait_duplication_in_bounds = "warn"
uninlined_format_args = "warn"
verbose_file_reads = "warn"
wildcard_imports = "warn"
zero_sized_map_values = "warn"
[lints.rust]
unreachable_pub = "warn"
unsafe_op_in_unsafe_fn = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[lints.rust.rust_2018_idioms]
level = "warn"
priority = -1
Copyright (c) 2016 Joe Wilm and individual contributors
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
# anstyle-parse
> Parse [Parse ANSI Style Escapes](https://vt100.net/emu/dec_ansi_parser)
[![Documentation](https://img.shields.io/badge/docs-master-blue.svg)][Documentation]
![License](https://img.shields.io/crates/l/anstyle-parse.svg)
[![Crates Status](https://img.shields.io/crates/v/anstyle-parse.svg)](https://crates.io/crates/anstyle-parse)
## License
Licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally
submitted for inclusion in the work by you, as defined in the Apache-2.0
license, shall be dual licensed as above, without any additional terms or
conditions.
### Special Thanks
[chrisduerr](https://github.com/alacritty/vte/commits?author=chrisduerr) and the
[alacritty project](https://github.com/alacritty/alacritty) for
[vte](https://crates.io/crates/vte) which
[this was forked from](https://github.com/alacritty/vte/issues/82)
[Crates.io]: https://crates.io/crates/anstyle-parse
[Documentation]: https://docs.rs/anstyle-parse
#![allow(clippy::incompatible_msrv)] // not verifying benches atm
use std::hint::black_box;
use anstyle_parse::DefaultCharAccumulator;
use anstyle_parse::Params;
use anstyle_parse::Parser;
use anstyle_parse::Perform;
#[divan::bench(args = DATA)]
fn advance(data: &Data) {
let mut dispatcher = BenchDispatcher;
let mut parser = Parser::<DefaultCharAccumulator>::new();
for byte in data.content() {
parser.advance(&mut dispatcher, *byte);
}
}
#[divan::bench(args = DATA)]
fn advance_strip(data: &Data) -> String {
let mut stripped = Strip::with_capacity(data.content().len());
let mut parser = Parser::<DefaultCharAccumulator>::new();
for byte in data.content() {
parser.advance(&mut stripped, *byte);
}
black_box(stripped.0)
}
#[divan::bench(args = DATA)]
fn state_change(data: &Data) {
let mut state = anstyle_parse::state::State::default();
for byte in data.content() {
let (next_state, action) = anstyle_parse::state::state_change(state, *byte);
state = next_state;
black_box(action);
}
}
#[divan::bench(args = DATA)]
fn state_change_strip_str(bencher: divan::Bencher<'_, '_>, data: &Data) {
if let Ok(content) = std::str::from_utf8(data.content()) {
bencher
.with_inputs(|| content)
.bench_local_values(|content| {
let stripped = strip_str(content);
black_box(stripped)
});
}
}
struct BenchDispatcher;
impl Perform for BenchDispatcher {
fn print(&mut self, c: char) {
black_box(c);
}
fn execute(&mut self, byte: u8) {
black_box(byte);
}
fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
black_box((params, intermediates, ignore, c));
}
fn put(&mut self, byte: u8) {
black_box(byte);
}
fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
black_box((params, bell_terminated));
}
fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
black_box((params, intermediates, ignore, c));
}
fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
black_box((intermediates, ignore, byte));
}
}
#[derive(Default)]
struct Strip(String);
impl Strip {
fn with_capacity(capacity: usize) -> Self {
Self(String::with_capacity(capacity))
}
}
impl Perform for Strip {
fn print(&mut self, c: char) {
self.0.push(c);
}
fn execute(&mut self, byte: u8) {
if byte.is_ascii_whitespace() {
self.0.push(byte as char);
}
}
}
fn strip_str(content: &str) -> String {
use anstyle_parse::state::state_change;
use anstyle_parse::state::Action;
use anstyle_parse::state::State;
#[inline]
fn is_utf8_continuation(b: u8) -> bool {
matches!(b, 0x80..=0xbf)
}
#[inline]
fn is_printable(action: Action, byte: u8) -> bool {
action == Action::Print
|| action == Action::BeginUtf8
// since we know the input is valid UTF-8, the only thing we can do with
// continuations is to print them
|| is_utf8_continuation(byte)
|| (action == Action::Execute && byte.is_ascii_whitespace())
}
let mut stripped = Vec::with_capacity(content.len());
let mut bytes = content.as_bytes();
while !bytes.is_empty() {
let offset = bytes.iter().copied().position(|b| {
let (_next_state, action) = state_change(State::Ground, b);
!is_printable(action, b)
});
let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
stripped.extend(printable);
bytes = next;
let mut state = State::Ground;
let offset = bytes.iter().copied().position(|b| {
let (next_state, action) = state_change(state, b);
if next_state != State::Anywhere {
state = next_state;
}
is_printable(action, b)
});
let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
bytes = next;
}
#[allow(clippy::unwrap_used)]
String::from_utf8(stripped).unwrap()
}
const DATA: &[Data] = &[
Data(
"0-state_changes",
b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\".as_slice(),
),
#[cfg(feature = "utf8")]
Data("1-demo.vte", include_bytes!("../tests/demo.vte").as_slice()),
Data(
"2-rg_help.vte",
include_bytes!("../tests/rg_help.vte").as_slice(),
),
Data(
"3-rg_linus.vte",
include_bytes!("../tests/rg_linus.vte").as_slice(),
),
];
#[derive(Debug)]
struct Data(&'static str, &'static [u8]);
impl Data {
const fn name(&self) -> &'static str {
self.0
}
const fn content(&self) -> &'static [u8] {
self.1
}
}
impl std::fmt::Display for Data {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name().fmt(f)
}
}
#[test]
fn verify_data() {
for data in DATA {
let Data(name, content) = data;
// Make sure the comparison is fair
if let Ok(content) = std::str::from_utf8(content) {
let mut stripped = Strip::with_capacity(content.len());
let mut parser = Parser::<DefaultCharAccumulator>::new();
for byte in content.as_bytes() {
parser.advance(&mut stripped, *byte);
}
assert_eq!(stripped.0, strip_str(content));
}
}
}
fn main() {
divan::main();
}
//! Parse input from stdin and log actions on stdout
use std::io::{self, Read};
use anstyle_parse::{DefaultCharAccumulator, Params, Parser, Perform};
/// A type implementing Perform that just logs actions
struct Log;
impl Perform for Log {
fn print(&mut self, c: char) {
println!("[print] {c:?}");
}
fn execute(&mut self, byte: u8) {
println!("[execute] {byte:02x}");
}
fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
println!(
"[hook] params={params:?}, intermediates={intermediates:?}, ignore={ignore:?}, char={c:?}"
);
}
fn put(&mut self, byte: u8) {
println!("[put] {byte:02x}");
}
fn unhook(&mut self) {
println!("[unhook]");
}
fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
println!("[osc_dispatch] params={params:?} bell_terminated={bell_terminated}");
}
fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
println!(
"[csi_dispatch] params={params:#?}, intermediates={intermediates:?}, ignore={ignore:?}, char={c:?}"
);
}
fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
println!(
"[esc_dispatch] intermediates={intermediates:?}, ignore={ignore:?}, byte={byte:02x}"
);
}
}
fn main() {
let input = io::stdin();
let mut handle = input.lock();
let mut statemachine = Parser::<DefaultCharAccumulator>::new();
let mut performer = Log;
let mut buf = [0; 2048];
loop {
match handle.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
for byte in &buf[..n] {
statemachine.advance(&mut performer, *byte);
}
}
Err(err) => {
println!("err: {err}");
break;
}
}
}
}
//! Fixed size parameters list with optional subparameters.
use core::fmt::{self, Debug, Formatter};
pub(crate) const MAX_PARAMS: usize = 32;
#[derive(Default, Clone, PartialEq, Eq)]
pub struct Params {
/// Number of subparameters for each parameter.
///
/// For each entry in the `params` slice, this stores the length of the param as number of
/// subparams at the same index as the param in the `params` slice.
///
/// At the subparam positions the length will always be `0`.
subparams: [u8; MAX_PARAMS],
/// All parameters and subparameters.
params: [u16; MAX_PARAMS],
/// Number of suparameters in the current parameter.
current_subparams: u8,
/// Total number of parameters and subparameters.
len: usize,
}
impl Params {
/// Returns the number of parameters.
#[inline]
pub fn len(&self) -> usize {
self.len
}
/// Returns `true` if there are no parameters present.
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// Returns an iterator over all parameters and subparameters.
#[inline]
pub fn iter(&self) -> ParamsIter<'_> {
ParamsIter::new(self)
}
/// Returns `true` if there is no more space for additional parameters.
#[inline]
pub(crate) fn is_full(&self) -> bool {
self.len == MAX_PARAMS
}
/// Clear all parameters.
#[inline]
pub(crate) fn clear(&mut self) {
self.current_subparams = 0;
self.len = 0;
}
/// Add an additional parameter.
#[inline]
pub(crate) fn push(&mut self, item: u16) {
self.subparams[self.len - self.current_subparams as usize] = self.current_subparams + 1;
self.params[self.len] = item;
self.current_subparams = 0;
self.len += 1;
}
/// Add an additional subparameter to the current parameter.
#[inline]
pub(crate) fn extend(&mut self, item: u16) {
self.subparams[self.len - self.current_subparams as usize] = self.current_subparams + 1;
self.params[self.len] = item;
self.current_subparams += 1;
self.len += 1;
}
}
impl<'a> IntoIterator for &'a Params {
type IntoIter = ParamsIter<'a>;
type Item = &'a [u16];
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
/// Immutable subparameter iterator.
pub struct ParamsIter<'a> {
params: &'a Params,
index: usize,
}
impl<'a> ParamsIter<'a> {
fn new(params: &'a Params) -> Self {
Self { params, index: 0 }
}
}
impl<'a> Iterator for ParamsIter<'a> {
type Item = &'a [u16];
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.params.len() {
return None;
}
// Get all subparameters for the current parameter.
let num_subparams = self.params.subparams[self.index];
let param = &self.params.params[self.index..self.index + num_subparams as usize];
// Jump to the next parameter.
self.index += num_subparams as usize;
Some(param)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.params.len() - self.index;
(remaining, Some(remaining))
}
}
impl Debug for Params {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "[")?;
for (i, param) in self.iter().enumerate() {
if i != 0 {
write!(f, ";")?;
}
for (i, subparam) in param.iter().enumerate() {
if i != 0 {
write!(f, ":")?;
}
subparam.fmt(f)?;
}
}
write!(f, "]")
}
}
use super::{pack, unpack, Action, State};
use snapbox::file;
use vte_generate_state_changes::generate_state_changes;
#[test]
fn table() {
let mut content = vec![];
generate_table(&mut content).unwrap();
let content = String::from_utf8(content).unwrap();
let content = codegenrs::rustfmt(&content, None).unwrap();
snapbox::assert_data_eq!(content, file!["table.rs"].raw());
}
#[allow(clippy::write_literal)]
fn generate_table(file: &mut impl std::io::Write) -> std::io::Result<()> {
writeln!(
file,
"// This file is @generated by {}",
std::file!().replace('\\', "/")
)?;
writeln!(file)?;
writeln!(
file,
"{}",
r#"#[rustfmt::skip]
pub(crate) const STATE_CHANGES: [[u8; 256]; 16] = ["#
)?;
for (state, entries) in STATE_CHANGES.iter().enumerate() {
writeln!(file, " // {:?}", State::try_from(state as u8).unwrap())?;
write!(file, " [")?;
let mut last_entry = None;
for packed in entries {
let (next_state, action) = unpack(*packed);
if last_entry != Some(packed) {
writeln!(file)?;
writeln!(file, " // {next_state:?} {action:?}")?;
write!(file, " ")?;
}
write!(file, "0x{packed:0>2x}, ")?;
last_entry = Some(packed);
}
writeln!(file)?;
writeln!(file, " ],")?;
}
writeln!(file, "{}", r#"];"#)?;
Ok(())
}
/// This is the state change table. It's indexed first by current state and then by the next
/// character in the pty stream.
pub(crate) static STATE_CHANGES: [[u8; 256]; 16] = state_changes();
generate_state_changes!(state_changes, {
Anywhere {
0x18 => (Ground, Execute),
0x1a => (Ground, Execute),
0x1b => (Escape, Nop),
},
Ground {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x7f => (Anywhere, Print),
0x80..=0x8f => (Anywhere, Execute),
0x91..=0x9a => (Anywhere, Execute),
0x9c => (Anywhere, Execute),
// Beginning of UTF-8 2 byte sequence
0xc2..=0xdf => (Utf8, BeginUtf8),
// Beginning of UTF-8 3 byte sequence
0xe0..=0xef => (Utf8, BeginUtf8),
// Beginning of UTF-8 4 byte sequence
0xf0..=0xf4 => (Utf8, BeginUtf8),
},
Escape {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x7f => (Anywhere, Ignore),
0x20..=0x2f => (EscapeIntermediate, Collect),
0x30..=0x4f => (Ground, EscDispatch),
0x51..=0x57 => (Ground, EscDispatch),
0x59 => (Ground, EscDispatch),
0x5a => (Ground, EscDispatch),
0x5c => (Ground, EscDispatch),
0x60..=0x7e => (Ground, EscDispatch),
0x5b => (CsiEntry, Nop),
0x5d => (OscString, Nop),
0x50 => (DcsEntry, Nop),
0x58 => (SosPmApcString, Nop),
0x5e => (SosPmApcString, Nop),
0x5f => (SosPmApcString, Nop),
},
EscapeIntermediate {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
0x7f => (Anywhere, Ignore),
0x30..=0x7e => (Ground, EscDispatch),
},
CsiEntry {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x7f => (Anywhere, Ignore),
0x20..=0x2f => (CsiIntermediate, Collect),
0x30..=0x39 => (CsiParam, Param),
0x3a..=0x3b => (CsiParam, Param),
0x3c..=0x3f => (CsiParam, Collect),
0x40..=0x7e => (Ground, CsiDispatch),
},
CsiIgnore {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x3f => (Anywhere, Ignore),
0x7f => (Anywhere, Ignore),
0x40..=0x7e => (Ground, Nop),
},
CsiParam {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
0x7f => (Anywhere, Ignore),
0x3c..=0x3f => (CsiIgnore, Nop),
0x20..=0x2f => (CsiIntermediate, Collect),
0x40..=0x7e => (Ground, CsiDispatch),
},
CsiIntermediate {
0x00..=0x17 => (Anywhere, Execute),
0x19 => (Anywhere, Execute),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
0x7f => (Anywhere, Ignore),
0x30..=0x3f => (CsiIgnore, Nop),
0x40..=0x7e => (Ground, CsiDispatch),
},
DcsEntry {
0x00..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x7f => (Anywhere, Ignore),
0x20..=0x2f => (DcsIntermediate, Collect),
0x30..=0x39 => (DcsParam, Param),
0x3a..=0x3b => (DcsParam, Param),
0x3c..=0x3f => (DcsParam, Collect),
0x40..=0x7e => (DcsPassthrough, Nop),
},
DcsIntermediate {
0x00..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x20..=0x2f => (Anywhere, Collect),
0x7f => (Anywhere, Ignore),
0x30..=0x3f => (DcsIgnore, Nop),
0x40..=0x7e => (DcsPassthrough, Nop),
},
DcsIgnore {
0x00..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x20..=0x7f => (Anywhere, Ignore),
0x9c => (Ground, Nop),
},
DcsParam {
0x00..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
0x7f => (Anywhere, Ignore),
0x3c..=0x3f => (DcsIgnore, Nop),
0x20..=0x2f => (DcsIntermediate, Collect),
0x40..=0x7e => (DcsPassthrough, Nop),
},
DcsPassthrough {
0x00..=0x17 => (Anywhere, Put),
0x19 => (Anywhere, Put),
0x1c..=0x1f => (Anywhere, Put),
0x20..=0x7e => (Anywhere, Put),
0x7f => (Anywhere, Ignore),
0x9c => (Ground, Nop),
},
SosPmApcString {
0x00..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x20..=0x7f => (Anywhere, Ignore),
0x9c => (Ground, Nop),
},
OscString {
0x00..=0x06 => (Anywhere, Ignore),
0x07 => (Ground, Nop),
0x08..=0x17 => (Anywhere, Ignore),
0x19 => (Anywhere, Ignore),
0x1c..=0x1f => (Anywhere, Ignore),
0x20..=0xff => (Anywhere, OscPut),
}
});
#![allow(clippy::exhaustive_enums)]
use core::mem;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
#[derive(Default)]
pub enum State {
Anywhere = 0,
CsiEntry = 1,
CsiIgnore = 2,
CsiIntermediate = 3,
CsiParam = 4,
DcsEntry = 5,
DcsIgnore = 6,
DcsIntermediate = 7,
DcsParam = 8,
DcsPassthrough = 9,
Escape = 10,
EscapeIntermediate = 11,
#[default]
Ground = 12,
OscString = 13,
SosPmApcString = 14,
Utf8 = 15,
}
impl TryFrom<u8> for State {
type Error = u8;
#[inline(always)]
fn try_from(raw: u8) -> Result<Self, Self::Error> {
STATES.get(raw as usize).ok_or(raw).copied()
}
}
const STATES: [State; 16] = [
State::Anywhere,
State::CsiEntry,
State::CsiIgnore,
State::CsiIntermediate,
State::CsiParam,
State::DcsEntry,
State::DcsIgnore,
State::DcsIntermediate,
State::DcsParam,
State::DcsPassthrough,
State::Escape,
State::EscapeIntermediate,
State::Ground,
State::OscString,
State::SosPmApcString,
State::Utf8,
];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
#[derive(Default)]
pub enum Action {
#[default]
Nop = 0,
Clear = 1,
Collect = 2,
CsiDispatch = 3,
EscDispatch = 4,
Execute = 5,
Hook = 6,
Ignore = 7,
OscEnd = 8,
OscPut = 9,
OscStart = 10,
Param = 11,
Print = 12,
Put = 13,
Unhook = 14,
BeginUtf8 = 15,
}
impl TryFrom<u8> for Action {
type Error = u8;
#[inline(always)]
fn try_from(raw: u8) -> Result<Self, Self::Error> {
ACTIONS.get(raw as usize).ok_or(raw).copied()
}
}
const ACTIONS: [Action; 16] = [
Action::Nop,
Action::Clear,
Action::Collect,
Action::CsiDispatch,
Action::EscDispatch,
Action::Execute,
Action::Hook,
Action::Ignore,
Action::OscEnd,
Action::OscPut,
Action::OscStart,
Action::Param,
Action::Print,
Action::Put,
Action::Unhook,
Action::BeginUtf8,
];
/// Unpack a u8 into a State and Action
///
/// The implementation of this assumes that there are *precisely* 16 variants for both Action and
/// State. Furthermore, it assumes that the enums are tag-only; that is, there is no data in any
/// variant.
///
/// Bad things will happen if those invariants are violated.
#[inline(always)]
pub(crate) const fn unpack(delta: u8) -> (State, Action) {
unsafe {
(
// State is stored in bottom 4 bits
mem::transmute::<u8, State>(delta & 0x0f),
// Action is stored in top 4 bits
mem::transmute::<u8, Action>(delta >> 4),
)
}
}
#[inline(always)]
#[cfg(test)]
pub(crate) const fn pack(state: State, action: Action) -> u8 {
(action as u8) << 4 | state as u8
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unpack_state_action() {
match unpack(0xee) {
(State::SosPmApcString, Action::Unhook) => (),
_ => panic!("unpack failed"),
}
match unpack(0x0f) {
(State::Utf8, Action::Nop) => (),
_ => panic!("unpack failed"),
}
match unpack(0xff) {
(State::Utf8, Action::BeginUtf8) => (),
_ => panic!("unpack failed"),
}
}
#[test]
fn pack_state_action() {
match unpack(0xee) {
(State::SosPmApcString, Action::Unhook) => (),
_ => panic!("unpack failed"),
}
match unpack(0x0f) {
(State::Utf8, Action::Nop) => (),
_ => panic!("unpack failed"),
}
match unpack(0xff) {
(State::Utf8, Action::BeginUtf8) => (),
_ => panic!("unpack failed"),
}
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment