Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mpd
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Иван Мажукин
mpd
Commits
5696f91a
Commit
5696f91a
authored
Mar 14, 2014
by
Max Kellermann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
pcm/PcmFormat: ARM NEON optimizations for float->s16
This is nearly 4 times faster than the "portable" algorithm.
parent
3932e62f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
137 additions
and
1 deletion
+137
-1
Makefile.am
Makefile.am
+1
-0
NEWS
NEWS
+1
-0
Neon.hxx
src/pcm/Neon.hxx
+97
-0
PcmFormat.cxx
src/pcm/PcmFormat.cxx
+38
-1
No files found.
Makefile.am
View file @
5696f91a
...
...
@@ -452,6 +452,7 @@ libpcm_a_SOURCES = \
src/pcm/PcmFormat.cxx src/pcm/PcmFormat.hxx
\
src/pcm/FloatConvert.hxx
\
src/pcm/ShiftConvert.hxx
\
src/pcm/Neon.hxx
\
src/pcm/FormatConverter.cxx src/pcm/FormatConverter.hxx
\
src/pcm/ChannelsConverter.cxx src/pcm/ChannelsConverter.hxx
\
src/pcm/Resampler.hxx
\
...
...
NEWS
View file @
5696f91a
...
...
@@ -36,6 +36,7 @@ ver 0.19 (not yet released)
- allow playlist directory without music directory
- use XDG to auto-detect "music_directory" and "db_file"
* new resampler option using libsoxr
* ARM NEON optimizations
* install systemd unit for socket activation
* Android port
...
...
src/pcm/Neon.hxx
0 → 100644
View file @
5696f91a
/*
* Copyright (C) 2003-2014 The Music Player Daemon Project
* http://www.musicpd.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MPD_PCM_NEON_HXX
#define MPD_PCM_NEON_HXX
#include "Traits.hxx"
#include <arm_neon.h>
/**
* Call a NEON intrinsic for each element in the vector.
*
* @param func the NEON intrinsic
* @param result the vector variable that gets assigned the result
* @param vector the input vector
*/
#define neon_x4_u(func, result, vector) do { \
result.val[0] = func(vector.val[0]); \
result.val[1] = func(vector.val[1]); \
result.val[2] = func(vector.val[2]); \
result.val[3] = func(vector.val[3]); \
} while (0)
/**
* Call a NEON intrinsic for each element in the vector.
*
* @param func the NEON intrinsic
* @param result the vector variable that gets assigned the result
* @param vector the input vector
* @param arg an additional argument that gets passed to each call
*/
#define neon_x4_b(func, result, vector, arg) do { \
result.val[0] = func(vector.val[0], arg); \
result.val[1] = func(vector.val[1], arg); \
result.val[2] = func(vector.val[2], arg); \
result.val[3] = func(vector.val[3], arg); \
} while (0)
/**
* Convert floating point samples to 16 bit signed integer using ARM NEON.
*/
struct
NeonFloatTo16
{
static
constexpr
SampleFormat
src_format
=
SampleFormat
::
FLOAT
;
static
constexpr
SampleFormat
dst_format
=
SampleFormat
::
S16
;
typedef
SampleTraits
<
src_format
>
SrcTraits
;
typedef
SampleTraits
<
dst_format
>
DstTraits
;
typedef
typename
SrcTraits
::
value_type
SV
;
typedef
typename
DstTraits
::
value_type
DV
;
static
constexpr
size_t
BLOCK_SIZE
=
16
;
void
Convert
(
int16_t
*
dst
,
const
float
*
src
,
const
size_t
n
)
const
{
const
float32x4_t
factor
=
vdupq_n_f32
(
1
<<
(
DstTraits
::
BITS
-
1
));
for
(
unsigned
i
=
0
;
i
<
n
/
BLOCK_SIZE
;
++
i
,
src
+=
BLOCK_SIZE
,
dst
+=
BLOCK_SIZE
)
{
/* load 16 float samples into 4 quad
registers */
float32x4x4_t
value
=
vld4q_f32
(
src
);
/* apply factor */
neon_x4_b
(
vmulq_f32
,
value
,
value
,
factor
);
/* convert to 32 bit integer */
int32x4x4_t
ivalue
;
neon_x4_u
(
vcvtq_s32_f32
,
ivalue
,
value
);
/* convert to 16 bit integer with saturation */
int16x4x4_t
nvalue
;
neon_x4_u
(
vqmovn_s32
,
nvalue
,
ivalue
);
/* store result */
vst4_s16
(
dst
,
nvalue
);
}
}
};
#endif
src/pcm/PcmFormat.cxx
View file @
5696f91a
...
...
@@ -76,9 +76,46 @@ struct Convert32To16 {
};
template
<
SampleFormat
F
,
class
Traits
=
SampleTraits
<
F
>>
struct
FloatToInteger
struct
Portable
FloatToInteger
:
PerSampleConvert
<
FloatToIntegerSampleConvert
<
F
,
Traits
>>
{};
template
<
SampleFormat
F
,
class
Traits
=
SampleTraits
<
F
>>
struct
FloatToInteger
:
PortableFloatToInteger
<
F
,
Traits
>
{};
/**
* A template class that attempts to use the "optimized" algorithm for
* large portions of the buffer, and calls the "portable" algorithm"
* for the rest when the last block is not full.
*/
template
<
typename
Optimized
,
typename
Portable
>
class
GlueOptimizedConvert
:
Optimized
,
Portable
{
public
:
typedef
typename
Portable
::
SrcTraits
SrcTraits
;
typedef
typename
Portable
::
DstTraits
DstTraits
;
void
Convert
(
typename
DstTraits
::
pointer_type
out
,
typename
SrcTraits
::
const_pointer_type
in
,
size_t
n
)
const
{
Optimized
::
Convert
(
out
,
in
,
n
);
/* use the "portable" algorithm for the trailing
samples */
size_t
remaining
=
n
%
Optimized
::
BLOCK_SIZE
;
size_t
done
=
n
-
remaining
;
Portable
::
Convert
(
out
+
done
,
in
+
done
,
remaining
);
}
};
#ifdef __ARM_NEON__
#include "Neon.hxx"
template
<>
struct
FloatToInteger
<
SampleFormat
::
S16
,
SampleTraits
<
SampleFormat
::
S16
>>
:
GlueOptimizedConvert
<
NeonFloatTo16
,
PortableFloatToInteger
<
SampleFormat
::
S16
>>
{};
#endif
template
<
class
C
>
static
ConstBuffer
<
typename
C
::
DstTraits
::
value_type
>
AllocateConvert
(
PcmBuffer
&
buffer
,
C
convert
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment