mirror of
https://github.com/LizardByte/Sunshine.git
synced 2025-10-29 11:23:23 +00:00
Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions (#2828)
* Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions * Update nanors to fix AVX-512 memory corruption
This commit is contained in:
parent
037c61dc99
commit
e7c420dd6e
@ -49,8 +49,6 @@ configure_file("${CMAKE_SOURCE_DIR}/src/version.h.in" version.h @ONLY)
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}") # required for importing version.h
|
||||
|
||||
set(SUNSHINE_TARGET_FILES
|
||||
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.h"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Input.h"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Rtsp.h"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/RtspParser.c"
|
||||
@ -108,6 +106,8 @@ set(SUNSHINE_TARGET_FILES
|
||||
"${CMAKE_SOURCE_DIR}/src/round_robin.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/stat_trackers.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/stat_trackers.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/rswrapper.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/rswrapper.c"
|
||||
${PLATFORM_TARGET_FILES})
|
||||
|
||||
if(NOT SUNSHINE_ASSETS_DIR_DEF)
|
||||
|
||||
@ -85,9 +85,9 @@ set_source_files_properties("${CMAKE_SOURCE_DIR}/src/upnp.cpp"
|
||||
PROPERTIES COMPILE_FLAGS -Wno-pedantic)
|
||||
|
||||
# third-party/nanors
|
||||
set_source_files_properties("${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
|
||||
set_source_files_properties("${CMAKE_SOURCE_DIR}/src/rswrapper.c"
|
||||
DIRECTORY "${CMAKE_SOURCE_DIR}" "${TEST_DIR}"
|
||||
PROPERTIES COMPILE_FLAGS "-include deps/obl/autoshim.h -ftree-vectorize")
|
||||
PROPERTIES COMPILE_FLAGS "-ftree-vectorize -funroll-loops")
|
||||
|
||||
# third-party/ViGEmClient
|
||||
set(VIGEM_COMPILE_FLAGS "")
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
#include "video.h"
|
||||
|
||||
extern "C" {
|
||||
#include <rs.h>
|
||||
#include "rswrapper.h"
|
||||
}
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
157
src/rswrapper.c
Normal file
157
src/rswrapper.c
Normal file
@ -0,0 +1,157 @@
|
||||
/**
|
||||
* @file src/rswrapper.c
|
||||
* @brief Wrappers for nanors vectorization with different ISA options
|
||||
*/
|
||||
|
||||
// _FORTIY_SOURCE can cause some versions of GCC to try to inline
|
||||
// memset() with incompatible target options when compiling rs.c
|
||||
#ifdef _FORTIFY_SOURCE
|
||||
#undef _FORTIFY_SOURCE
|
||||
#endif
|
||||
|
||||
// The assert() function is decorated with __cold on macOS which
|
||||
// is incompatible with Clang's target multiversioning feature
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG
|
||||
#endif
|
||||
|
||||
#define DECORATE_FUNC_I(a, b) a##b
|
||||
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b)
|
||||
|
||||
// Append an ISA suffix to the public RS API
|
||||
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX)
|
||||
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX)
|
||||
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX)
|
||||
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX)
|
||||
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX)
|
||||
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX)
|
||||
|
||||
// Append an ISA suffix to internal functions to prevent multiple definition errors
|
||||
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX)
|
||||
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX)
|
||||
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX)
|
||||
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX)
|
||||
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX)
|
||||
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX)
|
||||
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX)
|
||||
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX)
|
||||
#define scal DECORATE_FUNC(scal, ISA_SUFFIX)
|
||||
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX)
|
||||
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX)
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
// Compile a variant for SSSE3
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function)
|
||||
#else
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("ssse3")
|
||||
#endif
|
||||
#define ISA_SUFFIX _ssse3
|
||||
#define OBLAS_SSE3
|
||||
#include "../third-party/nanors/rs.c"
|
||||
#undef OBLAS_SSE3
|
||||
#undef ISA_SUFFIX
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#else
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
// Compile a variant for AVX2
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
|
||||
#else
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx2")
|
||||
#endif
|
||||
#define ISA_SUFFIX _avx2
|
||||
#define OBLAS_AVX2
|
||||
#include "../third-party/nanors/rs.c"
|
||||
#undef OBLAS_AVX2
|
||||
#undef ISA_SUFFIX
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#else
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
// Compile a variant for AVX512BW
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function)
|
||||
#else
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512f,avx512bw")
|
||||
#endif
|
||||
#define ISA_SUFFIX _avx512
|
||||
#define OBLAS_AVX512
|
||||
#include "../third-party/nanors/rs.c"
|
||||
#undef OBLAS_AVX512
|
||||
#undef ISA_SUFFIX
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#else
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Compile a default variant
|
||||
#define ISA_SUFFIX _def
|
||||
#include "../third-party/nanors/deps/obl/autoshim.h"
|
||||
#include "../third-party/nanors/rs.c"
|
||||
#undef ISA_SUFFIX
|
||||
|
||||
#undef reed_solomon_init
|
||||
#undef reed_solomon_new
|
||||
#undef reed_solomon_new_static
|
||||
#undef reed_solomon_release
|
||||
#undef reed_solomon_decode
|
||||
#undef reed_solomon_encode
|
||||
|
||||
#include "rswrapper.h"
|
||||
|
||||
reed_solomon_new_t reed_solomon_new_fn;
|
||||
reed_solomon_release_t reed_solomon_release_fn;
|
||||
reed_solomon_encode_t reed_solomon_encode_fn;
|
||||
reed_solomon_decode_t reed_solomon_decode_fn;
|
||||
|
||||
/**
|
||||
* @brief This initializes the RS function pointers to the best vectorized version available.
|
||||
* @details The streaming code will directly invoke these function pointers during encoding.
|
||||
*/
|
||||
void
|
||||
reed_solomon_init(void) {
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) {
|
||||
reed_solomon_new_fn = reed_solomon_new_avx512;
|
||||
reed_solomon_release_fn = reed_solomon_release_avx512;
|
||||
reed_solomon_encode_fn = reed_solomon_encode_avx512;
|
||||
reed_solomon_decode_fn = reed_solomon_decode_avx512;
|
||||
reed_solomon_init_avx512();
|
||||
}
|
||||
else if (__builtin_cpu_supports("avx2")) {
|
||||
reed_solomon_new_fn = reed_solomon_new_avx2;
|
||||
reed_solomon_release_fn = reed_solomon_release_avx2;
|
||||
reed_solomon_encode_fn = reed_solomon_encode_avx2;
|
||||
reed_solomon_decode_fn = reed_solomon_decode_avx2;
|
||||
reed_solomon_init_avx2();
|
||||
}
|
||||
else if (__builtin_cpu_supports("ssse3")) {
|
||||
reed_solomon_new_fn = reed_solomon_new_ssse3;
|
||||
reed_solomon_release_fn = reed_solomon_release_ssse3;
|
||||
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
|
||||
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
|
||||
reed_solomon_init_ssse3();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
reed_solomon_new_fn = reed_solomon_new_def;
|
||||
reed_solomon_release_fn = reed_solomon_release_def;
|
||||
reed_solomon_encode_fn = reed_solomon_encode_def;
|
||||
reed_solomon_decode_fn = reed_solomon_decode_def;
|
||||
reed_solomon_init_def();
|
||||
}
|
||||
}
|
||||
32
src/rswrapper.h
Normal file
32
src/rswrapper.h
Normal file
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* @file src/rswrapper.h
|
||||
* @brief Wrappers for nanors vectorization
|
||||
* @details This is a drop-in replacement for nanors rs.h
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct _reed_solomon reed_solomon;
|
||||
|
||||
typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards);
|
||||
typedef void (*reed_solomon_release_t)(reed_solomon *rs);
|
||||
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
|
||||
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);
|
||||
|
||||
extern reed_solomon_new_t reed_solomon_new_fn;
|
||||
extern reed_solomon_release_t reed_solomon_release_fn;
|
||||
extern reed_solomon_encode_t reed_solomon_encode_fn;
|
||||
extern reed_solomon_decode_t reed_solomon_decode_fn;
|
||||
|
||||
#define reed_solomon_new reed_solomon_new_fn
|
||||
#define reed_solomon_release reed_solomon_release_fn
|
||||
#define reed_solomon_encode reed_solomon_encode_fn
|
||||
#define reed_solomon_decode reed_solomon_decode_fn
|
||||
|
||||
/**
|
||||
* @brief This initializes the RS function pointers to the best vectorized version available.
|
||||
* @details The streaming code will directly invoke these function pointers during encoding.
|
||||
*/
|
||||
void
|
||||
reed_solomon_init(void);
|
||||
@ -13,8 +13,10 @@
|
||||
#include <boost/endian/arithmetic.hpp>
|
||||
|
||||
extern "C" {
|
||||
// clang-format off
|
||||
#include <moonlight-common-c/src/Limelight-internal.h>
|
||||
#include <rs.h>
|
||||
#include "rswrapper.h"
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
#include "config.h"
|
||||
@ -236,7 +238,6 @@ namespace stream {
|
||||
}
|
||||
constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400;
|
||||
|
||||
using rh_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
|
||||
using video_packet_t = util::c_ptr<video_packet_raw_t>;
|
||||
using audio_packet_t = util::c_ptr<audio_packet_raw_t>;
|
||||
using audio_fec_packet_t = util::c_ptr<audio_fec_packet_raw_t>;
|
||||
@ -621,7 +622,7 @@ namespace stream {
|
||||
}
|
||||
|
||||
namespace fec {
|
||||
using rs_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
|
||||
using rs_t = util::safe_ptr<reed_solomon, [](reed_solomon *rs) { reed_solomon_release(rs); }>;
|
||||
|
||||
struct fec_t {
|
||||
size_t data_shards;
|
||||
|
||||
37
tests/unit/test_rswrapper.cpp
Normal file
37
tests/unit/test_rswrapper.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* @file tests/unit/test_rswrapper.cpp
|
||||
* @brief Test src/rswrapper.*
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include <src/rswrapper.h>
|
||||
}
|
||||
|
||||
#include <tests/conftest.cpp>
|
||||
|
||||
TEST(ReedSolomonWrapperTests, InitTest) {
|
||||
reed_solomon_init();
|
||||
|
||||
// Ensure all function pointers were populated
|
||||
ASSERT_NE(reed_solomon_new, nullptr);
|
||||
ASSERT_NE(reed_solomon_release, nullptr);
|
||||
ASSERT_NE(reed_solomon_encode, nullptr);
|
||||
ASSERT_NE(reed_solomon_decode, nullptr);
|
||||
}
|
||||
|
||||
TEST(ReedSolomonWrapperTests, EncodeTest) {
|
||||
reed_solomon_init();
|
||||
|
||||
auto rs = reed_solomon_new(1, 1);
|
||||
ASSERT_NE(rs, nullptr);
|
||||
|
||||
uint8_t dataShard[16] = {};
|
||||
uint8_t fecShard[16] = {};
|
||||
|
||||
// If we picked the incorrect ISA in our wrapper, we should crash here
|
||||
uint8_t *shardPtrs[2] = { dataShard, fecShard };
|
||||
auto ret = reed_solomon_encode(rs, shardPtrs, 2, sizeof(dataShard));
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
reed_solomon_release(rs);
|
||||
}
|
||||
2
third-party/nanors
vendored
2
third-party/nanors
vendored
@ -1 +1 @@
|
||||
Subproject commit e9e242e98e27037830490b2a752895ca68f75f8b
|
||||
Subproject commit 19f07b513e924e471cadd141943c1ec4adc8d0e0
|
||||
Loading…
x
Reference in New Issue
Block a user