New upstream version 0-1299+ds
This commit is contained in:
parent
d3b74ab2d5
commit
9209efb76e
|
@ -10,7 +10,7 @@ if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dis
|
|||
fi
|
||||
|
||||
# Default clang-format points to default 3.5 version one
|
||||
CLANG_FORMAT=clang-format-12
|
||||
CLANG_FORMAT=${CLANG_FORMAT:-clang-format-12}
|
||||
$CLANG_FORMAT --version
|
||||
|
||||
if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
|
||||
|
|
|
@ -3,13 +3,8 @@
|
|||
|
||||
cmake_minimum_required(VERSION 3.22)
|
||||
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
include(CMakeDependentOption)
|
||||
|
||||
|
@ -22,6 +17,8 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 binaries" ON
|
|||
# On Linux system SDL2 is likely to be lacking HIDAPI support which have drawbacks but is needed for SDL motion
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_EXTERNAL_SDL2 "Compile external SDL2" ON "ENABLE_SDL2;NOT MSVC" OFF)
|
||||
|
||||
option(ENABLE_LIBUSB "Enable the use of LibUSB" ON)
|
||||
|
||||
option(ENABLE_OPENGL "Enable OpenGL" ON)
|
||||
mark_as_advanced(FORCE ENABLE_OPENGL)
|
||||
option(ENABLE_QT "Enable the Qt frontend" ON)
|
||||
|
@ -35,6 +32,8 @@ option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
|||
|
||||
option(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled FFmpeg" "${WIN32}")
|
||||
|
||||
option(YUZU_USE_EXTERNAL_VULKAN_HEADERS "Use Vulkan-Headers from externals" ON)
|
||||
|
||||
option(YUZU_USE_QT_MULTIMEDIA "Use QtMultimedia for Camera" OFF)
|
||||
|
||||
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
|
||||
|
@ -47,6 +46,8 @@ option(YUZU_TESTS "Compile tests" ON)
|
|||
|
||||
option(YUZU_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
|
||||
|
||||
option(YUZU_ROOM "Compile LDN room server" ON)
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_CRASH_DUMPS "Compile Windows crash dump (Minidump) support" OFF "WIN32" OFF)
|
||||
|
||||
option(YUZU_USE_BUNDLED_VCPKG "Use vcpkg for yuzu dependencies" "${MSVC}")
|
||||
|
@ -201,36 +202,43 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
|
|||
# System imported libraries
|
||||
# =======================================================================
|
||||
|
||||
find_package(enet 1.3)
|
||||
# Enforce the search mode of non-required packages for better and shorter failure messages
|
||||
find_package(enet 1.3 MODULE)
|
||||
find_package(fmt 9 REQUIRED)
|
||||
find_package(inih)
|
||||
find_package(libusb 1.0.24)
|
||||
find_package(inih MODULE)
|
||||
find_package(lz4 REQUIRED)
|
||||
find_package(nlohmann_json 3.8 REQUIRED)
|
||||
find_package(Opus 1.3)
|
||||
find_package(Vulkan 1.3.238)
|
||||
find_package(Opus 1.3 MODULE)
|
||||
find_package(ZLIB 1.2 REQUIRED)
|
||||
find_package(zstd 1.5 REQUIRED)
|
||||
|
||||
if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS)
|
||||
find_package(Vulkan 1.3.238 REQUIRED)
|
||||
endif()
|
||||
|
||||
if (ENABLE_LIBUSB)
|
||||
find_package(libusb 1.0.24 MODULE)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
find_package(xbyak 6)
|
||||
find_package(xbyak 6 CONFIG)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
||||
find_package(dynarmic 6.4.0)
|
||||
find_package(dynarmic 6.4.0 CONFIG)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CUBEB)
|
||||
find_package(cubeb)
|
||||
find_package(cubeb CONFIG)
|
||||
endif()
|
||||
|
||||
if (USE_DISCORD_PRESENCE)
|
||||
find_package(DiscordRPC)
|
||||
find_package(DiscordRPC MODULE)
|
||||
endif()
|
||||
|
||||
if (ENABLE_WEB_SERVICE)
|
||||
find_package(cpp-jwt 1.4)
|
||||
find_package(httplib 0.11)
|
||||
find_package(cpp-jwt 1.4 CONFIG)
|
||||
find_package(httplib 0.11 MODULE)
|
||||
endif()
|
||||
|
||||
if (YUZU_TESTS)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(httplib QUIET CONFIG)
|
||||
if (httplib_FOUND)
|
||||
if (httplib_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(httplib CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
|
@ -4,7 +4,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(lz4 QUIET CONFIG)
|
||||
if (lz4_FOUND)
|
||||
if (lz4_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(lz4 CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
|
@ -4,7 +4,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(zstd QUIET CONFIG)
|
||||
if (zstd_FOUND)
|
||||
if (zstd_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(zstd CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,9 @@
|
|||
# SPDX-FileCopyrightText: 2016 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
# xbyak
|
||||
if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
|
||||
|
@ -12,8 +12,7 @@ endif()
|
|||
|
||||
# Dynarmic
|
||||
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
|
||||
set(DYNARMIC_NO_BUNDLED_FMT ON)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON)
|
||||
add_subdirectory(dynarmic EXCLUDE_FROM_ALL)
|
||||
add_library(dynarmic::dynarmic ALIAS dynarmic)
|
||||
endif()
|
||||
|
@ -45,7 +44,7 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12" AND CMAKE_CXX_COMPILER
|
|||
endif()
|
||||
|
||||
# libusb
|
||||
if (NOT TARGET libusb::usb)
|
||||
if (ENABLE_LIBUSB AND NOT TARGET libusb::usb)
|
||||
add_subdirectory(libusb EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
|
@ -60,10 +59,10 @@ if (YUZU_USE_EXTERNAL_SDL2)
|
|||
Locale Power Render)
|
||||
foreach(_SUB ${SDL_UNUSED_SUBSYSTEMS})
|
||||
string(TOUPPER ${_SUB} _OPT)
|
||||
option(SDL_${_OPT} "" OFF)
|
||||
set(SDL_${_OPT} OFF)
|
||||
endforeach()
|
||||
|
||||
option(HIDAPI "" ON)
|
||||
set(HIDAPI ON)
|
||||
endif()
|
||||
set(SDL_STATIC ON)
|
||||
set(SDL_SHARED OFF)
|
||||
|
@ -83,7 +82,7 @@ endif()
|
|||
|
||||
# Cubeb
|
||||
if (ENABLE_CUBEB AND NOT TARGET cubeb::cubeb)
|
||||
set(BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(BUILD_TESTS OFF)
|
||||
add_subdirectory(cubeb EXCLUDE_FROM_ALL)
|
||||
add_library(cubeb::cubeb ALIAS cubeb)
|
||||
endif()
|
||||
|
@ -98,6 +97,7 @@ endif()
|
|||
# Sirit
|
||||
add_subdirectory(sirit EXCLUDE_FROM_ALL)
|
||||
|
||||
# httplib
|
||||
if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
||||
if (NOT WIN32)
|
||||
find_package(OpenSSL 1.1)
|
||||
|
@ -108,7 +108,7 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
|||
|
||||
if (WIN32 OR NOT OPENSSL_FOUND)
|
||||
# LibreSSL
|
||||
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||
set(LIBRESSL_SKIP_INSTALL ON)
|
||||
set(OPENSSLDIR "/etc/ssl/")
|
||||
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||
|
@ -118,7 +118,6 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
|||
DEFINITION OPENSSL_LIBS)
|
||||
endif()
|
||||
|
||||
# httplib
|
||||
add_library(httplib INTERFACE)
|
||||
target_include_directories(httplib INTERFACE ./cpp-httplib)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
|
@ -152,6 +151,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
|
|||
endif()
|
||||
|
||||
# Vulkan-Headers
|
||||
if (NOT TARGET Vulkan::Headers)
|
||||
if (YUZU_USE_EXTERNAL_VULKAN_HEADERS)
|
||||
add_subdirectory(Vulkan-Headers EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
|
|
@ -1160,6 +1160,14 @@ public:
|
|||
/// TBD
|
||||
Id OpSubgroupAllEqualKHR(Id result_type, Id predicate);
|
||||
|
||||
// Result is true only in the active invocation with the lowest id in the group, otherwise
|
||||
// result is false.
|
||||
Id OpGroupNonUniformElect(Id result_type, Id scope);
|
||||
|
||||
// Result is the Value of the invocation from the active invocation with the lowest id in the
|
||||
// group to all active invocations in the group.
|
||||
Id OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value);
|
||||
|
||||
// Result is the Value of the invocation identified by the id Id to all active invocations in
|
||||
// the group.
|
||||
Id OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id);
|
||||
|
|
|
@ -36,6 +36,17 @@ Id Module::OpSubgroupAllEqualKHR(Id result_type, Id predicate) {
|
|||
return *code << OpId{spv::Op::OpSubgroupAllEqualKHR, result_type} << predicate << EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformElect(Id result_type, Id scope) {
|
||||
code->Reserve(4);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformElect, result_type} << scope << EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value) {
|
||||
code->Reserve(5);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformBroadcastFirst, result_type} << scope << value
|
||||
<< EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id) {
|
||||
code->Reserve(6);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformBroadcast, result_type} << scope << value
|
||||
|
|
|
@ -161,7 +161,10 @@ add_subdirectory(video_core)
|
|||
add_subdirectory(network)
|
||||
add_subdirectory(input_common)
|
||||
add_subdirectory(shader_recompiler)
|
||||
add_subdirectory(dedicated_room)
|
||||
|
||||
if (YUZU_ROOM)
|
||||
add_subdirectory(dedicated_room)
|
||||
endif()
|
||||
|
||||
if (YUZU_TESTS)
|
||||
add_subdirectory(tests)
|
||||
|
|
|
@ -187,11 +187,7 @@ add_library(audio_core STATIC
|
|||
renderer/voice/voice_info.cpp
|
||||
renderer/voice/voice_info.h
|
||||
renderer/voice/voice_state.h
|
||||
sink/cubeb_sink.cpp
|
||||
sink/cubeb_sink.h
|
||||
sink/null_sink.h
|
||||
sink/sdl2_sink.cpp
|
||||
sink/sdl2_sink.h
|
||||
sink/sink.h
|
||||
sink/sink_details.cpp
|
||||
sink/sink_details.h
|
||||
|
@ -222,11 +218,22 @@ if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
|||
target_link_libraries(audio_core PRIVATE dynarmic::dynarmic)
|
||||
endif()
|
||||
|
||||
if(ENABLE_CUBEB)
|
||||
if (ENABLE_CUBEB)
|
||||
target_sources(audio_core PRIVATE
|
||||
sink/cubeb_sink.cpp
|
||||
sink/cubeb_sink.h
|
||||
)
|
||||
|
||||
target_link_libraries(audio_core PRIVATE cubeb::cubeb)
|
||||
target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1)
|
||||
endif()
|
||||
if(ENABLE_SDL2)
|
||||
|
||||
if (ENABLE_SDL2)
|
||||
target_sources(audio_core PRIVATE
|
||||
sink/sdl2_sink.cpp
|
||||
sink/sdl2_sink.h
|
||||
)
|
||||
|
||||
target_link_libraries(audio_core PRIVATE SDL2::SDL2)
|
||||
target_compile_definitions(audio_core PRIVATE HAVE_SDL2)
|
||||
endif()
|
||||
|
|
|
@ -97,6 +97,7 @@ add_library(common STATIC
|
|||
point.h
|
||||
precompiled_headers.h
|
||||
quaternion.h
|
||||
range_map.h
|
||||
reader_writer_queue.h
|
||||
ring_buffer.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp
|
||||
|
|
|
@ -393,12 +393,27 @@ public:
|
|||
}
|
||||
|
||||
// Virtual memory initialization
|
||||
virtual_base = static_cast<u8*>(
|
||||
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
|
||||
#if defined(__FreeBSD__)
|
||||
virtual_base =
|
||||
static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
virtual_base = static_cast<u8*>(
|
||||
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
}
|
||||
#else
|
||||
virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
|
||||
good = true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename KeyTBase, typename ValueT>
|
||||
class RangeMap {
|
||||
private:
|
||||
using KeyT =
|
||||
std::conditional_t<std::is_signed_v<KeyTBase>, KeyTBase, std::make_signed_t<KeyTBase>>;
|
||||
|
||||
public:
|
||||
explicit RangeMap(ValueT null_value_) : null_value{null_value_} {
|
||||
container.emplace(std::numeric_limits<KeyT>::min(), null_value);
|
||||
};
|
||||
~RangeMap() = default;
|
||||
|
||||
void Map(KeyTBase address, KeyTBase address_end, ValueT value) {
|
||||
KeyT new_address = static_cast<KeyT>(address);
|
||||
KeyT new_address_end = static_cast<KeyT>(address_end);
|
||||
if (new_address < 0) {
|
||||
new_address = 0;
|
||||
}
|
||||
if (new_address_end < 0) {
|
||||
new_address_end = 0;
|
||||
}
|
||||
InternalMap(new_address, new_address_end, value);
|
||||
}
|
||||
|
||||
void Unmap(KeyTBase address, KeyTBase address_end) {
|
||||
Map(address, address_end, null_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return 0;
|
||||
}
|
||||
return ContinousSizeInternal(new_address);
|
||||
}
|
||||
|
||||
[[nodiscard]] ValueT GetValueAt(KeyT address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return null_value;
|
||||
}
|
||||
return GetValueInternal(new_address);
|
||||
}
|
||||
|
||||
private:
|
||||
using MapType = std::map<KeyT, ValueT>;
|
||||
using IteratorType = typename MapType::iterator;
|
||||
using ConstIteratorType = typename MapType::const_iterator;
|
||||
|
||||
size_t ContinousSizeInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end() || it->second == null_value) {
|
||||
return 0;
|
||||
}
|
||||
const auto it_end = std::next(it);
|
||||
if (it_end == container.end()) {
|
||||
return std::numeric_limits<KeyT>::max() - address;
|
||||
}
|
||||
return it_end->first - address;
|
||||
}
|
||||
|
||||
ValueT GetValueInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it;
|
||||
}
|
||||
if (it != container.end() && (it->first == address)) {
|
||||
return it;
|
||||
}
|
||||
--it;
|
||||
return it;
|
||||
}
|
||||
|
||||
ValueT GetFirstValueWithin(KeyT address) {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it->second;
|
||||
}
|
||||
if (it == container.end()) [[unlikely]] { // this would be a bug
|
||||
return null_value;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ValueT GetLastValueWithin(KeyT address) {
|
||||
auto it = container.upper_bound(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
if (it == container.begin()) [[unlikely]] { // this would be a bug
|
||||
return it->second;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void InternalMap(KeyT address, KeyT address_end, ValueT value) {
|
||||
const bool must_add_start = GetFirstValueWithin(address) != value;
|
||||
const ValueT last_value = GetLastValueWithin(address_end);
|
||||
const bool must_add_end = last_value != value;
|
||||
auto it = container.lower_bound(address);
|
||||
const auto it_end = container.upper_bound(address_end);
|
||||
while (it != it_end) {
|
||||
it = container.erase(it);
|
||||
}
|
||||
if (must_add_start) {
|
||||
container.emplace(address, value);
|
||||
}
|
||||
if (must_add_end) {
|
||||
container.emplace(address_end, last_value);
|
||||
}
|
||||
}
|
||||
|
||||
ValueT null_value;
|
||||
MapType container;
|
||||
};
|
||||
|
||||
} // namespace Common
|
|
@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
|||
// Renderer
|
||||
values.fsr_sharpening_slider.SetGlobal(true);
|
||||
values.renderer_backend.SetGlobal(true);
|
||||
values.renderer_force_max_clock.SetGlobal(true);
|
||||
values.vulkan_device.SetGlobal(true);
|
||||
values.aspect_ratio.SetGlobal(true);
|
||||
values.max_anisotropy.SetGlobal(true);
|
||||
|
@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
|||
values.use_asynchronous_shaders.SetGlobal(true);
|
||||
values.use_fast_gpu_time.SetGlobal(true);
|
||||
values.use_pessimistic_flushes.SetGlobal(true);
|
||||
values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
|
||||
values.bg_red.SetGlobal(true);
|
||||
values.bg_green.SetGlobal(true);
|
||||
values.bg_blue.SetGlobal(true);
|
||||
|
|
|
@ -415,6 +415,7 @@ struct Values {
|
|||
// Renderer
|
||||
SwitchableSetting<RendererBackend, true> renderer_backend{
|
||||
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
|
||||
SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
|
||||
Setting<bool> renderer_debug{false, "debug"};
|
||||
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
|
||||
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
|
||||
|
@ -451,6 +452,8 @@ struct Values {
|
|||
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
|
||||
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
|
||||
SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
|
||||
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
|
||||
"use_vulkan_driver_pipeline_cache"};
|
||||
|
||||
SwitchableSetting<u8> bg_red{0, "bg_red"};
|
||||
SwitchableSetting<u8> bg_green{0, "bg_green"};
|
||||
|
@ -531,6 +534,7 @@ struct Values {
|
|||
Setting<bool> reporting_services{false, "reporting_services"};
|
||||
Setting<bool> quest_flag{false, "quest_flag"};
|
||||
Setting<bool> disable_macro_jit{false, "disable_macro_jit"};
|
||||
Setting<bool> disable_macro_hle{false, "disable_macro_hle"};
|
||||
Setting<bool> extended_logging{false, "extended_logging"};
|
||||
Setting<bool> use_debug_asserts{false, "use_debug_asserts"};
|
||||
Setting<bool> use_auto_stub{false, "use_auto_stub"};
|
||||
|
|
|
@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
|
|
@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
|
|
@ -194,9 +194,9 @@ std::size_t VfsFile::WriteBytes(const std::vector<u8>& data, std::size_t offset)
|
|||
|
||||
std::string VfsFile::GetFullPath() const {
|
||||
if (GetContainingDirectory() == nullptr)
|
||||
return "/" + GetName();
|
||||
return '/' + GetName();
|
||||
|
||||
return GetContainingDirectory()->GetFullPath() + "/" + GetName();
|
||||
return GetContainingDirectory()->GetFullPath() + '/' + GetName();
|
||||
}
|
||||
|
||||
VirtualFile VfsDirectory::GetFileRelative(std::string_view path) const {
|
||||
|
@ -435,7 +435,7 @@ std::string VfsDirectory::GetFullPath() const {
|
|||
if (IsRoot())
|
||||
return GetName();
|
||||
|
||||
return GetParentDirectory()->GetFullPath() + "/" + GetName();
|
||||
return GetParentDirectory()->GetFullPath() + '/' + GetName();
|
||||
}
|
||||
|
||||
bool ReadOnlyVfsDirectory::IsWritable() const {
|
||||
|
|
|
@ -40,6 +40,11 @@ void EmulatedConsole::SetTouchParams() {
|
|||
touch_params[index++] = std::move(touchscreen_param);
|
||||
}
|
||||
|
||||
if (Settings::values.touch_from_button_maps.empty()) {
|
||||
LOG_WARNING(Input, "touch_from_button_maps is unset by frontend config");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto button_index =
|
||||
static_cast<u64>(Settings::values.touch_from_button_map_index.GetValue());
|
||||
const auto& touch_buttons = Settings::values.touch_from_button_maps[button_index].buttons;
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
namespace Core::HID {
|
||||
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
|
||||
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
|
||||
// Use a common UUID for TAS and Virtual Gamepad
|
||||
constexpr Common::UUID TAS_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
constexpr Common::UUID VIRTUAL_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
|
||||
|
||||
|
@ -348,10 +353,6 @@ void EmulatedController::ReloadInput() {
|
|||
}
|
||||
}
|
||||
|
||||
// Use a common UUID for TAS
|
||||
static constexpr Common::UUID TAS_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register TAS devices. No need to force update
|
||||
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
|
||||
if (!tas_button_devices[index]) {
|
||||
|
@ -377,10 +378,6 @@ void EmulatedController::ReloadInput() {
|
|||
});
|
||||
}
|
||||
|
||||
// Use a common UUID for Virtual Gamepad
|
||||
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register virtual devices. No need to force update
|
||||
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
|
||||
if (!virtual_button_devices[index]) {
|
||||
|
@ -780,7 +777,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
|
|||
|
||||
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
|
||||
if (controller.stick_values[index].uuid != uuid) {
|
||||
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
|
||||
const bool is_tas = uuid == TAS_UUID;
|
||||
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
|
||||
return;
|
||||
}
|
||||
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
|
||||
!stick_value.right) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,15 +22,19 @@ namespace {
|
|||
|
||||
namespace Service::NIFM {
|
||||
|
||||
// This is nn::nifm::RequestState
|
||||
enum class RequestState : u32 {
|
||||
NotSubmitted = 1,
|
||||
Error = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
Pending = 2,
|
||||
Connected = 3,
|
||||
Invalid = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
OnHold = 2,
|
||||
Accepted = 3,
|
||||
Blocking = 4,
|
||||
};
|
||||
|
||||
enum class InternetConnectionType : u8 {
|
||||
WiFi = 1,
|
||||
// This is nn::nifm::NetworkInterfaceType
|
||||
enum class NetworkInterfaceType : u32 {
|
||||
Invalid = 0,
|
||||
WiFi_Ieee80211 = 1,
|
||||
Ethernet = 2,
|
||||
};
|
||||
|
||||
|
@ -42,14 +46,23 @@ enum class InternetConnectionStatus : u8 {
|
|||
Connected,
|
||||
};
|
||||
|
||||
// This is nn::nifm::NetworkProfileType
|
||||
enum class NetworkProfileType : u32 {
|
||||
User,
|
||||
SsidList,
|
||||
Temporary,
|
||||
};
|
||||
|
||||
// This is nn::nifm::IpAddressSetting
|
||||
struct IpAddressSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address current_address{};
|
||||
Network::IPv4Address ip_address{};
|
||||
Network::IPv4Address subnet_mask{};
|
||||
Network::IPv4Address gateway{};
|
||||
Network::IPv4Address default_gateway{};
|
||||
};
|
||||
static_assert(sizeof(IpAddressSetting) == 0xD, "IpAddressSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::DnsSetting
|
||||
struct DnsSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address primary_dns{};
|
||||
|
@ -57,18 +70,26 @@ struct DnsSetting {
|
|||
};
|
||||
static_assert(sizeof(DnsSetting) == 0x9, "DnsSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::AuthenticationSetting
|
||||
struct AuthenticationSetting {
|
||||
bool is_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
};
|
||||
static_assert(sizeof(AuthenticationSetting) == 0x41, "AuthenticationSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::ProxySetting
|
||||
struct ProxySetting {
|
||||
bool enabled{};
|
||||
bool is_enabled{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
u16 port{};
|
||||
std::array<char, 0x64> proxy_server{};
|
||||
bool automatic_auth_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
AuthenticationSetting authentication{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
};
|
||||
static_assert(sizeof(ProxySetting) == 0xAA, "ProxySetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::IpSettingData
|
||||
struct IpSettingData {
|
||||
IpAddressSetting ip_address_setting{};
|
||||
DnsSetting dns_setting{};
|
||||
|
@ -101,6 +122,7 @@ static_assert(sizeof(NifmWirelessSettingData) == 0x70,
|
|||
"NifmWirelessSettingData has incorrect size.");
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// This is nn::nifm::detail::sf::NetworkProfileData
|
||||
struct SfNetworkProfileData {
|
||||
IpSettingData ip_setting_data{};
|
||||
u128 uuid{};
|
||||
|
@ -114,13 +136,14 @@ struct SfNetworkProfileData {
|
|||
};
|
||||
static_assert(sizeof(SfNetworkProfileData) == 0x17C, "SfNetworkProfileData has incorrect size.");
|
||||
|
||||
// This is nn::nifm::NetworkProfileData
|
||||
struct NifmNetworkProfileData {
|
||||
u128 uuid{};
|
||||
std::array<char, 0x40> network_name{};
|
||||
u32 unknown_1{};
|
||||
u32 unknown_2{};
|
||||
u8 unknown_3{};
|
||||
u8 unknown_4{};
|
||||
NetworkProfileType network_profile_type{};
|
||||
NetworkInterfaceType network_interface_type{};
|
||||
bool is_auto_connect{};
|
||||
bool is_large_capacity{};
|
||||
INSERT_PADDING_BYTES(2);
|
||||
NifmWirelessSettingData wireless_setting_data{};
|
||||
IpSettingData ip_setting_data{};
|
||||
|
@ -184,6 +207,7 @@ public:
|
|||
|
||||
event1 = CreateKEvent(service_context, "IRequest:Event1");
|
||||
event2 = CreateKEvent(service_context, "IRequest:Event2");
|
||||
state = RequestState::NotSubmitted;
|
||||
}
|
||||
|
||||
~IRequest() override {
|
||||
|
@ -196,7 +220,7 @@ private:
|
|||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
if (state == RequestState::NotSubmitted) {
|
||||
UpdateState(RequestState::Pending);
|
||||
UpdateState(RequestState::OnHold);
|
||||
}
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
|
@ -219,14 +243,14 @@ private:
|
|||
switch (state) {
|
||||
case RequestState::NotSubmitted:
|
||||
return has_connection ? ResultSuccess : ResultNetworkCommunicationDisabled;
|
||||
case RequestState::Pending:
|
||||
case RequestState::OnHold:
|
||||
if (has_connection) {
|
||||
UpdateState(RequestState::Connected);
|
||||
UpdateState(RequestState::Accepted);
|
||||
} else {
|
||||
UpdateState(RequestState::Error);
|
||||
UpdateState(RequestState::Invalid);
|
||||
}
|
||||
return ResultPendingConnection;
|
||||
case RequestState::Connected:
|
||||
case RequestState::Accepted:
|
||||
default:
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
@ -338,9 +362,9 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
.ip_setting_data{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
|
@ -348,12 +372,14 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
.secondary_dns{1, 0, 0, 1},
|
||||
},
|
||||
.proxy_setting{
|
||||
.enabled{false},
|
||||
.is_enabled{false},
|
||||
.port{},
|
||||
.proxy_server{},
|
||||
.automatic_auth_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
.authentication{
|
||||
.is_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
},
|
||||
},
|
||||
.mtu{1500},
|
||||
},
|
||||
|
@ -370,7 +396,7 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
network_profile_data.ip_setting_data.ip_address_setting.current_address =
|
||||
network_profile_data.ip_setting_data.ip_address_setting.ip_address =
|
||||
room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
|
@ -444,9 +470,9 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
|||
return IpConfigInfo{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
|
@ -459,7 +485,7 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
|||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
ip_config_info.ip_address_setting.current_address = room_member->GetFakeIpAddress();
|
||||
ip_config_info.ip_address_setting.ip_address = room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -480,7 +506,7 @@ void IGeneralService::GetInternetConnectionStatus(Kernel::HLERequestContext& ctx
|
|||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
struct Output {
|
||||
InternetConnectionType type{InternetConnectionType::WiFi};
|
||||
u8 type{static_cast<u8>(NetworkInterfaceType::WiFi_Ieee80211)};
|
||||
u8 wifi_strength{3};
|
||||
InternetConnectionStatus state{InternetConnectionStatus::Connected};
|
||||
};
|
||||
|
|
|
@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
|
|||
return Errno::NETUNREACH;
|
||||
case WSAEMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case WSAETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
|
@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
|
|||
return Errno::NETUNREACH;
|
||||
case EMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case ETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
|
@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
|
|||
int e = errno;
|
||||
#endif
|
||||
const Errno err = TranslateNativeError(e);
|
||||
if (err == Errno::AGAIN) {
|
||||
if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
|
||||
return err;
|
||||
}
|
||||
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
|
||||
|
|
|
@ -436,7 +436,7 @@ struct Memory::Impl {
|
|||
}
|
||||
|
||||
if (Settings::IsFastmemEnabled()) {
|
||||
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
|
||||
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
|
||||
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# SPDX-FileCopyrightText: 2017 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
|
||||
|
||||
add_executable(yuzu-room
|
||||
precompiled_headers.h
|
||||
yuzu_room.cpp
|
||||
|
|
|
@ -4,14 +4,10 @@
|
|||
add_library(input_common STATIC
|
||||
drivers/camera.cpp
|
||||
drivers/camera.h
|
||||
drivers/gc_adapter.cpp
|
||||
drivers/gc_adapter.h
|
||||
drivers/keyboard.cpp
|
||||
drivers/keyboard.h
|
||||
drivers/mouse.cpp
|
||||
drivers/mouse.h
|
||||
drivers/sdl_driver.cpp
|
||||
drivers/sdl_driver.h
|
||||
drivers/tas_input.cpp
|
||||
drivers/tas_input.h
|
||||
drivers/touch_screen.cpp
|
||||
|
@ -62,8 +58,17 @@ if (ENABLE_SDL2)
|
|||
target_compile_definitions(input_common PRIVATE HAVE_SDL2)
|
||||
endif()
|
||||
|
||||
if (ENABLE_LIBUSB)
|
||||
target_sources(input_common PRIVATE
|
||||
drivers/gc_adapter.cpp
|
||||
drivers/gc_adapter.h
|
||||
)
|
||||
target_link_libraries(input_common PRIVATE libusb::usb)
|
||||
target_compile_definitions(input_common PRIVATE HAVE_LIBUSB)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(input_common)
|
||||
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost libusb::usb)
|
||||
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost)
|
||||
|
||||
if (YUZU_USE_PRECOMPILED_HEADERS)
|
||||
target_precompile_headers(input_common PRIVATE precompiled_headers.h)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "common/input.h"
|
||||
#include "common/param_package.h"
|
||||
#include "input_common/drivers/camera.h"
|
||||
#include "input_common/drivers/gc_adapter.h"
|
||||
#include "input_common/drivers/keyboard.h"
|
||||
#include "input_common/drivers/mouse.h"
|
||||
#include "input_common/drivers/tas_input.h"
|
||||
|
@ -19,6 +18,10 @@
|
|||
#include "input_common/input_mapping.h"
|
||||
#include "input_common/input_poller.h"
|
||||
#include "input_common/main.h"
|
||||
|
||||
#ifdef HAVE_LIBUSB
|
||||
#include "input_common/drivers/gc_adapter.h"
|
||||
#endif
|
||||
#ifdef HAVE_SDL2
|
||||
#include "input_common/drivers/sdl_driver.h"
|
||||
#endif
|
||||
|
@ -45,7 +48,9 @@ struct InputSubsystem::Impl {
|
|||
RegisterEngine("keyboard", keyboard);
|
||||
RegisterEngine("mouse", mouse);
|
||||
RegisterEngine("touch", touch_screen);
|
||||
#ifdef HAVE_LIBUSB
|
||||
RegisterEngine("gcpad", gcadapter);
|
||||
#endif
|
||||
RegisterEngine("cemuhookudp", udp_client);
|
||||
RegisterEngine("tas", tas_input);
|
||||
RegisterEngine("camera", camera);
|
||||
|
@ -72,7 +77,9 @@ struct InputSubsystem::Impl {
|
|||
UnregisterEngine(keyboard);
|
||||
UnregisterEngine(mouse);
|
||||
UnregisterEngine(touch_screen);
|
||||
#ifdef HAVE_LIBUSB
|
||||
UnregisterEngine(gcadapter);
|
||||
#endif
|
||||
UnregisterEngine(udp_client);
|
||||
UnregisterEngine(tas_input);
|
||||
UnregisterEngine(camera);
|
||||
|
@ -95,8 +102,10 @@ struct InputSubsystem::Impl {
|
|||
devices.insert(devices.end(), keyboard_devices.begin(), keyboard_devices.end());
|
||||
auto mouse_devices = mouse->GetInputDevices();
|
||||
devices.insert(devices.end(), mouse_devices.begin(), mouse_devices.end());
|
||||
#ifdef HAVE_LIBUSB
|
||||
auto gcadapter_devices = gcadapter->GetInputDevices();
|
||||
devices.insert(devices.end(), gcadapter_devices.begin(), gcadapter_devices.end());
|
||||
#endif
|
||||
auto udp_devices = udp_client->GetInputDevices();
|
||||
devices.insert(devices.end(), udp_devices.begin(), udp_devices.end());
|
||||
#ifdef HAVE_SDL2
|
||||
|
@ -119,9 +128,11 @@ struct InputSubsystem::Impl {
|
|||
if (engine == mouse->GetEngineName()) {
|
||||
return mouse;
|
||||
}
|
||||
#ifdef HAVE_LIBUSB
|
||||
if (engine == gcadapter->GetEngineName()) {
|
||||
return gcadapter;
|
||||
}
|
||||
#endif
|
||||
if (engine == udp_client->GetEngineName()) {
|
||||
return udp_client;
|
||||
}
|
||||
|
@ -194,9 +205,11 @@ struct InputSubsystem::Impl {
|
|||
if (engine == mouse->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
#ifdef HAVE_LIBUSB
|
||||
if (engine == gcadapter->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
if (engine == udp_client->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
|
@ -217,7 +230,9 @@ struct InputSubsystem::Impl {
|
|||
void BeginConfiguration() {
|
||||
keyboard->BeginConfiguration();
|
||||
mouse->BeginConfiguration();
|
||||
#ifdef HAVE_LIBUSB
|
||||
gcadapter->BeginConfiguration();
|
||||
#endif
|
||||
udp_client->BeginConfiguration();
|
||||
#ifdef HAVE_SDL2
|
||||
sdl->BeginConfiguration();
|
||||
|
@ -227,7 +242,9 @@ struct InputSubsystem::Impl {
|
|||
void EndConfiguration() {
|
||||
keyboard->EndConfiguration();
|
||||
mouse->EndConfiguration();
|
||||
#ifdef HAVE_LIBUSB
|
||||
gcadapter->EndConfiguration();
|
||||
#endif
|
||||
udp_client->EndConfiguration();
|
||||
#ifdef HAVE_SDL2
|
||||
sdl->EndConfiguration();
|
||||
|
@ -248,7 +265,6 @@ struct InputSubsystem::Impl {
|
|||
|
||||
std::shared_ptr<Keyboard> keyboard;
|
||||
std::shared_ptr<Mouse> mouse;
|
||||
std::shared_ptr<GCAdapter> gcadapter;
|
||||
std::shared_ptr<TouchScreen> touch_screen;
|
||||
std::shared_ptr<TasInput::Tas> tas_input;
|
||||
std::shared_ptr<CemuhookUDP::UDPClient> udp_client;
|
||||
|
@ -256,6 +272,10 @@ struct InputSubsystem::Impl {
|
|||
std::shared_ptr<VirtualAmiibo> virtual_amiibo;
|
||||
std::shared_ptr<VirtualGamepad> virtual_gamepad;
|
||||
|
||||
#ifdef HAVE_LIBUSB
|
||||
std::shared_ptr<GCAdapter> gcadapter;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SDL2
|
||||
std::shared_ptr<SDLDriver> sdl;
|
||||
#endif
|
||||
|
|
|
@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::FrontFace:
|
||||
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
|
||||
break;
|
||||
|
@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
|
|||
EmitContext ctx{program, bindings, profile, runtime_info};
|
||||
Precolor(program);
|
||||
EmitCode(ctx, program);
|
||||
const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
|
||||
const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
|
||||
ctx.header.insert(0, version);
|
||||
if (program.shared_memory_size > 0) {
|
||||
const auto requested_size{program.shared_memory_size};
|
||||
|
|
|
@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
|||
case IR::Attribute::FrontFace:
|
||||
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddF32("{}=itof(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get attribute {}", attr);
|
||||
}
|
||||
|
@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.AddU32("{}=uint(gl_VertexID);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddU32("{}=uint(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
|||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
|
||||
ctx.Const(element)));
|
||||
return ctx.OpLoad(
|
||||
ctx.F32[1],
|
||||
ctx.need_input_position_indirect
|
||||
? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
|
||||
ctx.Const(element))
|
||||
: AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
|
||||
case IR::Attribute::InstanceId:
|
||||
if (ctx.profile.support_vertex_instance_id) {
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
|
||||
|
@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
|||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
|
||||
case IR::Attribute::FrontFace:
|
||||
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
|
||||
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
|
||||
|
@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
|||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpISub(ctx.U32[1], index, base);
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
|
|||
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
|
||||
}
|
||||
|
||||
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
|
||||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
|
|||
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
|
||||
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
|
||||
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
|
||||
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
|
||||
|
||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
|
|
@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
|||
U16 = Name(TypeInt(16, false), "u16");
|
||||
S16 = Name(TypeInt(16, true), "s16");
|
||||
}
|
||||
if (info.uses_int64) {
|
||||
if (info.uses_int64 && profile.support_int64) {
|
||||
AddCapability(spv::Capability::Int64);
|
||||
U64 = Name(TypeInt(64, false), "u64");
|
||||
}
|
||||
|
@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
|
|||
size_t label_index{0};
|
||||
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
AddLabel(labels[label_index]);
|
||||
const Id pointer{is_array
|
||||
? OpAccessChain(input_f32, input_position, vertex, masked_index)
|
||||
: OpAccessChain(input_f32, input_position, masked_index)};
|
||||
const Id pointer{[&]() {
|
||||
if (need_input_position_indirect) {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
|
||||
masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, u32_zero_value,
|
||||
masked_index);
|
||||
} else {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, masked_index);
|
||||
}
|
||||
}()};
|
||||
const Id result{OpLoad(F32[1], pointer)};
|
||||
OpReturnValue(result);
|
||||
++label_index;
|
||||
|
@ -1367,30 +1379,56 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
Decorate(layer, spv::Decoration::Flat);
|
||||
}
|
||||
if (loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
const bool is_fragment{stage == Stage::Fragment};
|
||||
if (!is_fragment && profile.has_broken_spirv_position_input) {
|
||||
need_input_position_indirect = true;
|
||||
|
||||
const Id input_position_struct = TypeStruct(F32[4]);
|
||||
input_position = DefineInput(*this, input_position_struct, true);
|
||||
|
||||
MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
|
||||
static_cast<unsigned>(spv::BuiltIn::Position));
|
||||
Decorate(input_position_struct, spv::Decoration::Block);
|
||||
} else {
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
|
||||
: spv::BuiltIn::Position};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (loads[IR::Attribute::InstanceId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
|
||||
if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
if (loads[IR::Attribute::VertexId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
|
||||
if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
if (loads[IR::Attribute::DrawID]) {
|
||||
draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
|
||||
}
|
||||
if (loads[IR::Attribute::FrontFace]) {
|
||||
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
|
||||
|
|
|
@ -218,6 +218,7 @@ public:
|
|||
Id base_instance{};
|
||||
Id vertex_id{};
|
||||
Id vertex_index{};
|
||||
Id draw_index{};
|
||||
Id base_vertex{};
|
||||
Id front_face{};
|
||||
Id point_coord{};
|
||||
|
@ -279,6 +280,7 @@ public:
|
|||
Id write_global_func_u32x2{};
|
||||
Id write_global_func_u32x4{};
|
||||
|
||||
bool need_input_position_indirect{};
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
||||
|
|
|
@ -34,6 +34,11 @@ public:
|
|||
|
||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
|
||||
|
||||
[[nodiscard]] virtual bool HasHLEMacroState() const = 0;
|
||||
|
||||
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
||||
u32 offset) = 0;
|
||||
|
||||
virtual void Dump(u64 hash) = 0;
|
||||
|
||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||
|
@ -52,11 +57,16 @@ public:
|
|||
return start_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
|
||||
return is_propietary_driver;
|
||||
}
|
||||
|
||||
protected:
|
||||
ProgramHeader sph{};
|
||||
std::array<u32, 8> gp_passthrough_mask{};
|
||||
Stage stage{};
|
||||
u32 start_address{};
|
||||
bool is_propietary_driver{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) {
|
|||
return "ViewportMask";
|
||||
case Attribute::FrontFace:
|
||||
return "FrontFace";
|
||||
case Attribute::BaseInstance:
|
||||
return "BaseInstance";
|
||||
case Attribute::BaseVertex:
|
||||
return "BaseVertex";
|
||||
case Attribute::DrawID:
|
||||
return "DrawID";
|
||||
}
|
||||
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
|
||||
}
|
||||
|
|
|
@ -219,6 +219,11 @@ enum class Attribute : u64 {
|
|||
FixedFncTexture9Q = 231,
|
||||
ViewportMask = 232,
|
||||
FrontFace = 255,
|
||||
|
||||
// Implementation attributes
|
||||
BaseInstance = 256,
|
||||
BaseVertex = 257,
|
||||
DrawID = 258,
|
||||
};
|
||||
|
||||
constexpr size_t NUM_GENERICS = 32;
|
||||
|
|
|
@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
|
|||
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
|
||||
return GetAttributeU32(attribute, Imm32(0));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
|
||||
return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
|
||||
}
|
||||
|
||||
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
|
||||
Inst(Opcode::SetAttribute, attribute, value, vertex);
|
||||
}
|
||||
|
|
|
@ -74,6 +74,8 @@ public:
|
|||
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
|
||||
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
|
||||
|
||||
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
|
||||
|
|
|
@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
|
|||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
|
||||
const Shader::VaryingState& passthrough_mask,
|
||||
bool passthrough_position,
|
||||
std::optional<IR::Attribute> passthrough_layer_attr) {
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!passthrough_mask.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_position) {
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_layer_attr) {
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
|
||||
ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
}
|
||||
|
||||
u32 GetOutputTopologyVertices(OutputTopology output_topology) {
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
return 1;
|
||||
case OutputTopology::LineStrip:
|
||||
return 2;
|
||||
default:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
EmitGeometryPassthrough(
|
||||
ir, program, program.info.passthrough,
|
||||
program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
|
@ -198,6 +262,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
|
||||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||
}
|
||||
|
||||
if (!host_info.support_geometry_shader_passthrough) {
|
||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||
LowerGeometryPassthrough(program, host_info);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
||||
Optimization::ConstantPropagationPass(program);
|
||||
Optimization::ConstantPropagationPass(env, program);
|
||||
|
||||
Optimization::PositionPass(env, program);
|
||||
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program);
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
||||
Optimization::TexturePass(env, program, host_info);
|
||||
|
||||
if (Settings::values.resolution_info.active) {
|
||||
|
@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
|||
IR::Program program;
|
||||
program.stage = Stage::Geometry;
|
||||
program.output_topology = output_topology;
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
program.output_vertices = 1;
|
||||
break;
|
||||
case OutputTopology::LineStrip:
|
||||
program.output_vertices = 2;
|
||||
break;
|
||||
default:
|
||||
program.output_vertices = 3;
|
||||
break;
|
||||
}
|
||||
program.output_vertices = GetOutputTopologyVertices(output_topology);
|
||||
|
||||
program.is_geometry_passthrough = false;
|
||||
program.info.loads.mask = source_program.info.stores.mask;
|
||||
|
@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
|||
node.data.block = current_block;
|
||||
|
||||
IR::IREmitter ir{*current_block};
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!program.info.stores.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
|
||||
ir.Imm32(0));
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
EmitGeometryPassthrough(ir, program, program.info.stores, true,
|
||||
source_program.info.emulated_layer);
|
||||
|
||||
IR::Block* return_block{block_pool.Create(inst_pool)};
|
||||
IR::IREmitter{*return_block}.Epilogue();
|
||||
|
|
|
@ -15,6 +15,9 @@ struct HostTranslateInfo {
|
|||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <type_traits>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
|
@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
case IR::Attribute::PrimitiveId:
|
||||
case IR::Attribute::InstanceId:
|
||||
case IR::Attribute::VertexId:
|
||||
case IR::Attribute::BaseVertex:
|
||||
case IR::Attribute::BaseInstance:
|
||||
case IR::Attribute::DrawID:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
|
@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
const auto offset_value = offset.U32();
|
||||
auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
|
||||
if (!replacement) {
|
||||
return;
|
||||
}
|
||||
const auto new_attribute = [replacement]() {
|
||||
switch (*replacement) {
|
||||
case ReplaceConstant::BaseInstance:
|
||||
return IR::Attribute::BaseInstance;
|
||||
case ReplaceConstant::BaseVertex:
|
||||
return IR::Attribute::BaseVertex;
|
||||
case ReplaceConstant::DrawID:
|
||||
return IR::Attribute::DrawID;
|
||||
default:
|
||||
throw NotImplementedException("Not implemented replacement variable {}", *replacement);
|
||||
}
|
||||
}();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
|
||||
} else {
|
||||
inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
|
||||
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
if (bank_value != which_bank) {
|
||||
return;
|
||||
}
|
||||
const auto offset_value = offset.U32();
|
||||
if (offset_value < offset_start || offset_value >= offset_end) {
|
||||
return;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(
|
||||
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetRegister:
|
||||
return FoldGetRegister(inst);
|
||||
|
@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
IR::Opcode::CompositeInsertF16x4);
|
||||
case IR::Opcode::FSwizzleAdd:
|
||||
return FoldFSwizzleAdd(block, inst);
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
if (env.HasHLEMacroState()) {
|
||||
FoldConstBuffer(env, block, inst);
|
||||
}
|
||||
if (env.IsPropietaryDriver()) {
|
||||
FoldDriverConstBuffer(env, block, inst, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagationPass(IR::Program& program) {
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program) {
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
ConstantPropagation(*block, inst);
|
||||
ConstantPropagation(env, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
}
|
||||
|
||||
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::U32 offset;
|
||||
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
||||
|
@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
|||
}
|
||||
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
||||
// buffer offset in bytes.
|
||||
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
}
|
||||
|
||||
|
@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
StorageInfo info;
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
|
@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
|||
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
|
||||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@ struct HostTranslateInfo;
|
|||
namespace Shader::Optimization {
|
||||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||
void ConstantPropagationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
|
|
|
@ -55,6 +55,8 @@ struct Profile {
|
|||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
/// The Position builtin needs to be wrapped in a struct when used as an input
|
||||
bool has_broken_spirv_position_input{};
|
||||
/// Offset image operands with an unsigned type do not work
|
||||
bool has_broken_unsigned_image_offsets{};
|
||||
/// Signed instructions with unsigned data types are misinterpreted
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
|
||||
namespace Shader {
|
||||
|
||||
enum class ReplaceConstant : u32 {
|
||||
BaseInstance,
|
||||
BaseVertex,
|
||||
DrawID,
|
||||
};
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
|
@ -59,6 +65,8 @@ enum class Interpolation {
|
|||
struct ConstantBufferDescriptor {
|
||||
u32 index;
|
||||
u32 count;
|
||||
|
||||
auto operator<=>(const ConstantBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct StorageBufferDescriptor {
|
||||
|
@ -66,6 +74,8 @@ struct StorageBufferDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
bool is_written;
|
||||
|
||||
auto operator<=>(const StorageBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct TextureBufferDescriptor {
|
||||
|
@ -78,6 +88,8 @@ struct TextureBufferDescriptor {
|
|||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureBufferDescriptor&) const = default;
|
||||
};
|
||||
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
|
||||
|
||||
|
@ -89,6 +101,8 @@ struct ImageBufferDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageBufferDescriptor&) const = default;
|
||||
};
|
||||
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
|
||||
|
||||
|
@ -104,6 +118,8 @@ struct TextureDescriptor {
|
|||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureDescriptor&) const = default;
|
||||
};
|
||||
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
|
||||
|
||||
|
@ -116,6 +132,8 @@ struct ImageDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageDescriptor&) const = default;
|
||||
};
|
||||
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
namespace Shader {
|
||||
|
||||
struct VaryingState {
|
||||
std::bitset<256> mask{};
|
||||
std::bitset<512> mask{};
|
||||
|
||||
void Set(IR::Attribute attribute, bool state = true) {
|
||||
mask[static_cast<size_t>(attribute)] = state;
|
||||
|
|
|
@ -7,6 +7,7 @@ add_executable(tests
|
|||
common/fibers.cpp
|
||||
common/host_memory.cpp
|
||||
common/param_package.cpp
|
||||
common/range_map.cpp
|
||||
common/ring_buffer.cpp
|
||||
common/scratch_buffer.cpp
|
||||
common/unique_function.cpp
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
#include "common/range_map.h"
|
||||
|
||||
enum class MappedEnum : u32 {
|
||||
Invalid = 0,
|
||||
Valid_1 = 1,
|
||||
Valid_2 = 2,
|
||||
Valid_3 = 3,
|
||||
};
|
||||
|
||||
TEST_CASE("Range Map: Setup", "[video_core]") {
|
||||
Common::RangeMap<u64, MappedEnum> my_map(MappedEnum::Invalid);
|
||||
my_map.Map(3000, 3500, MappedEnum::Valid_1);
|
||||
my_map.Unmap(3200, 3600);
|
||||
my_map.Map(4000, 4500, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4200) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2900) == 0);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(2900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(3100) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3200) == MappedEnum::Invalid);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(4199) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4200) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(4400) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4500) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(4600) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Unmap(0, 6000);
|
||||
for (u64 address = 0; address < 10000; address += 1000) {
|
||||
REQUIRE(my_map.GetContinousSizeFrom(address) == 0);
|
||||
}
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_1);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 4000);
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_2);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1500);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2500) == 1600);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4100) == 900);
|
||||
REQUIRE(my_map.GetValueAt(900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2500) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(4100) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(5000) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Map(2000, 6000, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1000);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 3000);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1999) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1500) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2001) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(5999) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(6000) == MappedEnum::Invalid);
|
||||
}
|
|
@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
|
|||
int num = 0;
|
||||
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
REQUIRE(num == 0);
|
||||
REQUIRE(num == 1);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
|
|
|
@ -13,6 +13,7 @@ add_library(video_core STATIC
|
|||
buffer_cache/buffer_base.h
|
||||
buffer_cache/buffer_cache.cpp
|
||||
buffer_cache/buffer_cache.h
|
||||
cache_types.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
compatible_formats.cpp
|
||||
|
@ -84,6 +85,7 @@ add_library(video_core STATIC
|
|||
gpu.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
invalidation_accumulator.h
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
precompiled_headers.h
|
||||
|
@ -189,6 +191,8 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_texture_cache_base.cpp
|
||||
renderer_vulkan/vk_turbo_mode.cpp
|
||||
renderer_vulkan/vk_turbo_mode.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.cpp
|
||||
|
|
|
@ -430,7 +430,7 @@ private:
|
|||
if (query_begin >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||
|
@ -483,7 +483,7 @@ private:
|
|||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||
}
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
||||
const u64 word = current_word;
|
||||
u64 page = page_begin;
|
||||
page_begin = 0;
|
||||
|
||||
|
@ -531,7 +531,7 @@ private:
|
|||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
|
@ -539,8 +539,7 @@ private:
|
|||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -564,7 +563,7 @@ private:
|
|||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
|
@ -574,8 +573,7 @@ private:
|
|||
u64 begin = std::numeric_limits<u64>::max();
|
||||
u64 end = 0;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -200,7 +200,16 @@ public:
|
|||
/// Return true when a CPU region is modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||
|
||||
std::mutex mutex;
|
||||
void SetDrawIndirect(
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
|
||||
current_draw_indirect = current_draw_indirect_;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
Runtime& runtime;
|
||||
|
||||
private:
|
||||
|
@ -272,6 +281,8 @@ private:
|
|||
|
||||
void BindHostVertexBuffers();
|
||||
|
||||
void BindHostDrawIndirectBuffers();
|
||||
|
||||
void BindHostGraphicsUniformBuffers(size_t stage);
|
||||
|
||||
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
|
||||
|
@ -298,6 +309,8 @@ private:
|
|||
|
||||
void UpdateVertexBuffer(u32 index);
|
||||
|
||||
void UpdateDrawIndirect();
|
||||
|
||||
void UpdateUniformBuffers(size_t stage);
|
||||
|
||||
void UpdateStorageBuffers(size_t stage);
|
||||
|
@ -372,6 +385,8 @@ private:
|
|||
SlotVector<Buffer> slot_buffers;
|
||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
|
||||
|
||||
u32 last_index_count = 0;
|
||||
|
||||
Binding index_buffer;
|
||||
|
@ -380,6 +395,8 @@ private:
|
|||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
|
@ -674,6 +691,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
|||
}
|
||||
BindHostVertexBuffers();
|
||||
BindHostTransformFeedbackBuffers();
|
||||
if (current_draw_indirect) {
|
||||
BindHostDrawIndirectBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -823,6 +843,7 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
|||
template <class P>
|
||||
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
AccumulateFlushes();
|
||||
|
||||
if (committed_ranges.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -869,7 +890,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
buffer_id,
|
||||
});
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 256ULL;
|
||||
constexpr u64 align = 8ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = std::max(largest_copy, new_size);
|
||||
|
@ -1041,6 +1062,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
||||
const auto bind_buffer = [this](const Binding& binding) {
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
bind_buffer(count_buffer_binding);
|
||||
}
|
||||
bind_buffer(indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
u32 dirty = ~0U;
|
||||
|
@ -1272,6 +1306,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
|||
UpdateStorageBuffers(stage);
|
||||
UpdateTextureBuffers(stage);
|
||||
}
|
||||
if (current_draw_indirect) {
|
||||
UpdateDrawIndirect();
|
||||
}
|
||||
} while (has_deleted_buffers);
|
||||
}
|
||||
|
||||
|
@ -1289,7 +1326,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const auto& index_array = draw_state.index_buffer;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
|
||||
if (!flags[Dirty::IndexBuffer]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::IndexBuffer] = false;
|
||||
|
@ -1361,6 +1398,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
|||
};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateDrawIndirect() {
|
||||
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
binding = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
binding = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = static_cast<u32>(size),
|
||||
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
||||
};
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||
}
|
||||
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||
indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
|
@ -1880,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
|||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
const u32 alignment = runtime.GetStorageBufferAlignment();
|
||||
|
||||
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
||||
const u32 aligned_size =
|
||||
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
|
||||
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
||||
if (!cpu_addr || size == 0) {
|
||||
return NULL_BINDING;
|
||||
}
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
|
||||
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
|
||||
const Binding binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
return binding;
|
||||
|
@ -1941,4 +2006,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
enum class CacheType : u32 {
|
||||
None = 0,
|
||||
TextureCache = 1 << 0,
|
||||
QueryCache = 1 << 1,
|
||||
BufferCache = 1 << 2,
|
||||
ShaderCache = 1 << 3,
|
||||
NoTextureCache = QueryCache | BufferCache | ShaderCache,
|
||||
NoBufferCache = TextureCache | QueryCache | ShaderCache,
|
||||
NoQueryCache = TextureCache | BufferCache | ShaderCache,
|
||||
All = TextureCache | QueryCache | BufferCache | ShaderCache,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(CacheType)
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -61,7 +61,7 @@ bool DmaPusher::Step() {
|
|||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
dma_state.dma_get = command_list_header.addr;
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
|
@ -75,12 +75,22 @@ bool DmaPusher::Step() {
|
|||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize_destructive(command_list_header.size);
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
constexpr u32 MacroRegistersStart = 0xE00;
|
||||
if (dma_state.method < MacroRegistersStart) {
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
const size_t copy_size = command_list_header.size * sizeof(u32);
|
||||
if (subchannels[dma_state.subchannel]) {
|
||||
subchannels[dma_state.subchannel]->current_dirty =
|
||||
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
|
||||
}
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
|
||||
}
|
||||
ProcessCommands(command_headers);
|
||||
}
|
||||
|
@ -94,6 +104,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
|
||||
if (dma_state.method_count) {
|
||||
// Data word of methods command
|
||||
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||
if (dma_state.non_incrementing) {
|
||||
const u32 max_write = static_cast<u32>(
|
||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||
|
@ -132,6 +143,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
case SubmissionMode::Inline:
|
||||
dma_state.method = command_header.method;
|
||||
dma_state.subchannel = command_header.subchannel;
|
||||
dma_state.dma_word_offset = static_cast<u64>(
|
||||
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
|
||||
CallMethod(command_header.arg_count);
|
||||
dma_state.non_incrementing = true;
|
||||
dma_increment_once = false;
|
||||
|
@ -164,8 +177,14 @@ void DmaPusher::CallMethod(u32 argument) const {
|
|||
dma_state.method_count,
|
||||
});
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
|
||||
dma_state.is_last_call);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
|
||||
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
||||
return;
|
||||
}
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,8 +193,11 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
|||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
|
||||
num_methods, dma_state.method_count);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -156,6 +156,8 @@ private:
|
|||
u32 subchannel; ///< Current subchannel
|
||||
u32 method_count; ///< Current method count
|
||||
u32 length_pending; ///< Large NI command length pending
|
||||
GPUVAddr dma_get; ///< Currently read segment
|
||||
u64 dma_word_offset; ///< Current word ofset from address
|
||||
bool non_incrementing; ///< Current command's NI flag
|
||||
bool is_last_call;
|
||||
};
|
||||
|
|
|
@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
|||
ProcessDraw(true, num_instances);
|
||||
}
|
||||
|
||||
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
|
||||
draw_state.topology = topology;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
|
||||
u32 index_count) {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
draw_state.topology = topology;
|
||||
draw_state.index_buffer = regs.index_buffer;
|
||||
draw_state.index_buffer.first = index_first;
|
||||
draw_state.index_buffer.count = index_count;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::SetInlineIndexBuffer(u32 index) {
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
|
||||
|
@ -198,4 +215,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
|
|||
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
|
||||
}
|
||||
}
|
||||
|
||||
void DrawManager::ProcessDrawIndirect() {
|
||||
LOG_TRACE(
|
||||
HW_GPU,
|
||||
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
|
||||
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
|
||||
indirect_state.buffer_size, indirect_state.max_draw_counts);
|
||||
|
||||
UpdateTopology();
|
||||
|
||||
if (maxwell3d->ShouldExecute()) {
|
||||
maxwell3d->rasterizer->DrawIndirect();
|
||||
}
|
||||
}
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -32,6 +32,16 @@ public:
|
|||
std::vector<u8> inline_index_draw_indexes;
|
||||
};
|
||||
|
||||
struct IndirectParams {
|
||||
bool is_indexed;
|
||||
bool include_count;
|
||||
GPUVAddr count_start_address;
|
||||
GPUVAddr indirect_start_address;
|
||||
size_t buffer_size;
|
||||
size_t max_draw_counts;
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
explicit DrawManager(Maxwell3D* maxwell_3d);
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument);
|
||||
|
@ -46,10 +56,22 @@ public:
|
|||
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
||||
u32 base_instance, u32 num_instances);
|
||||
|
||||
void DrawArrayIndirect(PrimitiveTopology topology);
|
||||
|
||||
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
||||
|
||||
const State& GetDrawState() const {
|
||||
return draw_state;
|
||||
}
|
||||
|
||||
IndirectParams& GetIndirectParams() {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
const IndirectParams& GetIndirectParams() const {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetInlineIndexBuffer(u32 index);
|
||||
|
||||
|
@ -63,7 +85,10 @@ private:
|
|||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
|
||||
void ProcessDrawIndirect();
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
IndirectParams indirect_state{};
|
||||
};
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
@ -17,6 +21,26 @@ public:
|
|||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
|
||||
void ConsumeSink() {
|
||||
if (method_sink.empty()) {
|
||||
return;
|
||||
}
|
||||
ConsumeSinkImpl();
|
||||
}
|
||||
|
||||
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
|
||||
std::vector<std::pair<u32, u32>> method_sink{};
|
||||
bool current_dirty{};
|
||||
GPUVAddr current_dma_segment;
|
||||
|
||||
protected:
|
||||
virtual void ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
CallMethod(method, value, true);
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
|
|||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
@ -20,11 +21,14 @@ namespace Tegra::Engines {
|
|||
|
||||
using namespace Texture;
|
||||
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
@ -49,6 +53,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
|
|||
}
|
||||
}
|
||||
|
||||
void Fermi2D::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
MICROPROFILE_SCOPE(GPU_BlitEngine);
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
|
@ -94,6 +105,7 @@ void Fermi2D::Blit() {
|
|||
config.src_x0 = 0;
|
||||
}
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
sw_blitter->Blit(src, regs.dst, config);
|
||||
}
|
||||
|
|
|
@ -305,10 +305,13 @@ public:
|
|||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
|
|
@ -14,7 +14,12 @@
|
|||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
|
||||
}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
|
@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
|
|||
upload_state.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
void KeplerCompute::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
|
|
@ -204,6 +204,8 @@ public:
|
|||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
|
|
|
@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
|
|||
|
||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
upload_state.BindRasterizer(rasterizer_);
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
|
||||
}
|
||||
|
||||
void KeplerMemory::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
|
|
@ -73,6 +73,8 @@ public:
|
|||
} regs{};
|
||||
|
||||
private:
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
|
@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
|||
regs.upload} {
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
execution_mask.reset();
|
||||
for (size_t i = 0; i < execution_mask.size(); i++) {
|
||||
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
|
||||
}
|
||||
}
|
||||
|
||||
Maxwell3D::~Maxwell3D() = default;
|
||||
|
@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
|||
shadow_state = regs;
|
||||
}
|
||||
|
||||
bool Maxwell3D::IsMethodExecutable(u32 method) {
|
||||
if (method >= MacroRegistersStart) {
|
||||
return true;
|
||||
}
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(draw.end):
|
||||
case MAXWELL3D_REG_INDEX(draw.begin):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(draw_inline_index):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_first):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.start_address):
|
||||
case MAXWELL3D_REG_INDEX(falcon[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(topology_override):
|
||||
case MAXWELL3D_REG_INDEX(clear_surface):
|
||||
case MAXWELL3D_REG_INDEX(report_semaphore.query):
|
||||
case MAXWELL3D_REG_INDEX(render_enable.mode):
|
||||
case MAXWELL3D_REG_INDEX(clear_report_value):
|
||||
case MAXWELL3D_REG_INDEX(sync_info):
|
||||
case MAXWELL3D_REG_INDEX(launch_dma):
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
|
@ -130,14 +201,72 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
|||
}
|
||||
|
||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||
for (size_t i = 0; i < amount; i++) {
|
||||
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||
}
|
||||
macro_segments.emplace_back(current_dma_segment, amount);
|
||||
current_macro_dirty |= current_dirty;
|
||||
current_dirty = false;
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
ConsumeSink();
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
macro_addresses.clear();
|
||||
macro_segments.clear();
|
||||
current_macro_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::RefreshParametersImpl() {
|
||||
size_t current_index = 0;
|
||||
for (auto& segment : macro_segments) {
|
||||
if (segment.first == 0) {
|
||||
current_index += segment.second;
|
||||
continue;
|
||||
}
|
||||
memory_manager.ReadBlock(segment.first, ¯o_params[current_index],
|
||||
sizeof(u32) * segment.second);
|
||||
current_index += segment.second;
|
||||
}
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||
u32 num_vertices = 0;
|
||||
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||
const auto& array = regs.vertex_streams[index];
|
||||
if (array.enable == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto& attribute = regs.vertex_attrib_format[index];
|
||||
if (attribute.constant) {
|
||||
num_vertices = std::max(num_vertices, 1U);
|
||||
continue;
|
||||
}
|
||||
const auto& limit = regs.vertex_stream_limits[index];
|
||||
const GPUVAddr gpu_addr_begin = array.Address();
|
||||
const GPUVAddr gpu_addr_end = limit.Address() + 1;
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
num_vertices = std::max(
|
||||
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
|
||||
}
|
||||
return num_vertices;
|
||||
}
|
||||
|
||||
size_t Maxwell3D::EstimateIndexBufferSize() {
|
||||
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
||||
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
||||
constexpr std::array<size_t, 4> max_sizes = {
|
||||
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
|
||||
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
|
||||
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||
return std::min<size_t>(
|
||||
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
|
||||
byte_size,
|
||||
static_cast<size_t>(end_address - start_address));
|
||||
}
|
||||
|
||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
|
@ -152,6 +281,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
|||
return argument;
|
||||
}
|
||||
|
||||
void Maxwell3D::ConsumeSinkImpl() {
|
||||
SCOPE_EXIT({ method_sink.clear(); });
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
if (control == Regs::ShadowRamControl::Track ||
|
||||
control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
|
||||
for (auto [method, value] : method_sink) {
|
||||
shadow_state.reg_array[method] = value;
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (control == Regs::ShadowRamControl::Replay) {
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
||||
if (regs.reg_array[method] == argument) {
|
||||
return;
|
||||
|
@ -263,7 +415,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
|||
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
}
|
||||
|
||||
|
@ -294,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
|||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
ProcessCBMultiData(base_start, amount);
|
||||
break;
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(inline_data): {
|
||||
ASSERT(methods_pending == amount);
|
||||
upload_state.ProcessData(base_start, amount);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
for (u32 i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
|
@ -332,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
|
||||
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.report_semaphore.query.operation) {
|
||||
case Regs::ReportSemaphore::Operation::Release:
|
||||
if (regs.report_semaphore.query.short_query != 0) {
|
||||
|
@ -389,7 +537,11 @@ void Maxwell3D::ProcessQueryCondition() {
|
|||
case Regs::RenderEnable::Override::NeverRender:
|
||||
execute_on = false;
|
||||
break;
|
||||
case Regs::RenderEnable::Override::UseRenderEnable:
|
||||
case Regs::RenderEnable::Override::UseRenderEnable: {
|
||||
if (rasterizer->AccelerateConditionalRendering()) {
|
||||
execute_on = true;
|
||||
return;
|
||||
}
|
||||
switch (regs.render_enable.mode) {
|
||||
case Regs::RenderEnable::Mode::True: {
|
||||
execute_on = true;
|
||||
|
@ -427,6 +579,7 @@ void Maxwell3D::ProcessQueryCondition() {
|
|||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCounterReset() {
|
||||
|
@ -463,7 +616,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessCBBind(size_t stage_index) {
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
|
||||
// stage.
|
||||
const auto& bind_data = regs.bind_groups[stage_index];
|
||||
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
|
||||
buffer.enabled = bind_data.valid.Value() != 0;
|
||||
|
@ -490,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
|
|||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
||||
const size_t copy_size = amount * sizeof(u32);
|
||||
memory_manager.WriteBlock(address, start_base, copy_size);
|
||||
memory_manager.WriteBlockCached(address, start_base, copy_size);
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
||||
|
@ -524,4 +678,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
|||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
|
||||
HLEReplacementAttributeType name) {
|
||||
const u64 key = (static_cast<u64>(bank) << 32) | offset;
|
||||
replace_table.emplace(key, name);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -272,6 +272,7 @@ public:
|
|||
};
|
||||
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, Mode> mode;
|
||||
BitField<4, 8, u32> pad;
|
||||
};
|
||||
|
@ -1217,10 +1218,12 @@ public:
|
|||
|
||||
struct Window {
|
||||
union {
|
||||
u32 raw_x;
|
||||
BitField<0, 16, u32> x_min;
|
||||
BitField<16, 16, u32> x_max;
|
||||
};
|
||||
union {
|
||||
u32 raw_y;
|
||||
BitField<0, 16, u32> y_min;
|
||||
BitField<16, 16, u32> y_max;
|
||||
};
|
||||
|
@ -2708,7 +2711,7 @@ public:
|
|||
u32 post_z_pixel_imask; ///< 0x0F1C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x20);
|
||||
ConstantColorRendering const_color_rendering; ///< 0x0F40
|
||||
s32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_mask; ///< 0x0F58
|
||||
u32 stencil_back_func_mask; ///< 0x0F5C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x14);
|
||||
|
@ -2832,9 +2835,9 @@ public:
|
|||
Blend blend; ///< 0x133C
|
||||
u32 stencil_enable; ///< 0x1380
|
||||
StencilOp stencil_front_op; ///< 0x1384
|
||||
s32 stencil_front_ref; ///< 0x1394
|
||||
s32 stencil_front_func_mask; ///< 0x1398
|
||||
s32 stencil_front_mask; ///< 0x139C
|
||||
u32 stencil_front_ref; ///< 0x1394
|
||||
u32 stencil_front_func_mask; ///< 0x1398
|
||||
u32 stencil_front_mask; ///< 0x139C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x4);
|
||||
u32 draw_auto_start_byte_count; ///< 0x13A4
|
||||
PsSaturate frag_color_clamp; ///< 0x13A8
|
||||
|
@ -3020,6 +3023,24 @@ public:
|
|||
/// Store temporary hw register values, used by some calls to restore state after a operation
|
||||
Regs shadow_state;
|
||||
|
||||
// None Engine
|
||||
enum class EngineHint : u32 {
|
||||
None = 0x0,
|
||||
OnHLEMacro = 0x1,
|
||||
};
|
||||
|
||||
EngineHint engine_state{EngineHint::None};
|
||||
|
||||
enum class HLEReplacementAttributeType : u32 {
|
||||
BaseVertex = 0x0,
|
||||
BaseInstance = 0x1,
|
||||
DrawID = 0x2,
|
||||
};
|
||||
|
||||
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
|
||||
|
||||
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||
|
||||
|
@ -3067,6 +3088,35 @@ public:
|
|||
std::unique_ptr<DrawManager> draw_manager;
|
||||
friend class DrawManager;
|
||||
|
||||
GPUVAddr GetMacroAddress(size_t index) const {
|
||||
return macro_addresses[index];
|
||||
}
|
||||
|
||||
void RefreshParameters() {
|
||||
if (!current_macro_dirty) {
|
||||
return;
|
||||
}
|
||||
RefreshParametersImpl();
|
||||
}
|
||||
|
||||
bool AnyParametersDirty() const {
|
||||
return current_macro_dirty;
|
||||
}
|
||||
|
||||
u32 GetMaxCurrentVertices();
|
||||
|
||||
size_t EstimateIndexBufferSize();
|
||||
|
||||
/// Handles a write to the CLEAR_BUFFERS register.
|
||||
void ProcessClearBuffers(u32 layer_count);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
|
@ -3076,6 +3126,8 @@ private:
|
|||
|
||||
void ProcessDirtyRegisters(u32 method, u32 argument);
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
|
@ -3116,16 +3168,13 @@ private:
|
|||
/// Handles writes to syncing register.
|
||||
void ProcessSyncPoint();
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||
std::optional<u64> GetQueryResult();
|
||||
|
||||
void RefreshParametersImpl();
|
||||
|
||||
bool IsMethodExecutable(u32 method);
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
|
@ -3145,6 +3194,10 @@ private:
|
|||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||
std::vector<GPUVAddr> macro_addresses;
|
||||
bool current_macro_dirty{};
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
|
|
@ -21,7 +21,10 @@ namespace Tegra::Engines {
|
|||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
: system{system_}, memory_manager{memory_manager_} {
|
||||
execution_mask.reset();
|
||||
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
|
||||
}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
|
@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
|||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void MaxwellDMA::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
|
@ -59,7 +69,7 @@ void MaxwellDMA::Launch() {
|
|||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
CopyBlockLinearToBlockLinear();
|
||||
|
@ -94,6 +104,7 @@ void MaxwellDMA::Launch() {
|
|||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.FlushCaching();
|
||||
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
|
@ -111,8 +122,8 @@ void MaxwellDMA::Launch() {
|
|||
memory_manager.ReadBlockUnsafe(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
}
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
|
@ -122,7 +133,7 @@ void MaxwellDMA::Launch() {
|
|||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlock(
|
||||
memory_manager.WriteBlockCached(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
}
|
||||
|
@ -131,8 +142,8 @@ void MaxwellDMA::Launch() {
|
|||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
|||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
|
@ -246,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
|
|||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
|
@ -277,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
|||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
|
@ -337,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
|||
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
||||
dst.block_size.height, dst.block_size.depth, pitch);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
|
|
|
@ -231,6 +231,8 @@ private:
|
|||
|
||||
void ReleaseSemaphore();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue