New upstream version 0-1299+ds

This commit is contained in:
Andrea Pappacoda 2023-01-07 17:06:21 +01:00
parent d3b74ab2d5
commit 9209efb76e
170 changed files with 22277 additions and 15633 deletions

2
.ci/scripts/format/script.sh Normal file → Executable file
View File

@ -10,7 +10,7 @@ if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dis
fi
# Default clang-format points to default 3.5 version one
CLANG_FORMAT=clang-format-12
CLANG_FORMAT=${CLANG_FORMAT:-clang-format-12}
$CLANG_FORMAT --version
if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then

View File

@ -3,13 +3,8 @@
cmake_minimum_required(VERSION 3.22)
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
# some of its variables, which is only possible in 3.13+
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/find-modules")
include(DownloadExternals)
include(CMakeDependentOption)
@ -22,6 +17,8 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 binaries" ON
# On Linux system SDL2 is likely to be lacking HIDAPI support which have drawbacks but is needed for SDL motion
CMAKE_DEPENDENT_OPTION(YUZU_USE_EXTERNAL_SDL2 "Compile external SDL2" ON "ENABLE_SDL2;NOT MSVC" OFF)
option(ENABLE_LIBUSB "Enable the use of LibUSB" ON)
option(ENABLE_OPENGL "Enable OpenGL" ON)
mark_as_advanced(FORCE ENABLE_OPENGL)
option(ENABLE_QT "Enable the Qt frontend" ON)
@ -35,6 +32,8 @@ option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
option(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled FFmpeg" "${WIN32}")
option(YUZU_USE_EXTERNAL_VULKAN_HEADERS "Use Vulkan-Headers from externals" ON)
option(YUZU_USE_QT_MULTIMEDIA "Use QtMultimedia for Camera" OFF)
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
@ -47,6 +46,8 @@ option(YUZU_TESTS "Compile tests" ON)
option(YUZU_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
option(YUZU_ROOM "Compile LDN room server" ON)
CMAKE_DEPENDENT_OPTION(YUZU_CRASH_DUMPS "Compile Windows crash dump (Minidump) support" OFF "WIN32" OFF)
option(YUZU_USE_BUNDLED_VCPKG "Use vcpkg for yuzu dependencies" "${MSVC}")
@ -201,36 +202,43 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
# System imported libraries
# =======================================================================
find_package(enet 1.3)
# Enforce the search mode of non-required packages for better and shorter failure messages
find_package(enet 1.3 MODULE)
find_package(fmt 9 REQUIRED)
find_package(inih)
find_package(libusb 1.0.24)
find_package(inih MODULE)
find_package(lz4 REQUIRED)
find_package(nlohmann_json 3.8 REQUIRED)
find_package(Opus 1.3)
find_package(Vulkan 1.3.238)
find_package(Opus 1.3 MODULE)
find_package(ZLIB 1.2 REQUIRED)
find_package(zstd 1.5 REQUIRED)
if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS)
find_package(Vulkan 1.3.238 REQUIRED)
endif()
if (ENABLE_LIBUSB)
find_package(libusb 1.0.24 MODULE)
endif()
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
find_package(xbyak 6)
find_package(xbyak 6 CONFIG)
endif()
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
find_package(dynarmic 6.4.0)
find_package(dynarmic 6.4.0 CONFIG)
endif()
if (ENABLE_CUBEB)
find_package(cubeb)
find_package(cubeb CONFIG)
endif()
if (USE_DISCORD_PRESENCE)
find_package(DiscordRPC)
find_package(DiscordRPC MODULE)
endif()
if (ENABLE_WEB_SERVICE)
find_package(cpp-jwt 1.4)
find_package(httplib 0.11)
find_package(cpp-jwt 1.4 CONFIG)
find_package(httplib 0.11 MODULE)
endif()
if (YUZU_TESTS)

View File

@ -5,7 +5,7 @@
include(FindPackageHandleStandardArgs)
find_package(httplib QUIET CONFIG)
if (httplib_FOUND)
if (httplib_CONSIDERED_CONFIGS)
find_package_handle_standard_args(httplib CONFIG_MODE)
else()
find_package(PkgConfig QUIET)

View File

@ -4,7 +4,7 @@
include(FindPackageHandleStandardArgs)
find_package(lz4 QUIET CONFIG)
if (lz4_FOUND)
if (lz4_CONSIDERED_CONFIGS)
find_package_handle_standard_args(lz4 CONFIG_MODE)
else()
find_package(PkgConfig QUIET)

View File

@ -4,7 +4,7 @@
include(FindPackageHandleStandardArgs)
find_package(zstd QUIET CONFIG)
if (zstd_FOUND)
if (zstd_CONSIDERED_CONFIGS)
find_package_handle_standard_args(zstd CONFIG_MODE)
else()
find_package(PkgConfig QUIET)

1334
dist/languages/ca.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/cs.ts vendored

File diff suppressed because it is too large Load Diff

1332
dist/languages/da.ts vendored

File diff suppressed because it is too large Load Diff

1380
dist/languages/de.ts vendored

File diff suppressed because it is too large Load Diff

1332
dist/languages/el.ts vendored

File diff suppressed because it is too large Load Diff

1338
dist/languages/es.ts vendored

File diff suppressed because it is too large Load Diff

1343
dist/languages/fr.ts vendored

File diff suppressed because it is too large Load Diff

1332
dist/languages/id.ts vendored

File diff suppressed because it is too large Load Diff

1431
dist/languages/it.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/ja_JP.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/ko_KR.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/nb.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/nl.ts vendored

File diff suppressed because it is too large Load Diff

1335
dist/languages/pl.ts vendored

File diff suppressed because it is too large Load Diff

1384
dist/languages/pt_BR.ts vendored

File diff suppressed because it is too large Load Diff

1348
dist/languages/pt_PT.ts vendored

File diff suppressed because it is too large Load Diff

1501
dist/languages/ru_RU.ts vendored

File diff suppressed because it is too large Load Diff

1334
dist/languages/sv.ts vendored

File diff suppressed because it is too large Load Diff

1346
dist/languages/tr_TR.ts vendored

File diff suppressed because it is too large Load Diff

1475
dist/languages/uk.ts vendored

File diff suppressed because it is too large Load Diff

1332
dist/languages/vi.ts vendored

File diff suppressed because it is too large Load Diff

1332
dist/languages/vi_VN.ts vendored

File diff suppressed because it is too large Load Diff

1337
dist/languages/zh_CN.ts vendored

File diff suppressed because it is too large Load Diff

1341
dist/languages/zh_TW.ts vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,9 @@
# SPDX-FileCopyrightText: 2016 Citra Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
include(DownloadExternals)
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
# some of its variables, which is only possible in 3.13+
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
# xbyak
if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
@ -12,8 +12,7 @@ endif()
# Dynarmic
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
set(DYNARMIC_NO_BUNDLED_FMT ON)
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
set(DYNARMIC_IGNORE_ASSERTS ON)
add_subdirectory(dynarmic EXCLUDE_FROM_ALL)
add_library(dynarmic::dynarmic ALIAS dynarmic)
endif()
@ -45,7 +44,7 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12" AND CMAKE_CXX_COMPILER
endif()
# libusb
if (NOT TARGET libusb::usb)
if (ENABLE_LIBUSB AND NOT TARGET libusb::usb)
add_subdirectory(libusb EXCLUDE_FROM_ALL)
endif()
@ -60,10 +59,10 @@ if (YUZU_USE_EXTERNAL_SDL2)
Locale Power Render)
foreach(_SUB ${SDL_UNUSED_SUBSYSTEMS})
string(TOUPPER ${_SUB} _OPT)
option(SDL_${_OPT} "" OFF)
set(SDL_${_OPT} OFF)
endforeach()
option(HIDAPI "" ON)
set(HIDAPI ON)
endif()
set(SDL_STATIC ON)
set(SDL_SHARED OFF)
@ -83,7 +82,7 @@ endif()
# Cubeb
if (ENABLE_CUBEB AND NOT TARGET cubeb::cubeb)
set(BUILD_TESTS OFF CACHE BOOL "")
set(BUILD_TESTS OFF)
add_subdirectory(cubeb EXCLUDE_FROM_ALL)
add_library(cubeb::cubeb ALIAS cubeb)
endif()
@ -98,6 +97,7 @@ endif()
# Sirit
add_subdirectory(sirit EXCLUDE_FROM_ALL)
# httplib
if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
if (NOT WIN32)
find_package(OpenSSL 1.1)
@ -108,7 +108,7 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
if (WIN32 OR NOT OPENSSL_FOUND)
# LibreSSL
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
set(LIBRESSL_SKIP_INSTALL ON)
set(OPENSSLDIR "/etc/ssl/")
add_subdirectory(libressl EXCLUDE_FROM_ALL)
target_include_directories(ssl INTERFACE ./libressl/include)
@ -118,7 +118,6 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
DEFINITION OPENSSL_LIBS)
endif()
# httplib
add_library(httplib INTERFACE)
target_include_directories(httplib INTERFACE ./cpp-httplib)
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
@ -152,6 +151,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
endif()
# Vulkan-Headers
if (NOT TARGET Vulkan::Headers)
if (YUZU_USE_EXTERNAL_VULKAN_HEADERS)
add_subdirectory(Vulkan-Headers EXCLUDE_FROM_ALL)
endif()

View File

@ -1160,6 +1160,14 @@ public:
/// TBD
Id OpSubgroupAllEqualKHR(Id result_type, Id predicate);
// Result is true only in the active invocation with the lowest id in the group, otherwise
// result is false.
Id OpGroupNonUniformElect(Id result_type, Id scope);
// Result is the Value of the invocation from the active invocation with the lowest id in the
// group to all active invocations in the group.
Id OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value);
// Result is the Value of the invocation identified by the id Id to all active invocations in
// the group.
Id OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id);

View File

@ -36,6 +36,17 @@ Id Module::OpSubgroupAllEqualKHR(Id result_type, Id predicate) {
return *code << OpId{spv::Op::OpSubgroupAllEqualKHR, result_type} << predicate << EndOp{};
}
Id Module::OpGroupNonUniformElect(Id result_type, Id scope) {
code->Reserve(4);
return *code << OpId{spv::Op::OpGroupNonUniformElect, result_type} << scope << EndOp{};
}
Id Module::OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value) {
code->Reserve(5);
return *code << OpId{spv::Op::OpGroupNonUniformBroadcastFirst, result_type} << scope << value
<< EndOp{};
}
Id Module::OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id) {
code->Reserve(6);
return *code << OpId{spv::Op::OpGroupNonUniformBroadcast, result_type} << scope << value

View File

@ -161,7 +161,10 @@ add_subdirectory(video_core)
add_subdirectory(network)
add_subdirectory(input_common)
add_subdirectory(shader_recompiler)
add_subdirectory(dedicated_room)
if (YUZU_ROOM)
add_subdirectory(dedicated_room)
endif()
if (YUZU_TESTS)
add_subdirectory(tests)

View File

@ -187,11 +187,7 @@ add_library(audio_core STATIC
renderer/voice/voice_info.cpp
renderer/voice/voice_info.h
renderer/voice/voice_state.h
sink/cubeb_sink.cpp
sink/cubeb_sink.h
sink/null_sink.h
sink/sdl2_sink.cpp
sink/sdl2_sink.h
sink/sink.h
sink/sink_details.cpp
sink/sink_details.h
@ -222,11 +218,22 @@ if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_link_libraries(audio_core PRIVATE dynarmic::dynarmic)
endif()
if(ENABLE_CUBEB)
if (ENABLE_CUBEB)
target_sources(audio_core PRIVATE
sink/cubeb_sink.cpp
sink/cubeb_sink.h
)
target_link_libraries(audio_core PRIVATE cubeb::cubeb)
target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1)
endif()
if(ENABLE_SDL2)
if (ENABLE_SDL2)
target_sources(audio_core PRIVATE
sink/sdl2_sink.cpp
sink/sdl2_sink.h
)
target_link_libraries(audio_core PRIVATE SDL2::SDL2)
target_compile_definitions(audio_core PRIVATE HAVE_SDL2)
endif()

View File

@ -97,6 +97,7 @@ add_library(common STATIC
point.h
precompiled_headers.h
quaternion.h
range_map.h
reader_writer_queue.h
ring_buffer.h
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp

View File

@ -393,12 +393,27 @@ public:
}
// Virtual memory initialization
virtual_base = static_cast<u8*>(
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
#if defined(__FreeBSD__)
virtual_base =
static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0));
if (virtual_base == MAP_FAILED) {
virtual_base = static_cast<u8*>(
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
throw std::bad_alloc{};
}
}
#else
virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
throw std::bad_alloc{};
}
madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
#endif
good = true;
}

139
src/common/range_map.h Normal file
View File

@ -0,0 +1,139 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <map>
#include <type_traits>
#include "common/common_types.h"
namespace Common {
template <typename KeyTBase, typename ValueT>
class RangeMap {
private:
using KeyT =
std::conditional_t<std::is_signed_v<KeyTBase>, KeyTBase, std::make_signed_t<KeyTBase>>;
public:
explicit RangeMap(ValueT null_value_) : null_value{null_value_} {
container.emplace(std::numeric_limits<KeyT>::min(), null_value);
};
~RangeMap() = default;
void Map(KeyTBase address, KeyTBase address_end, ValueT value) {
KeyT new_address = static_cast<KeyT>(address);
KeyT new_address_end = static_cast<KeyT>(address_end);
if (new_address < 0) {
new_address = 0;
}
if (new_address_end < 0) {
new_address_end = 0;
}
InternalMap(new_address, new_address_end, value);
}
void Unmap(KeyTBase address, KeyTBase address_end) {
Map(address, address_end, null_value);
}
[[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const {
const KeyT new_address = static_cast<KeyT>(address);
if (new_address < 0) {
return 0;
}
return ContinousSizeInternal(new_address);
}
[[nodiscard]] ValueT GetValueAt(KeyT address) const {
const KeyT new_address = static_cast<KeyT>(address);
if (new_address < 0) {
return null_value;
}
return GetValueInternal(new_address);
}
private:
using MapType = std::map<KeyT, ValueT>;
using IteratorType = typename MapType::iterator;
using ConstIteratorType = typename MapType::const_iterator;
size_t ContinousSizeInternal(KeyT address) const {
const auto it = GetFirstElementBeforeOrOn(address);
if (it == container.end() || it->second == null_value) {
return 0;
}
const auto it_end = std::next(it);
if (it_end == container.end()) {
return std::numeric_limits<KeyT>::max() - address;
}
return it_end->first - address;
}
ValueT GetValueInternal(KeyT address) const {
const auto it = GetFirstElementBeforeOrOn(address);
if (it == container.end()) {
return null_value;
}
return it->second;
}
ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const {
auto it = container.lower_bound(address);
if (it == container.begin()) {
return it;
}
if (it != container.end() && (it->first == address)) {
return it;
}
--it;
return it;
}
ValueT GetFirstValueWithin(KeyT address) {
auto it = container.lower_bound(address);
if (it == container.begin()) {
return it->second;
}
if (it == container.end()) [[unlikely]] { // this would be a bug
return null_value;
}
--it;
return it->second;
}
ValueT GetLastValueWithin(KeyT address) {
auto it = container.upper_bound(address);
if (it == container.end()) {
return null_value;
}
if (it == container.begin()) [[unlikely]] { // this would be a bug
return it->second;
}
--it;
return it->second;
}
void InternalMap(KeyT address, KeyT address_end, ValueT value) {
const bool must_add_start = GetFirstValueWithin(address) != value;
const ValueT last_value = GetLastValueWithin(address_end);
const bool must_add_end = last_value != value;
auto it = container.lower_bound(address);
const auto it_end = container.upper_bound(address_end);
while (it != it_end) {
it = container.erase(it);
}
if (must_add_start) {
container.emplace(address, value);
}
if (must_add_end) {
container.emplace(address_end, last_value);
}
}
ValueT null_value;
MapType container;
};
} // namespace Common

View File

@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.aspect_ratio.SetGlobal(true);
values.max_anisotropy.SetGlobal(true);
@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_asynchronous_shaders.SetGlobal(true);
values.use_fast_gpu_time.SetGlobal(true);
values.use_pessimistic_flushes.SetGlobal(true);
values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
values.bg_red.SetGlobal(true);
values.bg_green.SetGlobal(true);
values.bg_blue.SetGlobal(true);

View File

@ -415,6 +415,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
@ -451,6 +452,8 @@ struct Values {
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
"use_vulkan_driver_pipeline_cache"};
SwitchableSetting<u8> bg_red{0, "bg_red"};
SwitchableSetting<u8> bg_green{0, "bg_green"};
@ -531,6 +534,7 @@ struct Values {
Setting<bool> reporting_services{false, "reporting_services"};
Setting<bool> quest_flag{false, "quest_flag"};
Setting<bool> disable_macro_jit{false, "disable_macro_jit"};
Setting<bool> disable_macro_hle{false, "disable_macro_hle"};
Setting<bool> extended_logging{false, "extended_logging"};
Setting<bool> use_debug_asserts{false, "use_debug_asserts"};
Setting<bool> use_auto_stub{false, "use_auto_stub"};

View File

@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.enable_cycle_counting = true;
// Code cache size
#ifdef ARCHITECTURE_arm64
config.code_cache_size = 128_MiB;
#else
config.code_cache_size = 512_MiB;
#endif
// Allow memory fault handling to work
if (system.DebuggerEnabled()) {

View File

@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.enable_cycle_counting = true;
// Code cache size
#ifdef ARCHITECTURE_arm64
config.code_cache_size = 128_MiB;
#else
config.code_cache_size = 512_MiB;
#endif
// Allow memory fault handling to work
if (system.DebuggerEnabled()) {

View File

@ -194,9 +194,9 @@ std::size_t VfsFile::WriteBytes(const std::vector<u8>& data, std::size_t offset)
std::string VfsFile::GetFullPath() const {
if (GetContainingDirectory() == nullptr)
return "/" + GetName();
return '/' + GetName();
return GetContainingDirectory()->GetFullPath() + "/" + GetName();
return GetContainingDirectory()->GetFullPath() + '/' + GetName();
}
VirtualFile VfsDirectory::GetFileRelative(std::string_view path) const {
@ -435,7 +435,7 @@ std::string VfsDirectory::GetFullPath() const {
if (IsRoot())
return GetName();
return GetParentDirectory()->GetFullPath() + "/" + GetName();
return GetParentDirectory()->GetFullPath() + '/' + GetName();
}
bool ReadOnlyVfsDirectory::IsWritable() const {

View File

@ -40,6 +40,11 @@ void EmulatedConsole::SetTouchParams() {
touch_params[index++] = std::move(touchscreen_param);
}
if (Settings::values.touch_from_button_maps.empty()) {
LOG_WARNING(Input, "touch_from_button_maps is unset by frontend config");
return;
}
const auto button_index =
static_cast<u64>(Settings::values.touch_from_button_map_index.GetValue());
const auto& touch_buttons = Settings::values.touch_from_button_maps[button_index].buttons;

View File

@ -11,6 +11,11 @@
namespace Core::HID {
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
// Use a common UUID for TAS and Virtual Gamepad
constexpr Common::UUID TAS_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
constexpr Common::UUID VIRTUAL_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
@ -348,10 +353,6 @@ void EmulatedController::ReloadInput() {
}
}
// Use a common UUID for TAS
static constexpr Common::UUID TAS_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register TAS devices. No need to force update
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
if (!tas_button_devices[index]) {
@ -377,10 +378,6 @@ void EmulatedController::ReloadInput() {
});
}
// Use a common UUID for Virtual Gamepad
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register virtual devices. No need to force update
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
if (!virtual_button_devices[index]) {
@ -780,7 +777,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
if (controller.stick_values[index].uuid != uuid) {
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
const bool is_tas = uuid == TAS_UUID;
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
return;
}
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
!stick_value.right) {
return;
}
}

View File

@ -22,15 +22,19 @@ namespace {
namespace Service::NIFM {
// This is nn::nifm::RequestState
enum class RequestState : u32 {
NotSubmitted = 1,
Error = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
Pending = 2,
Connected = 3,
Invalid = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
OnHold = 2,
Accepted = 3,
Blocking = 4,
};
enum class InternetConnectionType : u8 {
WiFi = 1,
// This is nn::nifm::NetworkInterfaceType
enum class NetworkInterfaceType : u32 {
Invalid = 0,
WiFi_Ieee80211 = 1,
Ethernet = 2,
};
@ -42,14 +46,23 @@ enum class InternetConnectionStatus : u8 {
Connected,
};
// This is nn::nifm::NetworkProfileType
enum class NetworkProfileType : u32 {
User,
SsidList,
Temporary,
};
// This is nn::nifm::IpAddressSetting
struct IpAddressSetting {
bool is_automatic{};
Network::IPv4Address current_address{};
Network::IPv4Address ip_address{};
Network::IPv4Address subnet_mask{};
Network::IPv4Address gateway{};
Network::IPv4Address default_gateway{};
};
static_assert(sizeof(IpAddressSetting) == 0xD, "IpAddressSetting has incorrect size.");
// This is nn::nifm::DnsSetting
struct DnsSetting {
bool is_automatic{};
Network::IPv4Address primary_dns{};
@ -57,18 +70,26 @@ struct DnsSetting {
};
static_assert(sizeof(DnsSetting) == 0x9, "DnsSetting has incorrect size.");
// This is nn::nifm::AuthenticationSetting
struct AuthenticationSetting {
bool is_enabled{};
std::array<char, 0x20> user{};
std::array<char, 0x20> password{};
};
static_assert(sizeof(AuthenticationSetting) == 0x41, "AuthenticationSetting has incorrect size.");
// This is nn::nifm::ProxySetting
struct ProxySetting {
bool enabled{};
bool is_enabled{};
INSERT_PADDING_BYTES(1);
u16 port{};
std::array<char, 0x64> proxy_server{};
bool automatic_auth_enabled{};
std::array<char, 0x20> user{};
std::array<char, 0x20> password{};
AuthenticationSetting authentication{};
INSERT_PADDING_BYTES(1);
};
static_assert(sizeof(ProxySetting) == 0xAA, "ProxySetting has incorrect size.");
// This is nn::nifm::IpSettingData
struct IpSettingData {
IpAddressSetting ip_address_setting{};
DnsSetting dns_setting{};
@ -101,6 +122,7 @@ static_assert(sizeof(NifmWirelessSettingData) == 0x70,
"NifmWirelessSettingData has incorrect size.");
#pragma pack(push, 1)
// This is nn::nifm::detail::sf::NetworkProfileData
struct SfNetworkProfileData {
IpSettingData ip_setting_data{};
u128 uuid{};
@ -114,13 +136,14 @@ struct SfNetworkProfileData {
};
static_assert(sizeof(SfNetworkProfileData) == 0x17C, "SfNetworkProfileData has incorrect size.");
// This is nn::nifm::NetworkProfileData
struct NifmNetworkProfileData {
u128 uuid{};
std::array<char, 0x40> network_name{};
u32 unknown_1{};
u32 unknown_2{};
u8 unknown_3{};
u8 unknown_4{};
NetworkProfileType network_profile_type{};
NetworkInterfaceType network_interface_type{};
bool is_auto_connect{};
bool is_large_capacity{};
INSERT_PADDING_BYTES(2);
NifmWirelessSettingData wireless_setting_data{};
IpSettingData ip_setting_data{};
@ -184,6 +207,7 @@ public:
event1 = CreateKEvent(service_context, "IRequest:Event1");
event2 = CreateKEvent(service_context, "IRequest:Event2");
state = RequestState::NotSubmitted;
}
~IRequest() override {
@ -196,7 +220,7 @@ private:
LOG_WARNING(Service_NIFM, "(STUBBED) called");
if (state == RequestState::NotSubmitted) {
UpdateState(RequestState::Pending);
UpdateState(RequestState::OnHold);
}
IPC::ResponseBuilder rb{ctx, 2};
@ -219,14 +243,14 @@ private:
switch (state) {
case RequestState::NotSubmitted:
return has_connection ? ResultSuccess : ResultNetworkCommunicationDisabled;
case RequestState::Pending:
case RequestState::OnHold:
if (has_connection) {
UpdateState(RequestState::Connected);
UpdateState(RequestState::Accepted);
} else {
UpdateState(RequestState::Error);
UpdateState(RequestState::Invalid);
}
return ResultPendingConnection;
case RequestState::Connected:
case RequestState::Accepted:
default:
return ResultSuccess;
}
@ -338,9 +362,9 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
.ip_setting_data{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
@ -348,12 +372,14 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
.secondary_dns{1, 0, 0, 1},
},
.proxy_setting{
.enabled{false},
.is_enabled{false},
.port{},
.proxy_server{},
.automatic_auth_enabled{},
.user{},
.password{},
.authentication{
.is_enabled{},
.user{},
.password{},
},
},
.mtu{1500},
},
@ -370,7 +396,7 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
// When we're connected to a room, spoof the hosts IP address
if (auto room_member = network.GetRoomMember().lock()) {
if (room_member->IsConnected()) {
network_profile_data.ip_setting_data.ip_address_setting.current_address =
network_profile_data.ip_setting_data.ip_address_setting.ip_address =
room_member->GetFakeIpAddress();
}
}
@ -444,9 +470,9 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
return IpConfigInfo{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
@ -459,7 +485,7 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
// When we're connected to a room, spoof the hosts IP address
if (auto room_member = network.GetRoomMember().lock()) {
if (room_member->IsConnected()) {
ip_config_info.ip_address_setting.current_address = room_member->GetFakeIpAddress();
ip_config_info.ip_address_setting.ip_address = room_member->GetFakeIpAddress();
}
}
@ -480,7 +506,7 @@ void IGeneralService::GetInternetConnectionStatus(Kernel::HLERequestContext& ctx
LOG_WARNING(Service_NIFM, "(STUBBED) called");
struct Output {
InternetConnectionType type{InternetConnectionType::WiFi};
u8 type{static_cast<u8>(NetworkInterfaceType::WiFi_Ieee80211)};
u8 wifi_strength{3};
InternetConnectionStatus state{InternetConnectionStatus::Connected};
};

View File

@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
return Errno::NETUNREACH;
case WSAEMSGSIZE:
return Errno::MSGSIZE;
case WSAETIMEDOUT:
return Errno::TIMEDOUT;
default:
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
return Errno::OTHER;
@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
return Errno::NETUNREACH;
case EMSGSIZE:
return Errno::MSGSIZE;
case ETIMEDOUT:
return Errno::TIMEDOUT;
default:
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
return Errno::OTHER;
@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
int e = errno;
#endif
const Errno err = TranslateNativeError(e);
if (err == Errno::AGAIN) {
if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
return err;
}
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));

View File

@ -436,7 +436,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@ -1,8 +1,6 @@
# SPDX-FileCopyrightText: 2017 Citra Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
add_executable(yuzu-room
precompiled_headers.h
yuzu_room.cpp

View File

@ -4,14 +4,10 @@
add_library(input_common STATIC
drivers/camera.cpp
drivers/camera.h
drivers/gc_adapter.cpp
drivers/gc_adapter.h
drivers/keyboard.cpp
drivers/keyboard.h
drivers/mouse.cpp
drivers/mouse.h
drivers/sdl_driver.cpp
drivers/sdl_driver.h
drivers/tas_input.cpp
drivers/tas_input.h
drivers/touch_screen.cpp
@ -62,8 +58,17 @@ if (ENABLE_SDL2)
target_compile_definitions(input_common PRIVATE HAVE_SDL2)
endif()
if (ENABLE_LIBUSB)
target_sources(input_common PRIVATE
drivers/gc_adapter.cpp
drivers/gc_adapter.h
)
target_link_libraries(input_common PRIVATE libusb::usb)
target_compile_definitions(input_common PRIVATE HAVE_LIBUSB)
endif()
create_target_directory_groups(input_common)
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost libusb::usb)
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost)
if (YUZU_USE_PRECOMPILED_HEADERS)
target_precompile_headers(input_common PRIVATE precompiled_headers.h)

View File

@ -5,7 +5,6 @@
#include "common/input.h"
#include "common/param_package.h"
#include "input_common/drivers/camera.h"
#include "input_common/drivers/gc_adapter.h"
#include "input_common/drivers/keyboard.h"
#include "input_common/drivers/mouse.h"
#include "input_common/drivers/tas_input.h"
@ -19,6 +18,10 @@
#include "input_common/input_mapping.h"
#include "input_common/input_poller.h"
#include "input_common/main.h"
#ifdef HAVE_LIBUSB
#include "input_common/drivers/gc_adapter.h"
#endif
#ifdef HAVE_SDL2
#include "input_common/drivers/sdl_driver.h"
#endif
@ -45,7 +48,9 @@ struct InputSubsystem::Impl {
RegisterEngine("keyboard", keyboard);
RegisterEngine("mouse", mouse);
RegisterEngine("touch", touch_screen);
#ifdef HAVE_LIBUSB
RegisterEngine("gcpad", gcadapter);
#endif
RegisterEngine("cemuhookudp", udp_client);
RegisterEngine("tas", tas_input);
RegisterEngine("camera", camera);
@ -72,7 +77,9 @@ struct InputSubsystem::Impl {
UnregisterEngine(keyboard);
UnregisterEngine(mouse);
UnregisterEngine(touch_screen);
#ifdef HAVE_LIBUSB
UnregisterEngine(gcadapter);
#endif
UnregisterEngine(udp_client);
UnregisterEngine(tas_input);
UnregisterEngine(camera);
@ -95,8 +102,10 @@ struct InputSubsystem::Impl {
devices.insert(devices.end(), keyboard_devices.begin(), keyboard_devices.end());
auto mouse_devices = mouse->GetInputDevices();
devices.insert(devices.end(), mouse_devices.begin(), mouse_devices.end());
#ifdef HAVE_LIBUSB
auto gcadapter_devices = gcadapter->GetInputDevices();
devices.insert(devices.end(), gcadapter_devices.begin(), gcadapter_devices.end());
#endif
auto udp_devices = udp_client->GetInputDevices();
devices.insert(devices.end(), udp_devices.begin(), udp_devices.end());
#ifdef HAVE_SDL2
@ -119,9 +128,11 @@ struct InputSubsystem::Impl {
if (engine == mouse->GetEngineName()) {
return mouse;
}
#ifdef HAVE_LIBUSB
if (engine == gcadapter->GetEngineName()) {
return gcadapter;
}
#endif
if (engine == udp_client->GetEngineName()) {
return udp_client;
}
@ -194,9 +205,11 @@ struct InputSubsystem::Impl {
if (engine == mouse->GetEngineName()) {
return true;
}
#ifdef HAVE_LIBUSB
if (engine == gcadapter->GetEngineName()) {
return true;
}
#endif
if (engine == udp_client->GetEngineName()) {
return true;
}
@ -217,7 +230,9 @@ struct InputSubsystem::Impl {
void BeginConfiguration() {
keyboard->BeginConfiguration();
mouse->BeginConfiguration();
#ifdef HAVE_LIBUSB
gcadapter->BeginConfiguration();
#endif
udp_client->BeginConfiguration();
#ifdef HAVE_SDL2
sdl->BeginConfiguration();
@ -227,7 +242,9 @@ struct InputSubsystem::Impl {
void EndConfiguration() {
keyboard->EndConfiguration();
mouse->EndConfiguration();
#ifdef HAVE_LIBUSB
gcadapter->EndConfiguration();
#endif
udp_client->EndConfiguration();
#ifdef HAVE_SDL2
sdl->EndConfiguration();
@ -248,7 +265,6 @@ struct InputSubsystem::Impl {
std::shared_ptr<Keyboard> keyboard;
std::shared_ptr<Mouse> mouse;
std::shared_ptr<GCAdapter> gcadapter;
std::shared_ptr<TouchScreen> touch_screen;
std::shared_ptr<TasInput::Tas> tas_input;
std::shared_ptr<CemuhookUDP::UDPClient> udp_client;
@ -256,6 +272,10 @@ struct InputSubsystem::Impl {
std::shared_ptr<VirtualAmiibo> virtual_amiibo;
std::shared_ptr<VirtualGamepad> virtual_gamepad;
#ifdef HAVE_LIBUSB
std::shared_ptr<GCAdapter> gcadapter;
#endif
#ifdef HAVE_SDL2
std::shared_ptr<SDLDriver> sdl;
#endif

View File

@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
case IR::Attribute::VertexId:
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
break;
case IR::Attribute::BaseInstance:
ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
break;
case IR::Attribute::BaseVertex:
ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
break;
case IR::Attribute::DrawID:
ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
break;
case IR::Attribute::FrontFace:
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
break;
@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
case IR::Attribute::VertexId:
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
break;
case IR::Attribute::BaseInstance:
ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
break;
case IR::Attribute::BaseVertex:
ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
break;
case IR::Attribute::DrawID:
ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}

View File

@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
EmitContext ctx{program, bindings, profile, runtime_info};
Precolor(program);
EmitCode(ctx, program);
const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
ctx.header.insert(0, version);
if (program.shared_memory_size > 0) {
const auto requested_size{program.shared_memory_size};

View File

@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
case IR::Attribute::FrontFace:
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
break;
case IR::Attribute::BaseInstance:
ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
break;
case IR::Attribute::BaseVertex:
ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
break;
case IR::Attribute::DrawID:
ctx.AddF32("{}=itof(gl_DrawID);", inst);
break;
default:
throw NotImplementedException("Get attribute {}", attr);
}
@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
case IR::Attribute::VertexId:
ctx.AddU32("{}=uint(gl_VertexID);", inst);
break;
case IR::Attribute::BaseInstance:
ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
break;
case IR::Attribute::BaseVertex:
ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
break;
case IR::Attribute::DrawID:
ctx.AddU32("{}=uint(gl_DrawID);", inst);
break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}

View File

@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
ctx.Const(element)));
return ctx.OpLoad(
ctx.F32[1],
ctx.need_input_position_indirect
? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
ctx.Const(element))
: AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
}
case IR::Attribute::BaseInstance:
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
case IR::Attribute::BaseVertex:
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
case IR::Attribute::DrawID:
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
case IR::Attribute::FrontFace:
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
return ctx.OpISub(ctx.U32[1], index, base);
}
case IR::Attribute::BaseInstance:
return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
case IR::Attribute::BaseVertex:
return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
case IR::Attribute::DrawID:
return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
default:
throw NotImplementedException("Read U32 attribute {}", attr);
}

View File

@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
}
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
} // Anonymous namespace
@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}

View File

@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
U16 = Name(TypeInt(16, false), "u16");
S16 = Name(TypeInt(16, true), "s16");
}
if (info.uses_int64) {
if (info.uses_int64 && profile.support_int64) {
AddCapability(spv::Capability::Int64);
U64 = Name(TypeInt(64, false), "u64");
}
@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
size_t label_index{0};
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
AddLabel(labels[label_index]);
const Id pointer{is_array
? OpAccessChain(input_f32, input_position, vertex, masked_index)
: OpAccessChain(input_f32, input_position, masked_index)};
const Id pointer{[&]() {
if (need_input_position_indirect) {
if (is_array)
return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
masked_index);
else
return OpAccessChain(input_f32, input_position, u32_zero_value,
masked_index);
} else {
if (is_array)
return OpAccessChain(input_f32, input_position, vertex, masked_index);
else
return OpAccessChain(input_f32, input_position, masked_index);
}
}()};
const Id result{OpLoad(F32[1], pointer)};
OpReturnValue(result);
++label_index;
@ -1367,30 +1379,56 @@ void EmitContext::DefineInputs(const IR::Program& program) {
Decorate(layer, spv::Decoration::Flat);
}
if (loads.AnyComponent(IR::Attribute::PositionX)) {
const bool is_fragment{stage != Stage::Fragment};
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
input_position = DefineInput(*this, F32[4], true, built_in);
if (profile.support_geometry_shader_passthrough) {
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
Decorate(input_position, spv::Decoration::PassthroughNV);
const bool is_fragment{stage == Stage::Fragment};
if (!is_fragment && profile.has_broken_spirv_position_input) {
need_input_position_indirect = true;
const Id input_position_struct = TypeStruct(F32[4]);
input_position = DefineInput(*this, input_position_struct, true);
MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
static_cast<unsigned>(spv::BuiltIn::Position));
Decorate(input_position_struct, spv::Decoration::Block);
} else {
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
: spv::BuiltIn::Position};
input_position = DefineInput(*this, F32[4], true, built_in);
if (profile.support_geometry_shader_passthrough) {
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
Decorate(input_position, spv::Decoration::PassthroughNV);
}
}
}
}
if (loads[IR::Attribute::InstanceId]) {
if (profile.support_vertex_instance_id) {
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
if (loads[IR::Attribute::BaseInstance]) {
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
}
} else {
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
}
} else if (loads[IR::Attribute::BaseInstance]) {
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
}
if (loads[IR::Attribute::VertexId]) {
if (profile.support_vertex_instance_id) {
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
if (loads[IR::Attribute::BaseVertex]) {
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
}
} else {
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
}
} else if (loads[IR::Attribute::BaseVertex]) {
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
}
if (loads[IR::Attribute::DrawID]) {
draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
}
if (loads[IR::Attribute::FrontFace]) {
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);

View File

@ -218,6 +218,7 @@ public:
Id base_instance{};
Id vertex_id{};
Id vertex_index{};
Id draw_index{};
Id base_vertex{};
Id front_face{};
Id point_coord{};
@ -279,6 +280,7 @@ public:
Id write_global_func_u32x2{};
Id write_global_func_u32x4{};
bool need_input_position_indirect{};
Id input_position{};
std::array<Id, 32> input_generics{};

View File

@ -34,6 +34,11 @@ public:
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
[[nodiscard]] virtual bool HasHLEMacroState() const = 0;
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
u32 offset) = 0;
virtual void Dump(u64 hash) = 0;
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
@ -52,11 +57,16 @@ public:
return start_address;
}
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
return is_propietary_driver;
}
protected:
ProgramHeader sph{};
std::array<u32, 8> gp_passthrough_mask{};
Stage stage{};
u32 start_address{};
bool is_propietary_driver{};
};
} // namespace Shader

View File

@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) {
return "ViewportMask";
case Attribute::FrontFace:
return "FrontFace";
case Attribute::BaseInstance:
return "BaseInstance";
case Attribute::BaseVertex:
return "BaseVertex";
case Attribute::DrawID:
return "DrawID";
}
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
}

View File

@ -219,6 +219,11 @@ enum class Attribute : u64 {
FixedFncTexture9Q = 231,
ViewportMask = 232,
FrontFace = 255,
// Implementation attributes
BaseInstance = 256,
BaseVertex = 257,
DrawID = 258,
};
constexpr size_t NUM_GENERICS = 32;

View File

@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
return GetAttributeU32(attribute, Imm32(0));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
}
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
Inst(Opcode::SetAttribute, attribute, value, vertex);
}

View File

@ -74,6 +74,8 @@ public:
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);

View File

@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
}
return mapping;
}
void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
const Shader::VaryingState& passthrough_mask,
bool passthrough_position,
std::optional<IR::Attribute> passthrough_layer_attr) {
for (u32 i = 0; i < program.output_vertices; i++) {
// Assign generics from input
for (u32 j = 0; j < 32; j++) {
if (!passthrough_mask.Generic(j)) {
continue;
}
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
if (passthrough_position) {
// Assign position from input
const IR::Attribute attr = IR::Attribute::PositionX;
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
if (passthrough_layer_attr) {
// Assign layer
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
ir.Imm32(0));
}
// Emit vertex
ir.EmitVertex(ir.Imm32(0));
}
ir.EndPrimitive(ir.Imm32(0));
}
u32 GetOutputTopologyVertices(OutputTopology output_topology) {
switch (output_topology) {
case OutputTopology::PointList:
return 1;
case OutputTopology::LineStrip:
return 2;
default:
return 3;
}
}
void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
EmitGeometryPassthrough(
ir, program, program.info.passthrough,
program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
}
}
}
}
} // Anonymous namespace
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@ -198,6 +262,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
}
if (!host_info.support_geometry_shader_passthrough) {
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
LowerGeometryPassthrough(program, host_info);
}
}
break;
}
@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(program);
Optimization::ConstantPropagationPass(env, program);
Optimization::PositionPass(env, program);
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
Optimization::TexturePass(env, program, host_info);
if (Settings::values.resolution_info.active) {
@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
IR::Program program;
program.stage = Stage::Geometry;
program.output_topology = output_topology;
switch (output_topology) {
case OutputTopology::PointList:
program.output_vertices = 1;
break;
case OutputTopology::LineStrip:
program.output_vertices = 2;
break;
default:
program.output_vertices = 3;
break;
}
program.output_vertices = GetOutputTopologyVertices(output_topology);
program.is_geometry_passthrough = false;
program.info.loads.mask = source_program.info.stores.mask;
@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
node.data.block = current_block;
IR::IREmitter ir{*current_block};
for (u32 i = 0; i < program.output_vertices; i++) {
// Assign generics from input
for (u32 j = 0; j < 32; j++) {
if (!program.info.stores.Generic(j)) {
continue;
}
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
// Assign position from input
const IR::Attribute attr = IR::Attribute::PositionX;
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
// Assign layer
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
ir.Imm32(0));
// Emit vertex
ir.EmitVertex(ir.Imm32(0));
}
ir.EndPrimitive(ir.Imm32(0));
EmitGeometryPassthrough(ir, program, program.info.stores, true,
source_program.info.emulated_layer);
IR::Block* return_block{block_pool.Create(inst_pool)};
IR::IREmitter{*return_block}.Epilogue();

View File

@ -15,6 +15,9 @@ struct HostTranslateInfo {
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
};
} // namespace Shader

View File

@ -7,6 +7,7 @@
#include <type_traits>
#include "common/bit_cast.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
case IR::Attribute::PrimitiveId:
case IR::Attribute::InstanceId:
case IR::Attribute::VertexId:
case IR::Attribute::BaseVertex:
case IR::Attribute::BaseInstance:
case IR::Attribute::DrawID:
break;
default:
return;
@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
if (!bank.IsImmediate() || !offset.IsImmediate()) {
return;
}
const auto bank_value = bank.U32();
const auto offset_value = offset.U32();
auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
if (!replacement) {
return;
}
const auto new_attribute = [replacement]() {
switch (*replacement) {
case ReplaceConstant::BaseInstance:
return IR::Attribute::BaseInstance;
case ReplaceConstant::BaseVertex:
return IR::Attribute::BaseVertex;
case ReplaceConstant::DrawID:
return IR::Attribute::DrawID;
default:
throw NotImplementedException("Not implemented replacement variable {}", *replacement);
}
}();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
} else {
inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
}
}
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
if (!bank.IsImmediate() || !offset.IsImmediate()) {
return;
}
const auto bank_value = bank.U32();
if (bank_value != which_bank) {
return;
}
const auto offset_value = offset.U32();
if (offset_value < offset_start || offset_value >= offset_end) {
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
} else {
inst.ReplaceUsesWith(
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
}
}
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetRegister:
return FoldGetRegister(inst);
@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
IR::Opcode::CompositeInsertF16x4);
case IR::Opcode::FSwizzleAdd:
return FoldFSwizzleAdd(block, inst);
case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU32:
if (env.HasHLEMacroState()) {
FoldConstBuffer(env, block, inst);
}
if (env.IsPropietaryDriver()) {
FoldDriverConstBuffer(env, block, inst, 1);
}
break;
default:
break;
}
}
} // Anonymous namespace
void ConstantPropagationPass(IR::Program& program) {
void ConstantPropagationPass(Environment& env, IR::Program& program) {
const auto end{program.post_order_blocks.rend()};
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
ConstantPropagation(*block, inst);
ConstantPropagation(env, *block, inst);
}
}
}

View File

@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
const IR::U32 offset{
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}

View File

@ -13,9 +13,9 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConstantPropagationPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);

View File

@ -55,6 +55,8 @@ struct Profile {
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
/// The Position builtin needs to be wrapped in a struct when used as an input
bool has_broken_spirv_position_input{};
/// Offset image operands with an unsigned type do not work
bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted

View File

@ -16,6 +16,12 @@
namespace Shader {
enum class ReplaceConstant : u32 {
BaseInstance,
BaseVertex,
DrawID,
};
enum class TextureType : u32 {
Color1D,
ColorArray1D,
@ -59,6 +65,8 @@ enum class Interpolation {
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct StorageBufferDescriptor {
@ -66,6 +74,8 @@ struct StorageBufferDescriptor {
u32 cbuf_offset;
u32 count;
bool is_written;
auto operator<=>(const StorageBufferDescriptor&) const = default;
};
struct TextureBufferDescriptor {
@ -78,6 +88,8 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
auto operator<=>(const TextureBufferDescriptor&) const = default;
};
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
@ -89,6 +101,8 @@ struct ImageBufferDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
auto operator<=>(const ImageBufferDescriptor&) const = default;
};
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
@ -104,6 +118,8 @@ struct TextureDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
@ -116,6 +132,8 @@ struct ImageDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
auto operator<=>(const ImageDescriptor&) const = default;
};
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;

View File

@ -11,7 +11,7 @@
namespace Shader {
struct VaryingState {
std::bitset<256> mask{};
std::bitset<512> mask{};
void Set(IR::Attribute attribute, bool state = true) {
mask[static_cast<size_t>(attribute)] = state;

View File

@ -7,6 +7,7 @@ add_executable(tests
common/fibers.cpp
common/host_memory.cpp
common/param_package.cpp
common/range_map.cpp
common/ring_buffer.cpp
common/scratch_buffer.cpp
common/unique_function.cpp

View File

@ -0,0 +1,70 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <stdexcept>
#include <catch2/catch.hpp>
#include "common/range_map.h"
enum class MappedEnum : u32 {
Invalid = 0,
Valid_1 = 1,
Valid_2 = 2,
Valid_3 = 3,
};
TEST_CASE("Range Map: Setup", "[video_core]") {
Common::RangeMap<u64, MappedEnum> my_map(MappedEnum::Invalid);
my_map.Map(3000, 3500, MappedEnum::Valid_1);
my_map.Unmap(3200, 3600);
my_map.Map(4000, 4500, MappedEnum::Valid_2);
my_map.Map(4200, 4400, MappedEnum::Valid_2);
my_map.Map(4200, 4400, MappedEnum::Valid_1);
REQUIRE(my_map.GetContinousSizeFrom(4200) == 200);
REQUIRE(my_map.GetContinousSizeFrom(3000) == 200);
REQUIRE(my_map.GetContinousSizeFrom(2900) == 0);
REQUIRE(my_map.GetValueAt(2900) == MappedEnum::Invalid);
REQUIRE(my_map.GetValueAt(3100) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(3000) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(3200) == MappedEnum::Invalid);
REQUIRE(my_map.GetValueAt(4199) == MappedEnum::Valid_2);
REQUIRE(my_map.GetValueAt(4200) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(4400) == MappedEnum::Valid_2);
REQUIRE(my_map.GetValueAt(4500) == MappedEnum::Invalid);
REQUIRE(my_map.GetValueAt(4600) == MappedEnum::Invalid);
my_map.Unmap(0, 6000);
for (u64 address = 0; address < 10000; address += 1000) {
REQUIRE(my_map.GetContinousSizeFrom(address) == 0);
}
my_map.Map(1000, 3000, MappedEnum::Valid_1);
my_map.Map(4000, 5000, MappedEnum::Valid_1);
my_map.Map(2500, 4100, MappedEnum::Valid_1);
REQUIRE(my_map.GetContinousSizeFrom(1000) == 4000);
my_map.Map(1000, 3000, MappedEnum::Valid_1);
my_map.Map(4000, 5000, MappedEnum::Valid_2);
my_map.Map(2500, 4100, MappedEnum::Valid_3);
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1500);
REQUIRE(my_map.GetContinousSizeFrom(2500) == 1600);
REQUIRE(my_map.GetContinousSizeFrom(4100) == 900);
REQUIRE(my_map.GetValueAt(900) == MappedEnum::Invalid);
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(2500) == MappedEnum::Valid_3);
REQUIRE(my_map.GetValueAt(4100) == MappedEnum::Valid_2);
REQUIRE(my_map.GetValueAt(5000) == MappedEnum::Invalid);
my_map.Map(2000, 6000, MappedEnum::Valid_3);
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1000);
REQUIRE(my_map.GetContinousSizeFrom(3000) == 3000);
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(1999) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(1500) == MappedEnum::Valid_1);
REQUIRE(my_map.GetValueAt(2001) == MappedEnum::Valid_3);
REQUIRE(my_map.GetValueAt(5999) == MappedEnum::Valid_3);
REQUIRE(my_map.GetValueAt(6000) == MappedEnum::Invalid);
}

View File

@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
int num = 0;
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(num == 1);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();

View File

@ -13,6 +13,7 @@ add_library(video_core STATIC
buffer_cache/buffer_base.h
buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h
cache_types.h
cdma_pusher.cpp
cdma_pusher.h
compatible_formats.cpp
@ -84,6 +85,7 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
invalidation_accumulator.h
memory_manager.cpp
memory_manager.h
precompiled_headers.h
@ -189,6 +191,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_turbo_mode.cpp
renderer_vulkan/vk_turbo_mode.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp

View File

@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
const u64 word = current_word;
u64 page = page_begin;
page_begin = 0;
@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@ -539,8 +539,7 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
@ -564,7 +563,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@ -574,8 +573,7 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}

View File

@ -200,7 +200,16 @@ public:
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
std::mutex mutex;
void SetDrawIndirect(
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
current_draw_indirect = current_draw_indirect_;
}
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
std::recursive_mutex mutex;
Runtime& runtime;
private:
@ -272,6 +281,8 @@ private:
void BindHostVertexBuffers();
void BindHostDrawIndirectBuffers();
void BindHostGraphicsUniformBuffers(size_t stage);
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
@ -298,6 +309,8 @@ private:
void UpdateVertexBuffer(u32 index);
void UpdateDrawIndirect();
void UpdateUniformBuffers(size_t stage);
void UpdateStorageBuffers(size_t stage);
@ -372,6 +385,8 @@ private:
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
u32 last_index_count = 0;
Binding index_buffer;
@ -380,6 +395,8 @@ private:
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
Binding count_buffer_binding;
Binding indirect_buffer_binding;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
@ -674,6 +691,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
}
BindHostVertexBuffers();
BindHostTransformFeedbackBuffers();
if (current_draw_indirect) {
BindHostDrawIndirectBuffers();
}
}
template <class P>
@ -823,6 +843,7 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P>
void BufferCache<P>::CommitAsyncFlushesHigh() {
AccumulateFlushes();
if (committed_ranges.empty()) {
return;
}
@ -869,7 +890,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
buffer_id,
});
// Align up to avoid cache conflicts
constexpr u64 align = 256ULL;
constexpr u64 align = 8ULL;
constexpr u64 mask = ~(align - 1ULL);
total_size_bytes += (new_size + align - 1) & mask;
largest_copy = std::max(largest_copy, new_size);
@ -1041,6 +1062,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
}
template <class P>
void BufferCache<P>::BindHostDrawIndirectBuffers() {
const auto bind_buffer = [this](const Binding& binding) {
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
};
if (current_draw_indirect->include_count) {
bind_buffer(count_buffer_binding);
}
bind_buffer(indirect_buffer_binding);
}
template <class P>
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
u32 dirty = ~0U;
@ -1272,6 +1306,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
UpdateStorageBuffers(stage);
UpdateTextureBuffers(stage);
}
if (current_draw_indirect) {
UpdateDrawIndirect();
}
} while (has_deleted_buffers);
}
@ -1289,7 +1326,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const auto& index_array = draw_state.index_buffer;
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
if (!flags[Dirty::IndexBuffer]) {
return;
}
flags[Dirty::IndexBuffer] = false;
@ -1361,6 +1398,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
};
}
template <class P>
void BufferCache<P>::UpdateDrawIndirect() {
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
binding = NULL_BINDING;
return;
}
binding = Binding{
.cpu_addr = *cpu_addr,
.size = static_cast<u32>(size),
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
};
};
if (current_draw_indirect->include_count) {
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
}
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
indirect_buffer_binding);
}
template <class P>
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
@ -1880,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const u32 alignment = runtime.GetStorageBufferAlignment();
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
const u32 aligned_size =
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
.buffer_id = BufferId{},
};
return binding;
@ -1941,4 +2006,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
}
}
template <class P>
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
}
template <class P>
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
}
} // namespace VideoCommon

View File

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace VideoCommon {
enum class CacheType : u32 {
None = 0,
TextureCache = 1 << 0,
QueryCache = 1 << 1,
BufferCache = 1 << 2,
ShaderCache = 1 << 3,
NoTextureCache = QueryCache | BufferCache | ShaderCache,
NoBufferCache = TextureCache | QueryCache | ShaderCache,
NoQueryCache = TextureCache | BufferCache | ShaderCache,
All = TextureCache | QueryCache | BufferCache | ShaderCache,
};
DECLARE_ENUM_FLAG_OPERATORS(CacheType)
} // namespace VideoCommon

View File

@ -61,7 +61,7 @@ bool DmaPusher::Step() {
} else {
const CommandListHeader command_list_header{
command_list.command_lists[dma_pushbuffer_subindex++]};
const GPUVAddr dma_get = command_list_header.addr;
dma_state.dma_get = command_list_header.addr;
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
// We've gone through the current list, remove it from the queue
@ -75,12 +75,22 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize_destructive(command_list_header.size);
if (Settings::IsGPULevelHigh()) {
memory_manager.ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
constexpr u32 MacroRegistersStart = 0xE00;
if (dma_state.method < MacroRegistersStart) {
if (Settings::IsGPULevelHigh()) {
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
} else {
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
}
} else {
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
const size_t copy_size = command_list_header.size * sizeof(u32);
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty =
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
}
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
}
ProcessCommands(command_headers);
}
@ -94,6 +104,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
if (dma_state.method_count) {
// Data word of methods command
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
if (dma_state.non_incrementing) {
const u32 max_write = static_cast<u32>(
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
@ -132,6 +143,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
case SubmissionMode::Inline:
dma_state.method = command_header.method;
dma_state.subchannel = command_header.subchannel;
dma_state.dma_word_offset = static_cast<u64>(
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
CallMethod(command_header.arg_count);
dma_state.non_incrementing = true;
dma_increment_once = false;
@ -164,8 +177,14 @@ void DmaPusher::CallMethod(u32 argument) const {
dma_state.method_count,
});
} else {
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
dma_state.is_last_call);
auto subchannel = subchannels[dma_state.subchannel];
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
subchannel->method_sink.emplace_back(dma_state.method, argument);
return;
}
subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
}
}
@ -174,8 +193,11 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
dma_state.method_count);
} else {
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
num_methods, dma_state.method_count);
auto subchannel = subchannels[dma_state.subchannel];
subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
dma_state.method_count);
}
}

View File

@ -156,6 +156,8 @@ private:
u32 subchannel; ///< Current subchannel
u32 method_count; ///< Current method count
u32 length_pending; ///< Large NI command length pending
GPUVAddr dma_get; ///< Currently read segment
u64 dma_word_offset; ///< Current word ofset from address
bool non_incrementing; ///< Current command's NI flag
bool is_last_call;
};

View File

@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
ProcessDraw(true, num_instances);
}
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
draw_state.topology = topology;
ProcessDrawIndirect();
}
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
u32 index_count) {
const auto& regs{maxwell3d->regs};
draw_state.topology = topology;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
ProcessDrawIndirect();
}
void DrawManager::SetInlineIndexBuffer(u32 index) {
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
@ -198,4 +215,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
}
}
void DrawManager::ProcessDrawIndirect() {
LOG_TRACE(
HW_GPU,
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
indirect_state.buffer_size, indirect_state.max_draw_counts);
UpdateTopology();
if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->DrawIndirect();
}
}
} // namespace Tegra::Engines

View File

@ -32,6 +32,16 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
struct IndirectParams {
bool is_indexed;
bool include_count;
GPUVAddr count_start_address;
GPUVAddr indirect_start_address;
size_t buffer_size;
size_t max_draw_counts;
size_t stride;
};
explicit DrawManager(Maxwell3D* maxwell_3d);
void ProcessMethodCall(u32 method, u32 argument);
@ -46,10 +56,22 @@ public:
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
void DrawArrayIndirect(PrimitiveTopology topology);
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
const State& GetDrawState() const {
return draw_state;
}
IndirectParams& GetIndirectParams() {
return indirect_state;
}
const IndirectParams& GetIndirectParams() const {
return indirect_state;
}
private:
void SetInlineIndexBuffer(u32 index);
@ -63,7 +85,10 @@ private:
void ProcessDraw(bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect();
Maxwell3D* maxwell3d{};
State draw_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines

View File

@ -3,6 +3,10 @@
#pragma once
#include <bitset>
#include <limits>
#include <vector>
#include "common/common_types.h"
namespace Tegra::Engines {
@ -17,6 +21,26 @@ public:
/// Write multiple values to the register identified by method.
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) = 0;
void ConsumeSink() {
if (method_sink.empty()) {
return;
}
ConsumeSinkImpl();
}
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
std::vector<std::pair<u32, u32>> method_sink{};
bool current_dirty{};
GPUVAddr current_dma_segment;
protected:
virtual void ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
CallMethod(method, value, true);
}
method_sink.clear();
}
};
} // namespace Tegra::Engines

View File

@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
x_elements, regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
}
}

View File

@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/sw_blitter/blitter.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@ -20,11 +21,14 @@ namespace Tegra::Engines {
using namespace Texture;
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
execution_mask.reset();
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
}
Fermi2D::~Fermi2D() = default;
@ -49,6 +53,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
}
}
void Fermi2D::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void Fermi2D::Blit() {
MICROPROFILE_SCOPE(GPU_BlitEngine);
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
@ -94,6 +105,7 @@ void Fermi2D::Blit() {
config.src_x0 = 0;
}
memory_manager.FlushCaching();
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
sw_blitter->Blit(src, regs.dst, config);
}

View File

@ -305,10 +305,13 @@ public:
private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void Blit();
void ConsumeSinkImpl() override;
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@ -14,7 +14,12 @@
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
execution_mask.reset();
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
}
KeplerCompute::~KeplerCompute() = default;
@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");

View File

@ -204,6 +204,8 @@ public:
private:
void ProcessLaunch();
void ConsumeSinkImpl() override;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;

View File

@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
upload_state.BindRasterizer(rasterizer_);
execution_mask.reset();
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
}
void KeplerMemory::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View File

@ -73,6 +73,8 @@ public:
} regs{};
private:
void ConsumeSinkImpl() override;
Core::System& system;
Upload::State upload_state;
};

View File

@ -4,6 +4,8 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
for (size_t i = 0; i < execution_mask.size(); i++) {
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
}
}
Maxwell3D::~Maxwell3D() = default;
@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
}
bool Maxwell3D::IsMethodExecutable(u32 method) {
if (method >= MacroRegistersStart) {
return true;
}
switch (method) {
case MAXWELL3D_REG_INDEX(draw.end):
case MAXWELL3D_REG_INDEX(draw.begin):
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
case MAXWELL3D_REG_INDEX(index_buffer.first):
case MAXWELL3D_REG_INDEX(index_buffer.count):
case MAXWELL3D_REG_INDEX(draw_inline_index):
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer32_first):
case MAXWELL3D_REG_INDEX(index_buffer16_first):
case MAXWELL3D_REG_INDEX(index_buffer8_first):
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
case MAXWELL3D_REG_INDEX(load_mme.instruction):
case MAXWELL3D_REG_INDEX(load_mme.start_address):
case MAXWELL3D_REG_INDEX(falcon[4]):
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
case MAXWELL3D_REG_INDEX(topology_override):
case MAXWELL3D_REG_INDEX(clear_surface):
case MAXWELL3D_REG_INDEX(report_semaphore.query):
case MAXWELL3D_REG_INDEX(render_enable.mode):
case MAXWELL3D_REG_INDEX(clear_report_value):
case MAXWELL3D_REG_INDEX(sync_info):
case MAXWELL3D_REG_INDEX(launch_dma):
case MAXWELL3D_REG_INDEX(inline_data):
case MAXWELL3D_REG_INDEX(fragment_barrier):
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return true;
default:
return false;
}
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
@ -130,14 +201,72 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
}
macro_params.insert(macro_params.end(), base_start, base_start + amount);
for (size_t i = 0; i < amount; i++) {
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
}
macro_segments.emplace_back(current_dma_segment, amount);
current_macro_dirty |= current_dirty;
current_dirty = false;
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
ConsumeSink();
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
macro_addresses.clear();
macro_segments.clear();
current_macro_dirty = false;
}
}
void Maxwell3D::RefreshParametersImpl() {
size_t current_index = 0;
for (auto& segment : macro_segments) {
if (segment.first == 0) {
current_index += segment.second;
continue;
}
memory_manager.ReadBlock(segment.first, &macro_params[current_index],
sizeof(u32) * segment.second);
current_index += segment.second;
}
}
u32 Maxwell3D::GetMaxCurrentVertices() {
u32 num_vertices = 0;
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
const auto& array = regs.vertex_streams[index];
if (array.enable == 0) {
continue;
}
const auto& attribute = regs.vertex_attrib_format[index];
if (attribute.constant) {
num_vertices = std::max(num_vertices, 1U);
continue;
}
const auto& limit = regs.vertex_stream_limits[index];
const GPUVAddr gpu_addr_begin = array.Address();
const GPUVAddr gpu_addr_end = limit.Address() + 1;
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
num_vertices = std::max(
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
}
return num_vertices;
}
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
constexpr std::array<size_t, 4> max_sizes = {
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
byte_size,
static_cast<size_t>(end_address - start_address));
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;
@ -152,6 +281,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
return argument;
}
void Maxwell3D::ConsumeSinkImpl() {
SCOPE_EXIT({ method_sink.clear(); });
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
for (auto [method, value] : method_sink) {
shadow_state.reg_array[method] = value;
ProcessDirtyRegisters(method, value);
}
return;
}
if (control == Regs::ShadowRamControl::Replay) {
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
}
return;
}
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, value);
}
}
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;
@ -263,7 +415,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
@ -294,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
ProcessCBMultiData(base_start, amount);
break;
case MAXWELL3D_REG_INDEX(inline_data):
case MAXWELL3D_REG_INDEX(inline_data): {
ASSERT(methods_pending == amount);
upload_state.ProcessData(base_start, amount);
return;
}
default:
for (u32 i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - i <= 1);
@ -332,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
}
switch (regs.report_semaphore.query.operation) {
case Regs::ReportSemaphore::Operation::Release:
if (regs.report_semaphore.query.short_query != 0) {
@ -389,7 +537,11 @@ void Maxwell3D::ProcessQueryCondition() {
case Regs::RenderEnable::Override::NeverRender:
execute_on = false;
break;
case Regs::RenderEnable::Override::UseRenderEnable:
case Regs::RenderEnable::Override::UseRenderEnable: {
if (rasterizer->AccelerateConditionalRendering()) {
execute_on = true;
return;
}
switch (regs.render_enable.mode) {
case Regs::RenderEnable::Mode::True: {
execute_on = true;
@ -427,6 +579,7 @@ void Maxwell3D::ProcessQueryCondition() {
}
break;
}
}
}
void Maxwell3D::ProcessCounterReset() {
@ -463,7 +616,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
}
void Maxwell3D::ProcessCBBind(size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
// stage.
const auto& bind_data = regs.bind_groups[stage_index];
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
buffer.enabled = bind_data.valid.Value() != 0;
@ -490,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
const size_t copy_size = amount * sizeof(u32);
memory_manager.WriteBlock(address, start_base, copy_size);
memory_manager.WriteBlockCached(address, start_base, copy_size);
// Increment the current buffer position.
regs.const_buffer.offset += static_cast<u32>(copy_size);
@ -524,4 +678,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
HLEReplacementAttributeType name) {
const u64 key = (static_cast<u64>(bank) << 32) | offset;
replace_table.emplace(key, name);
}
} // namespace Tegra::Engines

View File

@ -272,6 +272,7 @@ public:
};
union {
u32 raw;
BitField<0, 1, Mode> mode;
BitField<4, 8, u32> pad;
};
@ -1217,10 +1218,12 @@ public:
struct Window {
union {
u32 raw_x;
BitField<0, 16, u32> x_min;
BitField<16, 16, u32> x_max;
};
union {
u32 raw_y;
BitField<0, 16, u32> y_min;
BitField<16, 16, u32> y_max;
};
@ -2708,7 +2711,7 @@ public:
u32 post_z_pixel_imask; ///< 0x0F1C
INSERT_PADDING_BYTES_NOINIT(0x20);
ConstantColorRendering const_color_rendering; ///< 0x0F40
s32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_mask; ///< 0x0F58
u32 stencil_back_func_mask; ///< 0x0F5C
INSERT_PADDING_BYTES_NOINIT(0x14);
@ -2832,9 +2835,9 @@ public:
Blend blend; ///< 0x133C
u32 stencil_enable; ///< 0x1380
StencilOp stencil_front_op; ///< 0x1384
s32 stencil_front_ref; ///< 0x1394
s32 stencil_front_func_mask; ///< 0x1398
s32 stencil_front_mask; ///< 0x139C
u32 stencil_front_ref; ///< 0x1394
u32 stencil_front_func_mask; ///< 0x1398
u32 stencil_front_mask; ///< 0x139C
INSERT_PADDING_BYTES_NOINIT(0x4);
u32 draw_auto_start_byte_count; ///< 0x13A4
PsSaturate frag_color_clamp; ///< 0x13A8
@ -3020,6 +3023,24 @@ public:
/// Store temporary hw register values, used by some calls to restore state after a operation
Regs shadow_state;
// None Engine
enum class EngineHint : u32 {
None = 0x0,
OnHLEMacro = 0x1,
};
EngineHint engine_state{EngineHint::None};
enum class HLEReplacementAttributeType : u32 {
BaseVertex = 0x0,
BaseInstance = 0x1,
DrawID = 0x2,
};
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@ -3067,6 +3088,35 @@ public:
std::unique_ptr<DrawManager> draw_manager;
friend class DrawManager;
GPUVAddr GetMacroAddress(size_t index) const {
return macro_addresses[index];
}
void RefreshParameters() {
if (!current_macro_dirty) {
return;
}
RefreshParametersImpl();
}
bool AnyParametersDirty() const {
return current_macro_dirty;
}
u32 GetMaxCurrentVertices();
size_t EstimateIndexBufferSize();
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers(u32 layer_count);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
/// Handles a write to the CB_DATA[i] register.
void ProcessCBData(u32 value);
void ProcessCBMultiData(const u32* start_base, u32 amount);
private:
void InitializeRegisterDefaults();
@ -3076,6 +3126,8 @@ private:
void ProcessDirtyRegisters(u32 method, u32 argument);
void ConsumeSinkImpl() override;
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
@ -3116,16 +3168,13 @@ private:
/// Handles writes to syncing register.
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
void ProcessCBData(u32 value);
void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
void RefreshParametersImpl();
bool IsMethodExecutable(u32 method);
Core::System& system;
MemoryManager& memory_manager;
@ -3145,6 +3194,10 @@ private:
Upload::State upload_state;
bool execute_on{true};
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
std::vector<GPUVAddr> macro_addresses;
bool current_macro_dirty{};
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@ -21,7 +21,10 @@ namespace Tegra::Engines {
using namespace Texture;
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_} {}
: system{system_}, memory_manager{memory_manager_} {
execution_mask.reset();
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
}
MaxwellDMA::~MaxwellDMA() = default;
@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void MaxwellDMA::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@ -59,7 +69,7 @@ void MaxwellDMA::Launch() {
if (launch.multi_line_enable) {
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
memory_manager.FlushCaching();
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
CopyBlockLinearToBlockLinear();
@ -94,6 +104,7 @@ void MaxwellDMA::Launch() {
reinterpret_cast<u8*>(tmp_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
((address & 0x180) >> 1) | ((address & 0x20) << 3);
@ -111,8 +122,8 @@ void MaxwellDMA::Launch() {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size());
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@ -122,7 +133,7 @@ void MaxwellDMA::Launch() {
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlock(
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
tmp_buffer.data(), tmp_buffer.size());
}
@ -131,8 +142,8 @@ void MaxwellDMA::Launch() {
std::vector<u8> tmp_buffer(regs.line_length_in);
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
}
}
}
@ -194,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
@ -246,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
@ -277,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
regs.src_params.block_size.height, regs.src_params.block_size.depth,
regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@ -337,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {

View File

@ -231,6 +231,8 @@ private:
void ReleaseSemaphore();
void ConsumeSinkImpl() override;
Core::System& system;
MemoryManager& memory_manager;

Some files were not shown because too many files have changed in this diff Show More