From 1e3d034f9682956e5737126ef8b97d54cdf08d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Cea=20L=C3=B3pez?= Date: Thu, 12 Dec 2024 21:45:56 +0100 Subject: [PATCH 01/67] Fix HLE buffer copy not executed when there's only 1 copy. (#1754) --- src/video_core/renderer_vulkan/vk_shader_hle.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index df9d40f079..d1d4f9af3b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -60,7 +60,7 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; u32 batch_start = 0; - u32 batch_end = 1; + u32 batch_end = copies.size() > 1 ? 1 : 0; while (batch_end < copies.size()) { // Place first copy into the current batch @@ -72,7 +72,7 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg for (int i = batch_start + 1; i < copies.size(); i++) { // Compute new src and dst bounds if we were to batch this copy - const auto [src_offset, dst_offset, size] = copies[i]; + const auto& [src_offset, dst_offset, size] = copies[i]; auto new_src_offset_min = std::min(src_offset_min, src_offset); auto new_src_offset_max = std::max(src_offset_max, src_offset + size); if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { From 2a19d915e8b8420c92ad819775cd9ae6dd7aa734 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Thu, 12 Dec 2024 22:46:20 +0200 Subject: [PATCH 02/67] fix for detecting more that 2 players and play both with player 1 keys (#1750) --- src/core/libraries/pad/pad.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index ec4186f116..98f086dd9d 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -155,6 +155,9 @@ int PS4_SYSV_ABI scePadGetFeatureReport() { } int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) { + if (userId == -1) { + return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE; + } LOG_DEBUG(Lib_Pad, "(DUMMY) called"); return 1; } @@ -246,6 +249,9 @@ int PS4_SYSV_ABI scePadMbusTerm() { int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) { LOG_INFO(Lib_Pad, "(DUMMY) called user_id = {} type = {} index = {}", userId, type, index); + if (userId == -1) { + return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE; + } if (Config::getUseSpecialPad()) { if (type != ORBIS_PAD_PORT_TYPE_SPECIAL) return ORBIS_PAD_ERROR_DEVICE_NOT_CONNECTED; @@ -346,6 +352,9 @@ int PS4_SYSV_ABI scePadReadHistory() { } int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { + if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) { + return ORBIS_PAD_ERROR_INVALID_HANDLE; + } auto* controller = Common::Singleton::Instance(); int connectedCount = 0; bool isConnected = false; From 3f1061de5613c0c4a74d6394a6493491280bc03f Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Fri, 13 Dec 2024 04:46:31 +0800 Subject: [PATCH 03/67] Resubmit - Prevent settings from being saved when close button is pressed instead of save (#1747) * Do not save settings when close button pressed instead of save * Update src/common/config.h Co-authored-by: TheTurtle * Revert "Update src/common/config.h" This reverts commit 125303ea8674b25e93a4c4cf7b93a0357eac19f4. --------- Co-authored-by: rainmakerv2 <30595646+jpau02@users.noreply.github.com> Co-authored-by: TheTurtle --- src/common/config.cpp | 52 ++++-- src/common/config.h | 4 +- src/emulator.cpp | 2 +- src/qt_gui/main_window.cpp | 6 +- src/qt_gui/settings_dialog.cpp | 289 ++++++++++++++++----------------- src/qt_gui/settings_dialog.h | 2 + 6 files changed, 193 insertions(+), 162 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 3db98a438e..4d07ba29f4 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -422,6 +422,10 @@ void setEmulatorLanguage(std::string language) { emulator_language = language; } +void setGameInstallDirs(const std::vector& settings_install_dirs_config) { + settings_install_dirs = settings_install_dirs_config; +} + u32 getMainWindowGeometryX() { return main_window_geometry_x; } @@ -673,14 +677,6 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; - data["GUI"]["theme"] = mw_themes; - data["GUI"]["iconSize"] = m_icon_size; - data["GUI"]["sliderPos"] = m_slider_pos; - data["GUI"]["iconSizeGrid"] = m_icon_size_grid; - data["GUI"]["sliderPosGrid"] = m_slider_pos_grid; - data["GUI"]["gameTableMode"] = m_table_mode; - data["GUI"]["mw_width"] = m_window_size_W; - data["GUI"]["mw_height"] = m_window_size_H; std::vector install_dirs; for (const auto& dirString : settings_install_dirs) { @@ -690,6 +686,43 @@ void save(const std::filesystem::path& path) { data["GUI"]["addonInstallDir"] = std::string{fmt::UTF(settings_addon_install_dir.u8string()).data}; + data["GUI"]["emulatorLanguage"] = emulator_language; + data["Settings"]["consoleLanguage"] = m_language; + + std::ofstream file(path, std::ios::binary); + file << data; + file.close(); +} + +void saveMainWindow(const std::filesystem::path& path) { + toml::value data; + + std::error_code error; + if (std::filesystem::exists(path, error)) { + try { + std::ifstream ifs; + ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); + ifs.open(path, std::ios_base::binary); + data = toml::parse(ifs, std::string{fmt::UTF(path.filename().u8string()).data}); + } catch (const std::exception& ex) { + fmt::print("Exception trying to parse config file. Exception: {}\n", ex.what()); + return; + } + } else { + if (error) { + fmt::print("Filesystem error: {}\n", error.message()); + } + fmt::print("Saving new configuration file {}\n", fmt::UTF(path.u8string())); + } + + data["GUI"]["mw_width"] = m_window_size_W; + data["GUI"]["mw_height"] = m_window_size_H; + data["GUI"]["theme"] = mw_themes; + data["GUI"]["iconSize"] = m_icon_size; + data["GUI"]["sliderPos"] = m_slider_pos; + data["GUI"]["iconSizeGrid"] = m_icon_size_grid; + data["GUI"]["sliderPosGrid"] = m_slider_pos_grid; + data["GUI"]["gameTableMode"] = m_table_mode; data["GUI"]["geometry_x"] = main_window_geometry_x; data["GUI"]["geometry_y"] = main_window_geometry_y; data["GUI"]["geometry_w"] = main_window_geometry_w; @@ -697,9 +730,6 @@ void save(const std::filesystem::path& path) { data["GUI"]["pkgDirs"] = m_pkg_viewer; data["GUI"]["elfDirs"] = m_elf_viewer; data["GUI"]["recentFiles"] = m_recent_files; - data["GUI"]["emulatorLanguage"] = emulator_language; - - data["Settings"]["consoleLanguage"] = m_language; std::ofstream file(path, std::ios::binary); file << data; diff --git a/src/common/config.h b/src/common/config.h index d98c944804..ff3b3703f3 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -13,6 +13,7 @@ enum HideCursorState : s16 { Never, Idle, Always }; void load(const std::filesystem::path& path); void save(const std::filesystem::path& path); +void saveMainWindow(const std::filesystem::path& path); bool isNeoMode(); bool isFullscreenMode(); @@ -67,6 +68,7 @@ void setNeoMode(bool enable); void setUserName(const std::string& type); void setUpdateChannel(const std::string& type); void setSeparateUpdateEnabled(bool use); +void setGameInstallDirs(const std::vector& settings_install_dirs_config); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); @@ -128,4 +130,4 @@ void setDefaultValues(); // settings u32 GetLanguage(); -}; // namespace Config +}; // namespace Config \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 60d6e18d73..eeac5973a7 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -100,7 +100,7 @@ Emulator::Emulator() { Emulator::~Emulator() { const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); } void Emulator::Run(const std::filesystem::path& file) { diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 4c40084d36..3eb629c0b2 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -35,7 +35,7 @@ MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWi MainWindow::~MainWindow() { SaveWindowState(); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); } bool MainWindow::Init() { @@ -1006,7 +1006,7 @@ void MainWindow::AddRecentFiles(QString filePath) { } Config::setRecentFiles(vec); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); CreateRecentGameActions(); // Refresh the QActions. } @@ -1077,4 +1077,4 @@ bool MainWindow::eventFilter(QObject* obj, QEvent* event) { } } return QMainWindow::eventFilter(obj, event); -} +} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 1fd4b6e8b9..e67c14ccb7 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -12,12 +12,13 @@ #ifdef ENABLE_UPDATER #include "check_update.h" #endif +#include #include "common/logging/backend.h" #include "common/logging/filter.h" +#include "common/logging/formatter.h" #include "main_window.h" #include "settings_dialog.h" #include "ui_settings_dialog.h" - QStringList languageNames = {"Arabic", "Czech", "Danish", @@ -94,13 +95,18 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge connect(ui->buttonBox, &QDialogButtonBox::clicked, this, [this, config_dir](QAbstractButton* button) { if (button == ui->buttonBox->button(QDialogButtonBox::Save)) { + UpdateSettings(); Config::save(config_dir / "config.toml"); QWidget::close(); } else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) { + UpdateSettings(); Config::save(config_dir / "config.toml"); } else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) { Config::setDefaultValues(); + Config::save(config_dir / "config.toml"); LoadValuesFromConfig(); + } else if (button == ui->buttonBox->button(QDialogButtonBox::Close)) { + ResetInstallFolders(); } if (Common::Log::IsActive()) { Common::Log::Filter filter; @@ -119,35 +125,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge // GENERAL TAB { - connect(ui->userNameLineEdit, &QLineEdit::textChanged, this, - [](const QString& text) { Config::setUserName(text.toStdString()); }); - - connect(ui->consoleLanguageComboBox, QOverload::of(&QComboBox::currentIndexChanged), - this, [](int index) { - if (index >= 0 && index < languageIndexes.size()) { - int languageCode = languageIndexes[index]; - Config::setLanguage(languageCode); - } - }); - - connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setFullscreenMode(val); }); - - connect(ui->separateUpdatesCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setSeparateUpdateEnabled(val); }); - - connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setShowSplash(val); }); - - connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setNeoMode(val); }); - - connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, - [](const QString& text) { Config::setLogType(text.toStdString()); }); - - connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, - [](const QString& text) { Config::setLogFilter(text.toStdString()); }); - #ifdef ENABLE_UPDATER connect(ui->updateCheckBox, &QCheckBox::stateChanged, this, [](int state) { Config::setAutoUpdate(state == Qt::Checked); }); @@ -163,74 +140,12 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge ui->updaterGroupBox->setVisible(false); ui->GUIgroupBox->setMaximumSize(265, 16777215); #endif - - connect(ui->playBGMCheckBox, &QCheckBox::stateChanged, this, [](int val) { - Config::setPlayBGM(val); - if (val == Qt::Unchecked) { - BackgroundMusicPlayer::getInstance().stopMusic(); - } - }); - - connect(ui->BGMVolumeSlider, &QSlider::valueChanged, this, [](float val) { - Config::setBGMvolume(val); - BackgroundMusicPlayer::getInstance().setVolume(val); - }); - -#ifdef ENABLE_DISCORD_RPC - connect(ui->discordRPCCheckbox, &QCheckBox::stateChanged, this, [](int val) { - Config::setEnableDiscordRPC(val); - auto* rpc = Common::Singleton::Instance(); - if (val == Qt::Checked) { - rpc->init(); - rpc->setStatusIdling(); - } else { - rpc->shutdown(); - } - }); -#endif } // Input TAB { connect(ui->hideCursorComboBox, QOverload::of(&QComboBox::currentIndexChanged), this, - [this](s16 index) { - Config::setCursorState(index); - OnCursorStateChanged(index); - }); - - connect(ui->idleTimeoutSpinBox, &QSpinBox::valueChanged, this, - [](int index) { Config::setCursorHideTimeout(index); }); - - connect(ui->backButtonBehaviorComboBox, QOverload::of(&QComboBox::currentIndexChanged), - this, [this](int index) { - if (index >= 0 && index < ui->backButtonBehaviorComboBox->count()) { - QString data = ui->backButtonBehaviorComboBox->itemData(index).toString(); - Config::setBackButtonBehavior(data.toStdString()); - } - }); - } - - // GPU TAB - { - // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 - // when setting and add 1 when getting to select the correct gpu in Qt - connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this, - [](int index) { Config::setGpuId(index - 1); }); - - connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setScreenWidth(val); }); - - connect(ui->heightSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setScreenHeight(val); }); - - connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setVblankDiv(val); }); - - connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setDumpShaders(val); }); - - connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setNullGpu(val); }); + [this](s16 index) { OnCursorStateChanged(index); }); } // PATH TAB @@ -262,21 +177,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge }); } - // DEBUG TAB - { - connect(ui->debugDump, &QCheckBox::stateChanged, this, - [](int val) { Config::setDebugDump(val); }); - - connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setVkValidation(val); }); - - connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setVkSyncValidation(val); }); - - connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setRdocEnabled(val); }); - } - // Descriptions { // General @@ -323,40 +223,69 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge } void SettingsDialog::LoadValuesFromConfig() { + + std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + std::error_code error; + if (!std::filesystem::exists(userdir / "Config.toml", error)) { + Config::load(userdir / "Config.toml"); + return; + } + + try { + std::ifstream ifs; + ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); + const toml::value data = toml::parse(userdir / "Config.toml"); + } catch (std::exception& ex) { + fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what()); + return; + } + + const toml::value data = toml::parse(userdir / "Config.toml"); + const QVector languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, 9, + 15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 30, 28}; + ui->consoleLanguageComboBox->setCurrentIndex( - std::distance( - languageIndexes.begin(), - std::find(languageIndexes.begin(), languageIndexes.end(), Config::GetLanguage())) % + std::distance(languageIndexes.begin(), + std::find(languageIndexes.begin(), languageIndexes.end(), + toml::find_or(data, "Settings", "consoleLanguage", 6))) % languageIndexes.size()); - ui->emulatorLanguageComboBox->setCurrentIndex(languages[Config::getEmulatorLanguage()]); - ui->hideCursorComboBox->setCurrentIndex(Config::getCursorState()); - OnCursorStateChanged(Config::getCursorState()); - ui->idleTimeoutSpinBox->setValue(Config::getCursorHideTimeout()); - ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1); - ui->widthSpinBox->setValue(Config::getScreenWidth()); - ui->heightSpinBox->setValue(Config::getScreenHeight()); - ui->vblankSpinBox->setValue(Config::vblankDiv()); - ui->dumpShadersCheckBox->setChecked(Config::dumpShaders()); - ui->nullGpuCheckBox->setChecked(Config::nullGpu()); - ui->playBGMCheckBox->setChecked(Config::getPlayBGM()); - ui->BGMVolumeSlider->setValue((Config::getBGMvolume())); - ui->discordRPCCheckbox->setChecked(Config::getEnableDiscordRPC()); - ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode()); - ui->separateUpdatesCheckBox->setChecked(Config::getSeparateUpdateEnabled()); - ui->showSplashCheckBox->setChecked(Config::showSplash()); - ui->ps4proCheckBox->setChecked(Config::isNeoMode()); - ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType())); - ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter())); - ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName())); - - ui->debugDump->setChecked(Config::debugDump()); - ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled()); - ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled()); - ui->rdocCheckBox->setChecked(Config::isRdocEnabled()); + ui->emulatorLanguageComboBox->setCurrentIndex( + languages[toml::find_or(data, "GUI", "emulatorLanguage", "en")]); + ui->hideCursorComboBox->setCurrentIndex(toml::find_or(data, "Input", "cursorState", 1)); + OnCursorStateChanged(toml::find_or(data, "Input", "cursorState", 1)); + ui->idleTimeoutSpinBox->setValue(toml::find_or(data, "Input", "cursorHideTimeout", 5)); + // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 + // when setting and add 1 when getting to select the correct gpu in Qt + ui->graphicsAdapterBox->setCurrentIndex(toml::find_or(data, "Vulkan", "gpuId", -1) + 1); + ui->widthSpinBox->setValue(toml::find_or(data, "GPU", "screenWidth", 1280)); + ui->heightSpinBox->setValue(toml::find_or(data, "GPU", "screenHeight", 720)); + ui->vblankSpinBox->setValue(toml::find_or(data, "GPU", "vblankDivider", 1)); + ui->dumpShadersCheckBox->setChecked(toml::find_or(data, "GPU", "dumpShaders", false)); + ui->nullGpuCheckBox->setChecked(toml::find_or(data, "GPU", "nullGpu", false)); + ui->playBGMCheckBox->setChecked(toml::find_or(data, "General", "playBGM", false)); + ui->BGMVolumeSlider->setValue(toml::find_or(data, "General", "BGMvolume", 50)); + ui->discordRPCCheckbox->setChecked( + toml::find_or(data, "General", "enableDiscordRPC", true)); + ui->fullscreenCheckBox->setChecked(toml::find_or(data, "General", "Fullscreen", false)); + ui->separateUpdatesCheckBox->setChecked( + toml::find_or(data, "General", "separateUpdateEnabled", false)); + ui->showSplashCheckBox->setChecked(toml::find_or(data, "General", "showSplash", false)); + ui->ps4proCheckBox->setChecked(toml::find_or(data, "General", "isPS4Pro", false)); + ui->logTypeComboBox->setCurrentText( + QString::fromStdString(toml::find_or(data, "General", "logType", "async"))); + ui->logFilterLineEdit->setText( + QString::fromStdString(toml::find_or(data, "General", "logFilter", ""))); + ui->userNameLineEdit->setText( + QString::fromStdString(toml::find_or(data, "General", "userName", "shadPS4"))); + ui->debugDump->setChecked(toml::find_or(data, "Debug", "DebugDump", false)); + ui->vkValidationCheckBox->setChecked(toml::find_or(data, "Vulkan", "validation", false)); + ui->vkSyncValidationCheckBox->setChecked( + toml::find_or(data, "Vulkan", "validation_sync", false)); + ui->rdocCheckBox->setChecked(toml::find_or(data, "Vulkan", "rdocEnable", false)); #ifdef ENABLE_UPDATER - ui->updateCheckBox->setChecked(Config::autoUpdate()); - std::string updateChannel = Config::getUpdateChannel(); + ui->updateCheckBox->setChecked(toml::find_or(data, "General", "autoUpdate", false)); + std::string updateChannel = toml::find_or(data, "General", "updateChannel", ""); if (updateChannel != "Release" && updateChannel != "Nightly") { if (Common::isRelease) { updateChannel = "Release"; @@ -367,18 +296,13 @@ void SettingsDialog::LoadValuesFromConfig() { ui->updateComboBox->setCurrentText(QString::fromStdString(updateChannel)); #endif - for (const auto& dir : Config::getGameInstallDirs()) { - QString path_string; - Common::FS::PathToQString(path_string, dir); - QListWidgetItem* item = new QListWidgetItem(path_string); - ui->gameFoldersListWidget->addItem(item); - } - - QString backButtonBehavior = QString::fromStdString(Config::getBackButtonBehavior()); + QString backButtonBehavior = QString::fromStdString( + toml::find_or(data, "Input", "backButtonBehavior", "left")); int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior); ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0); ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty()); + ResetInstallFolders(); } void SettingsDialog::InitializeEmulatorLanguages() { @@ -554,3 +478,76 @@ bool SettingsDialog::eventFilter(QObject* obj, QEvent* event) { } return QDialog::eventFilter(obj, event); } + +void SettingsDialog::UpdateSettings() { + + const QVector TouchPadIndex = {"left", "center", "right", "none"}; + Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); + Config::setNeoMode(ui->ps4proCheckBox->isChecked()); + Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked()); + Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); + Config::setNeoMode(ui->ps4proCheckBox->isChecked()); + Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); + Config::setLogFilter(ui->logFilterLineEdit->text().toStdString()); + Config::setUserName(ui->userNameLineEdit->text().toStdString()); + Config::setCursorState(ui->hideCursorComboBox->currentIndex()); + Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value()); + Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1); + Config::setBGMvolume(ui->BGMVolumeSlider->value()); + Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]); + Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked()); + Config::setScreenWidth(ui->widthSpinBox->value()); + Config::setScreenHeight(ui->heightSpinBox->value()); + Config::setVblankDiv(ui->vblankSpinBox->value()); + Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked()); + Config::setNullGpu(ui->nullGpuCheckBox->isChecked()); + Config::setSeparateUpdateEnabled(ui->separateUpdatesCheckBox->isChecked()); + Config::setShowSplash(ui->showSplashCheckBox->isChecked()); + Config::setDebugDump(ui->debugDump->isChecked()); + Config::setVkValidation(ui->vkValidationCheckBox->isChecked()); + Config::setVkSyncValidation(ui->vkSyncValidationCheckBox->isChecked()); + Config::setRdocEnabled(ui->rdocCheckBox->isChecked()); + Config::setAutoUpdate(ui->updateCheckBox->isChecked()); + Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString()); + +#ifdef ENABLE_DISCORD_RPC + auto* rpc = Common::Singleton::Instance(); + if (Config::getEnableDiscordRPC()) { + rpc->init(); + rpc->setStatusIdling(); + } else { + rpc->shutdown(); + } +#endif + + BackgroundMusicPlayer::getInstance().setVolume(ui->BGMVolumeSlider->value()); + ResetInstallFolders(); +} + +void SettingsDialog::ResetInstallFolders() { + + std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + const toml::value data = toml::parse(userdir / "Config.toml"); + + if (data.contains("GUI")) { + const toml::value& gui = data.at("GUI"); + const auto install_dir_array = + toml::find_or>(gui, "installDirs", {}); + std::vector settings_install_dirs_config = {}; + + for (const auto& dir : install_dir_array) { + if (std::find(settings_install_dirs_config.begin(), settings_install_dirs_config.end(), + dir) == settings_install_dirs_config.end()) { + settings_install_dirs_config.push_back(dir); + } + } + + for (const auto& dir : settings_install_dirs_config) { + QString path_string; + Common::FS::PathToQString(path_string, dir); + QListWidgetItem* item = new QListWidgetItem(path_string); + ui->gameFoldersListWidget->addItem(item); + } + Config::setGameInstallDirs(settings_install_dirs_config); + } +} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h index 8cdded9802..987b35d457 100644 --- a/src/qt_gui/settings_dialog.h +++ b/src/qt_gui/settings_dialog.h @@ -31,6 +31,8 @@ class SettingsDialog : public QDialog { private: void LoadValuesFromConfig(); + void UpdateSettings(); + void ResetInstallFolders(); void InitializeEmulatorLanguages(); void OnLanguageChanged(int index); void OnCursorStateChanged(s16 index); From 5be807fc8ac6bada55c37428a51cee081bf64498 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 13 Dec 2024 00:31:49 +0200 Subject: [PATCH 04/67] hot-fix: Fix order of operands --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fe2660705e..736410dcd2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -130,8 +130,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; From 91d57e830be312e5296fa7bfc265dfe87f13582c Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:27:36 +0800 Subject: [PATCH 05/67] Fix lowercase filenames fox Linux (#1760) Fix uppercase config filenames Co-authored-by: rainmakerv2 <30595646+jpau02@users.noreply.github.com> --- src/qt_gui/settings_dialog.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index e67c14ccb7..f74f864351 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -226,21 +226,21 @@ void SettingsDialog::LoadValuesFromConfig() { std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); std::error_code error; - if (!std::filesystem::exists(userdir / "Config.toml", error)) { - Config::load(userdir / "Config.toml"); + if (!std::filesystem::exists(userdir / "config.toml", error)) { + Config::load(userdir / "config.toml"); return; } try { std::ifstream ifs; ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); - const toml::value data = toml::parse(userdir / "Config.toml"); + const toml::value data = toml::parse(userdir / "config.toml"); } catch (std::exception& ex) { fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what()); return; } - const toml::value data = toml::parse(userdir / "Config.toml"); + const toml::value data = toml::parse(userdir / "config.toml"); const QVector languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, 9, 15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 30, 28}; @@ -527,7 +527,7 @@ void SettingsDialog::UpdateSettings() { void SettingsDialog::ResetInstallFolders() { std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - const toml::value data = toml::parse(userdir / "Config.toml"); + const toml::value data = toml::parse(userdir / "config.toml"); if (data.contains("GUI")) { const toml::value& gui = data.at("GUI"); From f587931ed387efbf83e8e947bf9859885bbac297 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:52:54 +0800 Subject: [PATCH 06/67] Fix for adding game folders (#1761) Co-authored-by: rainmakerv2 <30595646+jpau02@users.noreply.github.com> --- src/qt_gui/settings_dialog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index f74f864351..09d3674f7f 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -521,7 +521,6 @@ void SettingsDialog::UpdateSettings() { #endif BackgroundMusicPlayer::getInstance().setVolume(ui->BGMVolumeSlider->value()); - ResetInstallFolders(); } void SettingsDialog::ResetInstallFolders() { From bab00dbca8be8c0f3fb8433dea1b7bad24012a71 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 13 Dec 2024 18:23:01 +0200 Subject: [PATCH 07/67] kernel: Fix module finding Patch by Elbread --- src/core/libraries/kernel/process.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 6c29d93050..ba7964bc40 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg return handle; } handle = linker->LoadModule(path, true); + if (handle == -1) { + return ORBIS_KERNEL_ERROR_EINVAL; + } auto* module = linker->GetModule(handle); linker->RelocateAnyImports(module); From 8acefd25e77d527eb4a250572ed91161c342e144 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 13 Dec 2024 18:26:16 +0200 Subject: [PATCH 08/67] hot-fix the hot-fix --- src/core/libraries/kernel/process.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index ba7964bc40..97cc01ebc9 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -51,7 +51,7 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg } handle = linker->LoadModule(path, true); if (handle == -1) { - return ORBIS_KERNEL_ERROR_EINVAL; + return ORBIS_KERNEL_ERROR_ESRCH; } auto* module = linker->GetModule(handle); linker->RelocateAnyImports(module); From cfbd8691261e4d0b06a6ed29ec68c333ef6cc8d1 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 13 Dec 2024 18:28:19 +0200 Subject: [PATCH 09/67] texture_cache: Improve support for stencil reads (#1758) * texture_cache: Improve support for stencil reads * libraries: Supress some spammy logs * core: Support loading font libraries * texture_cache: Remove assert --- src/core/libraries/audio3d/audio3d.cpp | 2 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 2 +- .../libraries/libc_internal/libc_internal.cpp | 12 +++++++++++ src/emulator.cpp | 7 +++++-- src/video_core/amdgpu/liverpool.h | 4 ++++ .../renderer_vulkan/vk_rasterizer.cpp | 18 ++++++++++------ src/video_core/texture_cache/image.cpp | 4 +++- src/video_core/texture_cache/image.h | 5 +++++ src/video_core/texture_cache/image_info.cpp | 3 +++ src/video_core/texture_cache/image_info.h | 2 +- src/video_core/texture_cache/image_view.cpp | 2 +- .../texture_cache/texture_cache.cpp | 21 +++++++++++++++++++ 12 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/core/libraries/audio3d/audio3d.cpp b/src/core/libraries/audio3d/audio3d.cpp index 44670d87b9..d896524c63 100644 --- a/src/core/libraries/audio3d/audio3d.cpp +++ b/src/core/libraries/audio3d/audio3d.cpp @@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel, u32* pQueueAvailable) { - LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId); + LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 18035e6ce5..dbf085fb38 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -971,7 +971,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() { } void PS4_SYSV_ABI sceGnmFlushGarlic() { - LOG_WARNING(Lib_GnmDriver, "(STUBBED) called"); + LOG_TRACE(Lib_GnmDriver, "(STUBBED) called"); } int PS4_SYSV_ABI sceGnmGetCoredumpAddress() { diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index eb6046c7a3..8453a78b99 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/assert.h" #include "common/logging/log.h" @@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou return std::strncpy(dest, src, count); } +int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) { +#ifdef _WIN64 + return strncpy_s(dest, destsz, src, count); +#else + std::strcpy(dest, src); + return 0; +#endif +} + char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) { return std::strcat(dest, src); } @@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { internal_strlen); LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncpy); + LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strncpy_s); LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strcat); LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, diff --git a/src/emulator.cpp b/src/emulator.cpp index eeac5973a7..c517bc2840 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -266,7 +266,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, @@ -276,7 +276,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc}, - {"libSceCesCs.sprx", nullptr}}}; + {"libSceCesCs.sprx", nullptr}, + {"libSceFont.sprx", nullptr}, + {"libSceFontFt.sprx", nullptr}, + {"libSceFreeTypeOt.sprx", nullptr}}}; std::vector found_modules; const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ca3b016128..9bc3454d8c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -431,6 +431,10 @@ struct Liverpool { return u64(z_read_base) << 8; } + u64 StencilAddress() const { + return u64(stencil_read_base) << 8; + } + u32 NumSamples() const { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9abf1b527d..eb2ef36009 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -616,18 +616,24 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc}); image_id = texture_cache.FindImage(desc); - auto& image = texture_cache.GetImage(image_id); - if (image.binding.is_bound) { + auto* image = &texture_cache.GetImage(image_id); + if (image->depth_id) { + // If this image has an associated depth image, it's a stencil attachment. + // Redirect the access to the actual depth-stencil buffer. + image_id = image->depth_id; + image = &texture_cache.GetImage(image_id); + } + if (image->binding.is_bound) { // The image is already bound. In case if it is about to be used as storage we need // to force general layout on it. - image.binding.force_general |= image_desc.is_storage; + image->binding.force_general |= image_desc.is_storage; } - if (image.binding.is_target) { + if (image->binding.is_target) { // The image is already bound as target. Since we read and output to it need to force // general layout too. - image.binding.force_general = 1u; + image->binding.force_general = 1u; } - image.binding.is_bound = 1u; + image->binding.is_bound = 1u; } // Second pass to re-bind images that were updated after binding diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e7e1ce1dab..03339d280e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -145,8 +145,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const ImageInfo& info_) : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()} { + if (info.pixel_format == vk::Format::eUndefined) { + return; + } mip_hashes.resize(info.resources.levels); - ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index a1b1b007f2..473dd731e0 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -92,6 +92,10 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } + void AssociateDepth(ImageId image_id) { + depth_id = image_id; + } + boost::container::small_vector GetBarriers( vk::ImageLayout dst_layout, vk::Flags dst_mask, vk::PipelineStageFlags2 dst_stage, std::optional subres_range); @@ -116,6 +120,7 @@ struct Image { VAddr track_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; + ImageId depth_id{}; // Resource state tracking struct { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 0ed36ee39b..1445d41cd4 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -298,6 +298,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; + stencil_addr = buffer.StencilAddress(); + stencil_size = pitch * size.height * sizeof(u8); + guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size_bytes = depth_slice_sz * num_slices; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index e12ae3be10..a657310a8b 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -69,7 +69,7 @@ struct ImageInfo { } props{}; // Surface properties with impact on various calculation factors vk::Format pixel_format = vk::Format::eUndefined; - vk::ImageType type = vk::ImageType::e1D; + vk::ImageType type = vk::ImageType::e2D; SubresourceExtent resources; Extent3D size{1, 1, 1}; u32 num_bits{}; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 61f1aaafe8..12ad201d1a 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -170,7 +170,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } - if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) { + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eStencil; } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 153314d2bd..897d6f67e9 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -443,6 +443,27 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { } } + // If there is a stencil attachment, link depth and stencil. + if (desc.info.stencil_addr != 0) { + ImageId stencil_id{}; + ForEachImageInRegion(desc.info.stencil_addr, desc.info.stencil_size, + [&](ImageId image_id, Image& image) { + if (image.info.guest_address == desc.info.stencil_addr) { + stencil_id = image_id; + } + }); + if (!stencil_id) { + ImageInfo info{}; + info.guest_address = desc.info.stencil_addr; + info.guest_size_bytes = desc.info.stencil_size; + info.size = desc.info.size; + stencil_id = slot_images.insert(instance, scheduler, info); + RegisterImage(stencil_id); + } + Image& image = slot_images[stencil_id]; + image.AssociateDepth(image_id); + } + return RegisterImageView(image_id, desc.view_info); } From 306279901fccb634b7722de1b4cc17f70dd70f6b Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Fri, 13 Dec 2024 17:30:16 +0100 Subject: [PATCH 10/67] ci: Use link-time optimization for building (#1636) * ci: Use link-time optimization for building * cmake: Set CMP0069 policy to new for external dependencies * This enables LTO also when building external dependencies that do not handle CMP0069 in their CMake scripts. --- .github/workflows/build.yml | 12 ++++++------ externals/CMakeLists.txt | 3 +++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 878c108681..bacfbea0da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,7 +89,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -143,7 +143,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -201,7 +201,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -265,7 +265,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -312,7 +312,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -368,7 +368,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 082be211aa..e1e67f235b 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES SYSTEM ON ) +# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled +set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) + if (MSVC) # Silence "deprecation" warnings add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) From 028be3ba5d7da1a0782c053f43cf606c78d9b71b Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:49:07 -0800 Subject: [PATCH 11/67] shader_recompiler: Emulate unnormalized sampler coordinates in shader. (#1762) * shader_recompiler: Emulate unnormalized sampler coordinates in shader. * Address review comments. --- .../spirv/emit_spirv_floating_point.cpp | 8 ++++ .../backend/spirv/emit_spirv_instructions.h | 2 + .../frontend/translate/vector_memory.cpp | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 14 +++++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 2 + .../ir/passes/resource_tracking_pass.cpp | 41 ++++++++++++++----- src/shader_recompiler/ir/reg.h | 3 +- src/shader_recompiler/specialization.h | 16 ++++++++ src/video_core/texture_cache/sampler.cpp | 2 +- 10 files changed, 78 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e822eabef7..1e8f31ddcc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b)); +} + +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b)); +} + Id EmitFPNeg16(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F16[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index cc3db880cc..071b430d5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index b7ad3b36bd..74b9c905d2 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.has_lod.Assign(flags.any(MimgModifier::Lod)); info.is_array.Assign(mimg.da); + info.is_unnormalized.Assign(mimg.unrm); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 78e7f2289b..5fa20b7449 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { } } +F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPDiv32, a, b); + case Type::F64: + return Inst(Opcode::FPDiv64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) { if (a.Type() != b.Type() || a.Type() != c.Type()) { UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index cbd3780dea..e6608cba7e 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -158,6 +158,7 @@ class IREmitter { [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] F32F64 FPAbs(const F32F64& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0283ccd0f6..60232a3a18 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32, OPCODE(FPMin64, F64, F64, F64, ) OPCODE(FPMul32, F32, F32, F32, ) OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPDiv32, F32, F32, F32, ) +OPCODE(FPDiv64, F64, F64, F64, ) OPCODE(FPNeg32, F32, F32, ) OPCODE(FPNeg64, F64, F64, ) OPCODE(FPRecip32, F32, F32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 89c5c78a0c..9958510492 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -420,26 +420,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, const IR::Inst* producer, const u32 image_binding, const AmdGpu::Image& image) { // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { + const auto [sampler_binding, sampler] = [&] -> std::pair { ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); const IR::Value& handle = producer->Arg(1); // Inline sampler resource. if (handle.IsImmediate()) { LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ + const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = std::numeric_limits::max(), - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + .inline_sampler = inline_sampler, }); + return {binding, inline_sampler}; } // Normal sampler resource. const auto ssharp_handle = handle.InstRecursive(); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); const auto ssharp = TrackSharp(ssharp_ud, info); - return descriptors.Add(SamplerResource{ + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = ssharp, .associated_image = image_binding, .disable_aniso = disable_aniso, }); + return {binding, info.ReadUdSharp(ssharp)}; }(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -539,28 +542,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } }(); + const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized; + // Query dimensions of image if needed for normalization. + // We can't use the image sharp because it could be bound to a different image later. + const auto dimensions = + unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) + : IR::Value{}; + const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(idx); + if (unnormalized) { + // Normalize the coordinate for sampling, dividing by its corresponding dimension. + return ir.FPDiv(coord, + ir.BitCast(IR::U32{ir.CompositeExtract(dimensions, dim_idx)})); + } + return coord; + }; + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler const IR::Value coords = [&] -> IR::Value { switch (image.GetType()) { case AmdGpu::ImageType::Color1D: // x addr_reg = addr_reg + 1; - return get_addr_reg(addr_reg - 1); + return get_coord(addr_reg - 1, 0); case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y addr_reg = addr_reg + 2; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1)); case AmdGpu::ImageType::Color2DArray: // x, y, slice [[fallthrough]]; case AmdGpu::ImageType::Color2DMsaa: // x, y, frag - [[fallthrough]]; - case AmdGpu::ImageType::Color3D: // x, y, z addr_reg = addr_reg + 3; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_addr_reg(addr_reg - 1)); + case AmdGpu::ImageType::Color3D: // x, y, z + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_coord(addr_reg - 1, 2)); case AmdGpu::ImageType::Cube: // x, y, face addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 3004d2b868..ca2e9ceb96 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -40,7 +40,8 @@ union TextureInstInfo { BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; - BitField<10, 1, u32> is_gather; + BitField<10, 1, u32> is_unnormalized; + BitField<11, 1, u32> is_gather; }; union BufferInstInfo { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 2a3bd62f4f..bc8627c1c8 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -49,6 +49,12 @@ struct FMaskSpecialization { auto operator<=>(const FMaskSpecialization&) const = default; }; +struct SamplerSpecialization { + bool force_unnormalized = false; + + auto operator<=>(const SamplerSpecialization&) const = default; +}; + /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. @@ -67,6 +73,7 @@ struct StageSpecialization { boost::container::small_vector tex_buffers; boost::container::small_vector images; boost::container::small_vector fmasks; + boost::container::small_vector samplers; Backend::Bindings start{}; explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, @@ -107,6 +114,10 @@ struct StageSpecialization { spec.width = sharp.width; spec.height = sharp.height; }); + ForEachSharp(samplers, info->samplers, + [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { + spec.force_unnormalized = sharp.force_unnormalized; + }); } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { @@ -175,6 +186,11 @@ struct StageSpecialization { return false; } } + for (u32 i = 0; i < samplers.size(); i++) { + if (samplers[i] != other.samplers[i]) { + return false; + } + } return true; } }; diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index e47f53abf3..9f4bc7a7e8 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample .minLod = sampler.MinLod(), .maxLod = sampler.MaxLod(), .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type), - .unnormalizedCoordinates = bool(sampler.force_unnormalized), + .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. }; auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}", From 722a0e36be3486d2084bae557bc6722d7b895b3d Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:49:37 +0200 Subject: [PATCH 12/67] graphics: Improve handling of color buffer and storage image swizzles (#1763) * liverpool_to_vk: Remove wrong component swap formats * shader_recompiler: Handle storage and buffer format swizzles * shader_recompiler: Skip unsupported depth export * image_view: Remove image format swizzle * Platform support is not always guaranteed --- .../frontend/translate/export.cpp | 5 +++ .../ir/passes/resource_tracking_pass.cpp | 42 +++++++++++++++++++ src/shader_recompiler/specialization.h | 11 ++++- src/video_core/amdgpu/resource.h | 9 ++++ .../renderer_vulkan/liverpool_to_vk.cpp | 9 ---- src/video_core/texture_cache/image_view.cpp | 39 ----------------- 6 files changed, 66 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index f82f8fc1b6..f4914577df 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; + if (attrib == IR::Attribute::Depth && exp.en != 1) { + LOG_WARNING(Render_Vulkan, "Unsupported depth export"); + return; + } + const std::array vsrc = { IR::VectorReg(inst.src[0].code), IR::VectorReg(inst.src[1].code), diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 9958510492..398579ad4a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) { } } +IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { + boost::container::static_vector comps; + for (u32 i = 0; i < 4; i++) { + switch (sharp.GetSwizzle(i)) { + case AmdGpu::CompSwizzle::Zero: + comps.emplace_back(ir.Imm32(0.f)); + break; + case AmdGpu::CompSwizzle::One: + comps.emplace_back(ir.Imm32(1.f)); + break; + case AmdGpu::CompSwizzle::Red: + comps.emplace_back(ir.CompositeExtract(texel, 0)); + break; + case AmdGpu::CompSwizzle::Green: + comps.emplace_back(ir.CompositeExtract(texel, 1)); + break; + case AmdGpu::CompSwizzle::Blue: + comps.emplace_back(ir.CompositeExtract(texel, 2)); + break; + case AmdGpu::CompSwizzle::Alpha: + comps.emplace_back(ir.CompositeExtract(texel, 3)); + break; + default: + UNREACHABLE(); + } + } + return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); +}; + class Descriptors { public: explicit Descriptors(Info& info_) @@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + + // Apply dst_sel swizzle on formatted buffer instructions + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); + } else { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); + } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -732,6 +770,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + } + if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index bc8627c1c8..9b5dd8fa1f 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,6 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; + u32 dst_select = 0; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -38,8 +39,12 @@ struct TextureBufferSpecialization { struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; + u32 dst_select = 0; - auto operator<=>(const ImageSpecialization&) const = default; + bool operator==(const ImageSpecialization& other) const { + return type == other.type && is_integer == other.is_integer && + (dst_select != 0 ? dst_select == other.dst_select : true); + } }; struct FMaskSpecialization { @@ -103,11 +108,15 @@ struct StageSpecialization { ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + spec.dst_select = sharp.DstSelect(); }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetBoundType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + if (desc.is_storage) { + spec.dst_select = sharp.DstSelect(); + } }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba87425f2a..5d74175594 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -52,6 +52,10 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } + u32 DstSelect() const { + return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); + } + CompSwizzle GetSwizzle(u32 comp) const noexcept { const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; return static_cast(select[comp]); @@ -204,6 +208,11 @@ struct Image { return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); } + CompSwizzle GetSwizzle(u32 comp) const noexcept { + const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; + return static_cast(select[comp]); + } + static char SelectComp(u32 sel) { switch (sel) { case 0: diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index fa8d28ba07..ec0bb3bb7d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -699,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, default: break; } - } else if (comp_swap_reverse) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eA8B8G8R8UnormPack32; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eA8B8G8R8SrgbPack32; - default: - break; - } } return base_format; } diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 12ad201d1a..cc467e9a44 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -50,34 +50,6 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -bool IsIdentityMapping(u32 dst_sel, u32 num_components) { - return (num_components == 1 && dst_sel == 0b001'000'000'100) || - (num_components == 2 && dst_sel == 0b001'000'101'100) || - (num_components == 3 && dst_sel == 0b001'110'101'100) || - (num_components == 4 && dst_sel == 0b111'110'101'100); -} - -vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { - // BGRA - if (dst_sel == 0b111100101110) { - switch (format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eR8G8B8A8Snorm: - return vk::Format::eB8G8R8A8Snorm; - case vk::Format::eR8G8B8A8Uint: - return vk::Format::eB8G8R8A8Uint; - case vk::Format::eR8G8B8A8Sint: - return vk::Format::eB8G8R8A8Sint; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - default: - break; - } - } - return format; -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); @@ -120,17 +92,6 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso mapping.b = ConvertComponentSwizzle(image.dst_sel_z); mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } - // Check for unfortunate case of storage images being swizzled - const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt()); - const u32 dst_sel = image.DstSelect(); - if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) { - if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) { - format = new_format; - return; - } - LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps, - image.DstSelectName()); - } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { From f1c23d514b204e0f90c8538743978706fabc30b8 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:51:39 -0800 Subject: [PATCH 13/67] shader_recompiler: Implement FREXP instructions. (#1766) --- externals/sirit | 2 +- .../spirv/emit_spirv_floating_point.cpp | 26 ++++++++++++- .../backend/spirv/emit_spirv_instructions.h | 7 +++- .../backend/spirv/spirv_emit_context.cpp | 4 ++ .../backend/spirv/spirv_emit_context.h | 2 + .../frontend/translate/translate.h | 5 +++ .../frontend/translate/vector_alu.cpp | 37 ++++++++++++++++++- src/shader_recompiler/ir/ir_emitter.cpp | 33 ++++++++++++++++- src/shader_recompiler/ir/ir_emitter.h | 4 +- src/shader_recompiler/ir/opcodes.inc | 7 +++- 10 files changed, 119 insertions(+), 8 deletions(-) diff --git a/externals/sirit b/externals/sirit index 6cecb95d67..e12b6b592c 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d +Subproject commit e12b6b592ce9917a85303c555259488643c56f47 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 1e8f31ddcc..a63be87e23 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -225,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) { return ctx.OpTrunc(ctx.F64[1], value); } -Id EmitFPFract(EmitContext& ctx, Id value) { +Id EmitFPFract32(EmitContext& ctx, Id value) { return ctx.OpFract(ctx.F32[1], value); } +Id EmitFPFract64(EmitContext& ctx, Id value) { + return ctx.OpFract(ctx.F64[1], value); +} + +Id EmitFPFrexpSig32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0); +} + +Id EmitFPFrexpSig64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0); +} + +Id EmitFPFrexpExp32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + +Id EmitFPFrexpExp64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 071b430d5c..4ff53670e1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -222,7 +222,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value); Id EmitFPTrunc16(EmitContext& ctx, Id value); Id EmitFPTrunc32(EmitContext& ctx, Id value); Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPFract(EmitContext& ctx, Id value); +Id EmitFPFract32(EmitContext& ctx, Id value); +Id EmitFPFract64(EmitContext& ctx, Id value); +Id EmitFPFrexpSig32(EmitContext& ctx, Id value); +Id EmitFPFrexpSig64(EmitContext& ctx, Id value); +Id EmitFPFrexpExp32(EmitContext& ctx, Id value); +Id EmitFPFrexpExp64(EmitContext& ctx, Id value); Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5c7278c6bb..1ada2f1f9d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -147,6 +147,10 @@ void EmitContext::DefineArithmeticTypes() { full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2"); full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2"); + frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32"); + if (info.uses_fp64) { + frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64"); + } } void EmitContext::DefineInterfaces() { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4e5e7dd3b1..cd12933281 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -148,6 +148,8 @@ class EmitContext final : public Sirit::Module { Id full_result_i32x2; Id full_result_u32x2; + Id frexp_result_f32; + Id frexp_result_f64; Id pi_x2; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 43f3ccef24..2f320a6c73 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -200,6 +200,11 @@ class Translator { void V_BFREV_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); void V_FFBL_B32(const GcnInst& inst); + void V_FREXP_EXP_I32_F64(const GcnInst& inst); + void V_FREXP_MANT_F64(const GcnInst& inst); + void V_FRACT_F64(const GcnInst& inst); + void V_FREXP_EXP_I32_F32(const GcnInst& inst); + void V_FREXP_MANT_F32(const GcnInst& inst); void V_MOVRELD_B32(const GcnInst& inst); void V_MOVRELS_B32(const GcnInst& inst); void V_MOVRELSD_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 8149230db5..3e9e677a76 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_FFBH_U32(inst); case Opcode::V_FFBL_B32: return V_FFBL_B32(inst); + case Opcode::V_FREXP_EXP_I32_F64: + return V_FREXP_EXP_I32_F64(inst); + case Opcode::V_FREXP_MANT_F64: + return V_FREXP_MANT_F64(inst); + case Opcode::V_FRACT_F64: + return V_FRACT_F64(inst); + case Opcode::V_FREXP_EXP_I32_F32: + return V_FREXP_EXP_I32_F32(inst); + case Opcode::V_FREXP_MANT_F32: + return V_FREXP_MANT_F32(inst); case Opcode::V_MOVRELD_B32: return V_MOVRELD_B32(inst); case Opcode::V_MOVRELS_B32: @@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { void Translator::V_FRACT_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.Fract(src0)); + SetDst(inst.dst[0], ir.FPFract(src0)); } void Translator::V_TRUNC_F32(const GcnInst& inst) { @@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FindILsb(src0)); } +void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFrexpSig(src0)); +} + +void Translator::V_FRACT_F64(const GcnInst& inst) { + const IR::F32 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFract(src0)); +} + +void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpSig(src0)); +} + void Translator::V_MOVRELD_B32(const GcnInst& inst) { const IR::U32 src_val{GetSrc(inst.src[0])}; u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 5fa20b7449..29b406699f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -869,8 +869,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) { } } -F32 IREmitter::Fract(const F32& value) { - return Inst(Opcode::FPFract, value); +F32F64 IREmitter::FPFract(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFract32, value); + case Type::F64: + return Inst(Opcode::FPFract64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPFrexpSig(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpSig32, value); + case Type::F64: + return Inst(Opcode::FPFrexpSig64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::FPFrexpExp(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpExp32, value); + case Type::F64: + return Inst(Opcode::FPFrexpExp64, value); + default: + ThrowInvalidType(value.Type()); + } } U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e6608cba7e..f77e22b824 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -180,7 +180,9 @@ class IREmitter { [[nodiscard]] F32F64 FPFloor(const F32F64& value); [[nodiscard]] F32F64 FPCeil(const F32F64& value); [[nodiscard]] F32F64 FPTrunc(const F32F64& value); - [[nodiscard]] F32 Fract(const F32& value); + [[nodiscard]] F32F64 FPFract(const F32F64& value); + [[nodiscard]] F32F64 FPFrexpSig(const F32F64& value); + [[nodiscard]] U32 FPFrexpExp(const F32F64& value); [[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 60232a3a18..8f40ed985f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -210,7 +210,12 @@ OPCODE(FPCeil32, F32, F32, OPCODE(FPCeil64, F64, F64, ) OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPFract, F32, F32, ) +OPCODE(FPFract32, F32, F32, ) +OPCODE(FPFract64, F64, F64, ) +OPCODE(FPFrexpSig32, F32, F32, ) +OPCODE(FPFrexpSig64, F64, F64, ) +OPCODE(FPFrexpExp32, U32, F32, ) +OPCODE(FPFrexpExp64, U32, F64, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) From 715ac8a2795be8a471e8ff9c02c716725215c43e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Cea=20L=C3=B3pez?= Date: Fri, 13 Dec 2024 23:27:09 +0100 Subject: [PATCH 14/67] vk_shader_hle: Don't alter the order of the skipped copies. (#1757) * vk_shader_hle: Don't alter the order of the skipped copies. * Simplification. * Format. * More simplification. --- src/video_core/renderer_vulkan/vk_shader_hle.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index d1d4f9af3b..b863dce211 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -60,7 +60,7 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; u32 batch_start = 0; - u32 batch_end = copies.size() > 1 ? 1 : 0; + u32 batch_end = 0; while (batch_end < copies.size()) { // Place first copy into the current batch @@ -70,19 +70,19 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg auto dst_offset_min = copy.dstOffset; auto dst_offset_max = copy.dstOffset + copy.size; - for (int i = batch_start + 1; i < copies.size(); i++) { + for (++batch_end; batch_end < copies.size(); batch_end++) { // Compute new src and dst bounds if we were to batch this copy - const auto& [src_offset, dst_offset, size] = copies[i]; + const auto& [src_offset, dst_offset, size] = copies[batch_end]; auto new_src_offset_min = std::min(src_offset_min, src_offset); auto new_src_offset_max = std::max(src_offset_max, src_offset + size); if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { - continue; + break; } auto new_dst_offset_min = std::min(dst_offset_min, dst_offset); auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size); if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) { - continue; + break; } // We can batch this copy @@ -90,10 +90,6 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg src_offset_max = new_src_offset_max; dst_offset_min = new_dst_offset_min; dst_offset_max = new_dst_offset_max; - if (i != batch_end) { - std::swap(copies[i], copies[batch_end]); - } - ++batch_end; } // Obtain buffers for the total source and destination ranges. @@ -116,7 +112,6 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg src_offset_max - src_offset_min, dst_offset_max - dst_offset_min); scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies); batch_start = batch_end; - ++batch_end; } scheduler.CommandBuffer().pipelineBarrier( From 8bb1e8fcdb511f28c08924789251454315bcc617 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:17:00 +0800 Subject: [PATCH 15/67] Resize trophy pop up windows based on window size (#1769) --- src/core/libraries/np_trophy/trophy_ui.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/np_trophy/trophy_ui.cpp b/src/core/libraries/np_trophy/trophy_ui.cpp index 618f8db461..55ef7b8de6 100644 --- a/src/core/libraries/np_trophy/trophy_ui.cpp +++ b/src/core/libraries/np_trophy/trophy_ui.cpp @@ -38,21 +38,22 @@ void TrophyUI::Finish() { void TrophyUI::Draw() { const auto& io = GetIO(); + float AdjustWidth = io.DisplaySize.x / 1280; + float AdjustHeight = io.DisplaySize.y / 720; const ImVec2 window_size{ - std::min(io.DisplaySize.x, 250.f), - std::min(io.DisplaySize.y, 70.f), + std::min(io.DisplaySize.x, (300 * AdjustWidth)), + std::min(io.DisplaySize.y, (70 * AdjustHeight)), }; SetNextWindowSize(window_size); SetNextWindowCollapsed(false); - SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50)); + SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight))); KeepNavHighlight(); - if (Begin("Trophy Window", nullptr, ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoInputs)) { if (trophy_icon) { - Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50)); + Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight))); ImGui::SameLine(); } else { // placeholder @@ -61,6 +62,7 @@ void TrophyUI::Draw() { GetColorU32(ImVec4{0.7f})); ImGui::Indent(60); } + SetWindowFontScale((1.2 * AdjustHeight)); TextWrapped("Trophy earned!\n%s", trophy_name.c_str()); } End(); From 32556ad0d86ea01aacb136f00a879082bcca66c0 Mon Sep 17 00:00:00 2001 From: Alexandre Bouvier Date: Sat, 14 Dec 2024 08:18:05 +0000 Subject: [PATCH 16/67] cmake: fix double alias (#1771) --- CMakeLists.txt | 4 ++-- externals/CMakeLists.txt | 2 +- externals/sirit | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b057f55d6a..2e21a33c46 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,7 +875,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") @@ -1016,4 +1016,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo") install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png") install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps") -endif() \ No newline at end of file +endif() diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index e1e67f235b..dcc9d2bc0f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -110,7 +110,7 @@ if (NOT TARGET glslang::glslang) set(ENABLE_OPT OFF CACHE BOOL "") add_subdirectory(glslang) file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h") - target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") + target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") endif() # Robin-map diff --git a/externals/sirit b/externals/sirit index e12b6b592c..5b5ff49a58 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit e12b6b592ce9917a85303c555259488643c56f47 +Subproject commit 5b5ff49a58f5be27af1058794c6ca907dabc05b3 From a57ccf9112ec9971695cb08eeee27a876020ab19 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:18:34 +0800 Subject: [PATCH 17/67] Save main window together with config to avoid rare crash (#1772) --- src/common/config.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/config.cpp b/src/common/config.cpp index 4d07ba29f4..403b0e32f1 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -692,6 +692,7 @@ void save(const std::filesystem::path& path) { std::ofstream file(path, std::ios::binary); file << data; file.close(); + saveMainWindow(path); } void saveMainWindow(const std::filesystem::path& path) { From 40e8a40ada9a707b7cf3001ed1d8f835d0c7d2ad Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 00:20:04 -0800 Subject: [PATCH 18/67] externals: Add MoltenVK as an external. (#1767) --- .github/workflows/build.yml | 14 ++---- .gitmodules | 14 +++++- CMakeLists.txt | 3 +- documents/building-macos.md | 8 ++- externals/CMakeLists.txt | 24 +++++---- externals/MoltenVK/CMakeLists.txt | 81 +++++++++++++++++++++++++++++++ externals/MoltenVK/MoltenVK | 1 + externals/MoltenVK/SPIRV-Cross | 1 + externals/MoltenVK/cereal | 1 + 9 files changed, 119 insertions(+), 28 deletions(-) create mode 100644 externals/MoltenVK/CMakeLists.txt create mode 160000 externals/MoltenVK/MoltenVK create mode 160000 externals/MoltenVK/SPIRV-Cross create mode 160000 externals/MoltenVK/cereal diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bacfbea0da..3b5690438c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -174,11 +174,6 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - name: Cache CMake Configuration uses: actions/cache@v4 env: @@ -210,7 +205,7 @@ jobs: run: | mkdir upload mv ${{github.workspace}}/build/shadps4 upload - cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload + cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload tar cf shadps4-macos-sdl.tar.gz -C upload . - uses: actions/upload-artifact@v4 with: @@ -230,11 +225,8 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK and Setup Qt - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - uses: jurplel/install-qt-action@v4 + - name: Setup Qt + uses: jurplel/install-qt-action@v4 with: version: 6.7.3 host: mac diff --git a/.gitmodules b/.gitmodules index 8010250a95..3d0d21c5b6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,4 +106,16 @@ [submodule "externals/libpng"] path = externals/libpng url = https://github.com/pnggroup/libpng - shallow = true \ No newline at end of file + shallow = true +[submodule "externals/MoltenVK/SPIRV-Cross"] + path = externals/MoltenVK/SPIRV-Cross + url = https://github.com/KhronosGroup/SPIRV-Cross + shallow = true +[submodule "externals/MoltenVK/MoltenVK"] + path = externals/MoltenVK/MoltenVK + url = https://github.com/KhronosGroup/MoltenVK + shallow = true +[submodule "externals/MoltenVK/cereal"] + path = externals/MoltenVK/cereal + url = https://github.com/USCiLab/cereal + shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e21a33c46..1e54f7a003 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -894,8 +894,7 @@ if (APPLE) target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) else() # Link MoltenVK for Vulkan support - find_library(MOLTENVK MoltenVK REQUIRED) - target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) + target_link_libraries(shadps4 PRIVATE MoltenVK) endif() if (ARCHITECTURE STREQUAL "x86_64") diff --git a/documents/building-macos.md b/documents/building-macos.md index d8cc414e2d..9a1a021ee9 100644 --- a/documents/building-macos.md +++ b/documents/building-macos.md @@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv) brew install clang-format cmake ``` -Next, install x86_64 Homebrew and libraries. +Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI. **If you are on an ARM Mac:** ``` # Installs x86_64 Homebrew to /usr/local arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" # Installs libraries. -arch -x86_64 /usr/local/bin/brew install molten-vk qt@6 +arch -x86_64 /usr/local/bin/brew install qt@6 ``` **If you are on an x86_64 Mac:** ``` -brew install molten-vk qt@6 +brew install qt@6 ``` -If you don't need the Qt GUI you can remove `qt@6` from the last command. - ### Cloning and compiling: Clone the repository recursively: diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index dcc9d2bc0f..1ab23a4038 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -177,15 +177,6 @@ if (NOT TARGET PNG::PNG) add_library(PNG::PNG ALIAS png_static) endif() -if (APPLE) - # date - if (NOT TARGET date::date-tz) - option(BUILD_TZ_LIB "" ON) - option(USE_SYSTEM_TZ_DB "" ON) - add_subdirectory(date) - endif() -endif() - # Dear ImGui add_library(Dear_ImGui dear_imgui/imgui.cpp @@ -232,3 +223,18 @@ if (NOT TARGET stb::headers) target_include_directories(stb INTERFACE stb) add_library(stb::headers ALIAS stb) endif() + +# Apple-only dependencies +if (APPLE) + # date + if (NOT TARGET date::date-tz) + option(BUILD_TZ_LIB "" ON) + option(USE_SYSTEM_TZ_DB "" ON) + add_subdirectory(date) + endif() + + # MoltenVK + if (NOT TARGET MoltenVK) + add_subdirectory(MoltenVK) + endif() +endif() diff --git a/externals/MoltenVK/CMakeLists.txt b/externals/MoltenVK/CMakeLists.txt new file mode 100644 index 0000000000..00e3231eef --- /dev/null +++ b/externals/MoltenVK/CMakeLists.txt @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +# Prepare version information +find_package(Git) +if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + OUTPUT_VARIABLE MVK_GIT_REV + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +set(MVK_VERSION "1.2.12") +set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated) +file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";") + +# Find required system libraries +find_library(APPKIT_LIBRARY AppKit REQUIRED) +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(IOKIT_LIBRARY IOKit REQUIRED) +find_library(IOSURFACE_LIBRARY IOSurface REQUIRED) +find_library(METAL_LIBRARY Metal REQUIRED) +find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED) + +# cereal +option(SKIP_PORTABILITY_TEST "" ON) +option(BUILD_DOC "" OFF) +option(BUILD_SANDBOX "" OFF) +option(SKIP_PERFORMANCE_COMPARISON "" ON) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(cereal) + +# SPIRV-Cross +option(SPIRV_CROSS_CLI "" OFF) +option(SPIRV_CROSS_ENABLE_TESTS "" OFF) +option(SPIRV_CROSS_ENABLE_HLSL "" OFF) +option(SPIRV_CROSS_ENABLE_CPP "" OFF) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(SPIRV-Cross) + +# Common +set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common) +file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS + ${MVK_COMMON_DIR}/*.cpp + ${MVK_COMMON_DIR}/*.m + ${MVK_COMMON_DIR}/*.mm) +set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR}) + +add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES}) +target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES}) +target_compile_options(MoltenVKCommon PRIVATE -w) + +# MoltenVKShaderConverter +set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter) +file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm) +set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include) + +add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES}) +target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES}) +target_compile_options(MoltenVKShaderConverter PRIVATE -w) +target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon) +target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1) + +# MoltenVK +set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK) +file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS + ${MVK_DIR}/MoltenVK/*.cpp + ${MVK_DIR}/MoltenVK/*.m + ${MVK_DIR}/MoltenVK/*.mm) +file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*) +set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include) + +add_library(MoltenVK SHARED ${MVK_SOURCES}) +target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES}) +target_compile_options(MoltenVK PRIVATE -w) +target_link_libraries(MoltenVK PRIVATE + ${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY} + Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter) +target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1) diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK new file mode 160000 index 0000000000..5ad3ee5d2f --- /dev/null +++ b/externals/MoltenVK/MoltenVK @@ -0,0 +1 @@ +Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 diff --git a/externals/MoltenVK/SPIRV-Cross b/externals/MoltenVK/SPIRV-Cross new file mode 160000 index 0000000000..6173e24b31 --- /dev/null +++ b/externals/MoltenVK/SPIRV-Cross @@ -0,0 +1 @@ +Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6 diff --git a/externals/MoltenVK/cereal b/externals/MoltenVK/cereal new file mode 160000 index 0000000000..d1fcec807b --- /dev/null +++ b/externals/MoltenVK/cereal @@ -0,0 +1 @@ +Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e From 8caca4df32c05a11af8351590dcfa0fa5266eb11 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:03:42 -0800 Subject: [PATCH 19/67] shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP (#1770) * shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP * emit_spirv: Fix missing extension declaration. --- .../backend/spirv/emit_spirv.cpp | 4 ++++ .../backend/spirv/emit_spirv_image.cpp | 17 ++++++++++++----- .../backend/spirv/emit_spirv_instructions.h | 8 ++++---- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_memory.cpp | 11 ++++++++--- src/shader_recompiler/ir/ir_emitter.cpp | 17 +++++++++-------- src/shader_recompiler/ir/ir_emitter.h | 10 ++++++---- src/shader_recompiler/ir/opcodes.inc | 6 +++--- .../ir/passes/resource_tracking_pass.cpp | 8 +++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 6 ++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 13 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 23800fc49c..ab9d6afae8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -222,6 +222,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + if (profile.supports_image_load_store_lod) { + ctx.AddExtension("SPV_AMD_shader_image_load_store_lod"); + ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD); + } } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 736410dcd2..8da9280d01 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels; } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); @@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample; } -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) { UNREACHABLE_MSG("SPIR-V Instruction"); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id color_type = texture.data_types->Get(4); - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color)); + ImageOperands operands; + if (ctx.profile.supports_image_load_store_lod) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } else if (lod.value != 0) { + LOG_WARNING(Render, "Image write with LOD not supported by driver"); + } + ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, + operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 4ff53670e1..057b0d6929 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -392,14 +392,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color); Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f320a6c73..198cea276b 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -276,7 +276,7 @@ class Translator { // Image Memory // MIMG void IMAGE_LOAD(bool has_mip, const GcnInst& inst); - void IMAGE_STORE(const GcnInst& inst); + void IMAGE_STORE(bool has_mip, const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 74b9c905d2..eadd1c4db0 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer store operations case Opcode::IMAGE_STORE: - return IMAGE_STORE(inst); + return IMAGE_STORE(false, inst); + case Opcode::IMAGE_STORE_MIP: + return IMAGE_STORE(true, inst); // Image misc operations case Opcode::IMAGE_GET_RESINFO: @@ -423,7 +425,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { } } -void Translator::IMAGE_STORE(const GcnInst& inst) { +void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg data_reg{inst.dst[0].code}; @@ -434,6 +436,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + IR::TextureInstInfo info{}; + info.has_lod.Assign(has_mip); + boost::container::static_vector comps; for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { @@ -443,7 +448,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { comps.push_back(ir.GetVectorReg(data_reg++)); } const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - ir.ImageWrite(handle, body, value, {}); + ir.ImageWrite(handle, body, {}, value, info); } void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 29b406699f..3ebc82e64d 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1599,9 +1599,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } -Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info) { - return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling); +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, TextureInstInfo info) { + return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, @@ -1625,13 +1625,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, offset, lod_clamp); } -Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { - return Inst(Opcode::ImageRead, Flags{info}, handle, coords); +Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info) { + return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod); } -void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { - Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod, + const Value& color, TextureInstInfo info) { + Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color); } // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f77e22b824..068aba14dd 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -314,14 +314,16 @@ class IREmitter { TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, + TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); - void ImageWrite(const Value& handle, const Value& coords, const Value& color, + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info); + void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color, TextureInstInfo info); void EmitVertex(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 8f40ed985f..477275824b 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -334,12 +334,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 398579ad4a..f436db07a0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -771,14 +771,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.SetArg(1, coords); if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3))); } if (inst_info.has_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || + inst.GetOpcode() == IR::Opcode::ImageRead || + inst.GetOpcode() == IR::Opcode::ImageWrite); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); - inst.SetArg(3, arg); + inst.SetArg(2, arg); } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { inst.SetArg(4, arg); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 96c458d440..c00e37f9c8 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -23,6 +23,7 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; + bool supports_image_load_store_lod{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb605..2f96950557 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -267,6 +267,7 @@ bool Instance::CreateDevice() { list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); + image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9ccb..2b4bd612fd 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -158,6 +158,11 @@ class Instance { return legacy_vertex_attributes; } + /// Returns true when VK_AMD_shader_image_load_store_lod is supported. + bool IsImageLoadStoreLodSupported() const { + return image_load_store_lod; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -327,6 +332,7 @@ class Instance { bool maintenance5{}; bool list_restart{}; bool legacy_vertex_attributes{}; + bool image_load_store_lod{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0fa77e19b7..ff27b742f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -172,6 +172,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), + .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; From 3e226225080cca81693e034fdb0f0d0b30b8d4dd Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:04:30 -0800 Subject: [PATCH 20/67] renderer_vulkan: Remove some fallbacks and misc format queries that are no longer needed. (#1773) --- .../renderer_vulkan/vk_instance.cpp | 26 +++---------------- src/video_core/renderer_vulkan/vk_instance.h | 4 --- src/video_core/texture_cache/image_view.cpp | 3 +-- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2f96950557..e844150b24 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -68,11 +68,10 @@ std::unordered_map GetFormatProperties( } // Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility static constexpr std::array misc_formats = { - vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm, - vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint, - vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb, - vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint, + vk::Format::eA2R10G10B10UnormPack32, + vk::Format::eB8G8R8A8Unorm, + vk::Format::eB8G8R8A8Srgb, + vk::Format::eD24UnormS8Uint, }; for (const auto& format : misc_formats) { if (!format_properties.contains(format)) { @@ -583,8 +582,6 @@ bool Instance::IsFormatSupported(const vk::Format format, static vk::Format GetAlternativeFormat(const vk::Format format) { switch (format) { - case vk::Format::eB5G6R5UnormPack16: - return vk::Format::eR5G6B5UnormPack16; case vk::Format::eD16UnormS8Uint: return vk::Format::eD24UnormS8Uint; default: @@ -604,19 +601,4 @@ vk::Format Instance::GetSupportedFormat(const vk::Format format, return format; } -vk::ComponentMapping Instance::GetSupportedComponentSwizzle( - const vk::Format format, const vk::ComponentMapping swizzle, - const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return swizzle; - } - - vk::ComponentMapping supported_swizzle = swizzle; - if (format == vk::Format::eB5G6R5UnormPack16) { - // B5G6R5 -> R5G6B5 - std::swap(supported_swizzle.r, supported_swizzle.b); - } - return supported_swizzle; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 2b4bd612fd..54a9b98732 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -33,10 +33,6 @@ class Instance { [[nodiscard]] vk::Format GetSupportedFormat(vk::Format format, vk::FormatFeatureFlags2 flags) const; - /// Re-orders a component swizzle for format compatibility, if needed. - [[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle( - vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const; - /// Returns the Vulkan instance vk::Instance GetInstance() const { return *instance; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index cc467e9a44..41c45019ed 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -141,8 +141,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .image = image.image, .viewType = info.type, .format = instance.GetSupportedFormat(format, image.format_features), - .components = - instance.GetSupportedComponentSwizzle(format, info.mapping, image.format_features), + .components = info.mapping, .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, From 3c0c921ef5006f1d30eac356e72edb6140d1da1e Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sat, 14 Dec 2024 02:56:17 -0800 Subject: [PATCH 21/67] Tessellation (#1528) * shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- CMakeLists.txt | 3 + src/core/debug_state.cpp | 9 +- src/core/debug_state.h | 20 +- src/core/devtools/widget/shader_list.cpp | 17 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 7 +- .../backend/spirv/emit_spirv.cpp | 74 +- .../backend/spirv/emit_spirv_barriers.cpp | 13 +- .../spirv/emit_spirv_context_get_set.cpp | 146 +++- .../backend/spirv/emit_spirv_instructions.h | 9 +- .../backend/spirv/spirv_emit_context.cpp | 149 +++- .../backend/spirv/spirv_emit_context.h | 26 +- src/shader_recompiler/frontend/tessellation.h | 38 + .../frontend/translate/data_share.cpp | 9 +- .../frontend/translate/scalar_alu.cpp | 10 +- .../frontend/translate/translate.cpp | 41 +- .../frontend/translate/translate.h | 5 +- .../frontend/translate/vector_alu.cpp | 10 +- .../frontend/translate/vector_memory.cpp | 12 +- src/shader_recompiler/info.h | 22 +- src/shader_recompiler/ir/attribute.cpp | 12 + src/shader_recompiler/ir/attribute.h | 14 +- src/shader_recompiler/ir/basic_block.cpp | 2 + src/shader_recompiler/ir/ir_emitter.cpp | 35 +- src/shader_recompiler/ir/ir_emitter.h | 15 +- src/shader_recompiler/ir/microinstruction.cpp | 2 + src/shader_recompiler/ir/opcodes.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 4 + .../ir/passes/constant_propagation_pass.cpp | 26 +- .../ir/passes/constant_propogation.h | 4 + .../ir/passes/hull_shader_transform.cpp | 744 ++++++++++++++++++ src/shader_recompiler/ir/passes/ir_passes.h | 3 + .../ir/passes/ring_access_elimination.cpp | 45 +- .../ir/passes/shader_info_collection_pass.cpp | 16 + src/shader_recompiler/ir/patch.cpp | 28 + src/shader_recompiler/ir/patch.h | 173 ++++ src/shader_recompiler/ir/pattern_matching.h | 127 +++ src/shader_recompiler/ir/reg.h | 3 +- src/shader_recompiler/ir/type.h | 2 +- src/shader_recompiler/ir/value.cpp | 2 + src/shader_recompiler/ir/value.h | 9 + src/shader_recompiler/recompiler.cpp | 26 +- src/shader_recompiler/recompiler.h | 2 +- src/shader_recompiler/runtime_info.h | 78 +- src/shader_recompiler/specialization.h | 12 + src/video_core/amdgpu/liverpool.h | 49 +- src/video_core/amdgpu/types.h | 95 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 32 +- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_instance.cpp | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 105 ++- .../renderer_vulkan/vk_pipeline_cache.h | 18 +- .../renderer_vulkan/vk_pipeline_common.h | 10 +- .../renderer_vulkan/vk_rasterizer.cpp | 13 +- 54 files changed, 2146 insertions(+), 189 deletions(-) create mode 100644 src/shader_recompiler/frontend/tessellation.h create mode 100644 src/shader_recompiler/ir/passes/constant_propogation.h create mode 100644 src/shader_recompiler/ir/passes/hull_shader_transform.cpp create mode 100644 src/shader_recompiler/ir/patch.cpp create mode 100644 src/shader_recompiler/ir/patch.h create mode 100644 src/shader_recompiler/ir/pattern_matching.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e54f7a003..78d8421a3e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/constant_propagation_pass.cpp src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp + src/shader_recompiler/ir/passes/hull_shader_transform.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/opcodes.cpp src/shader_recompiler/ir/opcodes.h src/shader_recompiler/ir/opcodes.inc + src/shader_recompiler/ir/patch.cpp + src/shader_recompiler/ir/patch.h src/shader_recompiler/ir/post_order.cpp src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 6496249247..c68fd469da 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -177,10 +177,11 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, } } -void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module, - std::span spv, std::span raw_code, - std::span patch_spv, bool is_patched) { - shader_dump_list.emplace_back(name, module, std::vector{spv.begin(), spv.end()}, +void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, + std::span raw_code, std::span patch_spv, + bool is_patched) { + shader_dump_list.emplace_back(name, l_stage, module, std::vector{spv.begin(), spv.end()}, std::vector{raw_code.begin(), raw_code.end()}, std::vector{patch_spv.begin(), patch_spv.end()}, is_patched); } diff --git a/src/core/debug_state.h b/src/core/debug_state.h index fa2e5cd9d8..0db5bc4683 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -76,6 +76,7 @@ struct FrameDump { struct ShaderDump { std::string name; + Shader::LogicalStage l_stage; vk::ShaderModule module; std::vector spv; @@ -90,16 +91,17 @@ struct ShaderDump { std::string cache_isa_disasm{}; std::string cache_patch_disasm{}; - ShaderDump(std::string name, vk::ShaderModule module, std::vector spv, - std::vector isa, std::vector patch_spv, bool is_patched) - : name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)), - patch_spv(std::move(patch_spv)), is_patched(is_patched) {} + ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module, + std::vector spv, std::vector isa, std::vector patch_spv, + bool is_patched) + : name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)), + isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {} ShaderDump(const ShaderDump& other) = delete; ShaderDump(ShaderDump&& other) noexcept - : name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)}, - isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)}, - patch_source{std::move(other.patch_source)}, + : name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)}, + spv{std::move(other.spv)}, isa{std::move(other.isa)}, + patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)}, cache_spv_disasm{std::move(other.cache_spv_disasm)}, cache_isa_disasm{std::move(other.cache_isa_disasm)}, cache_patch_disasm{std::move(other.cache_patch_disasm)} {} @@ -108,6 +110,7 @@ struct ShaderDump { if (this == &other) return *this; name = std::move(other.name); + l_stage = other.l_stage; module = std::move(other.module); spv = std::move(other.spv); isa = std::move(other.isa); @@ -203,7 +206,8 @@ class DebugStateImpl { void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); - void CollectShader(const std::string& name, vk::ShaderModule module, std::span spv, + void CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); }; diff --git a/src/core/devtools/widget/shader_list.cpp b/src/core/devtools/widget/shader_list.cpp index 80c939718b..2c97db7fd4 100644 --- a/src/core/devtools/widget/shader_list.cpp +++ b/src/core/devtools/widget/shader_list.cpp @@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) { DebugState.ShowDebugMessage(msg); } if (compile) { - static std::map stage_arg = { - {"vs", "vert"}, - {"gs", "geom"}, - {"fs", "frag"}, - {"cs", "comp"}, + static std::map stage_arg = { + {Shader::LogicalStage::Vertex, "vert"}, + {Shader::LogicalStage::TessellationControl, "tesc"}, + {Shader::LogicalStage::TessellationEval, "tese"}, + {Shader::LogicalStage::Geometry, "geom"}, + {Shader::LogicalStage::Fragment, "frag"}, + {Shader::LogicalStage::Compute, "comp"}, }; - auto stage = stage_arg.find(value.name.substr(0, 2)); + auto stage = stage_arg.find(value.l_stage); if (stage == stage_arg.end()) { - DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} + - value.name.substr(0, 2)); + DebugState.ShowDebugMessage(std::string{"Invalid shader stage"}); } else { std::string cmd = fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 " diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index dbf085fb38..e85b8b8908 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1642,7 +1642,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) { LOG_TRACE(Lib_GnmDriver, "called"); - if (!cmdbuf || size < 0x1E) { return -1; } @@ -1660,11 +1659,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], - hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, + hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL + hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG + // right padding? WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ab9d6afae8..e545e8e367 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include #include #include @@ -13,6 +12,7 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" namespace Shader::Backend::SPIRV { @@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.VectorReg(); } else if constexpr (std::is_same_v) { return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } + UNREACHABLE(); } template @@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) { + switch (primitive) { + case AmdGpu::TessellationType::Isoline: + return spv::ExecutionMode::Isolines; + case AmdGpu::TessellationType::Triangle: + return spv::ExecutionMode::Triangles; + case AmdGpu::TessellationType::Quad: + return spv::ExecutionMode::Quads; + } + UNREACHABLE_MSG("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { + switch (spacing) { + case AmdGpu::TessellationPartitioning::Integer: + return spv::ExecutionMode::SpacingEqual; + case AmdGpu::TessellationPartitioning::FracOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case AmdGpu::TessellationPartitioning::FracEven: + return spv::ExecutionMode::SpacingFractionalEven; + default: + break; + } + UNREACHABLE_MSG("Tessellation spacing {}", spacing); +} + void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); @@ -248,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.uses_group_ballot) { ctx.AddCapability(spv::Capability::GroupNonUniformBallot); } - if (info.stage == Stage::Export || info.stage == Stage::Vertex) { + const auto stage = info.l_stage; + if (stage == LogicalStage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.stage == Stage::Geometry) { + if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } + if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { + ctx.AddCapability(spv::Capability::Tessellation); + } } -void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { - const auto& info = program.info; +void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.info.stage) { - case Stage::Compute: { + switch (info.l_stage) { + case LogicalStage::Compute: { const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); break; } - case Stage::Export: - case Stage::Vertex: + case LogicalStage::Vertex: execution_model = spv::ExecutionModel::Vertex; break; - case Stage::Fragment: + case LogicalStage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, + ctx.runtime_info.hs_info.NumOutputControlPoints()); + break; + case LogicalStage::TessellationEval: { + execution_model = spv::ExecutionModel::TessellationEvaluation; + const auto& vs_info = ctx.runtime_info.vs_info; + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); + ctx.AddExecutionMode(main, + vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw + ? spv::ExecutionMode::VertexOrderCcw + : spv::ExecutionMode::VertexOrderCw); + break; + } + case LogicalStage::Fragment: execution_model = spv::ExecutionModel::Fragment; if (ctx.profile.lower_left_origin_mode) { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); @@ -292,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; - case Stage::Geometry: + case LogicalStage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive)); ctx.AddExecutionMode(main, @@ -303,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.runtime_info.gs_info.num_invocations); break; default: - throw NotImplementedException("Stage {}", u32(program.info.stage)); + UNREACHABLE_MSG("Stage {}", u32(info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -349,7 +397,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(program, ctx, main); + DefineEntryPoint(program.info, ctx, main); SetupCapabilities(program.info, profile, ctx); SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 22b3523aa4..611225e8bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { void EmitBarrier(EmitContext& ctx) { const auto execution{spv::Scope::Workgroup}; - const auto memory{spv::Scope::Workgroup}; - const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | - spv::MemorySemanticsMask::WorkgroupMemory}; + spv::Scope memory; + spv::MemorySemanticsMask memory_semantics; + if (ctx.l_stage == Shader::LogicalStage::TessellationControl) { + memory = spv::Scope::Invocation; + memory_semantics = spv::MemorySemanticsMask::MaskNone; + } else { + memory = spv::Scope::Workgroup; + memory_semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory; + } ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), ctx.ConstU32(static_cast(memory)), ctx.ConstU32(static_cast(memory_semantics))); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d005169c4f..f3db6af56f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -4,6 +4,9 @@ #include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/patch.h" +#include "shader_recompiler/runtime_info.h" #include @@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - ASSERT(info.num_components > 0); - if (info.num_components == 1) { - return info.id; + const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index), + ctx.ConstU32(element)); } else { - return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + const auto& info{ctx.output_params.at(attr_index)}; + ASSERT(info.num_components > 0); + if (info.num_components == 1) { + return info.id; + } else { + return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + } } } if (IR::IsMrt(attr)) { @@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - return {info.component_type, info.is_integer}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + return {ctx.F32[1], false}; + } else { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& info{ctx.output_params.at(index)}; + return {info.component_type, info.is_integer}; + } } if (IR::IsMrt(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)}; @@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } -Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { +Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { if (IR::IsPosition(attr)) { ASSERT(attr == IR::Attribute::Position0); const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ - ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; + const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; const auto param = ctx.input_params.at(param_id).id; const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u UNREACHABLE(); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (ctx.info.stage == Stage::Geometry) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { + if (ctx.info.l_stage == LogicalStage::Geometry) { return EmitGetAttributeForGeometry(ctx, attr, comp, index); + } else if (ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval) { + if (IR::IsTessCoord(attr)) { + const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + const auto pointer{ + ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; + return ctx.OpLoad(ctx.F32[1], pointer); + } else if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); } if (IR::IsParam(attr)) { @@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { } return coord; } + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); default: - throw NotImplementedException("Read attribute {}", attr); + UNREACHABLE_MSG("Read attribute {}", attr); } } @@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - ASSERT(ctx.info.stage == Stage::Geometry); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: { + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + // [0:8]: patch id within VGT + // [8:12]: output control point id + // But 0:8 should be treated as 0 for attribute addressing purposes + if (ctx.runtime_info.hs_info.IsPassthrough()) { + // Gcn shader would run with 1 thread, but we need to run a thread for + // each output control point. + // If Gcn shader uses this value, we should make sure all threads in the + // Vulkan shader use 0 + return ctx.ConstU32(0u); + } else { + const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); + } + } default: - throw NotImplementedException("Read U32 attribute {}", attr); + UNREACHABLE_MSG("Read U32 attribute {}", attr); } } @@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen } } +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, + vertex_index, attr_index, comp_index)); +} + +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { + // Implied vertex index is invocation_id + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + Id pointer = + ctx.OpAccessChain(component_ptr, ctx.output_attr_array, + ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); + ctx.OpStore(pointer, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 + : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); + default: + UNREACHABLE_MSG("Patch {}", u32(patch)); + } + }()}; + ctx.OpStore(pointer, value); +} + template static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 057b0d6929..f71c61af6f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -9,6 +9,7 @@ namespace Shader::IR { enum class Attribute : u64; enum class ScalarReg : u32; +enum class Patch : u64; class Inst; class Value; } // namespace Shader::IR @@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); @@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 1ada2f1f9d..2e09e70a75 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/ir/passes/srt.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" #include @@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) { case Stage::Compute: return "cs"; } - throw InvalidArgument("Invalid stage {}", u32(stage)); + UNREACHABLE_MSG("Invalid hw stage {}", u32(stage)); } static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { @@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, - profile{profile_}, stage{info.stage}, binding{binding_} { + profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -268,9 +269,8 @@ void EmitContext::DefineInputs() { U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); } - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); @@ -311,12 +311,11 @@ void EmitContext::DefineInputs() { } input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); - interfaces.push_back(id); } } break; } - case Stage::Fragment: + case LogicalStage::Fragment: frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); @@ -351,15 +350,14 @@ void EmitContext::DefineInputs() { } input_params[semantic] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); - interfaces.push_back(attr_id); } break; - case Stage::Compute: + case LogicalStage::Compute: workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input); local_invocation_id = DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input); break; - case Stage::Geometry: { + case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), @@ -389,15 +387,129 @@ void EmitContext::DefineInputs() { } break; } + case LogicalStage::TessellationControl: { + invocation_id = + DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input); + patch_vertices = + DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + break; + } + case LogicalStage::TessellationEval: { + tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + + u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineInput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_in{}", index)); + patches[index] = id; + } + break; + } default: break; } } void EmitContext::DefineOutputs() { - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { + // No point in defining builtin outputs (i.e. position) unless next stage is fragment? + // Might cause problems linking with tcs + + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || + info.stores.Get(IR::Attribute::Position2) || + info.stores.Get(IR::Attribute::Position3); + if (has_extra_pos_stores) { + const Id type{TypeArray(F32[1], ConstU32(8U))}; + clip_distances = + DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); + cull_distances = + DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); + } + if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) { + const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id type{TypeArray(F32[4], ConstU32(num_attrs))}; + output_attr_array = DefineOutput(type, 0); + Name(output_attr_array, "out_attrs"); + } + } else { + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = + GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); + } + } + break; + } + case LogicalStage::TessellationControl: { + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], ConstU32(4U))}; + output_tess_level_outer = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], ConstU32(2U))}; + output_tess_level_inner = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + + const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray( + per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))}; + output_attr_array = DefineOutput(patch_array_type, 0); + Name(output_attr_array, "out_attrs"); + } + + u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineOutput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_out{}", index)); + patches[index] = id; + } + break; + } + case LogicalStage::TessellationEval: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || info.stores.Get(IR::Attribute::Position2) || @@ -419,11 +531,10 @@ void EmitContext::DefineOutputs() { Name(id, fmt::format("out_attr{}", i)); output_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); - interfaces.push_back(id); } break; } - case Stage::Fragment: + case LogicalStage::Fragment: for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; if (!info.stores.GetAny(mrt)) { @@ -435,22 +546,22 @@ void EmitContext::DefineOutputs() { const Id id{DefineOutput(type, i)}; Name(id, fmt::format("frag_color{}", i)); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); - interfaces.push_back(id); } break; - case Stage::Geometry: { + case LogicalStage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); output_params[attr_id] = {id, output_f32, F32[1], 4u}; - interfaces.push_back(id); } break; } - default: + case LogicalStage::Compute: break; + default: + UNREACHABLE(); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index cd12933281..583d96b99b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -46,14 +46,18 @@ class EmitContext final : public Sirit::Module { void DefineBufferOffsets(); void DefineInterpolatedAttribs(); - [[nodiscard]] Id DefineInput(Id type, u32 location) { - const Id input_id{DefineVar(type, spv::StorageClass::Input)}; - Decorate(input_id, spv::Decoration::Location, location); + [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)}; + if (location) { + Decorate(input_id, spv::Decoration::Location, *location); + } return input_id; } - [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt) { - const Id output_id{DefineVar(type, spv::StorageClass::Output)}; + [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)}; if (location) { Decorate(output_id, spv::Decoration::Location, *location); } @@ -131,7 +135,8 @@ class EmitContext final : public Sirit::Module { const Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - Stage stage{}; + Stage stage; + LogicalStage l_stage{}; Id void_id{}; Id U8{}; @@ -188,8 +193,15 @@ class EmitContext final : public Sirit::Module { Id clip_distances{}; Id cull_distances{}; + Id patch_vertices{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + Id tess_coord; + std::array patches{}; + Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch Id subgroup_local_invocation_id{}; Id image_u32{}; @@ -252,6 +264,8 @@ class EmitContext final : public Sirit::Module { bool is_loaded{}; s32 buffer_handle{-1}; }; + Id input_attr_array; + Id output_attr_array; std::array input_params{}; std::array output_params{}; std::array frag_outputs{}; diff --git a/src/shader_recompiler/frontend/tessellation.h b/src/shader_recompiler/frontend/tessellation.h new file mode 100644 index 0000000000..bfcaa4fdce --- /dev/null +++ b/src/shader_recompiler/frontend/tessellation.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Shader { + +struct TessellationDataConstantBuffer { + u32 ls_stride; + u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride : + // ls_stride + u32 num_patches; // num patches submitted in threadgroup + u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ? + // HullStateConstants::m_numInputCP * ls_stride * num_patches : 0 + // basically 0 when passthrough + u32 patch_const_size; // 16 * num_patch_attrs + u32 patch_const_base; // hs_output_base + patch_output_size + u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch + f32 off_chip_tessellation_factor_threshold; + u32 first_edge_tess_factor_index; +}; + +// Assign names to dword fields of TessellationDataConstantBuffer +enum class TessConstantAttribute : u32 { + LsStride, + HsCpStride, + HsNumPatch, + HsOutputBase, + PatchConstSize, + PatchConstBase, + PatchOutputSize, + OffChipTessellationFactorThreshold, + FirstEdgeTessFactorIndex, +}; + +} // namespace Shader \ No newline at end of file diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 5914f9fe36..116935b94e 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) { void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { const IR::U32 value{GetSrc(inst.src[0])}; - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { + if (info.l_stage == LogicalStage::Compute || + info.l_stage == LogicalStage::TessellationControl) { SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } else { + SetDst(inst.dst[0], value); } } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 5b411d83e1..1ef0d82d8f 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BFM_B32(inst); case Opcode::S_MUL_I32: return S_MUL_I32(inst); + case Opcode::S_BFE_I32: + return S_BFE(inst, true); case Opcode::S_BFE_U32: - return S_BFE_U32(inst); + return S_BFE(inst, false); case Opcode::S_ABSDIFF_I32: return S_ABSDIFF_I32(inst); @@ -434,12 +438,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } -void Translator::S_BFE_U32(const GcnInst& inst) { +void Translator::S_BFE(const GcnInst& inst, bool is_signed) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; - const IR::U32 result{ir.BitFieldExtract(src0, offset, count)}; + const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)}; SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 97978ff6b6..3031e66439 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -8,6 +8,8 @@ #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -34,9 +36,8 @@ void Translator::EmitPrologue() { } IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (info.stage) { - case Stage::Vertex: - case Stage::Export: + switch (info.l_stage) { + case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 @@ -52,7 +53,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; - case Stage::Fragment: + case LogicalStage::Fragment: dst_vreg = IR::VectorReg::V0; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { ++dst_vreg; // I @@ -122,7 +123,30 @@ void Translator::EmitPrologue() { } } break; - case Stage::Compute: + case LogicalStage::TessellationControl: { + // Should be laid out like: + // [0:8]: patch id within VGT + // [8:12]: output control point id + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); + // TODO PrimitiveId is probably V2 but haven't seen it yet + break; + } + case LogicalStage::TessellationEval: + ir.SetVectorReg(IR::VectorReg::V0, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); + // V2 is similar to PrimitiveID but not the same. It seems to only be used in + // compiler-generated address calculations. Its probably the patch id within the + // patches running locally on a given VGT (or CU, whichever is the granularity of LDS + // memory) + // Set to 0. See explanation in comment describing hull/domain passes + ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u)); + // V3 is the actual PrimitiveID as intended by the shader author. + ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); @@ -137,7 +161,7 @@ void Translator::EmitPrologue() { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; - case Stage::Geometry: + case LogicalStage::Geometry: switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 @@ -152,7 +176,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; default: - throw NotImplementedException("Unknown shader stage"); + UNREACHABLE_MSG("Unknown shader stage"); } } @@ -503,7 +527,8 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { - ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export); + ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export || + info.stage == Stage::Local); translator.EmitFetch(inst); continue; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 198cea276b..60bad18649 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -94,7 +94,8 @@ class Translator { void S_ASHR_I32(const GcnInst& inst); void S_BFM_B32(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); - void S_BFE_U32(const GcnInst& inst); + void S_BFE(const GcnInst& inst, bool is_signed); + void S_BFE_I32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); void S_NOT_B32(const GcnInst& inst); @@ -217,7 +218,7 @@ class Translator { // VOP3a void V_MAD_F32(const GcnInst& inst); - void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false); + void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true); void V_MAD_U32_U24(const GcnInst& inst); void V_CUBEID_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3e9e677a76..2b32ca2ce3 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; - const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 src2{GetSrc(inst.src[2])}; + if (!src1.IsImmediate()) { + src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F)); + } + if (!src2.IsImmediate()) { + src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F)); + } SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index eadd1c4db0..072b1f88e7 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -189,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -247,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; - if (info.stage != Stage::Export && info.stage != Stage::Geometry) { + if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) { ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); } + if (info.stage == Stage::Hull) { + // printf("here\n"); // break + } + IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); @@ -269,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 494bbb4bb8..dbea2af8a6 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -11,6 +11,7 @@ #include "common/types.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/copy_shader.h" +#include "shader_recompiler/frontend/tessellation.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/reg.h" @@ -163,6 +164,7 @@ struct Info { UserDataMask ud_mask{}; CopyShaderData gs_copy_data; + u32 uses_patches{}; BufferResourceList buffers; TextureBufferResourceList texture_buffers; @@ -173,8 +175,12 @@ struct Info { PersistentSrtInfo srt_info; std::vector flattened_ud_buf; + IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; + s32 tess_consts_dword_offset = -1; + std::span user_data; Stage stage; + LogicalStage l_stage; u64 pgm_hash{}; VAddr pgm_base; @@ -190,14 +196,16 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; - explicit Info(Stage stage_, ShaderParams params) - : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, user_data{params.user_data} {} template @@ -244,6 +252,16 @@ struct Info { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } } + + void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const { + ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD + auto buf = ReadUdReg(static_cast(tess_consts_ptr_base), + static_cast(tess_consts_dword_offset)); + VAddr tess_constants_addr = buf.base_address; + memcpy(&tess_constants, + reinterpret_cast(tess_constants_addr), + sizeof(tess_constants)); + } }; constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index e219dfb647..6a267e21b1 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) { return "VertexId"; case Attribute::InstanceId: return "InstanceId"; + case Attribute::PrimitiveId: + return "PrimitiveId"; case Attribute::FragCoord: return "FragCoord"; case Attribute::IsFrontFace: @@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::InvocationId: + return "InvocationId"; + case Attribute::PatchVertices: + return "PatchVertices"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPointU"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPointV"; + case Attribute::PackedHullInvocationInfo: + return "PackedHullInvocationInfo"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 0890e88f10..bcb2b44a9b 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,8 +72,13 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, - InstanceId0 = 78, // step rate 0 - InstanceId1 = 79, // step rate 1 + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 + InvocationId = 80, // TCS id in output patch and instanced geometry shader id + PatchVertices = 81, + TessellationEvaluationPointU = 82, + TessellationEvaluationPointV = 83, + PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID Max, }; @@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept { return attribute >= Attribute::Position0 && attribute <= Attribute::Position3; } +constexpr bool IsTessCoord(Attribute attribute) noexcept { + return attribute >= Attribute::TessellationEvaluationPointU && + attribute <= Attribute::TessellationEvaluationPointV; +} + constexpr bool IsParam(Attribute attribute) noexcept { return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; } diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index b4d1a78c78..a312eabde1 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map& inst_to_index, size return fmt::format("{}", arg.VectorReg()); case Type::Attribute: return fmt::format("{}", arg.Attribute()); + case Type::Patch: + return fmt::format("{}", arg.Patch()); default: return ""; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3ebc82e64d..21df53391f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) { Inst(Opcode::SetM0, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) { - return Inst(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index)); +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp), index); } U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { @@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); } +F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index); +} + +void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, + const U32& comp_index) { + Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); +} + +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { case 32: @@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } } +Value IREmitter::CompositeConstruct(std::span elements) { + switch (elements.size()) { + case 2: + return CompositeConstruct(elements[0], elements[1]); + case 3: + return CompositeConstruct(elements[0], elements[1], elements[2]); + case 4: + return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]); + default: + UNREACHABLE_MSG("Composite construct with greater than 4 elements"); + } +} + Value IREmitter::CompositeExtract(const Value& vector, size_t element) { const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 068aba14dd..95713565b7 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -10,6 +10,7 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -80,10 +81,18 @@ class IREmitter { [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, + IR::Value index = IR::Value(u32(0u))); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); + [[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); @@ -138,6 +147,8 @@ class IREmitter { [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); + [[nodiscard]] Value CompositeConstruct(std::span values); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); @@ -335,6 +346,7 @@ class IREmitter { template T Inst(Opcode op, Args... args) { auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; + it->SetParent(block); return T{Value{&*it}}; } @@ -352,6 +364,7 @@ class IREmitter { u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + it->SetParent(block); return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 9b4ad63d2e..6e7bbe6612 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: + case Opcode::SetTcsGenericAttribute: + case Opcode::SetPatch: case Opcode::StoreBufferU32: case Opcode::StoreBufferU32x2: case Opcode::StoreBufferU32x3: diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index be640297a7..cd73ace7eb 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque}; constexpr Type ScalarReg{Type::ScalarReg}; constexpr Type VectorReg{Type::VectorReg}; constexpr Type Attribute{Type::Attribute}; -constexpr Type SystemValue{Type::SystemValue}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 477275824b..470f9fbe5d 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32, OPCODE(GetAttribute, F32, Attribute, U32, U32, ) OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) +OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) +OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 9624ce6a5c..16b07e1a1a 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } } +template +void FoldMul(IR::Block& block, IR::Inst& inst) { + if (!FoldCommutative(inst, [](T a, T b) { return a * b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + return; + } +} + void FoldCmpClass(IR::Block& block, IR::Inst& inst) { ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation"); const auto class_mask = static_cast(inst.Arg(1).U32()); @@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { FoldWhenAllImmediates(inst, [](u32 a) { return static_cast(a); }); return; case IR::Opcode::IMul32: - FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + FoldMul(block, inst); + return; + case IR::Opcode::UDiv32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0"); + return a / b; + }); + return; + case IR::Opcode::UMod32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0"); + return a % b; + }); return; case IR::Opcode::FPCmpClass32: FoldCmpClass(block, inst); diff --git a/src/shader_recompiler/ir/passes/constant_propogation.h b/src/shader_recompiler/ir/passes/constant_propogation.h new file mode 100644 index 0000000000..313a3cc6a1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/constant_propogation.h @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp new file mode 100644 index 0000000000..5cf02b6d0d --- /dev/null +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -0,0 +1,744 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/pattern_matching.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Optimization { + +/** + * Tessellation shaders pass outputs to the next shader using LDS. + * The Hull shader stage receives input control points stored in LDS. + * + * These passes attempt to resolve LDS accesses to attribute accesses and correctly + * write to the tessellation factor tables. + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 + * - ... + * - TCS outputs for patch 0 + * - TCS outputs for patch 1 + * - TCS outputs for patch 2 + * - ... + * - PatchConst TCS outputs for patch 0 + * - PatchConst TCS outputs for patch 1 + * - PatchConst TCS outputs for patch 2 + * + * + * If the Hull stage does not write any new control points the driver will + * optimize LDS layout so input and output control point spaces overlap. + * (Passthrough) + * + * The gnm driver requires a V# holding special constants to be bound + * for reads by the shader. + * The Hull and Domain shaders read values from this buffer which + * contain size and offset information required to address input, output, + * or PatchConst attributes within the current patch. + * See the TessellationDataConstantBuffer struct to see the layout of this V#. + * + * Tessellation factors are stored to a special tessellation factor V# that is automatically bound + * by the driver. This is the input to the fixed function tessellator that actually subdivides the + * domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull + * shader. + * The offset into the tess factor buffer determines which factor the shader is writing. + * Additionally, most hull shaders seem to redundantly write tess factors to PatchConst + * attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes. + * + * LDS reads in the Hull shader can be from input control points, and in the the Domain shader can + * be hs output control points (output from the perspective of the Hull shader) and patchconst + * values. + * LDS stores in the Hull shader can either be output control point writes or per-patch + * (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its + * followed by the geometry stage (but we havent seen this yet), so nothing special there. + * The address calculations can vary significantly and can't be easily pattern matched. We are at + * the mercy of instruction selection the ps4 compiler wanted to use. + * Generally though, they could look something like this: + * Input control point: + * addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride + * + attr# * 16 + component + * Output control point: + * addr = #patches * input_cp_stride * #input_cp_per_patch + * + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride + + attr# * 16 + component + * Per patch output: + * addr = #patches * input_cp_stride * #cp_per_input_patch + * + #patches * output_patch_stride + * + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component + * + * output_patch_stride and output_cp_stride are usually compile time constants in the gcn + * + * Hull shaders can probably also read output control points corresponding to other threads, like + * shared memory (but we havent seen this yet). + * ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens. + * They should also be able to read PatchConst values, + * although not sure if this happens in practice. + * + * To determine which type of attribute (input, output, patchconst) we the check the users of + * TessConstants V# reads to deduce which type of attribute a given load/store to LDS + * is touching. + * + * In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control + * point id (InvocationId) are packed in VGPR1 by the driver like + * V1 = InvocationId << 8 | PatchIdInVgt + * The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine + * which is which. + * + * This pass does not attempt to deduce the exact attribute referenced in a LDS load/store. + * Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's, + * TES in's, and PatchConst in/out's. + * + * TCS/TES Input attributes: + * We define input attributes using an array in the shader roughly like this: + * // equivalent GLSL in TCS + * layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES]; + * + * Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#. + * We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes. + * For TES, the number of attributes comes from hs_cp_stride / 16. + * The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input + * patch (for Hull) or the hs output patch (for Domain). + * + * For input reads in TCS or TES, we emit SPIR-V like: + * float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2]; + * + * For output writes, we assume the control point index is InvocationId, since high level languages + * impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks + * like this: + * layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES]; + * out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value; + * + * NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs + * variable. + * + * Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole + * which contains the attributes from many patches. On the other hand, higher level shading + * languages restrict attribute access to the patch of the current thread, which is naturally a + * restriction in SPIR-V also. + * The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many + * patches' attributes are just implementation details of the ps4 driver/compiler. To deal with + * this, we can replace certain TessConstant V# reads with 0, which only contribute to the base + * address of the current patch's attributes in LDS and not the indexes within the local patch. + * + * (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing + * attributes to buffers and not caring about whether they are hs input, hs output, or patchconst + * attributes) + * + */ + +namespace { + +using namespace Shader::Optimiation::PatternMatching; + +static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset, + Shader::Info& info, Shader::RuntimeInfo& runtime_info, + TessellationDataConstantBuffer& tess_constants) { + info.tess_consts_ptr_base = sharp_ptr_base; + info.tess_consts_dword_offset = sharp_dword_offset; + info.ReadTessConstantBuffer(tess_constants); + if (info.l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + + return; +} + +struct TessSharpLocation { + IR::ScalarReg ptr_base; + u32 dword_off; +}; + +std::optional FindTessConstantSharp(IR::Inst* read_const_buffer) { + IR::Value sharp_ptr_base; + IR::Value sharp_dword_offset; + + IR::Value rv = IR::Value{read_const_buffer}; + IR::Value handle = read_const_buffer->Arg(0); + + if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(), + MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = IR::ScalarReg::Max, + .dword_off = static_cast(sharp_dword_offset.ScalarReg())}; + } else if (M_COMPOSITECONSTRUCTU32X4( + M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)), + MatchIgnore()), + MatchImm(sharp_dword_offset)), + MatchIgnore(), MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(), + .dword_off = sharp_dword_offset.U32()}; + } + return {}; +} + +// Walker that helps deduce what type of attribute a DS instruction is reading +// or writing, which could be an input control point, output control point, +// or per-patch constant (PatchConst). +// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users +// recursively and increment a counter on the Load/WriteShared users. +// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase), +// and PatchConstBase (m_patchConstBase). +// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch +// is used as an addend to skip the region for input control points, and similarly +// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region +// for output control points. +// +// TODO: this will break if AMD compiler used distributive property like +// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch) +class TessConstantUseWalker { +public: + void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) { + u32 inc; + switch (attr) { + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + inc = 1; + break; + case TessConstantAttribute::PatchConstBase: + inc = 2; + break; + default: + UNREACHABLE(); + } + + for (IR::Use use : read_const_buffer->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + + ++seq_num; + } + +private: + void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) { + IR::Inst* inst = use.user; + + switch (use.user->GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + u32 counter = inst->Flags(); + inst->SetFlags(counter + inc); + // Stop here + return; + } + case IR::Opcode::Phi: { + struct PhiCounter { + u16 seq_num; + u8 unique_edge; + u8 counter; + }; + + PhiCounter count = inst->Flags(); + ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand); + // the point of seq_num is to tell us if we've already traversed this + // phi on the current walk. Alternatively we could keep a set of phi's + // seen on the current walk. This is to handle phi cycles + if (count.seq_num == 0) { + // First time we've encountered this phi + count.seq_num = seq_num; + // Mark the phi as having been traversed originally through this edge + count.unique_edge = use.operand; + count.counter = inc; + } else if (count.seq_num < seq_num) { + count.seq_num = seq_num; + // For now, assume we are visiting this phi via the same edge + // as on other walks. If not, some dataflow analysis might be necessary + ASSERT(count.unique_edge == use.operand); + count.counter += inc; + } else { + // count.seq_num == seq_num + // there's a cycle, and we've already been here on this walk + return; + } + inst->SetFlags(count); + break; + } + default: + break; + } + + for (IR::Use use : inst->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + } + + u32 seq_num{1u}; +}; + +enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst }; + +static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info, + const Shader::RuntimeInfo& runtime_info) { + u32 count = ring_access->Flags(); + if (count == 0) { + return AttributeRegion::InputCP; + } else if (info.l_stage == LogicalStage::TessellationControl && + runtime_info.hs_info.IsPassthrough()) { + ASSERT(count <= 1); + return AttributeRegion::PatchConst; + } else { + ASSERT(count <= 2); + return AttributeRegion(count); + } +} + +static bool IsDivisibleByStride(IR::Value term, u32 stride) { + IR::Value a, b; + if (MatchU32(stride).Match(term)) { + return true; + } else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) || + M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) { + return IsDivisibleByStride(a, stride); + } else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) { + return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride); + } + return false; +} + +// Return true if we can eliminate any addends +static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector& addends) { + IR::Value a, b; + if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) { + bool ret = false; + ret = TryOptimizeAddendInModulo(a, stride, addends); + ret |= TryOptimizeAddendInModulo(b, stride, addends); + return ret; + } else if (!IsDivisibleByStride(addend, stride)) { + addends.push_back(IR::U32{addend}); + return false; + } else { + return true; + } +} + +// In calculation (a + b + ...) % stride +// Use this fact +// (a + b) mod N = (a mod N + b mod N) mod N +// If any addend is divisible by stride, then we can replace it with 0 in the attribute +// or component index calculation +static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) { + std::vector addends; + if (TryOptimizeAddendInModulo(addr, stride, addends)) { + addr = ir.Imm32(0); + for (auto& addend : addends) { + addr = ir.IAdd(addr, addend); + } + } + return addr; +} + +// TODO: can optimize div in control point index similarly to mod + +// Read a TCS input (InputCP region) or TES input (OutputCP region) +static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride)); + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = + ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = + ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); +} + +} // namespace + +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + const Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: { + const auto info = inst.Flags(); + if (!info.globally_coherent) { + break; + } + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto GetValue = [&](IR::Value data) -> IR::F32 { + if (auto* inst = data.TryInstRecursive(); + inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) { + return IR::F32{inst->Arg(0)}; + } + return ir.BitCast(IR::U32{data}); + }; + const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1; + IR::U32 index = IR::U32{inst.Arg(1)}; + ASSERT(index.IsImmediate()); + const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2; + + const IR::Value data = inst.Arg(2); + auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch { + // The hull outputs tess factors in different formats depending on the shader. + // For triangle domains, it seems to pack the entries into 4 consecutive floats, + // with the 3 edge factors followed by the 1 interior factor. + // For quads, it does 4 edge factors then 2 interior. + // There is a tess factor stride member of the GNMX hull constants struct in + // a hull program shader binary archive, but this doesn't seem to be + // communicated to the driver. + // The layout seems to be implied by the type of the abstract domain. + switch (runtime_info.hs_info.tess_type) { + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); + return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Triangle: + ASSERT(gcn_factor_idx < 4); + if (gcn_factor_idx == 3) { + return IR::Patch::TessellationLodInteriorU; + } + return IR::PatchFactor(gcn_factor_idx); + default: + // Point domain types haven't been seen so far + UNREACHABLE_MSG("Unhandled tess type"); + } + }; + + inst.Invalidate(); + if (num_dwords == 1) { + ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data)); + break; + } + auto* inst = data.TryInstRecursive(); + ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4)); + for (s32 i = 0; i < num_dwords; i++) { + ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i))); + } + break; + } + + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 + ? 1 + : (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4); + const IR::U32 addr{inst.Arg(0)}; + const IR::U32 data{inst.Arg(1).Resolve()}; + + const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind, + u32 off_dw) { + const IR::F32 data_component = ir.BitCast(value); + + if (output_kind == AttributeRegion::OutputCP) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + u32 stride = runtime_info.hs_info.hs_output_cp_stride; + // Invocation ID array index is implicit, handled by SPIRV backend + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = ir.ShiftRightLogical( + ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = ir.ShiftRightLogical( + ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + ir.SetTcsGenericAttribute(data_component, attr_index, comp_index); + } else { + ASSERT(output_kind == AttributeRegion::PatchConst); + ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}", + fmt::ptr(addr.Inst())); + ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component); + } + }; + + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + if (num_dwords == 1) { + SetOutput(addr, data, region, 0); + } else { + for (auto i = 0; i < num_dwords; i++) { + SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i); + } + } + inst.Invalidate(); + break; + } + + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + ASSERT_MSG(region == AttributeRegion::InputCP, + "Unhandled read of output or patchconst attribute in hull shader"); + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast( + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + + default: + break; + } + } + } + + if (runtime_info.hs_info.IsPassthrough()) { + // Copy input attributes to output attributes, indexed by InvocationID + // Passthrough should imply that input and output patches have same number of vertices + IR::Block* entry_block = *program.blocks.begin(); + auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Prologue; + }); + ASSERT(it != entry_block->end()); + ++it; + ASSERT(it != entry_block->end()); + ++it; + // Prologue + // SetExec #true + // <- insert here + // ... + IR::IREmitter ir{*entry_block, it}; + + ASSERT(runtime_info.hs_info.ls_stride % 16 == 0); + u32 num_attributes = runtime_info.hs_info.ls_stride / 16; + const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId); + for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) { + for (u32 comp = 0; comp < 4; comp++) { + IR::F32 attr_read = + ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp)); + // InvocationId is implicit index for output control point writes + ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp)); + } + } + // We could wrap the rest of the program in an if stmt + // CopyInputAttrsToOutputs(); // psuedocode + // if (InvocationId == 0) { + // PatchConstFunction(); + // } + // But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally + } +} + +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto opcode = inst.GetOpcode(); + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { + if (region == AttributeRegion::OutputCP) { + return ReadTessInputComponent( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + } else { + ASSERT(region == AttributeRegion::PatchConst); + return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); + } + }; + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast(GetInput(addr, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = GetInput(addr, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + default: + break; + } + } + } +} + +// Run before either hull or domain transform +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { + TessellationDataConstantBuffer tess_constants; + Shader::Info& info = program.info; + // Find the TessellationDataConstantBuffer V# + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + auto found_tess_consts_sharp = [&]() -> bool { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::Value addr = inst.Arg(0); + auto read_const_buffer = IR::BreadthFirstSearch( + addr, [](IR::Inst* maybe_tess_const) -> std::optional { + if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) { + return maybe_tess_const; + } + return std::nullopt; + }); + if (read_const_buffer) { + auto sharp_location = FindTessConstantSharp(read_const_buffer.value()); + if (sharp_location) { + if (info.tess_consts_dword_offset >= 0) { + // Its possible theres a readconstbuffer that contributes to an + // LDS address and isnt a TessConstant V# read. Could improve on + // this somehow + ASSERT_MSG(static_cast(sharp_location->dword_off) == + info.tess_consts_dword_offset && + sharp_location->ptr_base == + info.tess_consts_ptr_base, + "TessConstants V# is ambiguous"); + } + InitTessConstants(sharp_location->ptr_base, + static_cast(sharp_location->dword_off), info, + runtime_info, tess_constants); + return true; + } + UNREACHABLE_MSG("Failed to match tess constant sharp"); + } + return false; + } + default: + return false; + } + }(); + + if (found_tess_consts_sharp) { + break; + } + } + } + + ASSERT(info.tess_consts_dword_offset >= 0); + + TessConstantUseWalker walker; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { + auto sharp_location = FindTessConstantSharp(&inst); + if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base && + sharp_location->dword_off == info.tess_consts_dword_offset) { + // The shader is reading from the TessConstants V# + IR::Value index = inst.Arg(1); + + ASSERT_MSG(index.IsImmediate(), + "Tessellation constant read with dynamic index"); + u32 off_dw = index.U32(); + ASSERT(off_dw <= + static_cast(TessConstantAttribute::FirstEdgeTessFactorIndex)); + + auto tess_const_attr = static_cast(off_dw); + switch (tess_const_attr) { + case TessConstantAttribute::LsStride: + // If not, we may need to make this runtime state for TES + ASSERT(info.l_stage == LogicalStage::TessellationControl); + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride)); + break; + case TessConstantAttribute::HsCpStride: + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride)); + break; + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + case TessConstantAttribute::PatchConstBase: + walker.MarkTessAttributeUsers(&inst, tess_const_attr); + // We should be able to safely set these to 0 so that indexing happens only + // within the local patch in the recompiled Vulkan shader. This assumes + // these values only contribute to address calculations for in/out + // attributes in the original gcn shader. + // See the explanation for why we set V2 to 0 when emitting the prologue. + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + break; + case Shader::TessConstantAttribute::PatchConstSize: + case Shader::TessConstantAttribute::PatchOutputSize: + case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold: + case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex: + // May need to replace PatchConstSize and PatchOutputSize with 0 + break; + default: + UNREACHABLE_MSG("Read past end of TessConstantsBuffer"); + } + } + } + } + } + + // These pattern matching are neccessary for now unless we support dynamic indexing of + // PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single + // vec4 array like in/out attrs. Not sure about tess factors. + if (info.l_stage == LogicalStage::TessellationControl) { + // Replace the BFEs on V1 (packed with patch id within VGT and output cp id) + for (IR::Block* block : program.blocks) { + for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) { + IR::Inst& inst = *it; + if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(0), MatchU32(8)) + .Match(IR::Value{&inst})) { + IR::IREmitter emit(*block, it); + // This is the patch id within the VGT, not the actual PrimitiveId + // in the draw + IR::Value replacement(0u); + inst.ReplaceUsesWithAndRemove(replacement); + } else if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32( + MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(8), MatchU32(5)) + .Match(IR::Value{&inst})) { + IR::IREmitter ir(*block, it); + IR::Value replacement; + if (runtime_info.hs_info.IsPassthrough()) { + // Deal with annoying pattern in BB where InvocationID use makes no + // sense (in addr calculation for patchconst or tess factor write) + replacement = ir.Imm32(0); + } else { + replacement = ir.GetAttributeU32(IR::Attribute::InvocationId); + } + inst.ReplaceUsesWithAndRemove(replacement); + } + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 7bd47992c8..61f43e7e48 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, Stage stage); +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index eb1be29674..d6f1efb12f 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,11 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Optimization { @@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }; switch (stage) { + case Stage::Local: { + ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU32: { + bool is_composite = opcode == IR::Opcode::WriteSharedU64; + u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + + u32 offset = 0; + const auto* addr = inst.Arg(0).InstRecursive(); + if (addr->GetOpcode() == IR::Opcode::IAdd32) { + ASSERT(addr->Arg(1).IsImmediate()); + offset = addr->Arg(1).U32(); + } + IR::Value data = inst.Arg(1).Resolve(); + for (s32 i = 0; i < num_components; i++) { + const auto attrib = IR::Attribute::Param0 + (offset / 16); + const auto comp = (offset / 4) % 4; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; + ir.SetAttribute(attrib, ir.BitCast(value), comp); + offset += 4; + } + inst.Invalidate(); + break; + } + default: + break; + } + }); + break; + } case Stage::Export: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } @@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::LoadBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } const auto shl_inst = inst.Arg(1).TryInstRecursive(); - const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2; + const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2); const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1); const auto bucket = offset.Resolve().U32() / 256u; const auto attrib = bucket < 4 ? IR::Attribute::Position0 @@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto buffer_info = inst.Flags(); + if (!buffer_info.system_coherent || !buffer_info.globally_coherent) { break; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 8b93d72e37..c34b59b88b 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::GetUserData: info.ud_mask.Set(inst.Arg(0).ScalarReg()); break; + case IR::Opcode::SetPatch: { + const auto patch = inst.Arg(0).Patch(); + if (patch <= IR::Patch::TessellationLodBottom) { + info.stores_tess_level_outer = true; + } else if (patch <= IR::Patch::TessellationLodInteriorV) { + info.stores_tess_level_inner = true; + } else { + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + } + break; + } + case IR::Opcode::GetPatch: { + const auto patch = inst.Arg(0).Patch(); + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + break; + } case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/patch.cpp b/src/shader_recompiler/ir/patch.cpp new file mode 100644 index 0000000000..2485bc5b4e --- /dev/null +++ b/src/shader_recompiler/ir/patch.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/patch.h" + +namespace Shader::IR { + +std::string NameOf(Patch patch) { + switch (patch) { + case Patch::TessellationLodLeft: + return "TessellationLodLeft"; + case Patch::TessellationLodTop: + return "TessellationLodTop"; + case Patch::TessellationLodRight: + return "TessellationLodRight"; + case Patch::TessellationLodBottom: + return "TessellationLodBottom"; + case Patch::TessellationLodInteriorU: + return "TessellationLodInteriorU"; + case Patch::TessellationLodInteriorV: + return "TessellationLodInteriorV"; + default: + const u32 index = u32(patch) - u32(Patch::Component0); + return fmt::format("Component{}", index); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/patch.h b/src/shader_recompiler/ir/patch.h new file mode 100644 index 0000000000..65d2192e69 --- /dev/null +++ b/src/shader_recompiler/ir/patch.h @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 125); + +constexpr bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +constexpr Patch PatchFactor(u32 index) { + return static_cast(index); +} + +constexpr Patch PatchGeneric(u32 index) { + return static_cast(static_cast(Patch::Component0) + index); +} + +constexpr u32 GenericPatchIndex(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +constexpr u32 GenericPatchElement(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +[[nodiscard]] std::string NameOf(Patch patch); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::IR::Patch patch, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch)); + } +}; diff --git a/src/shader_recompiler/ir/pattern_matching.h b/src/shader_recompiler/ir/pattern_matching.h new file mode 100644 index 0000000000..1279f14c3e --- /dev/null +++ b/src/shader_recompiler/ir/pattern_matching.h @@ -0,0 +1,127 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::Optimiation::PatternMatching { + +// Attempt at pattern matching for Insts and Values +// Needs improvement, mostly a convenience + +template +struct MatchObject { + inline bool Match(IR::Value v) { + return static_cast(this)->Match(v); + } +}; + +struct MatchValue : MatchObject { + MatchValue(IR::Value& return_val_) : return_val(return_val_) {} + + inline bool Match(IR::Value v) { + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchIgnore : MatchObject { + MatchIgnore() {} + + inline bool Match(IR::Value v) { + return true; + } +}; + +struct MatchImm : MatchObject { + MatchImm(IR::Value& v) : return_val(v) {} + + inline bool Match(IR::Value v) { + if (!v.IsImmediate()) { + return false; + } + + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchAttribute : MatchObject { + MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {} + + inline bool Match(IR::Value v) { + return v.Type() == IR::Type::Attribute && v.Attribute() == attribute; + } + +private: + IR::Attribute attribute; +}; + +struct MatchU32 : MatchObject { + MatchU32(u32 imm_) : imm(imm_) {} + + inline bool Match(IR::Value v) { + return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm; + } + +private: + u32 imm; +}; + +template +struct MatchInstObject : MatchObject> { + static_assert(sizeof...(Args) == IR::NumArgsOf(opcode)); + MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {} + + inline bool Match(IR::Value v) { + IR::Inst* inst = v.TryInstRecursive(); + if (!inst || inst->GetOpcode() != opcode) { + return false; + } + + bool matched = true; + + [&](std::index_sequence) { + ((matched = matched && std::get(pattern).Match(inst->Arg(Is))), ...); + }(std::make_index_sequence{}); + + return matched; + } + +private: + using MatchArgs = std::tuple; + MatchArgs pattern; +}; + +template +inline auto MakeInstPattern(Args&&... args) { + return MatchInstObject(std::forward(args)...); +} + +// Conveniences. TODO probably simpler way of doing this +#define M_READCONST(...) MakeInstPattern(__VA_ARGS__) +#define M_GETUSERDATA(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDUEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDSEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_GETATTRIBUTEU32(...) MakeInstPattern(__VA_ARGS__) +#define M_UMOD32(...) MakeInstPattern(__VA_ARGS__) +#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern(__VA_ARGS__) +#define M_IADD32(...) MakeInstPattern(__VA_ARGS__) +#define M_IMUL32(...) MakeInstPattern(__VA_ARGS__) +#define M_BITWISEAND32(...) MakeInstPattern(__VA_ARGS__) +#define M_GETTESSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_SETTCSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X2(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X4(...) \ + MakeInstPattern(__VA_ARGS__) + +} // namespace Shader::Optimiation::PatternMatching \ No newline at end of file diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index ca2e9ceb96..19e0da3dd9 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -49,7 +49,8 @@ union BufferInstInfo { BitField<0, 1, u32> index_enable; BitField<1, 1, u32> offset_enable; BitField<2, 12, u32> inst_offset; - BitField<14, 1, u32> ring_access; // global + system coherency + BitField<14, 1, u32> system_coherent; + BitField<15, 1, u32> globally_coherent; }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h index ec855a77e4..0f043fb644 100644 --- a/src/shader_recompiler/ir/type.h +++ b/src/shader_recompiler/ir/type.h @@ -15,7 +15,7 @@ enum class Type { ScalarReg = 1 << 1, VectorReg = 1 << 2, Attribute = 1 << 3, - SystemValue = 1 << 4, + Patch = 1 << 4, U1 = 1 << 5, U8 = 1 << 6, U16 = 1 << 7, diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp index 889e99556d..8826b80f26 100644 --- a/src/shader_recompiler/ir/value.cpp +++ b/src/shader_recompiler/ir/value.cpp @@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index dbe8b5cc4f..ed1e5536aa 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -16,6 +16,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -34,6 +35,7 @@ class Value { explicit Value(IR::ScalarReg reg) noexcept; explicit Value(IR::VectorReg reg) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch patch) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -56,6 +58,7 @@ class Value { [[nodiscard]] IR::ScalarReg ScalarReg() const; [[nodiscard]] IR::VectorReg VectorReg() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -75,6 +78,7 @@ class Value { IR::ScalarReg sreg; IR::VectorReg vreg; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; @@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const { return attribute; } +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + inline bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 64f842c42b..ad57adb6a4 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { } IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile) { + RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] != token_mov_vcchi) { @@ -60,12 +63,29 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + const auto stage = program.info.stage; + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + Shader::Optimization::IdentityRemovalPass(program.blocks); + if (info.l_stage == LogicalStage::TessellationControl) { + // Tess passes require previous const prop passes for now (for simplicity). TODO allow + // fine grained folding or opportunistic folding we set an operand to an immediate + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::HullShaderTransform(program, runtime_info); + } else if (info.l_stage == LogicalStage::TessellationEval) { + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::DomainShaderTransform(program, runtime_info); + } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - if (program.info.stage != Stage::Compute) { + Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } - Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::IdentityRemovalPass(program.blocks); diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index f8acf6c9ed..8180c29b36 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -28,6 +28,6 @@ struct Pools { }; [[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile); + RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4c779a368b..23e23c1181 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -7,6 +7,7 @@ #include #include #include "common/types.h" +#include "shader_recompiler/frontend/tessellation.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/types.h" @@ -21,12 +22,31 @@ enum class Stage : u32 { Local, Compute, }; -constexpr u32 MaxStageTypes = 7; + +// Vertex intentionally comes after TCS/TES due to order of compilation +enum class LogicalStage : u32 { + Fragment, + TessellationControl, + TessellationEval, + Vertex, + Geometry, + Compute, + NumLogicalStages +}; + +constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } +struct LocalRuntimeInfo { + u32 ls_stride; + bool links_with_tcs; + + auto operator<=>(const LocalRuntimeInfo&) const noexcept = default; +}; + struct ExportRuntimeInfo { u32 vertex_data_size; @@ -64,9 +84,57 @@ struct VertexRuntimeInfo { u32 num_outputs; std::array outputs; bool emulate_depth_negative_one_to_one{}; + // Domain + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; + u32 hs_output_cp_stride{}; bool operator==(const VertexRuntimeInfo& other) const noexcept { - return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one; + return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && + tess_type == other.tess_type && tess_topology == other.tess_topology && + tess_partitioning == other.tess_partitioning && + hs_output_cp_stride == other.hs_output_cp_stride; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + hs_output_cp_stride = tess_constants.hs_cp_stride; + } +}; + +struct HullRuntimeInfo { + // from registers + u32 num_input_control_points; + u32 num_threads; + AmdGpu::TessellationType tess_type; + + // from tess constants buffer + u32 ls_stride; + u32 hs_output_cp_stride; + u32 hs_output_base; + + auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + + // It might be possible for a non-passthrough TCS to have these conditions, in some + // dumb situation. + // In that case, it should be fine to assume passthrough and declare some extra + // output control points and attributes that shouldnt be read by the TES anyways + bool IsPassthrough() const { + return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1; + }; + + // regs.ls_hs_config.hs_output_control_points contains the number of threads, which + // isn't exactly the number of output control points. + // For passthrough shaders, the register field is set to 1, so use the number of + // input control points + u32 NumOutputControlPoints() const { + return IsPassthrough() ? num_input_control_points : num_threads; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + ls_stride = tess_constants.ls_stride; + hs_output_cp_stride = tess_constants.hs_cp_stride; + hs_output_base = tess_constants.hs_output_base; } }; @@ -150,8 +218,10 @@ struct RuntimeInfo { AmdGpu::FpDenormMode fp_denorm_mode32; AmdGpu::FpRoundMode fp_round_mode32; union { + LocalRuntimeInfo ls_info; ExportRuntimeInfo es_info; VertexRuntimeInfo vs_info; + HullRuntimeInfo hs_info; GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; @@ -174,6 +244,10 @@ struct RuntimeInfo { return es_info == other.es_info; case Stage::Geometry: return gs_info == other.gs_info; + case Stage::Hull: + return hs_info == other.hs_info; + case Stage::Local: + return ls_info == other.ls_info; default: return true; } diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 9b5dd8fa1f..5799c4c955 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -127,6 +127,18 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { spec.force_unnormalized = sharp.force_unnormalized; }); + + // Initialize runtime_info fields that rely on analysis in tessellation passes + if (info->l_stage == LogicalStage::TessellationControl || + info->l_stage == LogicalStage::TessellationEval) { + Shader::TessellationDataConstantBuffer tess_constants; + info->ReadTessConstantBuffer(tess_constants); + if (info->l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + } } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9bc3454d8c..b6172d37bc 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -143,6 +143,13 @@ struct Liverpool { } }; + struct HsTessFactorClamp { + // I've only seen min=0.0, max=1.0 so far. + // TODO why is max set to 1.0? Makes no sense + float hs_max_tess; + float hs_min_tess; + }; + struct ComputeProgram { u32 dispatch_initiator; u32 dim_x; @@ -956,6 +963,7 @@ struct Liverpool { enum VgtStages : u32 { Vs = 0u, // always enabled EsGs = 0xB0u, + LsHs = 0x45u, }; VgtStages raw; @@ -963,7 +971,8 @@ struct Liverpool { BitField<2, 1, u32> hs_en; BitField<3, 2, u32> es_en; BitField<5, 1, u32> gs_en; - BitField<6, 1, u32> vs_en; + BitField<6, 2, u32> vs_en; + BitField<8, 1, u32> dynamic_hs; bool IsStageEnabled(u32 stage) const { switch (stage) { @@ -1059,6 +1068,28 @@ struct Liverpool { }; }; + union LsHsConfig { + u32 raw; + BitField<0, 8, u32> num_patches; + BitField<8, 6, u32> hs_input_control_points; + BitField<14, 6, u32> hs_output_control_points; + }; + + union TessellationConfig { + u32 raw; + BitField<0, 2, TessellationType> type; + BitField<2, 3, TessellationPartitioning> partitioning; + BitField<5, 3, TessellationTopology> topology; + }; + + union TessFactorMemoryBase { + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1109,7 +1140,7 @@ struct Liverpool { ShaderProgram es_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram hs_program; - INSERT_PADDING_WORDS(0x2C); + INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; @@ -1176,7 +1207,9 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); + HsTessFactorClamp hs_clamp; + INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; INSERT_PADDING_WORDS(1); ModeControl mode_control; @@ -1200,9 +1233,10 @@ struct Liverpool { BitField<0, 11, u32> vgt_gs_max_vert_out; INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; - INSERT_PADDING_WORDS(1); + LsHsConfig ls_hs_config; u32 vgt_gs_vert_itemsize[4]; - INSERT_PADDING_WORDS(4); + TessellationConfig tess_config; + INSERT_PADDING_WORDS(3); PolygonOffset poly_offset; GsInstances vgt_gs_instance_cnt; StreamOutConfig vgt_strmout_config; @@ -1216,6 +1250,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; + INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); + TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; @@ -1431,6 +1467,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); @@ -1445,6 +1482,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); +static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); @@ -1456,6 +1494,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 6b95ed910e..fa8491665c 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -21,6 +23,69 @@ enum class FpDenormMode : u32 { InOutAllow = 3, }; +enum class TessellationType : u32 { + Isoline = 0, + Triangle = 1, + Quad = 2, +}; + +constexpr std::string_view NameOf(TessellationType type) { + switch (type) { + case TessellationType::Isoline: + return "Isoline"; + case TessellationType::Triangle: + return "Triangle"; + case TessellationType::Quad: + return "Quad"; + default: + return "Unknown"; + } +} + +enum class TessellationPartitioning : u32 { + Integer = 0, + Pow2 = 1, + FracOdd = 2, + FracEven = 3, +}; + +constexpr std::string_view NameOf(TessellationPartitioning partitioning) { + switch (partitioning) { + case TessellationPartitioning::Integer: + return "Integer"; + case TessellationPartitioning::Pow2: + return "Pow2"; + case TessellationPartitioning::FracOdd: + return "FracOdd"; + case TessellationPartitioning::FracEven: + return "FracEven"; + default: + return "Unknown"; + } +} + +enum class TessellationTopology : u32 { + Point = 0, + Line = 1, + TriangleCw = 2, + TriangleCcw = 3, +}; + +constexpr std::string_view NameOf(TessellationTopology topology) { + switch (topology) { + case TessellationTopology::Point: + return "Point"; + case TessellationTopology::Line: + return "Line"; + case TessellationTopology::TriangleCw: + return "TriangleCw"; + case TessellationTopology::TriangleCcw: + return "TriangleCcw"; + default: + return "Unknown"; + } +} + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -118,3 +183,33 @@ enum class NumberFormat : u32 { }; } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8d495ab060..a39b183786 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipelineKey compute_key_, const Shader::Info& info_, vk::ShaderModule module) : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { - auto& info = stages[int(Shader::Stage::Compute)]; + auto& info = stages[int(Shader::LogicalStage::Compute)]; info = &info_; const vk::PipelineShaderStageCreateInfo shader_ci = { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7955375742..222ffb5a9e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/scope_exit.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; if (fetch_shader && !instance.IsVertexInputDynamicState()) { - const auto& vs_info = GetStage(Shader::Stage::Vertex); + const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); for (const auto& attrib : fetch_shader->attributes) { if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader @@ -106,6 +107,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); + const vk::PipelineTessellationStateCreateInfo tessellation_state = { + .patchControlPoints = key.patch_control_points, + }; + const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, .rasterizerDiscardEnable = false, @@ -204,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector shader_stages; - auto stage = u32(Shader::Stage::Vertex); + auto stage = u32(Shader::LogicalStage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -212,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Geometry); + stage = u32(Shader::LogicalStage::Geometry); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eGeometry, @@ -220,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Fragment); + stage = u32(Shader::LogicalStage::TessellationControl); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationControl, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::TessellationEval); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, @@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, + .pTessellationState = + stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, @@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() { if (!stage) { continue; } - if (stage->has_readconst) { bindings.push_back({ .binding = binding++, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 703a0680e1..444c8517e2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -52,6 +52,7 @@ struct GraphicsPipelineKey { std::array blend_controls; std::array write_masks; std::array vertex_buffer_formats; + u32 patch_control_points; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; @@ -73,7 +74,7 @@ class GraphicsPipeline : public Pipeline { bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index e844150b24..76efb215d1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -327,6 +327,7 @@ bool Instance::CreateDevice() { .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, + .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, @@ -378,6 +379,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{ .extendedDynamicState = true, }, + vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{}, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ .extendedDynamicState3ColorWriteMask = true, }, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ff27b742f6..58473496f2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -22,6 +22,8 @@ extern std::unique_ptr presenter; namespace Vulkan { +using Shader::LogicalStage; +using Shader::Stage; using Shader::VsOutput; constexpr static std::array DescriptorHeapSizes = { @@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { +Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; const auto BuildCommon = [&](const auto& program) { @@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { info.fp_round_mode32 = program.settings.fp_round_mode32; }; switch (stage) { - case Shader::Stage::Export: { + case Stage::Local: { + BuildCommon(regs.ls_program); + if (regs.stage_enable.IsStageEnabled(static_cast(Stage::Hull))) { + info.ls_info.links_with_tcs = true; + Shader::TessellationDataConstantBuffer tess_constants; + const auto* pgm = regs.ProgramForStage(static_cast(Stage::Hull)); + const auto params = Liverpool::GetParams(*pgm); + const auto& hull_info = program_cache.at(params.hash)->info; + hull_info.ReadTessConstantBuffer(tess_constants); + info.ls_info.ls_stride = tess_constants.ls_stride; + } + break; + } + case Stage::Hull: { + BuildCommon(regs.hs_program); + info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.tess_type = regs.tess_config.type; + + // We need to initialize most hs_info fields after finding the V# with tess constants + break; + } + case Stage::Export: { BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; break; } - case Shader::Stage::Vertex: { + case Stage::Vertex: { BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + if (l_stage == LogicalStage::TessellationEval) { + info.vs_info.tess_type = regs.tess_config.type; + info.vs_info.tess_topology = regs.tess_config.topology; + info.vs_info.tess_partitioning = regs.tess_config.partitioning; + } break; } - case Shader::Stage::Geometry: { + case Stage::Geometry: { BuildCommon(regs.gs_program); auto& gs_info = info.gs_info; gs_info.output_vertices = regs.vgt_gs_max_vert_out; @@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); break; } - case Shader::Stage::Fragment: { + case Stage::Fragment: { BuildCommon(regs.ps_program); info.fs_info.en_flags = regs.ps_input_ena; info.fs_info.addr_flags = regs.ps_input_addr; @@ -143,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } break; } - case Shader::Stage::Compute: { + case Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; @@ -277,6 +306,11 @@ bool PipelineCache::RefreshGraphicsKey() { key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); key.vertex_buffer_formats.fill(vk::Format::eUndefined); + key.patch_control_points = 0; + if (regs.stage_enable.hs_en.Value()) { + key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + } + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader // recompiler. for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { @@ -305,7 +339,7 @@ bool PipelineCache::RefreshGraphicsKey() { fetch_shader = std::nullopt; Shader::Backend::Bindings binding{}; - const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { + const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); const auto stage_out_idx = static_cast(stage_out); if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { @@ -332,23 +366,23 @@ bool PipelineCache::RefreshGraphicsKey() { auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, - key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); + key.stage_hashes[stage_out_idx]) = + GetProgram(stage_in, stage_out, params, binding); if (fetch_shader_) { fetch_shader = fetch_shader_; } return true; }; - const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); }; - const auto& IsGsFeaturesSupported = [&]() -> bool { // These checks are temporary until all functionality is implemented. return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; }; - TryBindStage(Shader::Stage::Fragment); + infos.fill(nullptr); + TryBindStage(Stage::Fragment, LogicalStage::Fragment); - const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; + const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; switch (regs.stage_enable.raw) { @@ -356,22 +390,36 @@ bool PipelineCache::RefreshGraphicsKey() { if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { return false; } - if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) { + if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) { return false; } - if (!TryBindStage(Shader::Stage::Geometry)) { + if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) { + return false; + } + break; + } + case Liverpool::ShaderStageEnable::VgtStages::LsHs: { + if (!instance.IsTessellationSupported()) { + break; + } + if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) { + return false; + } + if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) { + return false; + } + if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) { return false; } break; } default: { - TryBindStage(Shader::Stage::Vertex); - infos[static_cast(Shader::Stage::Geometry)] = nullptr; + TryBindStage(Stage::Vertex, LogicalStage::Vertex); break; } } - const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + const auto vs_info = infos[static_cast(Shader::LogicalStage::Vertex)]; if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; for (const auto& attrib : fetch_shader->attributes) { @@ -424,19 +472,18 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_samples = num_samples; return true; -} +} // namespace Vulkan bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = - GetProgram(Shader::Stage::Compute, cs_params, binding); + GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; } -vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, - const Shader::RuntimeInfo& runtime_info, +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, @@ -461,19 +508,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { - DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span{}, - is_patched); + DebugState.CollectShader(name, info.l_stage, module, spv, code, + patch ? *patch : std::span{}, is_patched); } return module; } -std::tuple, u64> -PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding) { - const auto runtime_info = BuildRuntimeInfo(stage); +PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, + Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { + auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - it_pgm.value() = std::make_unique(stage, params); + it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); @@ -492,7 +539,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { - auto new_info = Shader::Info(stage, params); + auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c5c2fc98e2..ec4406448e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -34,11 +34,13 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; + using ModuleList = boost::container::small_vector; Shader::Info info; - boost::container::small_vector modules; + ModuleList modules; - explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + : info{stage, l_stage, params} {} void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); @@ -55,10 +57,10 @@ class PipelineCache { const ComputePipeline* GetComputePipeline(); - std::tuple, - u64> - GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding); + using Result = std::tuple, u64>; + Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, + Shader::ShaderParams params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -71,10 +73,10 @@ class PipelineCache { std::string_view ext); std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding); - Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); + Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 8c48c83f75..1b13a1797d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -14,9 +14,10 @@ class BufferCache; namespace Vulkan { -static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | - vk::ShaderStageFlagBits::eGeometry | - vk::ShaderStageFlagBits::eFragment; +static constexpr auto gp_stage_flags = + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl | + vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment; class Instance; class Scheduler; @@ -37,6 +38,7 @@ class Pipeline { } auto GetStages() const { + static_assert(static_cast(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1); if (is_compute) { return std::span{stages.cend() - 1, stages.cend()}; } else { @@ -44,7 +46,7 @@ class Pipeline { } } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { + const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept { return *stages[u32(stage)]; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index eb2ef36009..fef4c7ec52 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/memory.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -48,10 +49,6 @@ void Rasterizer::CpSync() { bool Rasterizer::FilterDraw() { const auto& regs = liverpool->regs; - // Tessellation is unsupported so skip the draw to avoid locking up the driver. - if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { - return false; - } // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an // actual draw hence can skip pipeline creation. if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { @@ -214,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); @@ -271,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); @@ -326,7 +323,7 @@ void Rasterizer::DispatchDirect() { return; } - const auto& cs = pipeline->GetStage(Shader::Stage::Compute); + const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { return; } @@ -387,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { const auto& regs = liverpool->regs; if (pipeline->IsCompute()) { - const auto& info = pipeline->GetStage(Shader::Stage::Compute); + const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); // Most of the time when a metadata is updated with a shader it gets cleared. It means // we can skip the whole dispatch and update the tracked state instead. Also, it is not From cafd40f2c2f2d0062979ad1ec12b6d755eeb4e81 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sat, 14 Dec 2024 15:33:06 +0300 Subject: [PATCH 22/67] DmaData and Recompiler fixes (#1775) * liverpool: fix dmadata packet handling * recompiler: emit a label right after s_branch to prevent dead code interferrence * specialize barriers --- .../frontend/control_flow_graph.cpp | 1 + src/video_core/amdgpu/liverpool.cpp | 26 ++-- src/video_core/buffer_cache/buffer_cache.cpp | 124 +++++++++++++++++- src/video_core/buffer_cache/buffer_cache.h | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 4 + .../renderer_vulkan/vk_rasterizer.h | 1 + 6 files changed, 140 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 8c3122b28a..1fb129f6c4 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,6 +80,7 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); + AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8db2d63c43..820903ab79 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, - dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), - dma_data->NumBytes(), false); + rasterizer->CopyBuffer(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), + dma_data->NumBytes(), false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e9fc064938..31b2a2c586 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,8 +312,23 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + const vk::BufferMemoryBarrier2 buf_barrier_before = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier_before, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + const vk::BufferMemoryBarrier2 buf_barrier_after = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, @@ -324,9 +339,96 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &buf_barrier_after, + }); +} + +void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { + if (!src_gds && !IsRegionRegistered(src, num_bytes)) { + // Both buffers were not transferred to GPU yet. Can safely copy in host memory. + memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); + return; + } + // Without a readback there's nothing we can do with this + // Fallback to creating dst buffer on GPU to at least have this data there + } + if (!src_gds && !IsRegionRegistered(src, num_bytes)) { + InlineData(dst, std::bit_cast(src), num_bytes, dst_gds); + return; + } + auto& src_buffer = [&] -> const Buffer& { + if (src_gds) { + return gds_buffer; + } + const BufferId buffer_id = FindBuffer(src, num_bytes); + return slot_buffers[buffer_id]; + }(); + auto& dst_buffer = [&] -> const Buffer& { + if (dst_gds) { + return gds_buffer; + } + const BufferId buffer_id = FindBuffer(dst, num_bytes); + return slot_buffers[buffer_id]; + }(); + vk::BufferCopy region{ + .srcOffset = src_buffer.Offset(src), + .dstOffset = dst_buffer.Offset(dst), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_before, + }); + cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); + const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_after, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -701,8 +803,22 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e62913413a..4c57e9c29e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -87,6 +87,7 @@ class BufferCache { /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); [[nodiscard]] std::pair ObtainHostUBO(std::span data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fef4c7ec52..9e9b40ca51 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -840,6 +840,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } +void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); +} + u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ec1b5e1341..b5bead6971 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -53,6 +53,7 @@ class Rasterizer { void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size); From e752f04cde25941818e141cc933ffa380cc321e5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 04:33:24 -0800 Subject: [PATCH 23/67] shader_recompiler: Fixups from stencil changes (#1776) --- src/shader_recompiler/frontend/translate/export.cpp | 2 +- src/video_core/texture_cache/image_view.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index f4914577df..5927aa696a 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -13,7 +13,7 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; - if (attrib == IR::Attribute::Depth && exp.en != 1) { + if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) { LOG_WARNING(Render_Vulkan, "Unsupported depth export"); return; } diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 41c45019ed..ec1fda0d8c 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -131,7 +131,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } - if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) { + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && + (format == vk::Format::eR8Uint || format == vk::Format::eR8Unorm)) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eStencil; } From 27447537c3e846e0da923fdb1525e4253b555849 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 06:12:41 -0800 Subject: [PATCH 24/67] externals: Update sirit to fix debug assert (#1783) --- externals/sirit | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/sirit b/externals/sirit index 5b5ff49a58..1e74f4ef8d 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 5b5ff49a58f5be27af1058794c6ca907dabc05b3 +Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8da9280d01..e5d4f30778 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -247,7 +247,7 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id ImageOperands operands; if (ctx.profile.supports_image_load_store_lod) { operands.Add(spv::ImageOperandsMask::Lod, lod); - } else if (lod.value != 0) { + } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, From e9ede8d62749d2697c0b807296846d132acf4919 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:17:14 +0200 Subject: [PATCH 25/67] Revert "DmaData and Recompiler fixes (#1775)" (#1784) This reverts commit cafd40f2c2f2d0062979ad1ec12b6d755eeb4e81. --- .../frontend/control_flow_graph.cpp | 1 - src/video_core/amdgpu/liverpool.cpp | 26 ++-- src/video_core/buffer_cache/buffer_cache.cpp | 124 +----------------- src/video_core/buffer_cache/buffer_cache.h | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 4 - .../renderer_vulkan/vk_rasterizer.h | 1 - 6 files changed, 17 insertions(+), 140 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 1fb129f6c4..8c3122b28a 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,7 +80,6 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); - AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 820903ab79..8db2d63c43 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, + dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), + dma_data->NumBytes(), false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), - dma_data->NumBytes(), false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 31b2a2c586..e9fc064938 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,23 +312,8 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier_before = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_before, - }); - cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); - const vk::BufferMemoryBarrier2 buf_barrier_after = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, @@ -339,96 +324,9 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_after, - }); -} - -void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - // Both buffers were not transferred to GPU yet. Can safely copy in host memory. - memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); - return; - } - // Without a readback there's nothing we can do with this - // Fallback to creating dst buffer on GPU to at least have this data there - } - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - InlineData(dst, std::bit_cast(src), num_bytes, dst_gds); - return; - } - auto& src_buffer = [&] -> const Buffer& { - if (src_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(src, num_bytes); - return slot_buffers[buffer_id]; - }(); - auto& dst_buffer = [&] -> const Buffer& { - if (dst_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(dst, num_bytes); - return slot_buffers[buffer_id]; - }(); - vk::BufferCopy region{ - .srcOffset = src_buffer.Offset(src), - .dstOffset = dst_buffer.Offset(dst), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferRead, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_before, - }); - cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); - const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_after, + .pBufferMemoryBarriers = &buf_barrier, }); + cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -803,22 +701,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, - }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4c57e9c29e..e62913413a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -87,7 +87,6 @@ class BufferCache { /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); [[nodiscard]] std::pair ObtainHostUBO(std::span data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9e9b40ca51..fef4c7ec52 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -840,10 +840,6 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } -void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); -} - u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b5bead6971..ec1b5e1341 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -53,7 +53,6 @@ class Rasterizer { void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size); From 8b88344679af4a45e7d5e35089e778b463eac13b Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:46:19 -0800 Subject: [PATCH 26/67] vk_instance: Remove unused dynamic state 2 features struct (#1791) --- src/video_core/renderer_vulkan/vk_instance.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 76efb215d1..d7bfaee4e2 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -379,7 +379,6 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{ .extendedDynamicState = true, }, - vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{}, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ .extendedDynamicState3ColorWriteMask = true, }, From f93677b95371a83db695151341d4629e133d2203 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:46:35 -0800 Subject: [PATCH 27/67] resource_tracking_pass: Fix converting dimensions to float for normalization. (#1790) --- .../ir/passes/resource_tracking_pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f436db07a0..a593989526 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -586,12 +586,13 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, const auto dimensions = unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) : IR::Value{}; - const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value { - const auto coord = get_addr_reg(idx); + const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(coord_idx); if (unnormalized) { // Normalize the coordinate for sampling, dividing by its corresponding dimension. - return ir.FPDiv(coord, - ir.BitCast(IR::U32{ir.CompositeExtract(dimensions, dim_idx)})); + const auto dim = + ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)}); + return ir.FPDiv(coord, dim); } return coord; }; From 876445faf1b0ef63ddb9d0111e35b74bd31b4a42 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sat, 14 Dec 2024 23:46:55 +0300 Subject: [PATCH 28/67] recompiler: emit a label right after s_branch to prevent dead code interferrence (#1785) --- src/shader_recompiler/frontend/control_flow_graph.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 8c3122b28a..0816ec088e 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,6 +80,8 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); + // Emit this label so that the block ends with s_branch instruction + AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; From af26c945b10c400f9720dbb29857876867a57c35 Mon Sep 17 00:00:00 2001 From: Connor Garey Date: Sat, 14 Dec 2024 22:30:17 +0000 Subject: [PATCH 29/67] Fix for "shadPS4" not being given on Linux volume mixers (#1789) --- src/sdl_window.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index f6b57436fa..4b13844b86 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include @@ -68,6 +69,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3 WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_, std::string_view window_title) : width{width_}, height{height_}, controller{controller_} { + if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) { + UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError()); + } if (!SDL_Init(SDL_INIT_VIDEO)) { UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError()); } From 0fd1ab674bbb6b41f5cc6d46ca5ed4bcd6d6052c Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 14 Dec 2024 23:54:46 +0100 Subject: [PATCH 30/67] GPU processor refactoring (#1787) * coroutine code prettification * asc queues submission refactoring * better asc ring context handling * final touches and review notes * even more simplification for context saving --- src/common/debug.h | 3 + src/core/debug_state.cpp | 68 +++--- src/core/debug_state.h | 8 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 36 ++-- .../backend/spirv/spirv_emit_context.cpp | 2 +- src/video_core/amdgpu/liverpool.cpp | 204 ++++++++++-------- src/video_core/amdgpu/liverpool.h | 25 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 9 +- .../renderer_vulkan/vk_rasterizer.cpp | 6 +- .../renderer_vulkan/vk_shader_hle.cpp | 16 +- .../renderer_vulkan/vk_shader_hle.h | 2 +- src/video_core/texture_cache/tile_manager.cpp | 1 + 12 files changed, 234 insertions(+), 146 deletions(-) diff --git a/src/common/debug.h b/src/common/debug.h index 091c6191de..4d42aa4ab7 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -57,3 +57,6 @@ enum MarkersPalette : int { tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0}; #define FRAME_END FrameMark + +#define FIBER_ENTER(name) TracyFiberEnter(name) +#define FIBER_EXIT TracyFiberLeave diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index c68fd469da..daf614bd9d 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -142,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) { frame.queues.push_back(std::move(dump)); } -void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute) { - std::scoped_lock lock{frame_dump_list_mutex}; +std::optional DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) { const auto it = waiting_reg_dumps.find(header_addr); if (it == waiting_reg_dumps.end()) { - return; + return std::nullopt; } auto& frame = *it->second; waiting_reg_dumps.erase(it); waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr)); - auto& dump = frame.regs[header_addr - base_addr]; - dump.regs = regs; - if (is_compute) { - dump.is_compute = true; - const auto& cs = dump.regs.cs_program; - dump.cs_data = PipelineComputerProgramDump{ - .cs_program = cs, - .code = std::vector{cs.Code().begin(), cs.Code().end()}, - }; - } else { - for (int i = 0; i < RegDump::MaxShaderStages; i++) { - if (regs.stage_enable.IsStageEnabled(i)) { - auto stage = regs.ProgramForStage(i); - if (stage->address_lo != 0) { - auto code = stage->Code(); - dump.stages[i] = PipelineShaderProgramDump{ - .user_data = *stage, - .code = std::vector{code.begin(), code.end()}, - }; - } + return &frame.regs[header_addr - base_addr]; +} + +void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, + const AmdGpu::Liverpool::Regs& regs) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->regs = regs; + + for (int i = 0; i < RegDump::MaxShaderStages; i++) { + if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { + auto stage = (*dump)->regs.ProgramForStage(i); + if (stage->address_lo != 0) { + auto code = stage->Code(); + (*dump)->stages[i] = PipelineShaderProgramDump{ + .user_data = *stage, + .code = std::vector{code.begin(), code.end()}, + }; } } } } +void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, + const CsState& cs_state) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->is_compute = true; + auto& cs = (*dump)->regs.cs_program; + cs = cs_state; + + (*dump)->cs_data = PipelineComputerProgramDump{ + .cs_program = cs, + .code = std::vector{cs.Code().begin(), cs.Code().end()}, + }; +} + void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, diff --git a/src/core/debug_state.h b/src/core/debug_state.h index 0db5bc4683..a0e428b6b5 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -11,7 +11,6 @@ #include #include "common/types.h" -#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #ifdef _WIN32 @@ -204,12 +203,17 @@ class DebugStateImpl { void PushQueueDump(QueueDump dump); void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); + const AmdGpu::Liverpool::Regs& regs); + using CsState = AmdGpu::Liverpool::ComputeProgram; + void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); void CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); + +private: + std::optional GetRegDump(uintptr_t base_addr, uintptr_t header_addr); }; } // namespace DebugStateType diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index e85b8b8908..583339dd98 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61); // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; std::condition_variable cv_lock{}; -static std::mutex m_submission{}; +std::mutex m_submission{}; static u64 frames_submitted{}; // frame counter static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame static int sdk_version{0}; -struct AscQueueInfo { - VAddr map_addr; - u32* read_addr; - u32 ring_size_dw; -}; -static Common::SlotVector asc_queues{}; +static u32 asc_next_offs_dw[Liverpool::NumComputeRings]; static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF; static constexpr u32 tessellation_offchip_buffer_size = 0x800000u; @@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { } auto vqid = gnm_vqid - 1; - auto& asc_queue = asc_queues[{vqid}]; - const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr + *asc_queue.read_addr); - const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr - : (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr; - const std::span acb_span{acb_ptr, acb_size >> 2u}; + auto& asc_queue = liverpool->asc_queues[{vqid}]; + + const auto& offs_dw = asc_next_offs_dw[vqid]; + + if (next_offs_dw < offs_dw) { + ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer"); + } + + const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr) + offs_dw; + const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw; + const std::span acb_span{acb_ptr, acb_size_dw}; + + asc_next_offs_dw[vqid] = next_offs_dw; if (DebugState.DumpingCurrentFrame()) { static auto last_frame_num = -1LL; @@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { }); } liverpool->SubmitAsc(gnm_vqid, acb_span); - - *asc_queue.read_addr += acb_size; - *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; } void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) { @@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR; } - auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw); + const auto vqid = + liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id); // We need to offset index as `dingDong` assumes it to be from the range [1..64] const auto gnm_vqid = vqid.index + 1; LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id, gnm_vqid); + const auto& queue = liverpool->asc_queues[vqid]; + *queue.read_addr = 0u; + return gnm_vqid; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2e09e70a75..5f0ad298e9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -294,7 +294,7 @@ void EmitContext::DefineInputs() { }); // Note that we pass index rather than Id input_params[attrib.semantic] = SpirvAttribute{ - .id = rate_idx, + .id = {rate_idx}, .pointer_type = input_u32, .component_type = U32[1], .num_components = std::min(attrib.num_elements, num_components), diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8db2d63c43..8cca636c0c 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/assert.h" #include "common/config.h" #include "common/debug.h" @@ -18,7 +20,32 @@ namespace AmdGpu { static const char* dcb_task_name{"DCB_TASK"}; static const char* ccb_task_name{"CCB_TASK"}; -static const char* acb_task_name{"ACB_TASK"}; + +#define MAX_NAMES 56 +static_assert(Liverpool::NumComputeRings <= MAX_NAMES); + +#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n), +#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)} + +static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES); + +#define YIELD(name) \ + FIBER_EXIT; \ + co_yield {}; \ + FIBER_ENTER(name); + +#define YIELD_CE() YIELD(ccb_task_name) +#define YIELD_GFX() YIELD(dcb_task_name) +#define YIELD_ASC(id) YIELD(acb_task_name[id]) + +#define RESUME(task, name) \ + FIBER_EXIT; \ + task.handle.resume(); \ + FIBER_ENTER(name); + +#define RESUME_CE(task) RESUME(task, ccb_task_name) +#define RESUME_GFX(task) RESUME(task, dcb_task_name) +#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id]) std::array Liverpool::ConstantEngine::constants_heap; @@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) { VideoCore::StartCapture(); - int qid = -1; + curr_qid = -1; while (num_submits || num_commands) { @@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) { --num_commands; } - qid = (qid + 1) % NumTotalQueues; + curr_qid = (curr_qid + 1) % num_mapped_queues; - auto& queue = mapped_queues[qid]; + auto& queue = mapped_queues[curr_qid]; Task::Handle task{}; { @@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) { } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { - TracyFiberEnter(ccb_task_name); + FIBER_ENTER(ccb_task_name); while (!ccb.empty()) { const auto* header = reinterpret_cast(ccb.data()); @@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { case PM4ItOpcode::WaitOnDeCounterDiff: { const auto diff = it_body[0]; while ((cblock.de_count - cblock.ce_count) >= diff) { - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); + YIELD_CE(); } break; } @@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { const auto* indirect_buffer = reinterpret_cast(header); auto task = ProcessCeUpdate({indirect_buffer->Address(), indirect_buffer->ib_size}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_CE(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); - }; + while (!task.handle.done()) { + YIELD_CE(); + RESUME_CE(task); + } break; } default: @@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { ccb = NextPacket(ccb, header->type3.NumWords() + 1); } - TracyFiberLeave; + FIBER_EXIT; } Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span ccb) { - TracyFiberEnter(dcb_task_name); + FIBER_ENTER(dcb_task_name); cblock.Reset(); @@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(dcb.data()); @@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[GfxQueueId].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::SetUconfigReg: { @@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); rasterizer->DispatchDirect(); @@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + auto& cs_program = GetCsRegs(); const auto offset = dispatch_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DispatchIndirect", cmd_address)); @@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); while (!rewind->Valid()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -633,11 +663,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanWaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -645,13 +671,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); auto task = ProcessGraphics( {indirect_buffer->Address(), indirect_buffer->ib_size}, {}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_GFX(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - }; + while (!task.handle.done()) { + YIELD_GFX(); + RESUME_GFX(task); + } break; } case PM4ItOpcode::IncrementDeCounter: { @@ -660,9 +685,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span acb, int vqid) { - TracyFiberEnter(acb_task_name); +template +Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { + FIBER_ENTER(acb_task_name[vqid]); + const auto& queue = asc_queues[{vqid}]; auto base_addr = reinterpret_cast(acb.data()); while (!acb.empty()) { @@ -711,15 +736,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = ProcessCompute( + auto task = ProcessCompute( {indirect_buffer->Address(), indirect_buffer->ib_size}, vqid); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_ASC(task, vqid); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - }; + while (!task.handle.done()) { + YIELD_ASC(vqid); + RESUME_ASC(task, vqid); + } break; } case PM4ItOpcode::DmaData: { @@ -757,30 +781,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { case PM4ItOpcode::Rewind: { const PM4CmdRewind* rewind = reinterpret_cast(header); while (!rewind->Valid()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::SetShReg: { const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[vqid + 1].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->DispatchDirect(); @@ -803,17 +835,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::ReleaseMem: { const auto* release_mem = reinterpret_cast(header); - release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <--- + release_mem->SignalFence(static_cast(queue.pipe_id)); break; } default: @@ -821,10 +849,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { static_cast(opcode), count); } - acb = NextPacket(acb, header->type3.NumWords() + 1); + const auto packet_size_dw = header->type3.NumWords() + 1; + acb = NextPacket(acb, packet_size_dw); + + if constexpr (!is_indirect) { + *queue.read_addr += packet_size_dw; + *queue.read_addr %= queue.ring_size_dw; + } } - TracyFiberLeave; + FIBER_EXIT; } std::pair, std::span> Liverpool::CopyCmdBuffers( @@ -881,10 +915,11 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { submit_cv.notify_one(); } -void Liverpool::SubmitAsc(u32 vqid, std::span acb) { - ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); - auto& queue = mapped_queues[vqid]; +void Liverpool::SubmitAsc(u32 gnm_vqid, std::span acb) { + ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index"); + auto& queue = mapped_queues[gnm_vqid]; + const auto vqid = gnm_vqid - 1; const auto& task = ProcessCompute(acb, vqid); { std::scoped_lock lock{queue.m_access}; @@ -892,6 +927,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { } std::scoped_lock lk{submit_mutex}; + num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1); ++num_submits; submit_cv.notify_one(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b6172d37bc..4c74d37d09 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -16,6 +16,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" +#include "common/slot_vector.h" #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" @@ -45,7 +46,8 @@ struct Liverpool { static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; - static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe); + static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe; + static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings; static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs static constexpr u32 NumColorBuffers = 8; @@ -1143,7 +1145,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); - ComputeProgram cs_program; + ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); @@ -1298,7 +1300,7 @@ struct Liverpool { ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb); - void SubmitAsc(u32 vqid, std::span acb); + void SubmitAsc(u32 gnm_vqid, std::span acb); void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; @@ -1341,6 +1343,18 @@ struct Liverpool { gfx_queue.dcb_buffer.reserve(GfxReservedSize); } + inline ComputeProgram& GetCsRegs() { + return mapped_queues[curr_qid].cs_state; + } + + struct AscQueueInfo { + VAddr map_addr; + u32* read_addr; + u32 ring_size_dw; + u32 pipe_id; + }; + Common::SlotVector asc_queues{}; + private: struct Task { struct promise_type { @@ -1378,7 +1392,8 @@ struct Liverpool { std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); - Task ProcessCompute(std::span acb, int vqid); + template + Task ProcessCompute(std::span acb, u32 vqid); void Process(std::stop_token stoken); @@ -1393,6 +1408,7 @@ struct Liverpool { VAddr indirect_args_addr{}; }; std::array mapped_queues{}; + u32 num_mapped_queues{1u}; // GFX is always available struct ConstantEngine { void Reset() { @@ -1421,6 +1437,7 @@ struct Liverpool { std::mutex submit_mutex; std::condition_variable_any submit_cv; std::queue> command_queue{}; + int curr_qid{-1}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 58473496f2..50396287b7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -173,9 +173,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ break; } case Stage::Compute: { - const auto& cs_pgm = regs.cs_program; + const auto& cs_pgm = liverpool->GetCsRegs(); info.num_user_data = cs_pgm.settings.num_user_regs; - info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; + info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), @@ -476,8 +476,8 @@ bool PipelineCache::RefreshGraphicsKey() { bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; - const auto* cs_pgm = &liverpool->regs.cs_program; - const auto cs_params = Liverpool::GetParams(*cs_pgm); + const auto& cs_pgm = liverpool->GetCsRegs(); + const auto cs_params = Liverpool::GetParams(cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; @@ -529,6 +529,7 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); } + it_pgm.value()->info.user_data = params.user_data; auto& program = it_pgm.value(); auto& info = program->info; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fef4c7ec52..bd8906f865 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -317,14 +317,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; } const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); - if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { + if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) { return; } @@ -344,7 +344,7 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index b863dce211..ff78f5d245 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -2,17 +2,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_hle.h" -#include "vk_rasterizer.h" +extern std::unique_ptr liverpool; namespace Vulkan { static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; -bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { +static bool ExecuteCopyShaderHLE(const Shader::Info& info, + const AmdGpu::Liverpool::ComputeProgram& cs_program, + Rasterizer& rasterizer) { auto& scheduler = rasterizer.GetScheduler(); auto& buffer_cache = rasterizer.GetBufferCache(); @@ -34,9 +36,9 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static std::vector copies; copies.clear(); - copies.reserve(regs.cs_program.dim_x); + copies.reserve(cs_program.dim_x); - for (u32 i = 0; i < regs.cs_program.dim_x; i++) { + for (u32 i = 0; i < cs_program.dim_x; i++) { const auto& [dst_idx, src_idx, end] = ctl_buf[i]; const u32 local_dst_offset = dst_idx * buf_stride; const u32 local_src_offset = src_idx * buf_stride; @@ -122,10 +124,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg } bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { switch (info.pgm_hash) { case COPY_SHADER_HASH: - return ExecuteCopyShaderHLE(info, regs, rasterizer); + return ExecuteCopyShaderHLE(info, cs_program, rasterizer); default: return false; } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h index fda9b1735f..008de8003e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.h +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -15,6 +15,6 @@ class Rasterizer; /// Attempts to execute a shader using HLE if possible. bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer); + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); } // namespace Vulkan diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d8d23c400f..a5e09e45de 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -212,6 +212,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: case vk::Format::eBc6HUfloatBlock: + case vk::Format::eR32G32B32A32Uint: case vk::Format::eR32G32B32A32Sfloat: return vk::Format::eR32G32B32A32Uint; default: From d2ac92481b514bd2dacbd60abdc460cd076d9d3c Mon Sep 17 00:00:00 2001 From: Richard Habitzreuter Date: Sun, 15 Dec 2024 11:28:36 -0300 Subject: [PATCH 31/67] style: add Gruvbox theme (#1796) --- src/qt_gui/main_window.cpp | 13 +++++++++++++ src/qt_gui/main_window_themes.cpp | 22 ++++++++++++++++++++-- src/qt_gui/main_window_themes.h | 8 +------- src/qt_gui/main_window_ui.h | 6 ++++++ 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 3eb629c0b2..0b5137c4be 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -111,6 +111,7 @@ void MainWindow::CreateActions() { m_theme_act_group->addAction(ui->setThemeGreen); m_theme_act_group->addAction(ui->setThemeBlue); m_theme_act_group->addAction(ui->setThemeViolet); + m_theme_act_group->addAction(ui->setThemeGruvbox); } void MainWindow::AddUiWidgets() { @@ -540,6 +541,14 @@ void MainWindow::CreateConnects() { isIconBlack = false; } }); + connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() { + m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar); + Config::setMainWindowTheme(static_cast(Theme::Gruvbox)); + if (isIconBlack) { + SetUiIcons(false); + isIconBlack = false; + } + }); } void MainWindow::StartGame() { @@ -912,6 +921,10 @@ void MainWindow::SetLastUsedTheme() { ui->setThemeViolet->setChecked(true); isIconBlack = false; SetUiIcons(false); + case Theme::Gruvbox: + ui->setThemeGruvbox->setChecked(true); + isIconBlack = false; + SetUiIcons(false); break; } } diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 35e64ef741..65dd04269b 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -15,7 +15,6 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Window, QColor(50, 50, 50)); themePalette.setColor(QPalette::WindowText, Qt::white); themePalette.setColor(QPalette::Base, QColor(20, 20, 20)); - themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25)); themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53)); themePalette.setColor(QPalette::ToolTipBase, Qt::white); themePalette.setColor(QPalette::ToolTipText, Qt::white); @@ -28,7 +27,6 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; - case Theme::Light: mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background "color: #000000;" // Black text @@ -115,6 +113,26 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text + qApp->setPalette(themePalette); + break; + case Theme::Gruvbox: + mw_searchbar->setStyleSheet("background-color: #1d2021;" + "color: #f9f5d7;" + "border: 2px solid #f9f5d7;" + "padding: 5px;"); + themePalette.setColor(QPalette::Window, QColor(29, 32, 33)); + themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Base, QColor(29, 32, 33)); + themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47)); + themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215)); + themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Text, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Button, QColor(40, 40, 40)); + themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52)); + themePalette.setColor(QPalette::Link, QColor(131, 165, 152)); + themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152)); + themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; } diff --git a/src/qt_gui/main_window_themes.h b/src/qt_gui/main_window_themes.h index 6da70e995f..d162da87b0 100644 --- a/src/qt_gui/main_window_themes.h +++ b/src/qt_gui/main_window_themes.h @@ -7,13 +7,7 @@ #include #include -enum class Theme : int { - Dark, - Light, - Green, - Blue, - Violet, -}; +enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox }; class WindowThemes : public QObject { Q_OBJECT diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 5ff572f86a..df64361fd5 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -36,6 +36,7 @@ class Ui_MainWindow { QAction* setThemeGreen; QAction* setThemeBlue; QAction* setThemeViolet; + QAction* setThemeGruvbox; QWidget* centralWidget; QLineEdit* mw_searchbar; QPushButton* playButton; @@ -158,6 +159,9 @@ class Ui_MainWindow { setThemeViolet = new QAction(MainWindow); setThemeViolet->setObjectName("setThemeViolet"); setThemeViolet->setCheckable(true); + setThemeGruvbox = new QAction(MainWindow); + setThemeGruvbox->setObjectName("setThemeGruvbox"); + setThemeGruvbox->setCheckable(true); centralWidget = new QWidget(MainWindow); centralWidget->setObjectName("centralWidget"); sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth()); @@ -282,6 +286,7 @@ class Ui_MainWindow { menuThemes->addAction(setThemeGreen); menuThemes->addAction(setThemeBlue); menuThemes->addAction(setThemeViolet); + menuThemes->addAction(setThemeGruvbox); menuGame_List_Icons->addAction(setIconSizeTinyAct); menuGame_List_Icons->addAction(setIconSizeSmallAct); menuGame_List_Icons->addAction(setIconSizeMediumAct); @@ -368,6 +373,7 @@ class Ui_MainWindow { setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr)); setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr)); setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr)); + setThemeGruvbox->setText("Gruvbox"); toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr)); } // retranslateUi }; From 9aa1c13c7e20079716a55657e1d47bda1aafd3ff Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sun, 15 Dec 2024 06:30:19 -0800 Subject: [PATCH 32/67] Fix some compiler problems with ds3 (#1793) - Implement S_CMOVK_I32 - Handle Isoline abstract patch type --- .../frontend/translate/scalar_alu.cpp | 20 +++++++++++-------- .../frontend/translate/translate.h | 2 +- .../ir/passes/hull_shader_transform.cpp | 10 ++++++---- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 1ef0d82d8f..e731e299aa 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -161,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) { switch (inst.opcode) { // SOPK case Opcode::S_MOVK_I32: - return S_MOVK(inst); - + return S_MOVK(inst, false); + case Opcode::S_CMOVK_I32: + return S_MOVK(inst, true); case Opcode::S_CMPK_EQ_I32: return S_CMPK(ConditionOp::EQ, true, inst); case Opcode::S_CMPK_LG_I32: @@ -458,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) { // SOPK -void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm; - if (simm16 & (1 << 15)) { - // TODO: need to verify the case of imm sign extension - UNREACHABLE(); +void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) { + const s16 simm16 = inst.control.sopk.simm; + // do the sign extension + const s32 simm32 = static_cast(simm16); + IR::U32 val = ir.Imm32(simm32); + if (is_conditional) { + // if !SCC its a NOP + val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))}; } - SetDst(inst.dst[0], ir.Imm32(simm16)); + SetDst(inst.dst[0], val); } void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 60bad18649..8e575fcadf 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -100,7 +100,7 @@ class Translator { void S_NOT_B32(const GcnInst& inst); // SOPK - void S_MOVK(const GcnInst& inst); + void S_MOVK(const GcnInst& inst, bool is_conditional); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst); diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 5cf02b6d0d..895c9823e3 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -398,8 +398,8 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { // communicated to the driver. // The layout seems to be implied by the type of the abstract domain. switch (runtime_info.hs_info.tess_type) { - case AmdGpu::TessellationType::Quad: - ASSERT(gcn_factor_idx < 6); + case AmdGpu::TessellationType::Isoline: + ASSERT(gcn_factor_idx < 2); return IR::PatchFactor(gcn_factor_idx); case AmdGpu::TessellationType::Triangle: ASSERT(gcn_factor_idx < 4); @@ -407,9 +407,11 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { return IR::Patch::TessellationLodInteriorU; } return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); + return IR::PatchFactor(gcn_factor_idx); default: - // Point domain types haven't been seen so far - UNREACHABLE_MSG("Unhandled tess type"); + UNREACHABLE(); } }; From 3001b007f6a450f62526fa61724753ab532bef20 Mon Sep 17 00:00:00 2001 From: DanielSvoboda Date: Sun, 15 Dec 2024 11:30:53 -0300 Subject: [PATCH 33/67] Keybord on README / Fix Play Time (#1786) * Keybord on README F10_F11_F12 * Update game_list_frame.cpp --- README.md | 7 +++++++ src/qt_gui/game_list_frame.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 18e69546c0..7ef5bdf65f 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o # Keyboard mapping +| Button | Function | +|-------------|-------------| +F10 | FPS Counter +Ctrl+F10 | Video Debug Info +F11 | Fullscreen +F12 | Trigger RenderDoc Capture + > [!NOTE] > Xbox and DualShock controllers work out of the box. diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 47bfbfef90..63f6b63b8d 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -123,7 +123,7 @@ void GameListFrame::PopulateGameList() { formattedPlayTime = formattedPlayTime.trimmed(); m_game_info->m_games[i].play_time = playTime.toStdString(); if (formattedPlayTime.isEmpty()) { - SetTableItem(i, 7, "0"); + SetTableItem(i, 7, QString("%1s").arg(seconds)); } else { SetTableItem(i, 7, formattedPlayTime); } From e7c4ffe032a0dd5605322363b9766425ea5531f9 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 15 Dec 2024 20:53:29 +0100 Subject: [PATCH 34/67] hot-fix: Tracy operation restored; memory leak fix as a bonus --- externals/CMakeLists.txt | 2 +- src/common/debug.h | 7 +++++++ src/core/libraries/gnmdriver/gnmdriver.cpp | 3 +++ src/video_core/amdgpu/liverpool.cpp | 3 ++- src/video_core/renderer_vulkan/vk_instance.cpp | 4 +++- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 1ab23a4038..dbe6794d86 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -193,7 +193,7 @@ option(TRACY_ENABLE "" ON) option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash option(TRACY_ON_DEMAND "" ON) option(TRACY_NO_FRAME_IMAGE "" ON) -option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling +option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability option(TRACY_NO_SYSTEM_TRACING "" ON) option(TRACY_NO_CALLSTACK "" ON) option(TRACY_NO_CODE_TRANSFER "" ON) diff --git a/src/common/debug.h b/src/common/debug.h index 4d42aa4ab7..882e9e5c4e 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() { return tracy::GetProfiler().IsConnected(); } +#define TRACY_GPU_ENABLED 0 + #define CUSTOM_LOCK(type, varname) \ tracy::LockableCtx varname { \ []() -> const tracy::SourceLocationData* { \ @@ -58,5 +60,10 @@ enum MarkersPalette : int { #define FRAME_END FrameMark +#ifdef TRACY_FIBERS #define FIBER_ENTER(name) TracyFiberEnter(name) #define FIBER_EXIT TracyFiberLeave +#else +#define FIBER_ENTER(name) +#define FIBER_EXIT +#endif diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 583339dd98..1a6007bf8f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -488,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() { } void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw); if (gnm_vqid == 0) { @@ -2166,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { @@ -2258,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ } int PS4_SYSV_ABI sceGnmSubmitDone() { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); WaitGpuIdle(); if (!liverpool->IsGpuIdle()) { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8cca636c0c..5dd3edd6da 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -659,7 +659,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); - if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + if (vo_port->IsVoLabel(wait_addr) && + num_submits == mapped_queues[GfxQueueId].submits.size()) { vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d7bfaee4e2..b479c14647 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/config.h" +#include "common/debug.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -261,7 +262,8 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool calibrated_timestamps = + TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); From 8a4e03228aca87fd31da6730e60723ece601a1c5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 15 Dec 2024 16:11:15 -0800 Subject: [PATCH 35/67] spirv_emit_context: Prevent double-add of GS in attributes to interface. (#1800) --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5f0ad298e9..5d2ec6f96f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -381,9 +381,8 @@ void EmitContext::DefineInputs() { for (int param_id = 0; param_id < num_params; ++param_id) { const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; - Name(id, fmt::format("in_attr{}", param_id)); + Name(id, fmt::format("gs_in_attr{}", param_id)); input_params[param_id] = {id, input_f32, F32[1], 4}; - interfaces.push_back(id); } break; } From 5585e42677d3f00c71679fe20aab1446f9a5bca3 Mon Sep 17 00:00:00 2001 From: Richard Habitzreuter Date: Tue, 17 Dec 2024 07:32:30 -0300 Subject: [PATCH 36/67] style: add rounded borders and focus color styling to the search bar (#1804) --- src/qt_gui/main_window.cpp | 1 + src/qt_gui/main_window_themes.cpp | 64 +++++++++++++++++-------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 0b5137c4be..9c81bcf11c 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -921,6 +921,7 @@ void MainWindow::SetLastUsedTheme() { ui->setThemeViolet->setChecked(true); isIconBlack = false; SetUiIcons(false); + break; case Theme::Gruvbox: ui->setThemeGruvbox->setChecked(true); isIconBlack = false; diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 65dd04269b..a52b4466ea 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -8,10 +8,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { switch (theme) { case Theme::Dark: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(50, 50, 50)); themePalette.setColor(QPalette::WindowText, Qt::white); themePalette.setColor(QPalette::Base, QColor(20, 20, 20)); @@ -28,10 +30,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; case Theme::Light: - mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background - "color: #000000;" // Black text - "border: 2px solid #000000;" // Black border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #ffffff; color: #000000; border: 1px solid #000000; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray themePalette.setColor(QPalette::WindowText, Qt::black); // Black themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish @@ -46,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::white); // White qApp->setPalette(themePalette); break; - case Theme::Green: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #192819; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base @@ -66,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text - qApp->setPalette(themePalette); break; - case Theme::Blue: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base @@ -92,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; - case Theme::Violet: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base @@ -116,10 +122,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; case Theme::Gruvbox: - mw_searchbar->setStyleSheet("background-color: #1d2021;" - "color: #f9f5d7;" - "border: 2px solid #f9f5d7;" - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #83A598; }"); themePalette.setColor(QPalette::Window, QColor(29, 32, 33)); themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215)); themePalette.setColor(QPalette::Base, QColor(29, 32, 33)); From 3c8e25e8e48f5a7618d84d28a534e3530c538790 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 02:34:43 -0800 Subject: [PATCH 37/67] fs: Fix wrong mounts being matched by partial guest path. (#1809) --- src/core/file_sys/fs.cpp | 21 +++++++++++++++++---- src/core/file_sys/fs.h | 9 ++++++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 0fdbb2783e..92f725cc78 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -10,16 +10,28 @@ namespace Core::FileSys { +std::string RemoveTrailingSlashes(const std::string& path) { + // Remove trailing slashes to make comparisons simpler. + std::string path_sanitized = path; + while (path_sanitized.ends_with("/")) { + path_sanitized.pop_back(); + } + return path_sanitized; +} + void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder, bool read_only) { std::scoped_lock lock{m_mutex}; - m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only); } void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) { std::scoped_lock lock{m_mutex}; - auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), - [&](const MntPair& pair) { return pair.mount == guest_folder; }); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) { + return pair.mount == guest_folder_sanitized; + }); m_mnt_pairs.erase(it, m_mnt_pairs.end()); } @@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea } // Nothing to do if getting the mount itself. - if (corrected_path == mount->mount) { + const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path); + if (corrected_path_sanitized == mount->mount) { return mount->host_path; } diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index b0153c162a..e219887c8b 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -22,7 +22,7 @@ class MntPoints { public: struct MntPair { std::filesystem::path host_path; - std::string mount; // e.g /app0/ + std::string mount; // e.g /app0 bool read_only; }; @@ -39,8 +39,11 @@ class MntPoints { const MntPair* GetMount(const std::string& guest_path) { std::scoped_lock lock{m_mutex}; - const auto it = std::ranges::find_if( - m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); }); + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) { + // When doing starts-with check, add a trailing slash to make sure we don't match + // against only part of the mount path. + return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/"); + }); return it == m_mnt_pairs.end() ? nullptr : &*it; } From aa5c1c10dfa560d55c59da2d7f841c3e9e0485fc Mon Sep 17 00:00:00 2001 From: ElBread3 <92335081+ElBread3@users.noreply.github.com> Date: Tue, 17 Dec 2024 04:42:21 -0600 Subject: [PATCH 38/67] More Fixes for Separate Update (#1487) * handle getdents + fix condition + add info to description * fix not handling dents errors * to not overwrite it, only gather separate update entries when normal folder is done * fix always setting entries to 0 and guest name including "UPDATE" * reset indexes on completion * don't use concat, fixes long standing bug * make sce_module module loading take both paths into account --- src/core/file_sys/fs.cpp | 10 ++++ src/core/file_sys/fs.h | 10 ++++ src/core/libraries/kernel/file_system.cpp | 58 ++++++++++++++++++++++- src/emulator.cpp | 46 +++++++++++++----- src/qt_gui/gui_context_menus.h | 22 ++++----- src/qt_gui/translations/en.ts | 2 +- 6 files changed, 119 insertions(+), 29 deletions(-) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 92f725cc78..45ba67b931 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -199,4 +199,14 @@ void HandleTable::CreateStdHandles() { setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr } +int HandleTable::GetFileDescriptor(File* file) { + std::scoped_lock lock{m_mutex}; + auto it = std::find(m_files.begin(), m_files.end(), file); + + if (it != m_files.end()) { + return std::distance(m_files.begin(), it); + } + return 0; +} + } // namespace Core::FileSys diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index e219887c8b..56df32ad05 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -9,6 +9,7 @@ #include #include #include "common/io_file.h" +#include "common/logging/formatter.h" #include "core/devices/base_device.h" namespace Core::FileSys { @@ -37,6 +38,14 @@ class MntPoints { std::filesystem::path GetHostPath(std::string_view guest_directory, bool* is_read_only = nullptr); + const MntPair* GetMountFromHostPath(const std::string& host_path) { + std::scoped_lock lock{m_mutex}; + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) { + return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data}); + }); + return it == m_mnt_pairs.end() ? nullptr : &*it; + } + const MntPair* GetMount(const std::string& guest_path) { std::scoped_lock lock{m_mutex}; const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) { @@ -86,6 +95,7 @@ class HandleTable { void DeleteHandle(int d); File* GetFile(int d); File* GetFile(const std::filesystem::path& host_name); + int GetFileDescriptor(File* file); void CreateStdHandles(); diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 5ba9976c60..57efbb6317 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { return sizeof(OrbisKernelDirent); } +static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) { + int dir_entries = 0; + + auto* h = Common::Singleton::Instance(); + auto* mnt = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data}; + auto mount = mnt->GetMountFromHostPath(update_dir_name); + auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data}; + + size_t pos = update_dir_name.find("-UPDATE"); + if (pos != std::string::npos) { + update_dir_name.erase(pos, 7); + auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1); + int descriptor; + + auto existent_folder = h->GetFile(update_dir_name); + if (!existent_folder) { + u32 handle = h->CreateHandle(); + auto* new_file = h->GetFile(handle); + new_file->type = Core::FileSys::FileType::Directory; + new_file->m_guest_name = guest_name; + new_file->m_host_name = update_dir_name; + if (!std::filesystem::is_directory(new_file->m_host_name)) { + h->DeleteHandle(handle); + return dir_entries; + } else { + new_file->dirents = GetDirectoryEntries(new_file->m_host_name); + new_file->dirents_index = 0; + } + new_file->is_opened = true; + descriptor = h->GetFileDescriptor(new_file); + } else { + descriptor = h->GetFileDescriptor(existent_folder); + } + + dir_entries = GetDents(descriptor, buf, nbytes, basep); + if (dir_entries == ORBIS_OK && existent_folder) { + existent_folder->dirents_index = 0; + file->dirents_index = 0; + } + } + + return dir_entries; +} + int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) { - return GetDents(fd, buf, nbytes, nullptr); + int a = GetDents(fd, buf, nbytes, nullptr); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr); + } + return a; } int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) { - return GetDents(fd, buf, nbytes, basep); + int a = GetDents(fd, buf, nbytes, basep); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, basep); + } + return a; } s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { diff --git a/src/emulator.cpp b/src/emulator.cpp index c517bc2840..252a344181 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/config.h" @@ -106,9 +107,11 @@ Emulator::~Emulator() { void Emulator::Run(const std::filesystem::path& file) { // Use the eboot from the separated updates folder if it's there - std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE"); - bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys"); - std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file; + std::filesystem::path game_patch_folder = file.parent_path(); + game_patch_folder += "-UPDATE"; + std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename()) + ? game_patch_folder / file.filename() + : file; // Applications expect to be run from /app0 so mount the file's parent path as app0. auto* mnt = Common::Singleton::Instance(); @@ -226,20 +229,37 @@ void Emulator::Run(const std::filesystem::path& file) { LoadSystemModules(eboot_path, game_info.game_serial); // Load all prx from game's sce_module folder - std::filesystem::path sce_module_folder = file.parent_path() / "sce_module"; - if (std::filesystem::is_directory(sce_module_folder)) { - for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) { - std::filesystem::path module_path = entry.path(); - std::filesystem::path update_module_path = - eboot_path.parent_path() / "sce_module" / entry.path().filename(); - if (std::filesystem::exists(update_module_path) && use_game_patch) { - module_path = update_module_path; + std::vector modules_to_load; + std::filesystem::path game_module_folder = file.parent_path() / "sce_module"; + if (std::filesystem::is_directory(game_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) { + if (entry.is_regular_file()) { + modules_to_load.push_back(entry.path()); } - LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); - linker->LoadModule(module_path); } } + // Load all prx from separate update's sce_module folder + std::filesystem::path update_module_folder = game_patch_folder / "sce_module"; + if (std::filesystem::is_directory(update_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) { + auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(), + [&entry](const std::filesystem::path& p) { + return p.filename() == entry.path().filename(); + }); + if (it != modules_to_load.end()) { + *it = entry.path(); + } else { + modules_to_load.push_back(entry.path()); + } + } + } + + for (const auto& module_path : modules_to_load) { + LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); + linker->LoadModule(module_path); + } + #ifdef ENABLE_DISCORD_RPC // Discord RPC if (Config::getEnableDiscordRPC()) { diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 6eef1230c7..3cc12c11e4 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -122,11 +122,11 @@ class GuiContextMenus : public QObject { if (selected == &openSfoViewer) { PSF psf; - QString game_update_path; - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); std::filesystem::path game_folder_path = m_games[itemID].path; - if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { - game_folder_path = Common::FS::PathFromQString(game_update_path); + std::filesystem::path game_update_path = game_folder_path; + game_update_path += "UPDATE"; + if (std::filesystem::exists(game_update_path)) { + game_folder_path = game_update_path; } if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) { int rows = psf.GetEntries().size(); @@ -320,21 +320,17 @@ class GuiContextMenus : public QObject { bool error = false; QString folder_path, game_update_path, dlc_path; Common::FS::PathToQString(folder_path, m_games[itemID].path); - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); + game_update_path = folder_path + "-UPDATE"; Common::FS::PathToQString( dlc_path, Config::getAddonInstallDir() / Common::FS::PathFromQString(folder_path).parent_path().filename()); QString message_type = tr("Game"); if (selected == deleteUpdate) { - if (!Config::getSeparateUpdateEnabled()) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("requiresEnableSeparateUpdateFolder_MSG"))); - error = true; - } else if (!std::filesystem::exists( - Common::FS::PathFromQString(game_update_path))) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("This game has no update to delete!"))); + if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { + QMessageBox::critical( + nullptr, tr("Error"), + QString(tr("This game has no separate update to delete!"))); error = true; } else { folder_path = game_update_path; diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 7ae5830409..9eccec8ea8 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -1159,7 +1159,7 @@ separateUpdatesCheckBox - Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management. + Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID. From 87773a417b96417a14bab695422f70e80697f4e4 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 05:04:19 -0800 Subject: [PATCH 39/67] mac: Choose whether system Vulkan is needed at runtime. (#1780) --- CMakeLists.txt | 13 ++- REUSE.toml | 1 + externals/MoltenVK/CMakeLists.txt | 16 ++- externals/MoltenVK/MoltenVK_icd.json | 8 ++ src/video_core/renderer_vulkan/vk_common.h | 4 - .../renderer_vulkan/vk_platform.cpp | 105 +++++++++++------- 6 files changed, 95 insertions(+), 52 deletions(-) create mode 100644 externals/MoltenVK/MoltenVK_icd.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 78d8421a3e..8f0397e86d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -892,11 +892,16 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") endif() if (APPLE) - option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) - if (USE_SYSTEM_VULKAN_LOADER) - target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) + if (ENABLE_QT_GUI) + # Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers. + target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json) + set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json + PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d) + add_custom_command(TARGET shadps4 POST_BUILD + COMMAND cmake -E copy $ $/Contents/Frameworks/libMoltenVK.dylib) + set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() - # Link MoltenVK for Vulkan support + # For non-bundled SDL build, just do a normal library link. target_link_libraries(shadps4 PRIVATE MoltenVK) endif() diff --git a/REUSE.toml b/REUSE.toml index 747679c8b2..cba63adf1d 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -15,6 +15,7 @@ path = [ "documents/changelog.md", "documents/Quickstart/2.png", "documents/Screenshots/*", + "externals/MoltenVK/MoltenVK_icd.json", "scripts/ps4_names.txt", "src/images/about_icon.png", "src/images/controller_icon.png", diff --git a/externals/MoltenVK/CMakeLists.txt b/externals/MoltenVK/CMakeLists.txt index 00e3231eef..908c2847cf 100644 --- a/externals/MoltenVK/CMakeLists.txt +++ b/externals/MoltenVK/CMakeLists.txt @@ -1,17 +1,29 @@ # SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later -# Prepare version information +# Prepare MoltenVK Git revision find_package(Git) if(GIT_FOUND) execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD OUTPUT_VARIABLE MVK_GIT_REV + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) endif() -set(MVK_VERSION "1.2.12") set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated) file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";") +message(STATUS "MoltenVK revision: ${MVK_GIT_REV}") + +# Prepare MoltenVK version +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API) +string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}") +set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}") +message(STATUS "MoltenVK version: ${MVK_VERSION}") # Find required system libraries find_library(APPKIT_LIBRARY AppKit REQUIRED) diff --git a/externals/MoltenVK/MoltenVK_icd.json b/externals/MoltenVK/MoltenVK_icd.json new file mode 100644 index 0000000000..2c33192639 --- /dev/null +++ b/externals/MoltenVK/MoltenVK_icd.json @@ -0,0 +1,8 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "../../../Frameworks/libMoltenVK.dylib", + "api_version": "1.2.0", + "is_portability_driver": true + } +} diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 9178aeb659..5fe199e0e0 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -3,10 +3,6 @@ #pragma once -#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER -#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 -#endif - // Include vulkan-hpp header #define VK_ENABLE_BETA_EXTENSIONS #define VK_NO_PROTOTYPES diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index f5e5136113..dbdabe0d97 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -14,6 +14,7 @@ #endif #include +#include #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" @@ -21,15 +22,6 @@ #include "sdl_window.h" #include "video_core/renderer_vulkan/vk_platform.h" -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL -static vk::detail::DynamicLoader dl; -#else -extern "C" { -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, - const char* pName); -} -#endif - namespace Vulkan { static const char* const VALIDATION_LAYER_NAME = "VK_LAYER_KHRONOS_validation"; @@ -199,15 +191,57 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window return extensions; } +std::vector GetInstanceLayers(bool enable_validation, bool enable_crash_diagnostic) { + const auto [properties_result, properties] = vk::enumerateInstanceLayerProperties(); + if (properties_result != vk::Result::eSuccess || properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties: {}", + vk::to_string(properties_result)); + return {}; + } + + std::vector layers; + layers.reserve(2); + + if (enable_validation) { + layers.push_back(VALIDATION_LAYER_NAME); + } + if (enable_crash_diagnostic) { + layers.push_back(CRASH_DIAGNOSTIC_LAYER_NAME); + } + + // Sanitize layer list + std::erase_if(layers, [&](const char* layer) -> bool { + const auto it = std::ranges::find_if(properties, [layer](const auto& prop) { + return std::strcmp(layer, prop.layerName) == 0; + }); + if (it == properties.end()) { + LOG_ERROR(Render_Vulkan, "Requested layer {} is not available", layer); + return true; + } + return false; + }); + + return layers; +} + vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation, bool enable_crash_diagnostic) { LOG_INFO(Render_Vulkan, "Creating vulkan instance"); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - auto vkGetInstanceProcAddr = - dl.getProcAddress("vkGetInstanceProcAddr"); +#ifdef __APPLE__ + // If the Vulkan loader exists in /usr/local/lib, give it priority. The Vulkan SDK + // installs it here by default but it is not in the default library search path. + // The loader has a clause to check for it, but at a lower priority than the bundled + // libMoltenVK.dylib, so we need to handle it ourselves to give it priority. + static const std::string usr_local_path = "/usr/local/lib/libvulkan.dylib"; + static vk::detail::DynamicLoader dl = std::filesystem::exists(usr_local_path) + ? vk::detail::DynamicLoader(usr_local_path) + : vk::detail::DynamicLoader(); +#else + static vk::detail::DynamicLoader dl; #endif - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + VULKAN_HPP_DEFAULT_DISPATCHER.init( + dl.getProcAddress("vkGetInstanceProcAddr")); const auto [available_version_result, available_version] = VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion @@ -230,38 +264,25 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .apiVersion = available_version, }; - u32 num_layers = 0; - std::array layers; + const auto layers = GetInstanceLayers(enable_validation, enable_crash_diagnostic); - vk::Bool32 enable_force_barriers = vk::False; - const char* log_path{}; + const std::string extensions_string = fmt::format("{}", fmt::join(extensions, ", ")); + const std::string layers_string = fmt::format("{}", fmt::join(layers, ", ")); + LOG_INFO(Render_Vulkan, "Enabled instance extensions: {}", extensions_string); + LOG_INFO(Render_Vulkan, "Enabled instance layers: {}", layers_string); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - if (enable_validation) { - layers[num_layers++] = VALIDATION_LAYER_NAME; - } + // Validation settings + vk::Bool32 enable_sync = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + vk::Bool32 enable_gpuav = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = + Config::vkValidationGpuEnabled() ? "GPU_BASED_GPU_ASSISTED" : "GPU_BASED_NONE"; - if (enable_crash_diagnostic) { - layers[num_layers++] = CRASH_DIAGNOSTIC_LAYER_NAME; - static const auto crash_diagnostic_path = - Common::FS::GetUserPathString(Common::FS::PathType::LogDir); - log_path = crash_diagnostic_path.c_str(); - enable_force_barriers = vk::True; - } -#else - if (enable_validation || enable_crash_diagnostic) { - LOG_WARNING(Render_Vulkan, - "Skipping loading Vulkan layers as dynamic loading is not enabled."); - } -#endif + // Crash diagnostics settings + static const auto crash_diagnostic_path = + Common::FS::GetUserPathString(Common::FS::PathType::LogDir); + const char* log_path = crash_diagnostic_path.c_str(); + vk::Bool32 enable_force_barriers = vk::True; - vk::Bool32 enable_sync = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::Bool32 enable_gpuav = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() - ? "GPU_BASED_GPU_ASSISTED" - : "GPU_BASED_NONE"; const std::array layer_setings = { vk::LayerSettingEXT{ .pLayerName = VALIDATION_LAYER_NAME, @@ -331,7 +352,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e vk::StructureChain instance_ci_chain = { vk::InstanceCreateInfo{ .pApplicationInfo = &application_info, - .enabledLayerCount = num_layers, + .enabledLayerCount = static_cast(layers.size()), .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), From ccfb1bbfa8ab7e536d684fd2c80b90fcfe66b5f6 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 21:56:08 -0800 Subject: [PATCH 40/67] vk_instance: Add additional fallback for missing D16UnormS8Uint. (#1810) --- .../renderer_vulkan/vk_instance.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index b479c14647..790e764004 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -583,23 +583,20 @@ bool Instance::IsFormatSupported(const vk::Format format, return (GetFormatFeatureFlags(format) & flags) == flags; } -static vk::Format GetAlternativeFormat(const vk::Format format) { - switch (format) { - case vk::Format::eD16UnormS8Uint: - return vk::Format::eD24UnormS8Uint; - default: - return format; - } -} - vk::Format Instance::GetSupportedFormat(const vk::Format format, const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return format; - } - const vk::Format alternative = GetAlternativeFormat(format); - if (IsFormatSupported(alternative, flags)) [[likely]] { - return alternative; + if (!IsFormatSupported(format, flags)) [[unlikely]] { + switch (format) { + case vk::Format::eD16UnormS8Uint: + if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) { + return vk::Format::eD24UnormS8Uint; + } + if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) { + return vk::Format::eD32SfloatS8Uint; + } + default: + break; + } } return format; } From be4c38bf1c4ebd6bf669176cc5123cbf9103bd01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 18 Dec 2024 20:48:00 +0100 Subject: [PATCH 41/67] Handle 32bit int ImageFormat (#1823) --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5d2ec6f96f..255a3e2b2e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -696,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { return spv::ImageFormat::R32ui; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) { + return spv::ImageFormat::R32i; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::R32f; From b1b4c8c48777a380e26da844e7a71cf3a94e4ce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 18 Dec 2024 20:57:58 +0100 Subject: [PATCH 42/67] Handle setting Vcc in Translator::SetDst64 (#1826) --- src/shader_recompiler/frontend/translate/translate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3031e66439..a14bff7060 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -439,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); return ir.SetVectorReg(IR::VectorReg(operand.code), lo); case OperandField::VccLo: - UNREACHABLE(); + ir.SetVccLo(lo); + return ir.SetVccHi(hi); case OperandField::VccHi: UNREACHABLE(); case OperandField::M0: From 32435674f24708b5d0533c3960b9447870ea35f4 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:05:35 -0600 Subject: [PATCH 43/67] Misc UE4 fixes (#1821) * Add ExecLo case to S_SAVEEXEC_B64 Seen in CUSA38209 * S_BCNT1_I32_B32 Turtle said our implementation of S_BCNT1_I32_B64 was meant to be for S_BCNT1_I32_B32, so renaming the opcode is the fix. --- src/shader_recompiler/frontend/translate/scalar_alu.cpp | 8 +++++--- src/shader_recompiler/frontend/translate/translate.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index e731e299aa..f96fd0f401 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -98,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { break; case Opcode::S_BREV_B32: return S_BREV_B32(inst); - case Opcode::S_BCNT1_I32_B64: - return S_BCNT1_I32_B64(inst); + case Opcode::S_BCNT1_I32_B32: + return S_BCNT1_I32_B32(inst); case Opcode::S_FF1_I32_B32: return S_FF1_I32_B32(inst); case Opcode::S_AND_SAVEEXEC_B64: @@ -579,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { +void Translator::S_BCNT1_I32_B32(const GcnInst& inst) { const IR::U32 result = ir.BitCount(GetSrc(inst.src[0])); SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); @@ -602,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in return ir.GetVcc(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::ExecLo: + return ir.GetExec(); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8e575fcadf..218b66d74b 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -110,7 +110,7 @@ class Translator { void S_MOV_B64(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); - void S_BCNT1_I32_B64(const GcnInst& inst); + void S_BCNT1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); From 1e0809903680dd401641ce24d964b6e75d629482 Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:06:30 +0200 Subject: [PATCH 44/67] add R8Uint in image Detiling (#1812) used by InFamous, and maybe other games --- src/video_core/texture_cache/tile_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index a5e09e45de..94d37c993b 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -172,6 +172,7 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { + case vk::Format::eR8Uint: case vk::Format::eR8Unorm: return vk::Format::eR8Uint; case vk::Format::eR4G4B4A4UnormPack16: From adf4b635f743ed2bc1d4d8d18ebacdd45649f7b4 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:11:09 +0200 Subject: [PATCH 45/67] hot-fix: Proper abi on init_routine --- src/core/libraries/kernel/threads/pthread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 08886c6eb6..372f05bff8 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -327,7 +327,7 @@ void PS4_SYSV_ABI sched_yield() { std::this_thread::yield(); } -int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) { +int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void PS4_SYSV_ABI (*init_routine)()) { for (;;) { auto state = once_control->state.load(); if (state == PthreadOnceState::Done) { From 188eebb92a9258ace01fa78b28c5a61e951acea1 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 19 Dec 2024 10:18:28 +0200 Subject: [PATCH 46/67] ir: Add heuristic based LDS barrier pass (#1801) * ir: Add heuristic based LDS barrier pass * Attempts to insert barriers after zero-depth divergant conditional blocks in shaders that use shared memory * lds_barriers: Limit to nvidia * Intel has historically had problems with cs barriers, will debug other time --- CMakeLists.txt | 1 + src/shader_recompiler/ir/passes/ir_passes.h | 5 ++ .../ir/passes/shared_memory_barrier_pass.cpp | 46 +++++++++++++++++++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/recompiler.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 6 files changed, 55 insertions(+) create mode 100644 src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f0397e86d..cc6fb6b938 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -671,6 +671,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp + src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 61f43e7e48..8a71d9e1f2 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -6,6 +6,10 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +namespace Shader { +struct Profile; +} + namespace Shader::Optimization { void SsaRewritePass(IR::BlockList& program); @@ -21,5 +25,6 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp new file mode 100644 index 0000000000..cae001e960 --- /dev/null +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Optimization { + +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { + if (!program.info.uses_shared || !profile.needs_lds_barriers) { + return; + } + using Type = IR::AbstractSyntaxNode::Type; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + if (node.type == Type::EndIf) { + --branch_depth; + continue; + } + if (node.type != Type::If) { + continue; + } + u32 curr_depth = branch_depth++; + if (curr_depth != 0) { + continue; + } + const IR::U1 cond = node.data.if_node.cond; + const auto insert_barrier = IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = node.data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c00e37f9c8..fc8c5956e9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -27,6 +27,7 @@ struct Profile { bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; + bool needs_lds_barriers{}; u64 min_ssbo_alignment{}; }; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index ad57adb6a4..bb027a11e5 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -91,6 +91,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); + Shader::Optimization::SharedMemoryBarrierPass(program, profile); return program; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 50396287b7..4b88bd3746 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, + .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", From b0b74243af97bc71a5681c279f738608dc5c6ebe Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Thu, 19 Dec 2024 10:25:03 +0200 Subject: [PATCH 47/67] clang-fix --- src/core/libraries/kernel/threads/pthread.cpp | 3 ++- .../ir/passes/shared_memory_barrier_pass.cpp | 15 ++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 372f05bff8..610e612388 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -327,7 +327,8 @@ void PS4_SYSV_ABI sched_yield() { std::this_thread::yield(); } -int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void PS4_SYSV_ABI (*init_routine)()) { +int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, + void PS4_SYSV_ABI (*init_routine)()) { for (;;) { auto state = once_control->state.load(); if (state == PthreadOnceState::Done) { diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index cae001e960..aad8fb1487 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -27,13 +27,14 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { continue; } const IR::U1 cond = node.data.if_node.cond; - const auto insert_barrier = IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && - inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { - return true; - } - return std::nullopt; - }); + const auto insert_barrier = + IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); if (insert_barrier) { IR::Block* const merge = node.data.if_node.merge; auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); From f2a989b9da5bbcc0b27e64fd37fdde95a9620a53 Mon Sep 17 00:00:00 2001 From: f8ith Date: Thu, 19 Dec 2024 16:58:11 +0800 Subject: [PATCH 48/67] wip: added status column for compat data (#1668) * wip: added basic gui for compat data * data is currently pulled directly from github API, awaiting server infra * removed unused initalizer * fixes * fix cmake * wip: add some testing date / version * add tooltip * fix nested QJsonObject * fix tooltip color * fix clang-format * Edit style * Add clickable status * formatting * import order * typo * fix clang format 2 --------- Co-authored-by: georgemoralis --- CMakeLists.txt | 2 + src/qt_gui/compatibility_info.cpp | 227 ++++++++++++++++++++++++++++++ src/qt_gui/compatibility_info.h | 97 +++++++++++++ src/qt_gui/game_info.cpp | 2 + src/qt_gui/game_list_frame.cpp | 143 ++++++++++++++++--- src/qt_gui/game_list_frame.h | 39 +++-- src/qt_gui/game_list_utils.h | 7 + src/qt_gui/main_window.cpp | 4 +- src/qt_gui/main_window.h | 3 + 9 files changed, 488 insertions(+), 36 deletions(-) create mode 100644 src/qt_gui/compatibility_info.cpp create mode 100644 src/qt_gui/compatibility_info.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cc6fb6b938..1727338402 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -811,6 +811,8 @@ set(QT_GUI src/qt_gui/about_dialog.cpp src/qt_gui/background_music_player.h src/qt_gui/cheats_patches.cpp src/qt_gui/cheats_patches.h + src/qt_gui/compatibility_info.cpp + src/qt_gui/compatibility_info.h src/qt_gui/main_window_ui.h src/qt_gui/main_window.cpp src/qt_gui/main_window.h diff --git a/src/qt_gui/compatibility_info.cpp b/src/qt_gui/compatibility_info.cpp new file mode 100644 index 0000000000..c8d6bf36d1 --- /dev/null +++ b/src/qt_gui/compatibility_info.cpp @@ -0,0 +1,227 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include "common/path_util.h" +#include "compatibility_info.h" + +CompatibilityInfoClass::CompatibilityInfoClass() + : m_network_manager(new QNetworkAccessManager(this)) { + QStringList file_paths; + std::filesystem::path compatibility_file_path = + Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / "compatibility_data.json"; + Common::FS::PathToQString(m_compatibility_filename, compatibility_file_path); +}; +CompatibilityInfoClass::~CompatibilityInfoClass() = default; + +void CompatibilityInfoClass::UpdateCompatibilityDatabase(QWidget* parent) { + if (LoadCompatibilityFile()) + return; + + QNetworkReply* reply = FetchPage(1); + WaitForReply(reply); + + QProgressDialog dialog(tr("Fetching compatibility data, please wait"), tr("Cancel"), 0, 0, + parent); + dialog.setWindowTitle(tr("Loading...")); + + int remaining_pages = 0; + if (reply->hasRawHeader("link")) { + QRegularExpression last_page_re("(\\d+)(?=>; rel=\"last\")"); + QRegularExpressionMatch last_page_match = + last_page_re.match(QString(reply->rawHeader("link"))); + if (last_page_match.hasMatch()) { + remaining_pages = last_page_match.captured(0).toInt() - 1; + } + } + + if (reply->error() != QNetworkReply::NoError) { + reply->deleteLater(); + QMessageBox::critical(parent, tr("Error"), + tr("Unable to update compatibility data! Try again later.")); + // Try loading compatibility_file.json again + LoadCompatibilityFile(); + return; + } + + ExtractCompatibilityInfo(reply->readAll()); + + QVector replies(remaining_pages); + QFutureWatcher future_watcher; + + for (int i = 0; i < remaining_pages; i++) { + replies[i] = FetchPage(i + 2); + } + + future_watcher.setFuture(QtConcurrent::map(replies, WaitForReply)); + connect(&future_watcher, &QFutureWatcher::finished, [&]() { + for (int i = 0; i < remaining_pages; i++) { + if (replies[i]->error() == QNetworkReply::NoError) { + ExtractCompatibilityInfo(replies[i]->readAll()); + } + replies[i]->deleteLater(); + } + + QFile compatibility_file(m_compatibility_filename); + + if (!compatibility_file.open(QIODevice::WriteOnly | QIODevice::Truncate | + QIODevice::Text)) { + QMessageBox::critical(parent, tr("Error"), + tr("Unable to open compatibility.json for writing.")); + return; + } + + QJsonDocument json_doc; + m_compatibility_database["version"] = COMPAT_DB_VERSION; + + json_doc.setObject(m_compatibility_database); + compatibility_file.write(json_doc.toJson()); + compatibility_file.close(); + + dialog.reset(); + }); + connect(&dialog, &QProgressDialog::canceled, &future_watcher, &QFutureWatcher::cancel); + dialog.setRange(0, remaining_pages); + connect(&future_watcher, &QFutureWatcher::progressValueChanged, &dialog, + &QProgressDialog::setValue); + dialog.exec(); +} + +QNetworkReply* CompatibilityInfoClass::FetchPage(int page_num) { + QUrl url = QUrl("https://api.github.com/repos/shadps4-emu/shadps4-game-compatibility/issues"); + QUrlQuery query; + query.addQueryItem("per_page", QString("100")); + query.addQueryItem( + "tags", QString("status-ingame status-playable status-nothing status-boots status-menus")); + query.addQueryItem("page", QString::number(page_num)); + url.setQuery(query); + + QNetworkRequest request(url); + QNetworkReply* reply = m_network_manager->get(request); + + return reply; +} + +void CompatibilityInfoClass::WaitForReply(QNetworkReply* reply) { + QEventLoop loop; + connect(reply, &QNetworkReply::finished, &loop, &QEventLoop::quit); + loop.exec(); + return; +}; + +CompatibilityEntry CompatibilityInfoClass::GetCompatibilityInfo(const std::string& serial) { + QString title_id = QString::fromStdString(serial); + if (m_compatibility_database.contains(title_id)) { + { + for (int os_int = 0; os_int != static_cast(OSType::Last); os_int++) { + QString os_string = OSTypeToString.at(static_cast(os_int)); + QJsonObject compatibility_obj = m_compatibility_database[title_id].toObject(); + if (compatibility_obj.contains(os_string)) { + QJsonObject compatibility_entry_obj = compatibility_obj[os_string].toObject(); + CompatibilityEntry compatibility_entry{ + LabelToCompatStatus.at(compatibility_entry_obj["status"].toString()), + compatibility_entry_obj["version"].toString(), + QDateTime::fromString(compatibility_entry_obj["last_tested"].toString(), + Qt::ISODate), + compatibility_entry_obj["url"].toString(), + compatibility_entry_obj["issue_number"].toInt()}; + return compatibility_entry; + } + } + } + } + return CompatibilityEntry{CompatibilityStatus::Unknown}; +} + +bool CompatibilityInfoClass::LoadCompatibilityFile() { + // Returns true if compatibility is loaded succescfully + QFileInfo check_file(m_compatibility_filename); + const auto modified_delta = QDateTime::currentDateTime() - check_file.lastModified(); + if (!check_file.exists() || !check_file.isFile() || + std::chrono::duration_cast(modified_delta).count() > 60) { + return false; + } + + QFile compatibility_file(m_compatibility_filename); + if (!compatibility_file.open(QIODevice::ReadOnly)) { + compatibility_file.close(); + return false; + } + QByteArray json_data = compatibility_file.readAll(); + compatibility_file.close(); + + QJsonDocument json_doc = QJsonDocument::fromJson(json_data); + if (json_doc.isEmpty() || json_doc.isNull()) { + return false; + } + + // Check database version + int version_number; + if (json_doc.object()["version"].isDouble()) { + if (json_doc.object()["version"].toInt() < COMPAT_DB_VERSION) + return false; + } else + return false; + + m_compatibility_database = json_doc.object(); + return true; +} + +void CompatibilityInfoClass::ExtractCompatibilityInfo(QByteArray response) { + QJsonDocument json_doc(QJsonDocument::fromJson(response)); + + if (json_doc.isNull()) { + return; + } + + QJsonArray json_arr; + + json_arr = json_doc.array(); + + for (const auto& issue_ref : std::as_const(json_arr)) { + QJsonObject issue_obj = issue_ref.toObject(); + QString title_id; + QRegularExpression title_id_regex("CUSA[0-9]{5}"); + QRegularExpressionMatch title_id_match = + title_id_regex.match(issue_obj["title"].toString()); + QString current_os = "os-unknown"; + QString compatibility_status = "status-unknown"; + if (issue_obj.contains("labels") && title_id_match.hasMatch()) { + title_id = title_id_match.captured(0); + const QJsonArray& label_array = issue_obj["labels"].toArray(); + for (const auto& elem : label_array) { + QString label = elem.toObject()["name"].toString(); + if (LabelToOSType.contains(label)) { + current_os = label; + continue; + } + if (LabelToCompatStatus.contains(label)) { + compatibility_status = label; + continue; + } + } + + // QJson does not support editing nested objects directly.. + + QJsonObject compatibility_obj = m_compatibility_database[title_id].toObject(); + + QJsonObject compatibility_data{ + {{"status", compatibility_status}, + {"last_tested", issue_obj["updated_at"]}, + {"version", issue_obj["milestone"].isNull() + ? "unknown" + : issue_obj["milestone"].toObject()["title"].toString()}, + {"url", issue_obj["html_url"]}, + {"issue_number", issue_obj["number"]}}}; + + compatibility_obj[current_os] = compatibility_data; + + m_compatibility_database[title_id] = compatibility_obj; + } + } + + return; +} diff --git a/src/qt_gui/compatibility_info.h b/src/qt_gui/compatibility_info.h new file mode 100644 index 0000000000..2b970670a7 --- /dev/null +++ b/src/qt_gui/compatibility_info.h @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include + +#include "common/config.h" +#include "core/file_format/psf.h" + +static constexpr int COMPAT_DB_VERSION = 1; + +enum class CompatibilityStatus { + Unknown, + Nothing, + Boots, + Menus, + Ingame, + Playable, +}; + +// Prioritize different compatibility reports based on user's platform +enum class OSType { +#ifdef Q_OS_WIN + Win32 = 0, + Unknown, + Linux, + macOS, +#elif defined(Q_OS_LINUX) + Linux = 0, + Unknown, + Win32, + macOS, +#elif defined(Q_OS_MAC) + macOS = 0, + Unknown, + Linux, + Win32, +#endif + // Fake enum to allow for iteration + Last +}; + +struct CompatibilityEntry { + CompatibilityStatus status; + QString version; + QDateTime last_tested; + QString url; + int issue_number; +}; + +class CompatibilityInfoClass : public QObject { + Q_OBJECT +public: + // Please think of a better alternative + inline static const std::unordered_map LabelToCompatStatus = { + {QStringLiteral("status-nothing"), CompatibilityStatus::Nothing}, + {QStringLiteral("status-boots"), CompatibilityStatus::Boots}, + {QStringLiteral("status-menus"), CompatibilityStatus::Menus}, + {QStringLiteral("status-ingame"), CompatibilityStatus::Ingame}, + {QStringLiteral("status-playable"), CompatibilityStatus::Playable}}; + inline static const std::unordered_map LabelToOSType = { + {QStringLiteral("os-linux"), OSType::Linux}, + {QStringLiteral("os-macOS"), OSType::macOS}, + {QStringLiteral("os-windows"), OSType::Win32}, + }; + + inline static const std::unordered_map CompatStatusToString = { + {CompatibilityStatus::Unknown, QStringLiteral("Unknown")}, + {CompatibilityStatus::Nothing, QStringLiteral("Nothing")}, + {CompatibilityStatus::Boots, QStringLiteral("Boots")}, + {CompatibilityStatus::Menus, QStringLiteral("Menus")}, + {CompatibilityStatus::Ingame, QStringLiteral("Ingame")}, + {CompatibilityStatus::Playable, QStringLiteral("Playable")}}; + inline static const std::unordered_map OSTypeToString = { + {OSType::Linux, QStringLiteral("os-linux")}, + {OSType::macOS, QStringLiteral("os-macOS")}, + {OSType::Win32, QStringLiteral("os-windows")}, + {OSType::Unknown, QStringLiteral("os-unknown")}}; + + CompatibilityInfoClass(); + ~CompatibilityInfoClass(); + void UpdateCompatibilityDatabase(QWidget* parent = nullptr); + bool LoadCompatibilityFile(); + CompatibilityEntry GetCompatibilityInfo(const std::string& serial); + void ExtractCompatibilityInfo(QByteArray response); + static void WaitForReply(QNetworkReply* reply); + QNetworkReply* FetchPage(int page_num); + +private: + QNetworkAccessManager* m_network_manager; + QString m_compatibility_filename; + QJsonObject m_compatibility_database; +}; \ No newline at end of file diff --git a/src/qt_gui/game_info.cpp b/src/qt_gui/game_info.cpp index 48643f8edf..e4750fa1dd 100644 --- a/src/qt_gui/game_info.cpp +++ b/src/qt_gui/game_info.cpp @@ -4,6 +4,7 @@ #include #include "common/path_util.h" +#include "compatibility_info.h" #include "game_info.h" GameInfoClass::GameInfoClass() = default; @@ -22,6 +23,7 @@ void GameInfoClass::GetGameInfo(QWidget* parent) { } } } + m_games = QtConcurrent::mapped(filePaths, [&](const QString& path) { return readGameInfo(Common::FS::PathFromQString(path)); }).results(); diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 63f6b63b8d..d43c35ef4a 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -1,12 +1,17 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/logging/log.h" #include "common/path_util.h" #include "common/string_util.h" #include "game_list_frame.h" +#include "game_list_utils.h" -GameListFrame::GameListFrame(std::shared_ptr game_info_get, QWidget* parent) - : QTableWidget(parent), m_game_info(game_info_get) { +GameListFrame::GameListFrame(std::shared_ptr game_info_get, + std::shared_ptr compat_info_get, + QWidget* parent) + : QTableWidget(parent), m_game_info(game_info_get), m_compat_info(compat_info_get) { icon_size = Config::getIconSize(); this->setShowGrid(false); this->setEditTriggers(QAbstractItemView::NoEditTriggers); @@ -17,29 +22,30 @@ GameListFrame::GameListFrame(std::shared_ptr game_info_get, QWidg this->verticalScrollBar()->installEventFilter(this); this->verticalScrollBar()->setSingleStep(20); this->horizontalScrollBar()->setSingleStep(20); - this->verticalHeader()->setSectionResizeMode(QHeaderView::Fixed); + this->verticalHeader()->setSectionResizeMode(QHeaderView::ResizeToContents); this->verticalHeader()->setVisible(false); this->horizontalHeader()->setContextMenuPolicy(Qt::CustomContextMenu); this->horizontalHeader()->setHighlightSections(false); this->horizontalHeader()->setSortIndicatorShown(true); this->horizontalHeader()->setStretchLastSection(true); this->setContextMenuPolicy(Qt::CustomContextMenu); - this->setColumnCount(9); + this->setColumnCount(10); this->setColumnWidth(1, 300); // Name - this->setColumnWidth(2, 120); // Serial - this->setColumnWidth(3, 90); // Region - this->setColumnWidth(4, 90); // Firmware - this->setColumnWidth(5, 90); // Size - this->setColumnWidth(6, 90); // Version - this->setColumnWidth(7, 120); // Play Time + this->setColumnWidth(2, 140); // Compatibility + this->setColumnWidth(3, 120); // Serial + this->setColumnWidth(4, 90); // Region + this->setColumnWidth(5, 90); // Firmware + this->setColumnWidth(6, 90); // Size + this->setColumnWidth(7, 90); // Version + this->setColumnWidth(8, 120); // Play Time QStringList headers; - headers << tr("Icon") << tr("Name") << tr("Serial") << tr("Region") << tr("Firmware") - << tr("Size") << tr("Version") << tr("Play Time") << tr("Path"); + headers << tr("Icon") << tr("Name") << tr("Compatibility") << tr("Serial") << tr("Region") + << tr("Firmware") << tr("Size") << tr("Version") << tr("Play Time") << tr("Path"); this->setHorizontalHeaderLabels(headers); this->horizontalHeader()->setSortIndicatorShown(true); this->horizontalHeader()->setSectionResizeMode(0, QHeaderView::ResizeToContents); - this->horizontalHeader()->setSectionResizeMode(2, QHeaderView::Fixed); this->horizontalHeader()->setSectionResizeMode(3, QHeaderView::Fixed); + this->horizontalHeader()->setSectionResizeMode(4, QHeaderView::Fixed); PopulateGameList(); connect(this, &QTableWidget::currentCellChanged, this, &GameListFrame::onCurrentCellChanged); @@ -68,6 +74,12 @@ GameListFrame::GameListFrame(std::shared_ptr game_info_get, QWidg connect(this, &QTableWidget::customContextMenuRequested, this, [=, this](const QPoint& pos) { m_gui_context_menus.RequestGameMenu(pos, m_game_info->m_games, this, true); }); + + connect(this, &QTableWidget::cellClicked, this, [=, this](int row, int column) { + if (column == 2 && !m_game_info->m_games[row].compatibility.url.isEmpty()) { + QDesktopServices::openUrl(QUrl(m_game_info->m_games[row].compatibility.url)); + } + }); } void GameListFrame::onCurrentCellChanged(int currentRow, int currentColumn, int previousRow, @@ -96,16 +108,20 @@ void GameListFrame::PopulateGameList() { for (int i = 0; i < m_game_info->m_games.size(); i++) { SetTableItem(i, 1, QString::fromStdString(m_game_info->m_games[i].name)); - SetTableItem(i, 2, QString::fromStdString(m_game_info->m_games[i].serial)); - SetRegionFlag(i, 3, QString::fromStdString(m_game_info->m_games[i].region)); - SetTableItem(i, 4, QString::fromStdString(m_game_info->m_games[i].fw)); - SetTableItem(i, 5, QString::fromStdString(m_game_info->m_games[i].size)); - SetTableItem(i, 6, QString::fromStdString(m_game_info->m_games[i].version)); + SetTableItem(i, 3, QString::fromStdString(m_game_info->m_games[i].serial)); + SetRegionFlag(i, 4, QString::fromStdString(m_game_info->m_games[i].region)); + SetTableItem(i, 5, QString::fromStdString(m_game_info->m_games[i].fw)); + SetTableItem(i, 6, QString::fromStdString(m_game_info->m_games[i].size)); + SetTableItem(i, 7, QString::fromStdString(m_game_info->m_games[i].version)); + + m_game_info->m_games[i].compatibility = + m_compat_info->GetCompatibilityInfo(m_game_info->m_games[i].serial); + SetCompatibilityItem(i, 2, m_game_info->m_games[i].compatibility); QString playTime = GetPlayTime(m_game_info->m_games[i].serial); if (playTime.isEmpty()) { m_game_info->m_games[i].play_time = "0:00:00"; - SetTableItem(i, 7, tr("Never Played")); + SetTableItem(i, 8, tr("Never Played")); } else { QStringList timeParts = playTime.split(':'); int hours = timeParts[0].toInt(); @@ -123,15 +139,15 @@ void GameListFrame::PopulateGameList() { formattedPlayTime = formattedPlayTime.trimmed(); m_game_info->m_games[i].play_time = playTime.toStdString(); if (formattedPlayTime.isEmpty()) { - SetTableItem(i, 7, QString("%1s").arg(seconds)); + SetTableItem(i, 8, QString("%1s").arg(seconds)); } else { - SetTableItem(i, 7, formattedPlayTime); + SetTableItem(i, 8, formattedPlayTime); } } QString path; Common::FS::PathToQString(path, m_game_info->m_games[i].path); - SetTableItem(i, 8, path); + SetTableItem(i, 9, path); } } @@ -203,6 +219,89 @@ void GameListFrame::ResizeIcons(int iconSize) { this->horizontalHeader()->setSectionResizeMode(8, QHeaderView::ResizeToContents); } +void GameListFrame::SetCompatibilityItem(int row, int column, CompatibilityEntry entry) { + QTableWidgetItem* item = new QTableWidgetItem(); + QWidget* widget = new QWidget(this); + QGridLayout* layout = new QGridLayout(widget); + + widget->setStyleSheet("QToolTip {background-color: black; color: white;}"); + + QColor color; + QString status_explanation; + + switch (entry.status) { + case CompatibilityStatus::Unknown: + color = QStringLiteral("#000000"); + status_explanation = tr("Compatibility is untested"); + break; + case CompatibilityStatus::Nothing: + color = QStringLiteral("#212121"); + status_explanation = tr("Games does not initialize properly / crashes the emulator"); + break; + case CompatibilityStatus::Boots: + color = QStringLiteral("#828282"); + status_explanation = tr("Game boots, but only displays a blank screen"); + break; + case CompatibilityStatus::Menus: + color = QStringLiteral("#FF0000"); + status_explanation = tr("Game displays an image but does not go past the menu"); + break; + case CompatibilityStatus::Ingame: + color = QStringLiteral("#F2D624"); + status_explanation = tr("Game has game-breaking glitches or unplayable performance"); + break; + case CompatibilityStatus::Playable: + color = QStringLiteral("#47D35C"); + status_explanation = + tr("Game can be completed with playable performance and no major glitches"); + break; + } + + QString tooltip_string; + + if (entry.status == CompatibilityStatus::Unknown) { + tooltip_string = status_explanation; + } else { + tooltip_string = + "

" + tr("Click to go to issue") + "" + "
" + tr("Last updated") + + QString(": %1 (%2)").arg(entry.last_tested.toString("yyyy-MM-dd"), entry.version) + + "
" + status_explanation + "

"; + } + + QPixmap circle_pixmap(16, 16); + circle_pixmap.fill(Qt::transparent); + QPainter painter(&circle_pixmap); + painter.setRenderHint(QPainter::Antialiasing); + painter.setPen(color); + painter.setBrush(color); + painter.drawEllipse({circle_pixmap.width() / 2.0, circle_pixmap.height() / 2.0}, 6.0, 6.0); + + QLabel* dotLabel = new QLabel("", widget); + dotLabel->setPixmap(circle_pixmap); + + QLabel* label = new QLabel(m_compat_info->CompatStatusToString.at(entry.status), widget); + + label->setStyleSheet("color: white; font-size: 16px; font-weight: bold;"); + + // Create shadow effect + QGraphicsDropShadowEffect* shadowEffect = new QGraphicsDropShadowEffect(); + shadowEffect->setBlurRadius(5); // Set the blur radius of the shadow + shadowEffect->setColor(QColor(0, 0, 0, 160)); // Set the color and opacity of the shadow + shadowEffect->setOffset(2, 2); // Set the offset of the shadow + + label->setGraphicsEffect(shadowEffect); // Apply shadow effect to the QLabel + + layout->addWidget(dotLabel, 0, 0, -1, 1); + layout->addWidget(label, 0, 1, 1, 1); + layout->setAlignment(Qt::AlignLeft); + widget->setLayout(layout); + widget->setToolTip(tooltip_string); + this->setItem(row, column, item); + this->setCellWidget(row, column, widget); + + return; +} + void GameListFrame::SetTableItem(int row, int column, QString itemStr) { QTableWidgetItem* item = new QTableWidgetItem(); QWidget* widget = new QWidget(this); diff --git a/src/qt_gui/game_list_frame.h b/src/qt_gui/game_list_frame.h index 6da2734a8e..8c6fcb1e2a 100644 --- a/src/qt_gui/game_list_frame.h +++ b/src/qt_gui/game_list_frame.h @@ -3,9 +3,14 @@ #pragma once +#include +#include +#include +#include #include #include "background_music_player.h" +#include "compatibility_info.h" #include "game_info.h" #include "game_list_utils.h" #include "gui_context_menus.h" @@ -13,7 +18,9 @@ class GameListFrame : public QTableWidget { Q_OBJECT public: - explicit GameListFrame(std::shared_ptr game_info_get, QWidget* parent = nullptr); + explicit GameListFrame(std::shared_ptr game_info_get, + std::shared_ptr compat_info_get, + QWidget* parent = nullptr); Q_SIGNALS: void GameListFrameClosed(); @@ -29,6 +36,7 @@ public Q_SLOTS: private: void SetTableItem(int row, int column, QString itemStr); void SetRegionFlag(int row, int column, QString itemStr); + void SetCompatibilityItem(int row, int column, CompatibilityEntry entry); QString GetPlayTime(const std::string& serial); QList m_columnActs; GameInfoClass* game_inf_get = nullptr; @@ -42,6 +50,7 @@ public Q_SLOTS: GameListUtils m_game_list_utils; GuiContextMenus m_gui_context_menus; std::shared_ptr m_game_info; + std::shared_ptr m_compat_info; int icon_size; @@ -59,18 +68,20 @@ public Q_SLOTS: case 1: return a.name < b.name; case 2: - return a.serial.substr(4) < b.serial.substr(4); + return a.compatibility.status < b.compatibility.status; case 3: - return a.region < b.region; + return a.serial.substr(4) < b.serial.substr(4); case 4: - return parseAsFloat(a.fw, 0) < parseAsFloat(b.fw, 0); + return a.region < b.region; case 5: - return parseSizeMB(b.size) < parseSizeMB(a.size); + return parseAsFloat(a.fw, 0) < parseAsFloat(b.fw, 0); case 6: - return a.version < b.version; + return parseSizeMB(b.size) < parseSizeMB(a.size); case 7: - return a.play_time < b.play_time; + return a.version < b.version; case 8: + return a.play_time < b.play_time; + case 9: return a.path < b.path; default: return false; @@ -82,18 +93,20 @@ public Q_SLOTS: case 1: return a.name > b.name; case 2: - return a.serial.substr(4) > b.serial.substr(4); + return a.compatibility.status > b.compatibility.status; case 3: - return a.region > b.region; + return a.serial.substr(4) > b.serial.substr(4); case 4: - return parseAsFloat(a.fw, 0) > parseAsFloat(b.fw, 0); + return a.region > b.region; case 5: - return parseSizeMB(b.size) > parseSizeMB(a.size); + return parseAsFloat(a.fw, 0) > parseAsFloat(b.fw, 0); case 6: - return a.version > b.version; + return parseSizeMB(b.size) > parseSizeMB(a.size); case 7: - return a.play_time > b.play_time; + return a.version > b.version; case 8: + return a.play_time > b.play_time; + case 9: return a.path > b.path; default: return false; diff --git a/src/qt_gui/game_list_utils.h b/src/qt_gui/game_list_utils.h index 3d710c5b7d..16c0307c82 100644 --- a/src/qt_gui/game_list_utils.h +++ b/src/qt_gui/game_list_utils.h @@ -3,7 +3,13 @@ #pragma once +#include +#include +#include +#include +#include #include "common/path_util.h" +#include "compatibility_info.h" struct GameInfo { std::filesystem::path path; // root path of game directory @@ -21,6 +27,7 @@ struct GameInfo { std::string fw = "Unknown"; std::string play_time = "Unknown"; + CompatibilityEntry compatibility = CompatibilityEntry{CompatibilityStatus::Unknown}; }; class GameListUtils { diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 9c81bcf11c..90cc947f46 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -138,7 +138,7 @@ void MainWindow::CreateDockWindows() { setCentralWidget(phCentralWidget); m_dock_widget.reset(new QDockWidget(tr("Game List"), this)); - m_game_list_frame.reset(new GameListFrame(m_game_info, this)); + m_game_list_frame.reset(new GameListFrame(m_game_info, m_compat_info, this)); m_game_list_frame->setObjectName("gamelist"); m_game_grid_frame.reset(new GameGridFrame(m_game_info, this)); m_game_grid_frame->setObjectName("gamegridlist"); @@ -184,6 +184,8 @@ void MainWindow::CreateDockWindows() { } void MainWindow::LoadGameLists() { + // Update compatibility database + m_compat_info->UpdateCompatibilityDatabase(this); // Get game info from game folders. m_game_info->GetGameInfo(this); if (isTableList) { diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index 5ae2540eca..d3623c3d07 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -10,6 +10,7 @@ #include "background_music_player.h" #include "common/config.h" #include "common/path_util.h" +#include "compatibility_info.h" #include "core/file_format/psf.h" #include "core/file_sys/fs.h" #include "elf_viewer.h" @@ -92,6 +93,8 @@ private Q_SLOTS: PSF psf; std::shared_ptr m_game_info = std::make_shared(); + std::shared_ptr m_compat_info = + std::make_shared(); QTranslator* translator; From 953fb1463850f071e5df2a25532c8e41eb198013 Mon Sep 17 00:00:00 2001 From: Alessandro Ampala <48158436+alessandroampala@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:59:26 +0100 Subject: [PATCH 49/67] Fix update on windows when path contains powershell wildcards (#1391) (#1779) * Fix update process on windows when there are some powershell wildcards in the installation path. (#1391) * Fix coding style error. (#1391) --- src/qt_gui/check_update.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp index a9aba0b841..d713f67fb9 100644 --- a/src/qt_gui/check_update.cpp +++ b/src/qt_gui/check_update.cpp @@ -347,7 +347,11 @@ void CheckUpdate::DownloadUpdate(const QString& url) { QString userPath; Common::FS::PathToQString(userPath, Common::FS::GetUserPath(Common::FS::PathType::UserDir)); +#ifdef Q_OS_WIN + QString tempDownloadPath = QString(getenv("LOCALAPPDATA")) + "/Temp/temp_download_update"; +#else QString tempDownloadPath = userPath + "/temp_download_update"; +#endif QDir dir(tempDownloadPath); if (!dir.exists()) { dir.mkpath("."); @@ -393,6 +397,11 @@ void CheckUpdate::Install() { QString processCommand; #ifdef Q_OS_WIN + // On windows, overwrite tempDirPath with AppData/Local/Temp folder + // due to PowerShell Expand-Archive not being able to handle correctly + // paths in square brackets (ie: ./[shadps4]) + tempDirPath = QString(getenv("LOCALAPPDATA")) + "/Temp/temp_download_update"; + // Windows Batch Script scriptFileName = tempDirPath + "/update.ps1"; scriptContent = QStringLiteral( @@ -408,10 +417,11 @@ void CheckUpdate::Install() { "Start-Sleep -Seconds 3\n" "Copy-Item -Recurse -Force '%2\\*' '%3\\'\n" "Start-Sleep -Seconds 2\n" - "Remove-Item -Force '%3\\update.ps1'\n" - "Remove-Item -Force '%3\\temp_download_update.zip'\n" - "Start-Process '%3\\shadps4.exe'\n" - "Remove-Item -Recurse -Force '%2'\n"); + "Remove-Item -Force -LiteralPath '%3\\update.ps1'\n" + "Remove-Item -Force -LiteralPath '%3\\temp_download_update.zip'\n" + "Remove-Item -Recurse -Force '%2'\n" + "Start-Process -FilePath '%3\\shadps4.exe' " + "-WorkingDirectory ([WildcardPattern]::Escape('%3'))\n"); arguments << "-ExecutionPolicy" << "Bypass" << "-File" << scriptFileName; From 39fed1f469104112afe03047644d40d72131409d Mon Sep 17 00:00:00 2001 From: alvinkim101 <160981360+alvinkim101@users.noreply.github.com> Date: Fri, 20 Dec 2024 10:27:27 -0800 Subject: [PATCH 50/67] Fix splash assert (#1832) --- src/core/file_format/splash.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/file_format/splash.cpp b/src/core/file_format/splash.cpp index b687021579..4eb701cf79 100644 --- a/src/core/file_format/splash.cpp +++ b/src/core/file_format/splash.cpp @@ -9,7 +9,7 @@ #include "splash.h" bool Splash::Open(const std::filesystem::path& filepath) { - ASSERT_MSG(filepath.stem().string() != "png", "Unexpected file format passed"); + ASSERT_MSG(filepath.extension().string() == ".png", "Unexpected file format passed"); Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read); if (!file.IsOpen()) { From 8d8bb050554b6c95042dd9462e744055dd35cf8c Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 21 Dec 2024 10:20:24 +0100 Subject: [PATCH 51/67] renderer_vulkan: add support for Polygon draws (#1798) --- src/video_core/buffer_cache/buffer_cache.cpp | 36 +++++++++++++++---- .../renderer_vulkan/liverpool_to_vk.cpp | 1 + .../renderer_vulkan/liverpool_to_vk.h | 9 +++++ .../renderer_vulkan/vk_rasterizer.cpp | 7 ++-- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e9fc064938..f265fb68d5 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -235,25 +235,44 @@ bool BufferCache::BindVertexBuffers( } u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList primitive type with CPU made index buffer. + // Emulate QuadList and Polygon primitive types with CPU made index buffer. const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList && !is_indexed) { - is_indexed = true; + if (!is_indexed) { + bool needs_index_buffer = false; + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || + regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + needs_index_buffer = true; + } + + if (!needs_index_buffer) { + return regs.num_indices; + } // Emit indices. const u32 index_size = 3 * regs.num_indices; const auto [data, offset] = stream_buffer.Map(index_size); - Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + + switch (regs.primitive_type) { + case AmdGpu::PrimitiveType::QuadList: + Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + break; + case AmdGpu::PrimitiveType::Polygon: + Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); + break; + default: + UNREACHABLE(); + break; + } + stream_buffer.Commit(); // Bind index buffer. + is_indexed = true; + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); return index_size / sizeof(u16); } - if (!is_indexed) { - return regs.num_indices; - } // Figure out index type and size. const bool is_index16 = @@ -288,6 +307,9 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type); return new_index_size / index_size; } + if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + UNREACHABLE(); + } // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index ec0bb3bb7d..6df89dbae7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -117,6 +117,7 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; case AmdGpu::PrimitiveType::QuadList: + case AmdGpu::PrimitiveType::Polygon: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::RectList: diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index ebd09f0ee0..72bddc6b64 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -98,6 +98,15 @@ void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_ver } } +inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { + u16* out_data = reinterpret_cast(out_ptr); + for (u16 i = 1; i < num_vertices - 1; i++) { + *out_data++ = 0; + *out_data++ = i; + *out_data++ = i + 1; + } +} + static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { if (fmt == vk::Format::eR32Sfloat) { return vk::Format::eD32Sfloat; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bd8906f865..df05b73a38 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -246,11 +246,12 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { - // For QuadList we use generated index buffer to convert quads to triangles. Since it + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || + regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + // We use a generated index buffer to convert quad lists and polygons to triangles. Since it // changes type of the draw, arguments are not valid for this case. We need to run a // conversion pass to repack the indirect arguments buffer first. - LOG_WARNING(Render_Vulkan, "QuadList primitive type is not supported for indirect draw"); + LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); return; } From 08182f814f2dc7a15f52c8566f6bf70a19b19fea Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 22 Dec 2024 01:49:12 +0100 Subject: [PATCH 52/67] Disable userfaultfd again by making it opt-in (#1777) * Disable userfaultfd again * Let userfd be be opt-in instead of disabled --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1727338402..d0c27c503f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -890,7 +890,8 @@ if (ENABLE_DISCORD_RPC) target_compile_definitions(shadps4 PRIVATE ENABLE_DISCORD_RPC) endif() -if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") +# Optional due to https://github.com/shadps4-emu/shadPS4/issues/1704 +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ENABLE_USERFAULTFD) target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD) endif() From 7e890def481367694e2078dec21d2d9160e9fb77 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 21 Dec 2024 16:49:34 -0800 Subject: [PATCH 53/67] fs: Return nullptr when file descriptor is out of bounds. (#1842) --- src/core/file_sys/fs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 45ba67b931..bf340e9e3a 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -171,6 +171,9 @@ void HandleTable::DeleteHandle(int d) { File* HandleTable::GetFile(int d) { std::scoped_lock lock{m_mutex}; + if (d < 0 || d >= m_files.size()) { + return nullptr; + } return m_files.at(d); } From fb2c035c0568cc33cbe8e02db91cc60d0db1db5b Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 22 Dec 2024 02:49:42 +0200 Subject: [PATCH 54/67] vk_rasterizer: Fix stencil clears (#1840) --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 15 ++++++++++++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index df05b73a38..f8efd9b811 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -161,10 +161,18 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.depth_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .loadOp = is_clear && regs.depth_control.depth_enable ? vk::AttachmentLoadOp::eClear + : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, + }; + state.stencil_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = is_clear && regs.depth_control.stencil_enable ? vk::AttachmentLoadOp::eClear + : vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, - .stencil = regs.stencil_clear}}, + .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, }; texture_cache.TouchMeta(htile_address, slice, false); state.has_depth = @@ -778,6 +786,7 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s desc.view_info.range); } state.depth_attachment.imageLayout = image.last_state.layout; + state.stencil_attachment.imageLayout = image.last_state.layout; image.usage.depth_target = true; image.usage.stencil = has_stencil; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 2c4e7a3c6a..81415f8b5e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,7 +47,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { ? render_state.color_attachments.data() : nullptr, .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, - .pStencilAttachment = render_state.has_stencil ? &render_state.depth_attachment : nullptr, + .pStencilAttachment = render_state.has_stencil ? &render_state.stencil_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cdd33745a2..fd5e683737 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -21,6 +21,7 @@ class Instance; struct RenderState { std::array color_attachments{}; vk::RenderingAttachmentInfo depth_attachment{}; + vk::RenderingAttachmentInfo stencil_attachment{}; u32 num_color_attachments{}; bool has_depth{}; bool has_stencil{}; From 0931802151f0c54a9adced601d8ee2ca1e80a6fc Mon Sep 17 00:00:00 2001 From: f8ith Date: Sun, 22 Dec 2024 17:05:44 +0800 Subject: [PATCH 55/67] compatibility_data: various fixes (#1847) * temporarily disable status column + various fixes * add configuration option to enable compatibility data * clang format --- src/common/config.cpp | 25 +++++++++++++++ src/common/config.h | 4 +++ src/qt_gui/compatibility_info.cpp | 51 +++++++++++++++++++++++++------ src/qt_gui/compatibility_info.h | 3 +- src/qt_gui/game_list_frame.cpp | 5 +++ src/qt_gui/main_window.cpp | 4 ++- 6 files changed, 81 insertions(+), 11 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 403b0e32f1..9d5a99d9ad 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -64,6 +64,8 @@ static bool vkCrashDiagnostic = false; static s16 cursorState = HideCursorState::Idle; static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; +static bool compatibilityData = false; +static bool checkCompatibilityOnStartup = false; // Gui std::vector settings_install_dirs = {}; @@ -224,6 +226,14 @@ bool getSeparateUpdateEnabled() { return separateupdatefolder; } +bool getCompatibilityEnabled() { + return compatibilityData; +} + +bool getCheckCompatibilityOnStartup() { + return checkCompatibilityOnStartup; +} + void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } @@ -344,6 +354,14 @@ void setSeparateUpdateEnabled(bool use) { separateupdatefolder = use; } +void setCompatibilityEnabled(bool use) { + compatibilityData = use; +} + +void setCheckCompatibilityOnStartup(bool use) { + checkCompatibilityOnStartup = use; +} + void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -544,6 +562,9 @@ void load(const std::filesystem::path& path) { isShowSplash = toml::find_or(general, "showSplash", true); isAutoUpdate = toml::find_or(general, "autoUpdate", false); separateupdatefolder = toml::find_or(general, "separateUpdateEnabled", false); + compatibilityData = toml::find_or(general, "compatibilityEnabled", false); + checkCompatibilityOnStartup = + toml::find_or(general, "checkCompatibilityOnStartup", false); } if (data.contains("Input")) { @@ -656,6 +677,8 @@ void save(const std::filesystem::path& path) { data["General"]["showSplash"] = isShowSplash; data["General"]["autoUpdate"] = isAutoUpdate; data["General"]["separateUpdateEnabled"] = separateupdatefolder; + data["General"]["compatibilityEnabled"] = compatibilityData; + data["General"]["checkCompatibilityOnStartup"] = checkCompatibilityOnStartup; data["Input"]["cursorState"] = cursorState; data["Input"]["cursorHideTimeout"] = cursorHideTimeout; data["Input"]["backButtonBehavior"] = backButtonBehavior; @@ -775,6 +798,8 @@ void setDefaultValues() { m_language = 1; gpuId = -1; separateupdatefolder = false; + compatibilityData = false; + checkCompatibilityOnStartup = false; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index ff3b3703f3..a4e6c3b12c 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -21,6 +21,8 @@ bool getPlayBGM(); int getBGMvolume(); bool getEnableDiscordRPC(); bool getSeparateUpdateEnabled(); +bool getCompatibilityEnabled(); +bool getCheckCompatibilityOnStartup(); std::string getLogFilter(); std::string getLogType(); @@ -69,6 +71,8 @@ void setUserName(const std::string& type); void setUpdateChannel(const std::string& type); void setSeparateUpdateEnabled(bool use); void setGameInstallDirs(const std::vector& settings_install_dirs_config); +void setCompatibilityEnabled(bool use); +void setCheckCompatibilityOnStartup(bool use); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); diff --git a/src/qt_gui/compatibility_info.cpp b/src/qt_gui/compatibility_info.cpp index c8d6bf36d1..aecac60cd2 100644 --- a/src/qt_gui/compatibility_info.cpp +++ b/src/qt_gui/compatibility_info.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "common/path_util.h" #include "compatibility_info.h" @@ -22,7 +23,8 @@ void CompatibilityInfoClass::UpdateCompatibilityDatabase(QWidget* parent) { return; QNetworkReply* reply = FetchPage(1); - WaitForReply(reply); + if (!WaitForReply(reply)) + return; QProgressDialog dialog(tr("Fetching compatibility data, please wait"), tr("Cancel"), 0, 0, parent); @@ -57,12 +59,17 @@ void CompatibilityInfoClass::UpdateCompatibilityDatabase(QWidget* parent) { } future_watcher.setFuture(QtConcurrent::map(replies, WaitForReply)); - connect(&future_watcher, &QFutureWatcher::finished, [&]() { + connect(&future_watcher, &QFutureWatcher::finished, [&]() { for (int i = 0; i < remaining_pages; i++) { - if (replies[i]->error() == QNetworkReply::NoError) { - ExtractCompatibilityInfo(replies[i]->readAll()); + if (replies[i]->bytesAvailable()) { + if (replies[i]->error() == QNetworkReply::NoError) { + ExtractCompatibilityInfo(replies[i]->readAll()); + } + replies[i]->deleteLater(); + } else { + // This means the request timed out + return; } - replies[i]->deleteLater(); } QFile compatibility_file(m_compatibility_filename); @@ -83,6 +90,16 @@ void CompatibilityInfoClass::UpdateCompatibilityDatabase(QWidget* parent) { dialog.reset(); }); + connect(&future_watcher, &QFutureWatcher::canceled, [&]() { + // Cleanup if user cancels pulling data + for (int i = 0; i < remaining_pages; i++) { + if (!replies[i]->bytesAvailable()) { + replies[i]->deleteLater(); + } else if (!replies[i]->isFinished()) { + replies[i]->abort(); + } + } + }); connect(&dialog, &QProgressDialog::canceled, &future_watcher, &QFutureWatcher::cancel); dialog.setRange(0, remaining_pages); connect(&future_watcher, &QFutureWatcher::progressValueChanged, &dialog, @@ -105,20 +122,34 @@ QNetworkReply* CompatibilityInfoClass::FetchPage(int page_num) { return reply; } -void CompatibilityInfoClass::WaitForReply(QNetworkReply* reply) { +bool CompatibilityInfoClass::WaitForReply(QNetworkReply* reply) { + // Returns true if reply succeeded, false if reply timed out + QTimer timer; + timer.setSingleShot(true); + QEventLoop loop; connect(reply, &QNetworkReply::finished, &loop, &QEventLoop::quit); + connect(&timer, SIGNAL(timeout()), &loop, SLOT(quit())); + timer.start(5000); loop.exec(); - return; + + if (timer.isActive()) { + timer.stop(); + return true; + } else { + disconnect(reply, SIGNAL(finished()), &loop, SLOT(quit())); + reply->abort(); + return false; + } }; CompatibilityEntry CompatibilityInfoClass::GetCompatibilityInfo(const std::string& serial) { QString title_id = QString::fromStdString(serial); if (m_compatibility_database.contains(title_id)) { { + QJsonObject compatibility_obj = m_compatibility_database[title_id].toObject(); for (int os_int = 0; os_int != static_cast(OSType::Last); os_int++) { QString os_string = OSTypeToString.at(static_cast(os_int)); - QJsonObject compatibility_obj = m_compatibility_database[title_id].toObject(); if (compatibility_obj.contains(os_string)) { QJsonObject compatibility_entry_obj = compatibility_obj[os_string].toObject(); CompatibilityEntry compatibility_entry{ @@ -133,7 +164,9 @@ CompatibilityEntry CompatibilityInfoClass::GetCompatibilityInfo(const std::strin } } } - return CompatibilityEntry{CompatibilityStatus::Unknown}; + + return CompatibilityEntry{CompatibilityStatus::Unknown, "", QDateTime::currentDateTime(), "", + 0}; } bool CompatibilityInfoClass::LoadCompatibilityFile() { diff --git a/src/qt_gui/compatibility_info.h b/src/qt_gui/compatibility_info.h index 2b970670a7..dcbaef8472 100644 --- a/src/qt_gui/compatibility_info.h +++ b/src/qt_gui/compatibility_info.h @@ -57,6 +57,7 @@ class CompatibilityInfoClass : public QObject { public: // Please think of a better alternative inline static const std::unordered_map LabelToCompatStatus = { + {QStringLiteral("status-unknown"), CompatibilityStatus::Unknown}, {QStringLiteral("status-nothing"), CompatibilityStatus::Nothing}, {QStringLiteral("status-boots"), CompatibilityStatus::Boots}, {QStringLiteral("status-menus"), CompatibilityStatus::Menus}, @@ -87,7 +88,7 @@ class CompatibilityInfoClass : public QObject { bool LoadCompatibilityFile(); CompatibilityEntry GetCompatibilityInfo(const std::string& serial); void ExtractCompatibilityInfo(QByteArray response); - static void WaitForReply(QNetworkReply* reply); + static bool WaitForReply(QNetworkReply* reply); QNetworkReply* FetchPage(int page_num); private: diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index d43c35ef4a..53159d8e75 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -80,6 +80,11 @@ GameListFrame::GameListFrame(std::shared_ptr game_info_get, QDesktopServices::openUrl(QUrl(m_game_info->m_games[row].compatibility.url)); } }); + + // Do not show status column if it is not enabled + if (!Config::getCompatibilityEnabled()) { + this->setColumnHidden(2, true); + } } void GameListFrame::onCurrentCellChanged(int currentRow, int currentColumn, int previousRow, diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 90cc947f46..d7d2a856ab 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -185,7 +185,9 @@ void MainWindow::CreateDockWindows() { void MainWindow::LoadGameLists() { // Update compatibility database - m_compat_info->UpdateCompatibilityDatabase(this); + if (Config::getCheckCompatibilityOnStartup()) { + m_compat_info->UpdateCompatibilityDatabase(this); + } // Get game info from game folders. m_game_info->GetGameInfo(this); if (isTableList) { From fac21a5362cae488f4b61b950fc52a46210645e2 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 22 Dec 2024 01:46:28 -0800 Subject: [PATCH 56/67] audio: Move port logic out of SDL backend and define backend interface. (#1848) --- src/core/libraries/audio/audioout.cpp | 169 ++++++++++++++++++-- src/core/libraries/audio/audioout.h | 2 +- src/core/libraries/audio/audioout_backend.h | 19 +++ src/core/libraries/audio/sdl_audio.cpp | 133 +++------------ src/core/libraries/audio/sdl_audio.h | 36 +---- 5 files changed, 203 insertions(+), 156 deletions(-) create mode 100644 src/core/libraries/audio/audioout_backend.h diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index 78b04cc906..db43ee9289 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include #include #include "common/assert.h" @@ -13,7 +15,22 @@ namespace Libraries::AudioOut { -static std::unique_ptr audio; +struct PortOut { + void* impl; + u32 samples_num; + u32 freq; + OrbisAudioOutParamFormat format; + OrbisAudioOutPort type; + int channels_num; + bool is_float; + std::array volume; + u8 sample_size; + bool is_open; +}; +std::shared_mutex ports_mutex; +std::array ports_out{}; + +static std::unique_ptr audio; static std::string_view GetAudioOutPort(OrbisAudioOutPort port) { switch (port) { @@ -70,6 +87,58 @@ static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) { } } +static bool IsFormatFloat(const OrbisAudioOutParamFormat format) { + switch (format) { + case OrbisAudioOutParamFormat::S16Mono: + case OrbisAudioOutParamFormat::S16Stereo: + case OrbisAudioOutParamFormat::S16_8CH: + case OrbisAudioOutParamFormat::S16_8CH_Std: + return false; + case OrbisAudioOutParamFormat::FloatMono: + case OrbisAudioOutParamFormat::FloatStereo: + case OrbisAudioOutParamFormat::Float_8CH: + case OrbisAudioOutParamFormat::Float_8CH_Std: + return true; + default: + UNREACHABLE_MSG("Unknown format"); + } +} + +static int GetFormatNumChannels(const OrbisAudioOutParamFormat format) { + switch (format) { + case OrbisAudioOutParamFormat::S16Mono: + case OrbisAudioOutParamFormat::FloatMono: + return 1; + case OrbisAudioOutParamFormat::S16Stereo: + case OrbisAudioOutParamFormat::FloatStereo: + return 2; + case OrbisAudioOutParamFormat::S16_8CH: + case OrbisAudioOutParamFormat::Float_8CH: + case OrbisAudioOutParamFormat::S16_8CH_Std: + case OrbisAudioOutParamFormat::Float_8CH_Std: + return 8; + default: + UNREACHABLE_MSG("Unknown format"); + } +} + +static u8 GetFormatSampleSize(const OrbisAudioOutParamFormat format) { + switch (format) { + case OrbisAudioOutParamFormat::S16Mono: + case OrbisAudioOutParamFormat::S16Stereo: + case OrbisAudioOutParamFormat::S16_8CH: + case OrbisAudioOutParamFormat::S16_8CH_Std: + return 2; + case OrbisAudioOutParamFormat::FloatMono: + case OrbisAudioOutParamFormat::FloatStereo: + case OrbisAudioOutParamFormat::Float_8CH: + case OrbisAudioOutParamFormat::Float_8CH_Std: + return 4; + default: + UNREACHABLE_MSG("Unknown format"); + } +} + int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() { LOG_ERROR(Lib_AudioOut, "(STUBBED) called"); return ORBIS_OK; @@ -110,8 +179,21 @@ int PS4_SYSV_ABI sceAudioOutChangeAppModuleState() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAudioOutClose() { - LOG_ERROR(Lib_AudioOut, "(STUBBED) called"); +int PS4_SYSV_ABI sceAudioOutClose(s32 handle) { + LOG_INFO(Lib_AudioOut, "handle = {}", handle); + if (handle < 1 || handle > SCE_AUDIO_OUT_NUM_PORTS) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + + std::scoped_lock lock(ports_mutex); + auto& port = ports_out.at(handle - 1); + if (!port.is_open) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + + audio->Close(port.impl); + port.impl = nullptr; + port.is_open = false; return ORBIS_OK; } @@ -180,16 +262,21 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - const auto [type, channels_num] = audio->GetStatus(handle); + std::scoped_lock lock(ports_mutex); + const auto& port = ports_out.at(handle - 1); + if (!port.is_open) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + state->rerouteCounter = 0; state->volume = 127; - switch (type) { + switch (port.type) { case OrbisAudioOutPort::Main: case OrbisAudioOutPort::Bgm: case OrbisAudioOutPort::Voice: state->output = 1; - state->channel = (channels_num > 2 ? 2 : channels_num); + state->channel = port.channels_num > 2 ? 2 : port.channels_num; break; case OrbisAudioOutPort::Personal: case OrbisAudioOutPort::Padspk: @@ -276,7 +363,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, u32 sample_rate, OrbisAudioOutParamExtendedInformation param_type) { LOG_INFO(Lib_AudioOut, - "AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} " + "id = {} port_type = {} index = {} length = {} sample_rate = {} " "param_type = {} attr = {}", user_id, GetAudioOutPort(port_type), index, length, sample_rate, GetAudioOutParamFormat(param_type.data_format), @@ -310,7 +397,26 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, LOG_ERROR(Lib_AudioOut, "Invalid format attribute"); return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; } - return audio->Open(port_type, length, sample_rate, format); + + std::scoped_lock lock{ports_mutex}; + const auto port = std::ranges::find(ports_out, false, &PortOut::is_open); + if (port == ports_out.end()) { + LOG_ERROR(Lib_AudioOut, "Audio ports are full"); + return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; + } + + port->is_open = true; + port->type = port_type; + port->samples_num = length; + port->freq = sample_rate; + port->format = format; + port->is_float = IsFormatFloat(format); + port->channels_num = GetFormatNumChannels(format); + port->sample_size = GetFormatSampleSize(format); + port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); + + port->impl = audio->Open(port->is_float, port->channels_num, port->freq); + return std::distance(ports_out.begin(), port) + 1; } int PS4_SYSV_ABI sceAudioOutOpenEx() { @@ -326,7 +432,15 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, const void* ptr) { // Nothing to output return ORBIS_OK; } - return audio->Output(handle, ptr); + + auto& port = ports_out.at(handle - 1); + if (!port.is_open) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + + const size_t data_size = port.samples_num * port.sample_size * port.channels_num; + audio->Output(port.impl, ptr, data_size); + return ORBIS_OK; } int PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num) { @@ -431,7 +545,42 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) { if (handle < 1 || handle > SCE_AUDIO_OUT_NUM_PORTS) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - return audio->SetVolume(handle, flag, vol); + + std::scoped_lock lock(ports_mutex); + auto& port = ports_out.at(handle - 1); + if (!port.is_open) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + + for (int i = 0; i < port.channels_num; i++, flag >>= 1u) { + auto bit = flag & 0x1u; + if (bit == 1) { + int src_index = i; + if (port.format == OrbisAudioOutParamFormat::Float_8CH_Std || + port.format == OrbisAudioOutParamFormat::S16_8CH_Std) { + switch (i) { + case 4: + src_index = 6; + break; + case 5: + src_index = 7; + break; + case 6: + src_index = 4; + break; + case 7: + src_index = 5; + break; + default: + break; + } + } + port.volume[i] = vol[src_index]; + } + } + + audio->SetVolume(port.impl, port.volume); + return ORBIS_OK; } int PS4_SYSV_ABI sceAudioOutSetVolumeDown() { diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index e8e718b87b..c66a0e9f5f 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -64,7 +64,7 @@ int PS4_SYSV_ABI sceAudioOutA3dExit(); int PS4_SYSV_ABI sceAudioOutA3dInit(); int PS4_SYSV_ABI sceAudioOutAttachToApplicationByPid(); int PS4_SYSV_ABI sceAudioOutChangeAppModuleState(); -int PS4_SYSV_ABI sceAudioOutClose(); +int PS4_SYSV_ABI sceAudioOutClose(s32 handle); int PS4_SYSV_ABI sceAudioOutDetachFromApplicationByPid(); int PS4_SYSV_ABI sceAudioOutExConfigureOutputMode(); int PS4_SYSV_ABI sceAudioOutExGetSystemInfo(); diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h new file mode 100644 index 0000000000..238ef0201c --- /dev/null +++ b/src/core/libraries/audio/audioout_backend.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Libraries::AudioOut { + +class AudioOutBackend { +public: + AudioOutBackend() = default; + virtual ~AudioOutBackend() = default; + + virtual void* Open(bool is_float, int num_channels, u32 sample_rate) = 0; + virtual void Close(void* impl) = 0; + virtual void Output(void* impl, const void* ptr, size_t size) = 0; + virtual void SetVolume(void* impl, std::array ch_volumes) = 0; +}; + +} // namespace Libraries::AudioOut diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 8cc823abeb..ce385ad9cf 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -1,141 +1,44 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include #include "common/assert.h" -#include "core/libraries/audio/audioout_error.h" #include "core/libraries/audio/sdl_audio.h" namespace Libraries::AudioOut { constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold -s32 SDLAudioOut::Open(OrbisAudioOutPort type, u32 samples_num, u32 freq, - OrbisAudioOutParamFormat format) { - std::scoped_lock lock{m_mutex}; - const auto port = std::ranges::find(ports_out, false, &PortOut::is_open); - if (port == ports_out.end()) { - LOG_ERROR(Lib_AudioOut, "Audio ports are full"); - return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; - } - - port->is_open = true; - port->type = type; - port->samples_num = samples_num; - port->freq = freq; - port->format = format; - SDL_AudioFormat sampleFormat; - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - sampleFormat = SDL_AUDIO_S16; - port->channels_num = 1; - port->sample_size = 2; - break; - case OrbisAudioOutParamFormat::FloatMono: - sampleFormat = SDL_AUDIO_F32; - port->channels_num = 1; - port->sample_size = 4; - break; - case OrbisAudioOutParamFormat::S16Stereo: - sampleFormat = SDL_AUDIO_S16; - port->channels_num = 2; - port->sample_size = 2; - break; - case OrbisAudioOutParamFormat::FloatStereo: - sampleFormat = SDL_AUDIO_F32; - port->channels_num = 2; - port->sample_size = 4; - break; - case OrbisAudioOutParamFormat::S16_8CH: - sampleFormat = SDL_AUDIO_S16; - port->channels_num = 8; - port->sample_size = 2; - break; - case OrbisAudioOutParamFormat::Float_8CH: - sampleFormat = SDL_AUDIO_F32; - port->channels_num = 8; - port->sample_size = 4; - break; - case OrbisAudioOutParamFormat::S16_8CH_Std: - sampleFormat = SDL_AUDIO_S16; - port->channels_num = 8; - port->sample_size = 2; - break; - case OrbisAudioOutParamFormat::Float_8CH_Std: - sampleFormat = SDL_AUDIO_F32; - port->channels_num = 8; - port->sample_size = 4; - break; - default: - UNREACHABLE_MSG("Unknown format"); - } - - port->volume.fill(Libraries::AudioOut::SCE_AUDIO_OUT_VOLUME_0DB); - +void* SDLAudioOut::Open(bool is_float, int num_channels, u32 sample_rate) { SDL_AudioSpec fmt; SDL_zero(fmt); - fmt.format = sampleFormat; - fmt.channels = port->channels_num; - fmt.freq = freq; - port->stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, NULL, NULL); - SDL_ResumeAudioDevice(SDL_GetAudioStreamDevice(port->stream)); - return std::distance(ports_out.begin(), port) + 1; + fmt.format = is_float ? SDL_AUDIO_F32 : SDL_AUDIO_S16; + fmt.channels = num_channels; + fmt.freq = sample_rate; + + auto* stream = + SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, nullptr, nullptr); + SDL_ResumeAudioStreamDevice(stream); + return stream; } -s32 SDLAudioOut::Output(s32 handle, const void* ptr) { - auto& port = ports_out.at(handle - 1); - if (!port.is_open) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; - } +void SDLAudioOut::Close(void* impl) { + SDL_DestroyAudioStream(static_cast(impl)); +} - const size_t data_size = port.samples_num * port.sample_size * port.channels_num; - bool result = SDL_PutAudioStreamData(port.stream, ptr, data_size); - while (SDL_GetAudioStreamAvailable(port.stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { +void SDLAudioOut::Output(void* impl, const void* ptr, size_t size) { + auto* stream = static_cast(impl); + SDL_PutAudioStreamData(stream, ptr, size); + while (SDL_GetAudioStreamAvailable(stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { SDL_Delay(0); } - return result ? ORBIS_OK : -1; } -s32 SDLAudioOut::SetVolume(s32 handle, s32 bitflag, s32* volume) { - using Libraries::AudioOut::OrbisAudioOutParamFormat; - auto& port = ports_out.at(handle - 1); - if (!port.is_open) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; - } - - for (int i = 0; i < port.channels_num; i++, bitflag >>= 1u) { - auto bit = bitflag & 0x1u; - - if (bit == 1) { - int src_index = i; - if (port.format == OrbisAudioOutParamFormat::Float_8CH_Std || - port.format == OrbisAudioOutParamFormat::S16_8CH_Std) { - switch (i) { - case 4: - src_index = 6; - break; - case 5: - src_index = 7; - break; - case 6: - src_index = 4; - break; - case 7: - src_index = 5; - break; - default: - break; - } - } - port.volume[i] = volume[src_index]; - } - } - - return ORBIS_OK; +void SDLAudioOut::SetVolume(void* impl, std::array ch_volumes) { + // Not yet implemented } } // namespace Libraries::AudioOut diff --git a/src/core/libraries/audio/sdl_audio.h b/src/core/libraries/audio/sdl_audio.h index 2c34f8e298..d55f2f6e3d 100644 --- a/src/core/libraries/audio/sdl_audio.h +++ b/src/core/libraries/audio/sdl_audio.h @@ -3,40 +3,16 @@ #pragma once -#include -#include -#include "core/libraries/audio/audioout.h" +#include "core/libraries/audio/audioout_backend.h" namespace Libraries::AudioOut { -class SDLAudioOut { +class SDLAudioOut final : public AudioOutBackend { public: - explicit SDLAudioOut() = default; - ~SDLAudioOut() = default; - - s32 Open(OrbisAudioOutPort type, u32 samples_num, u32 freq, OrbisAudioOutParamFormat format); - s32 Output(s32 handle, const void* ptr); - s32 SetVolume(s32 handle, s32 bitflag, s32* volume); - - constexpr std::pair GetStatus(s32 handle) const { - const auto& port = ports_out.at(handle - 1); - return std::make_pair(port.type, port.channels_num); - } - -private: - struct PortOut { - SDL_AudioStream* stream; - u32 samples_num; - u32 freq; - OrbisAudioOutParamFormat format; - OrbisAudioOutPort type; - int channels_num; - std::array volume; - u8 sample_size; - bool is_open; - }; - std::shared_mutex m_mutex; - std::array ports_out{}; + void* Open(bool is_float, int num_channels, u32 sample_rate) override; + void Close(void* impl) override; + void Output(void* impl, const void* ptr, size_t size) override; + void SetVolume(void* impl, std::array ch_volumes) override; }; } // namespace Libraries::AudioOut From 5eebb04de964b053ec3d091c8c3931ae305c96a9 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 22 Dec 2024 15:31:10 +0200 Subject: [PATCH 57/67] vk_rasterizer: hot fix --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f8efd9b811..b7cfb8cf00 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -152,8 +152,9 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { image.binding.is_target = 1u; const auto slice = image_view.info.range.base.layer; - const bool is_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address, slice); + const bool is_depth_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address, slice); + const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable; ASSERT(desc.view_info.range.extent.layers == 1); state.width = std::min(state.width, image.info.size.width); @@ -161,16 +162,14 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.depth_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_clear && regs.depth_control.depth_enable ? vk::AttachmentLoadOp::eClear - : vk::AttachmentLoadOp::eLoad, + .loadOp = is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, }; state.stencil_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_clear && regs.depth_control.stencil_enable ? vk::AttachmentLoadOp::eClear - : vk::AttachmentLoadOp::eLoad, + .loadOp = is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, }; From 433d9459e09cd9ce3a1d02cc78d901b66d25335f Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sun, 22 Dec 2024 21:46:45 +0800 Subject: [PATCH 58/67] Disable trophy pop-up with config setting (#1834) --- src/common/config.cpp | 12 +++++++++++ src/common/config.h | 2 ++ src/core/libraries/np_trophy/trophy_ui.cpp | 6 +++++- src/qt_gui/settings_dialog.cpp | 8 ++++++- src/qt_gui/settings_dialog.ui | 25 ++++++++++------------ src/qt_gui/translations/en.ts | 5 +++++ 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 9d5a99d9ad..deef0fa88b 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -34,6 +34,7 @@ namespace Config { static bool isNeo = false; static bool isFullscreen = false; static bool playBGM = false; +static bool isTrophyPopupDisabled = false; static int BGMvolume = 50; static bool enableDiscordRPC = false; static u32 screenWidth = 1280; @@ -98,6 +99,10 @@ bool isFullscreenMode() { return isFullscreen; } +bool getisTrophyPopupDisabled() { + return isTrophyPopupDisabled; +} + bool getPlayBGM() { return playBGM; } @@ -294,6 +299,10 @@ void setFullscreenMode(bool enable) { isFullscreen = enable; } +void setisTrophyPopupDisabled(bool disable) { + isTrophyPopupDisabled = disable; +} + void setPlayBGM(bool enable) { playBGM = enable; } @@ -549,6 +558,7 @@ void load(const std::filesystem::path& path) { isNeo = toml::find_or(general, "isPS4Pro", false); isFullscreen = toml::find_or(general, "Fullscreen", false); playBGM = toml::find_or(general, "playBGM", false); + isTrophyPopupDisabled = toml::find_or(general, "isTrophyPopupDisabled", false); BGMvolume = toml::find_or(general, "BGMvolume", 50); enableDiscordRPC = toml::find_or(general, "enableDiscordRPC", true); logFilter = toml::find_or(general, "logFilter", ""); @@ -667,6 +677,7 @@ void save(const std::filesystem::path& path) { data["General"]["isPS4Pro"] = isNeo; data["General"]["Fullscreen"] = isFullscreen; + data["General"]["isTrophyPopupDisabled"] = isTrophyPopupDisabled; data["General"]["playBGM"] = playBGM; data["General"]["BGMvolume"] = BGMvolume; data["General"]["enableDiscordRPC"] = enableDiscordRPC; @@ -763,6 +774,7 @@ void saveMainWindow(const std::filesystem::path& path) { void setDefaultValues() { isNeo = false; isFullscreen = false; + isTrophyPopupDisabled = false; playBGM = false; BGMvolume = 50; enableDiscordRPC = true; diff --git a/src/common/config.h b/src/common/config.h index a4e6c3b12c..701aadb12c 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -19,6 +19,7 @@ bool isNeoMode(); bool isFullscreenMode(); bool getPlayBGM(); int getBGMvolume(); +bool getisTrophyPopupDisabled(); bool getEnableDiscordRPC(); bool getSeparateUpdateEnabled(); bool getCompatibilityEnabled(); @@ -62,6 +63,7 @@ void setGpuId(s32 selectedGpuId); void setScreenWidth(u32 width); void setScreenHeight(u32 height); void setFullscreenMode(bool enable); +void setisTrophyPopupDisabled(bool disable); void setPlayBGM(bool enable); void setBGMvolume(int volume); void setEnableDiscordRPC(bool enable); diff --git a/src/core/libraries/np_trophy/trophy_ui.cpp b/src/core/libraries/np_trophy/trophy_ui.cpp index 55ef7b8de6..4bb8c8240e 100644 --- a/src/core/libraries/np_trophy/trophy_ui.cpp +++ b/src/core/libraries/np_trophy/trophy_ui.cpp @@ -5,6 +5,7 @@ #include #include #include "common/assert.h" +#include "common/config.h" #include "common/singleton.h" #include "imgui/imgui_std.h" #include "trophy_ui.h" @@ -82,7 +83,10 @@ void TrophyUI::Draw() { void AddTrophyToQueue(const std::filesystem::path& trophyIconPath, const std::string& trophyName) { std::lock_guard lock(queueMtx); - if (current_trophy_ui.has_value()) { + + if (Config::getisTrophyPopupDisabled()) { + return; + } else if (current_trophy_ui.has_value()) { TrophyInfo new_trophy; new_trophy.trophy_icon_path = trophyIconPath; new_trophy.trophy_name = trophyName; diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 09d3674f7f..97c891e4f5 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -194,6 +194,7 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge ui->updaterGroupBox->installEventFilter(this); #endif ui->GUIgroupBox->installEventFilter(this); + ui->disableTrophycheckBox->installEventFilter(this); // Input ui->hideCursorGroupBox->installEventFilter(this); @@ -263,6 +264,8 @@ void SettingsDialog::LoadValuesFromConfig() { ui->dumpShadersCheckBox->setChecked(toml::find_or(data, "GPU", "dumpShaders", false)); ui->nullGpuCheckBox->setChecked(toml::find_or(data, "GPU", "nullGpu", false)); ui->playBGMCheckBox->setChecked(toml::find_or(data, "General", "playBGM", false)); + ui->disableTrophycheckBox->setChecked( + toml::find_or(data, "General", "isTrophyPopupDisabled", false)); ui->BGMVolumeSlider->setValue(toml::find_or(data, "General", "BGMvolume", 50)); ui->discordRPCCheckbox->setChecked( toml::find_or(data, "General", "enableDiscordRPC", true)); @@ -397,6 +400,8 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { #endif } else if (elementName == "GUIgroupBox") { text = tr("GUIgroupBox"); + } else if (elementName == "disableTrophycheckBox") { + text = tr("disableTrophycheckBox"); } // Input @@ -485,6 +490,7 @@ void SettingsDialog::UpdateSettings() { Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); Config::setNeoMode(ui->ps4proCheckBox->isChecked()); Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked()); + Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked()); Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); Config::setNeoMode(ui->ps4proCheckBox->isChecked()); Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); @@ -549,4 +555,4 @@ void SettingsDialog::ResetInstallFolders() { } Config::setGameInstallDirs(settings_install_dirs_config); } -} \ No newline at end of file +} diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index cce728f650..faa0bf8474 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -59,9 +59,9 @@ 0 - 0 - 832 - 431 + -97 + 815 + 618 @@ -469,6 +469,13 @@ 11 + + + + Disable Trophy Pop-ups + + + @@ -561,16 +568,6 @@ - - - - - 0 - 61 - - - - @@ -696,7 +693,7 @@ 5 - + true diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 9eccec8ea8..ddaa4fe0d5 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -1201,6 +1201,11 @@ GUIgroupBox Play Title Music:\nIf a game supports it, enable playing special music when selecting the game in the GUI.
+ + + disableTrophycheckBox + Disable Trophy Pop-ups:\nDisable in-game trophy notifications. Trophy progress can still be tracked using the Trophy Viewer (right-click the game in the main window). + hideCursorGroupBox From aba2b290742c3f240cf910d0f81fbd2361eb03be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 22 Dec 2024 20:47:28 +0700 Subject: [PATCH 59/67] gui: start the emulator in new thread (#1829) --- src/qt_gui/main_window.cpp | 25 +++++++++++++++++-------- src/qt_gui/main_window.h | 1 + 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index d7d2a856ab..39f7c7b975 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -556,7 +556,6 @@ void MainWindow::CreateConnects() { } void MainWindow::StartGame() { - isGameRunning = true; BackgroundMusicPlayer::getInstance().stopMusic(); QString gamePath = ""; int table_mode = Config::getTableMode(); @@ -579,13 +578,12 @@ void MainWindow::StartGame() { } if (gamePath != "") { AddRecentFiles(gamePath); - Core::Emulator emulator; const auto path = Common::FS::PathFromQString(gamePath); if (!std::filesystem::exists(path)) { QMessageBox::critical(nullptr, tr("Run Game"), QString(tr("Eboot.bin file not found"))); return; } - emulator.Run(path); + StartEmulator(path); } } @@ -682,13 +680,12 @@ void MainWindow::BootGame() { QString(tr("Only one file can be selected!"))); } else { std::filesystem::path path = Common::FS::PathFromQString(fileNames[0]); - Core::Emulator emulator; if (!std::filesystem::exists(path)) { QMessageBox::critical(nullptr, tr("Run Game"), QString(tr("Eboot.bin file not found"))); return; } - emulator.Run(path); + StartEmulator(path); } } } @@ -1042,12 +1039,11 @@ void MainWindow::CreateRecentGameActions() { connect(m_recent_files_group, &QActionGroup::triggered, this, [this](QAction* action) { auto gamePath = Common::FS::PathFromQString(action->text()); AddRecentFiles(action->text()); // Update the list. - Core::Emulator emulator; if (!std::filesystem::exists(gamePath)) { QMessageBox::critical(nullptr, tr("Run Game"), QString(tr("Eboot.bin file not found"))); return; } - emulator.Run(gamePath); + StartEmulator(gamePath); }); } @@ -1095,4 +1091,17 @@ bool MainWindow::eventFilter(QObject* obj, QEvent* event) { } } return QMainWindow::eventFilter(obj, event); -} \ No newline at end of file +} + +void MainWindow::StartEmulator(std::filesystem::path path) { + if (isGameRunning) { + QMessageBox::critical(nullptr, tr("Run Game"), QString(tr("Game is already running!"))); + return; + } + std::thread emulator_thread([=] { + Core::Emulator emulator; + emulator.Run(path); + }); + emulator_thread.detach(); + isGameRunning = true; +} diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index d3623c3d07..f4163defad 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -69,6 +69,7 @@ private Q_SLOTS: void LoadTranslation(); void PlayBackgroundMusic(); QIcon RecolorIcon(const QIcon& icon, bool isWhite); + void StartEmulator(std::filesystem::path); bool isIconBlack = false; bool isTableList = true; bool isGameRunning = false; From 14dc13683227dbe1682aa1c13c85abb667d2aee2 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 22 Dec 2024 06:08:48 -0800 Subject: [PATCH 60/67] renderer_vulkan: Various attachment cleanup and fixes. (#1795) --- src/video_core/amdgpu/liverpool.h | 28 ++++ .../renderer_vulkan/vk_graphics_pipeline.h | 4 - .../renderer_vulkan/vk_pipeline_cache.cpp | 37 ++--- .../renderer_vulkan/vk_rasterizer.cpp | 133 +++++++++++------- .../renderer_vulkan/vk_rasterizer.h | 1 - .../renderer_vulkan/vk_scheduler.cpp | 4 +- src/video_core/texture_cache/image_info.cpp | 4 +- 7 files changed, 123 insertions(+), 88 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 4c74d37d09..83271a82dd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -428,6 +428,14 @@ struct Liverpool { BitField<0, 22, u32> tile_max; } depth_slice; + bool DepthValid() const { + return Address() != 0 && z_info.format != ZFormat::Invalid; + } + + bool StencilValid() const { + return Address() != 0 && stencil_info.format != StencilFormat::Invalid; + } + u32 Pitch() const { return (depth_size.pitch_tile_max + 1) << 3; } @@ -1275,6 +1283,26 @@ struct Liverpool { return nullptr; } + u32 NumSamples() const { + // It seems that the number of samples > 1 set in the AA config doesn't mean we're + // always rendering with MSAA, so we need to derive MS ratio from the CB and DB + // settings. + u32 num_samples = 1u; + if (color_control.mode != ColorControl::OperationMode::Disable) { + for (auto cb = 0u; cb < NumColorBuffers; ++cb) { + const auto& col_buf = color_buffers[cb]; + if (!col_buf) { + continue; + } + num_samples = std::max(num_samples, col_buf.NumSamples()); + } + } + if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) { + num_samples = std::max(num_samples, depth_buffer.NumSamples()); + } + return num_samples; + } + void SetDefaults(); }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 444c8517e2..f25341bbb2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -85,10 +85,6 @@ class GraphicsPipeline : public Pipeline { return key.mrt_mask; } - bool IsDepthEnabled() const { - return key.depth_stencil.depth_enable.Value(); - } - [[nodiscard]] bool IsPrimitiveListTopology() const { return key.prim_type == AmdGpu::PrimitiveType::PointList || key.prim_type == AmdGpu::PrimitiveType::LineList || diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4b88bd3746..43e02dd9da 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -258,32 +258,28 @@ bool PipelineCache::RefreshGraphicsKey() { auto& key = graphics_key; key.depth_stencil = regs.depth_control; + key.stencil = regs.stencil_control; key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && !regs.depth_render_control.depth_clear_enable); key.depth_bias_enable = regs.polygon_control.NeedsBias(); - const auto& db = regs.depth_buffer; - const auto ds_format = instance.GetSupportedFormat( - LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format), + const auto depth_format = instance.GetSupportedFormat( + LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format, + regs.depth_buffer.stencil_info.format), vk::FormatFeatureFlagBits2::eDepthStencilAttachment); - if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) { - key.depth_format = ds_format; + if (regs.depth_buffer.DepthValid()) { + key.depth_format = depth_format; } else { key.depth_format = vk::Format::eUndefined; + key.depth_stencil.depth_enable.Assign(false); } - if (regs.depth_control.depth_enable) { - key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); - } - key.stencil = regs.stencil_control; - - if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) { - key.stencil_format = key.depth_format; + if (regs.depth_buffer.StencilValid()) { + key.stencil_format = depth_format; } else { key.stencil_format = vk::Format::eUndefined; + key.depth_stencil.stencil_enable.Assign(false); } - if (key.depth_stencil.stencil_enable) { - key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined); - } + key.prim_type = regs.primitive_type; key.enable_primitive_restart = regs.enable_primitive_restart & 1; key.primitive_restart_index = regs.primitive_restart_index; @@ -291,7 +287,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.cull_mode = regs.polygon_control.CullingMode(); key.clip_space = regs.clipper_control.clip_space; key.front_face = regs.polygon_control.front_face; - key.num_samples = regs.aa_config.NumSamples(); + key.num_samples = regs.NumSamples(); const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; @@ -437,8 +433,6 @@ bool PipelineCache::RefreshGraphicsKey() { } } - u32 num_samples = 1u; - // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -463,15 +457,8 @@ bool PipelineCache::RefreshGraphicsKey() { key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; - - num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2); } - // It seems that the number of samples > 1 set in the AA config doesn't mean we're always - // rendering with MSAA, so we need to derive MS ratio from the CB settings. - num_samples = std::max(num_samples, regs.depth_buffer.NumSamples()); - key.num_samples = num_samples; - return true; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b7cfb8cf00..a0899f7c8b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -87,9 +87,11 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); } + const bool skip_cb_binding = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; - if (!col_buf) { + if (skip_cb_binding || !col_buf) { continue; } @@ -134,12 +136,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { }; } - using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat; - using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat; - if (regs.depth_buffer.Address() != 0 && - ((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) || - (regs.depth_control.stencil_enable && - regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { + if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) || + (regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); const auto& hint = liverpool->last_db_extent; auto& [image_id, desc] = @@ -159,25 +157,29 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); - state.depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, - }; - state.stencil_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, - }; + state.has_depth = regs.depth_buffer.DepthValid(); + state.has_stencil = regs.depth_buffer.StencilValid(); + if (state.has_depth) { + state.depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = + is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, + }; + } + if (state.has_stencil) { + state.stencil_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = + is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, + }; + } texture_cache.TouchMeta(htile_address, slice, false); - state.has_depth = - regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; - state.has_stencil = regs.depth_buffer.stencil_info.format != - AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; } return state; @@ -815,34 +817,60 @@ void Rasterizer::Resolve() { mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start; mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer; - vk::ImageResolve region = { - .srcSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt0_range.base.layer, - .layerCount = mrt0_range.extent.layers, - }, - .srcOffset = {0, 0, 0}, - .dstSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt1_range.base.layer, - .layerCount = mrt1_range.extent.layers, - }, - .dstOffset = {0, 0, 0}, - .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, - }; - mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, mrt0_range); mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, mrt1_range); - cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image, - vk::ImageLayout::eTransferDstOptimal, region); + if (mrt0_image.info.num_samples == 1) { + // Vulkan does not allow resolve from a single sample image, so change it to a copy. + // Note that resolving a single-sampled image doesn't really make sense, but a game might do + // it. + vk::ImageCopy region = { + .srcSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, + }; + cmdbuf.copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image, + vk::ImageLayout::eTransferDstOptimal, region); + } else { + vk::ImageResolve region = { + .srcSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, + }; + cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, + mrt1_image.image, vk::ImageLayout::eTransferDstOptimal, region); + } } void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { @@ -998,6 +1026,10 @@ void Rasterizer::UpdateViewportScissorState() { enable_offset ? regs.window_offset.window_y_offset : 0); for (u32 idx = 0; idx < Liverpool::NumViewports; idx++) { + if (regs.viewports[idx].xscale == 0) { + // Scissor and viewport counts should be equal. + continue; + } auto vp_scsr = scsr; if (regs.mode_control.vport_scissor_enable) { vp_scsr.top_left_x = @@ -1020,13 +1052,6 @@ void Rasterizer::UpdateViewportScissorState() { cmdbuf.setScissor(0, scissors); } -void Rasterizer::UpdateDepthStencilState() { - auto& depth = liverpool->regs.depth_control; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); -} - void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { if (Config::nullGpu() || !Config::vkMarkersEnabled()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ec1b5e1341..80b22c7d87 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -74,7 +74,6 @@ class Rasterizer { void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); - void UpdateDepthStencilState(); bool FilterDraw(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81415f8b5e..f6b0edda41 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -30,7 +30,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { is_rendering = true; render_state = new_state; - const auto witdh = + const auto width = render_state.width != std::numeric_limits::max() ? render_state.width : 1; const auto height = render_state.height != std::numeric_limits::max() ? render_state.height : 1; @@ -39,7 +39,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .renderArea = { .offset = {0, 0}, - .extent = {witdh, height}, + .extent = {width, height}, }, .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 1445d41cd4..606ede558e 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -266,7 +266,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); - num_samples = 1 << buffer.attrib.num_fragments_log2; + num_samples = buffer.NumSamples(); num_bits = NumBits(buffer.info.format); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); @@ -289,7 +289,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice props.is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = vk::ImageType::e2D; - num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 + num_samples = buffer.NumSamples(); num_bits = buffer.NumBits(); size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); From ab5240d8d2e452ac4e0a434b353080a5c3a7e963 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 22 Dec 2024 06:31:59 -0800 Subject: [PATCH 61/67] qt: Do not run emulator on separate thread on Mac. (#1849) --- src/qt_gui/main_window.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 39f7c7b975..adb42fc26f 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -1098,10 +1098,16 @@ void MainWindow::StartEmulator(std::filesystem::path path) { QMessageBox::critical(nullptr, tr("Run Game"), QString(tr("Game is already running!"))); return; } + isGameRunning = true; +#ifdef __APPLE__ + // SDL on macOS requires main thread. + Core::Emulator emulator; + emulator.Run(path); +#else std::thread emulator_thread([=] { Core::Emulator emulator; emulator.Run(path); }); emulator_thread.detach(); - isGameRunning = true; +#endif } From 8a409d86d4c34ff6e07f5f107ce55ed68e12f2ba Mon Sep 17 00:00:00 2001 From: setepenre <61476054+setepenre@users.noreply.github.com> Date: Sun, 22 Dec 2024 16:18:07 +0100 Subject: [PATCH 62/67] post-processing: rework gamma correction (#1756) --- src/video_core/host_shaders/post_process.frag | 8 +++++++- src/video_core/renderer_vulkan/vk_presenter.cpp | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/video_core/host_shaders/post_process.frag b/src/video_core/host_shaders/post_process.frag index fcced3232b..d501e98130 100644 --- a/src/video_core/host_shaders/post_process.frag +++ b/src/video_core/host_shaders/post_process.frag @@ -12,8 +12,14 @@ layout(push_constant) uniform settings { float gamma; } pp; +const float cutoff = 0.0031308, a = 1.055, b = 0.055, d = 12.92; +vec3 gamma(vec3 rgb) +{ + return mix(a * pow(rgb, vec3(1.0 / (2.4 + 1.0 - pp.gamma))) - b, d * rgb / pp.gamma, lessThan(rgb, vec3(cutoff))); +} + void main() { vec4 color_linear = texture(texSampler, uv); - color = pow(color_linear, vec4(1.0/(2.2 + 1.0 - pp.gamma))); + color = vec4(gamma(color_linear.rgb), color_linear.a); } diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index b7d829316c..139fd962d5 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -154,7 +154,7 @@ void Presenter::CreatePostProcessPipeline() { const auto& fs_module = Vulkan::Compile(pp_shaders[1], vk::ShaderStageFlagBits::eFragment, instance.GetDevice()); ASSERT(fs_module); - Vulkan::SetObjectName(instance.GetDevice(), vs_module, "post_process.frag"); + Vulkan::SetObjectName(instance.GetDevice(), fs_module, "post_process.frag"); const std::array shaders_ci{ vk::PipelineShaderStageCreateInfo{ From 7fe4df85abb23a0f9960ed6bdb74dfaa6a71f085 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 22 Dec 2024 20:12:43 +0300 Subject: [PATCH 63/67] Clear color attachment if FCE was invoked before any draws (#1851) * Clear RT if FCE was invoked before any draws Co-authored-by: psucien * address review comments --------- Co-authored-by: psucien --- .../renderer_vulkan/vk_rasterizer.cpp | 31 ++++++++++++++++++- .../renderer_vulkan/vk_rasterizer.h | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a0899f7c8b..ec099b9f6b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -52,7 +52,8 @@ bool Rasterizer::FilterDraw() { // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an // actual draw hence can skip pipeline creation. if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { - LOG_TRACE(Render_Vulkan, "FCE pass skipped"); + // Clears the render target if FCE is launched before any draws + EliminateFastClear(); return false; } if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) { @@ -201,6 +202,34 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { return {vertex_offset, instance_offset}; } +void Rasterizer::EliminateFastClear() { + auto& col_buf = liverpool->regs.color_buffers[0]; + if (!col_buf || !col_buf.info.fast_clear) { + return; + } + if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) { + return; + } + for (u32 slice = col_buf.view.slice_start; slice <= col_buf.view.slice_max; ++slice) { + texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); + } + const auto& hint = liverpool->last_cb_extent[0]; + VideoCore::TextureCache::RenderTargetDesc desc(col_buf, hint); + const auto& image_view = texture_cache.FindRenderTarget(desc); + const auto& image = texture_cache.GetImage(image_view.image_id); + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = col_buf.view.slice_start, + .layerCount = col_buf.view.slice_max - col_buf.view.slice_start + 1, + }; + scheduler.EndRendering(); + scheduler.CommandBuffer().clearColorImage(image.image, vk::ImageLayout::eColorAttachmentOptimal, + LiverpoolToVK::ColorBufferClearValue(col_buf).color, + range); +} + void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 80b22c7d87..1bbb90b6ce 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -71,6 +71,7 @@ class Rasterizer { RenderState PrepareRenderState(u32 mrt_mask); void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state); void Resolve(); + void EliminateFastClear(); void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); From 8abc43a03decaba8b82b5d507eec22fc6c864f00 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sun, 22 Dec 2024 19:43:44 +0100 Subject: [PATCH 64/67] texture_cache: 32bpp and 64bpp macro detilers (#1852) * added 32bpp macro detiler * added 64bpp macro detiler * consider 3d depth alignment in size calculations --- src/video_core/amdgpu/resource.h | 6 +- src/video_core/host_shaders/CMakeLists.txt | 2 + src/video_core/host_shaders/detile_m32x1.comp | 1 + src/video_core/host_shaders/detile_m32x2.comp | 1 + src/video_core/host_shaders/detile_m32x4.comp | 1 + src/video_core/host_shaders/detile_m8x1.comp | 1 + src/video_core/host_shaders/detile_m8x2.comp | 1 + .../host_shaders/detile_macro32x1.comp | 90 ++++++++ .../host_shaders/detile_macro32x2.comp | 91 ++++++++ src/video_core/texture_cache/image_info.cpp | 3 + src/video_core/texture_cache/tile_manager.cpp | 203 ++++-------------- src/video_core/texture_cache/tile_manager.h | 9 +- 12 files changed, 236 insertions(+), 173 deletions(-) create mode 100644 src/video_core/host_shaders/detile_macro32x1.comp create mode 100644 src/video_core/host_shaders/detile_macro32x2.comp diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 5d74175594..d9a8b7cac4 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -126,6 +126,7 @@ enum class TilingMode : u32 { Display_MacroTiled = 0xAu, Texture_MicroTiled = 0xDu, Texture_MacroTiled = 0xEu, + Texture_Volume = 0x13u, }; constexpr std::string_view NameOf(TilingMode type) { @@ -140,6 +141,8 @@ constexpr std::string_view NameOf(TilingMode type) { return "Texture_MicroTiled"; case TilingMode::Texture_MacroTiled: return "Texture_MacroTiled"; + case TilingMode::Texture_Volume: + return "Texture_Volume"; default: return "Unknown"; } @@ -294,9 +297,6 @@ struct Image { return tiling_index == 5 ? TilingMode::Texture_MicroTiled : TilingMode::Depth_MacroTiled; } - if (tiling_index == 0x13) { - return TilingMode::Texture_MicroTiled; - } return static_cast(tiling_index); } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 4ef8bcdba9..c2a3b53fd3 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -7,6 +7,8 @@ set(SHADER_FILES detile_m32x1.comp detile_m32x2.comp detile_m32x4.comp + detile_macro32x1.comp + detile_macro32x2.comp fs_tri.vert post_process.frag ) diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index fecea109bf..802f5f531d 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf { layout(push_constant) uniform image_info { uint num_levels; uint pitch; + uint height; uint sizes[14]; } info; diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index c2caa62c2b..90063a1859 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf { layout(push_constant) uniform image_info { uint num_levels; uint pitch; + uint height; uint sizes[14]; } info; diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index 1135387062..e1b9881723 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf { layout(push_constant) uniform image_info { uint num_levels; uint pitch; + uint height; uint sizes[14]; } info; diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index 3ca2e64bdb..39d0aaeb13 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -18,6 +18,7 @@ layout(std430, binding = 1) buffer output_buf { layout(push_constant) uniform image_info { uint num_levels; uint pitch; + uint height; uint sizes[14]; } info; diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index ee9b728108..3f8e5ab336 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -17,6 +17,7 @@ layout(std430, binding = 1) buffer output_buf { layout(push_constant) uniform image_info { uint num_levels; uint pitch; + uint height; uint sizes[14]; } info; diff --git a/src/video_core/host_shaders/detile_macro32x1.comp b/src/video_core/host_shaders/detile_macro32x1.comp new file mode 100644 index 0000000000..086fbcfb52 --- /dev/null +++ b/src/video_core/host_shaders/detile_macro32x1.comp @@ -0,0 +1,90 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; + +layout(push_constant) uniform image_info { + uint num_levels; + uint pitch; + uint height; + uint c0; + uint c1; +} info; + +// Each LUT is 64 bytes, so should fit into K$ given tiled slices locality +const uint lut_32bpp[][64] = { + { + 0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45, + 0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47, + 0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55, + 0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57, + 0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5, + 0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7, + 0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5, + 0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7, + }, + { + 0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d, + 0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f, + 0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d, + 0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f, + 0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd, + 0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf, + 0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd, + 0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf, + }, + { + 0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65, + 0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67, + 0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75, + 0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77, + 0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5, + 0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7, + 0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5, + 0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7, + }, + { + 0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d, + 0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f, + 0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d, + 0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f, + 0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed, + 0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef, + 0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd, + 0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff, + } +}; + +#define MICRO_TILE_DIM (8) +#define MICRO_TILE_SZ (1024) +#define TEXELS_PER_ELEMENT (1) +#define BPP (32) + +void main() { + uint x = gl_GlobalInvocationID.x % info.pitch; + uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; + uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); + + uint col = bitfieldExtract(x, 0, 3); + uint row = bitfieldExtract(y, 0, 3); + uint lut = bitfieldExtract(z, 0, 2); + uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM]; + + uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; + uint tile_row = y / MICRO_TILE_DIM; + uint tile_column = x / MICRO_TILE_DIM; + uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; + uint offs = slice_offs + tile_offs + (idx * BPP / 8); + + uint p0 = in_data[offs >> 2u]; + out_data[gl_GlobalInvocationID.x] = p0; +} diff --git a/src/video_core/host_shaders/detile_macro32x2.comp b/src/video_core/host_shaders/detile_macro32x2.comp new file mode 100644 index 0000000000..296311c7ac --- /dev/null +++ b/src/video_core/host_shaders/detile_macro32x2.comp @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; + +layout(push_constant) uniform image_info { + uint num_levels; + uint pitch; + uint height; + uint c0; + uint c1; +} info; + +const uint lut_64bpp[][64] = { + { + 0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49, + 0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b, + 0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59, + 0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b, + 0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9, + 0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb, + 0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9, + 0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb, + }, + { + 0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d, + 0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f, + 0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d, + 0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f, + 0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd, + 0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf, + 0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd, + 0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf, + }, + { + 0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69, + 0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b, + 0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79, + 0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b, + 0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9, + 0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb, + 0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9, + 0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb, + }, + { + 0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d, + 0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f, + 0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d, + 0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f, + 0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed, + 0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef, + 0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd, + 0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff, + }, +}; + +#define MICRO_TILE_DIM (8) +#define MICRO_TILE_SZ (2048) +#define TEXELS_PER_ELEMENT (1) +#define BPP (64) + +void main() { + uint x = gl_GlobalInvocationID.x % info.pitch; + uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; + uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); + + uint col = bitfieldExtract(x, 0, 3); + uint row = bitfieldExtract(y, 0, 3); + uint lut = bitfieldExtract(z, 0, 2); + uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM]; + + uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; + uint tile_row = y / MICRO_TILE_DIM; + uint tile_column = x / MICRO_TILE_DIM; + uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; + uint offs = slice_offs + tile_offs + (idx * BPP / 8); + + uint p0 = in_data[(offs >> 2) + 0]; + uint p1 = in_data[(offs >> 2) + 1]; + out_data[2 * gl_GlobalInvocationID.x + 0] = p0; + out_data[2 * gl_GlobalInvocationID.x + 1] = p1; +} diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 606ede558e..2cc4aab38a 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -366,6 +366,9 @@ void ImageInfo::UpdateSize() { mip_info.height = mip_h; break; } + case AmdGpu::TilingMode::Texture_Volume: + mip_d += (-mip_d) & 3u; + [[fallthrough]]; case AmdGpu::TilingMode::Texture_MicroTiled: { std::tie(mip_info.pitch, mip_info.size) = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 94d37c993b..6a51bae2cc 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -12,6 +12,8 @@ #include "video_core/host_shaders/detile_m32x4_comp.h" #include "video_core/host_shaders/detile_m8x1_comp.h" #include "video_core/host_shaders/detile_m8x2_comp.h" +#include "video_core/host_shaders/detile_macro32x1_comp.h" +#include "video_core/host_shaders/detile_macro32x2_comp.h" #include #include @@ -19,158 +21,7 @@ namespace VideoCore { -class TileManager32 { -public: - u32 m_macro_tile_height = 0; - u32 m_bank_height = 0; - u32 m_num_banks = 0; - u32 m_num_pipes = 0; - u32 m_padded_width = 0; - u32 m_padded_height = 0; - u32 m_pipe_bits = 0; - u32 m_bank_bits = 0; - - void Init(u32 width, u32 height, bool is_neo) { - m_macro_tile_height = (is_neo ? 128 : 64); - m_bank_height = is_neo ? 2 : 1; - m_num_banks = is_neo ? 8 : 16; - m_num_pipes = is_neo ? 16 : 8; - m_padded_width = width; - if (height == 1080) { - m_padded_height = is_neo ? 1152 : 1088; - } - if (height == 720) { - m_padded_height = 768; - } - m_pipe_bits = is_neo ? 4 : 3; - m_bank_bits = is_neo ? 3 : 4; - } - - static u32 getElementIdx(u32 x, u32 y) { - u32 elem = 0; - elem |= ((x >> 0u) & 0x1u) << 0u; - elem |= ((x >> 1u) & 0x1u) << 1u; - elem |= ((y >> 0u) & 0x1u) << 2u; - elem |= ((x >> 2u) & 0x1u) << 3u; - elem |= ((y >> 1u) & 0x1u) << 4u; - elem |= ((y >> 2u) & 0x1u) << 5u; - - return elem; - } - - static u32 getPipeIdx(u32 x, u32 y, bool is_neo) { - u32 pipe = 0; - - if (!is_neo) { - pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u; - pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u; - pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u; - } else { - pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u; - pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u; - pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u; - pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u; - } - - return pipe; - } - - static u32 getBankIdx(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, - u32 num_pipes) { - const u32 x_shift_offset = std::bit_width(bank_width * num_pipes) - 1; - const u32 y_shift_offset = std::bit_width(bank_height) - 1; - const u32 xs = x >> x_shift_offset; - const u32 ys = y >> y_shift_offset; - u32 bank = 0; - switch (num_banks) { - case 8: - bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u; - bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u; - bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u; - break; - case 16: - bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u; - bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u; - bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u; - bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u; - break; - default:; - } - - return bank; - } - - u64 getTiledOffs(u32 x, u32 y, bool is_neo) const { - u64 element_index = getElementIdx(x, y); - - u32 xh = x; - u32 yh = y; - u64 pipe = getPipeIdx(xh, yh, is_neo); - u64 bank = getBankIdx(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes); - u32 tile_bytes = (8 * 8 * 32 + 7) / 8; - u64 element_offset = (element_index * 32); - u64 tile_split_slice = 0; - - if (tile_bytes > 512) { - tile_split_slice = element_offset / (static_cast(512) * 8); - element_offset %= (static_cast(512) * 8); - tile_bytes = 512; - } - - u64 macro_tile_bytes = - (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks); - u64 macro_tiles_per_row = m_padded_width / 128; - u64 macro_tile_row_index = y / m_macro_tile_height; - u64 macro_tile_column_index = x / 128; - u64 macro_tile_index = - (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index; - u64 macro_tile_offset = macro_tile_index * macro_tile_bytes; - u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height); - u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes; - u64 slice_offset = tile_split_slice * slice_bytes; - u64 tile_row_index = (y / 8) % m_bank_height; - u64 tile_index = tile_row_index; - u64 tile_offset = tile_index * tile_bytes; - - u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice; - bank ^= tile_split_slice_rotation; - bank &= (m_num_banks - 1); - - u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset; - u64 bit_offset = total_offset & 0x7u; - total_offset /= 8; - - u64 pipe_interleave_offset = total_offset & 0xffu; - u64 offset = total_offset >> 8u; - u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | - (offset << (8u + m_pipe_bits + m_bank_bits)); - - return ((byte_offset << 3u) | bit_offset) / 8; - } -}; - -void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_neo) { - TileManager32 t; - t.Init(width, height, is_neo); - - for (u32 y = 0; y < height; y++) { - u32 x = 0; - u64 linear_offset = y * width * 4; - - for (; x + 1 < width; x += 2) { - auto tiled_offset = t.getTiledOffs(x, y, is_neo); - - std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64)); - linear_offset += 8; - } - if (x < width) { - auto tiled_offset = t.getTiledOffs(x, y, is_neo); - std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32)); - } - } -} - -vk::Format DemoteImageFormatForDetiling(vk::Format format) { +static vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { case vk::Format::eR8Uint: case vk::Format::eR8Unorm: @@ -233,7 +84,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { const DetilerContext* TileManager::GetDetiler(const Image& image) const { const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) { + switch (image.info.tiling_mode) { + case AmdGpu::TilingMode::Texture_MicroTiled: switch (format) { case vk::Format::eR8Uint: return &detilers[DetilerType::Micro8x1]; @@ -248,22 +100,35 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { default: return nullptr; } + case AmdGpu::TilingMode::Texture_Volume: + switch (format) { + case vk::Format::eR32Uint: + return &detilers[DetilerType::Macro32x1]; + case vk::Format::eR32G32Uint: + return &detilers[DetilerType::Macro32x2]; + default: + return nullptr; + } + break; + default: + return nullptr; } - return nullptr; } struct DetilerParams { u32 num_levels; u32 pitch0; + u32 height; u32 sizes[14]; }; TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler} { static const std::array detiler_shaders{ - HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, - HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, - HostShaders::DETILE_M32X4_COMP, + HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, + HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, + HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO32X1_COMP, + HostShaders::DETILE_MACRO32X2_COMP, }; boost::container::static_vector bindings{ @@ -447,14 +312,24 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o set_writes); DetilerParams params; - params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); params.num_levels = image.info.resources.levels; - - ASSERT(image.info.resources.levels <= 14); - std::memset(¶ms.sizes, 0, sizeof(params.sizes)); - for (int m = 0; m < image.info.resources.levels; ++m) { - params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + - (m > 0 ? params.sizes[m - 1] : 0); + params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); + params.height = image.info.size.height; + if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + ASSERT(image.info.resources.levels == 1); + ASSERT(image.info.num_bits >= 32); + const auto tiles_per_row = image.info.pitch / 8u; + const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); + params.sizes[0] = tiles_per_row; + params.sizes[1] = tiles_per_slice; + } else { + + ASSERT(image.info.resources.levels <= 14); + std::memset(¶ms.sizes, 0, sizeof(params.sizes)); + for (int m = 0; m < image.info.resources.levels; ++m) { + params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + (m > 0 ? params.sizes[m - 1] : 0); + } } cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params), diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index ed7e32c44a..72860bca0b 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -11,12 +11,6 @@ namespace VideoCore { class TextureCache; -/// Converts tiled texture data to linear format. -void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo); - -/// Converts image format to the one used internally by detiler. -vk::Format DemoteImageFormatForDetiling(vk::Format format); - enum DetilerType : u32 { Micro8x1, Micro8x2, @@ -24,6 +18,9 @@ enum DetilerType : u32 { Micro32x2, Micro32x4, + Macro32x1, + Macro32x2, + Max }; From 2dc5755799c92a74fddbe5516decd4cb4346c53b Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 22 Dec 2024 22:51:48 +0100 Subject: [PATCH 65/67] build: exclude Tracy from release builds --- externals/CMakeLists.txt | 6 +++++- src/common/debug.h | 4 ++++ src/video_core/renderer_vulkan/vk_scheduler.cpp | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index dbe6794d86..4350948b7e 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -189,7 +189,11 @@ add_library(Dear_ImGui target_include_directories(Dear_ImGui INTERFACE dear_imgui/) # Tracy -option(TRACY_ENABLE "" ON) +if (CMAKE_BUILD_TYPE STREQUAL "Release") + option(TRACY_ENABLE "" OFF) +else() + option(TRACY_ENABLE "" ON) +endif() option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash option(TRACY_ON_DEMAND "" ON) option(TRACY_NO_FRAME_IMAGE "" ON) diff --git a/src/common/debug.h b/src/common/debug.h index 882e9e5c4e..92bf725504 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -14,7 +14,11 @@ #include static inline bool IsProfilerConnected() { +#if TRACY_ENABLE return tracy::GetProfiler().IsConnected(); +#else + return false; +#endif } #define TRACY_GPU_ENABLED 0 diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f6b0edda41..fd84c54ed4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -14,12 +14,16 @@ std::mutex Scheduler::submit_mutex; Scheduler::Scheduler(const Instance& instance) : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} { +#if TRACY_GPU_ENABLED profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); +#endif AllocateWorkerCommandBuffers(); } Scheduler::~Scheduler() { +#if TRACY_GPU_ENABLED std::free(profiler_scope); +#endif } void Scheduler::BeginRendering(const RenderState& new_state) { @@ -93,23 +97,27 @@ void Scheduler::AllocateWorkerCommandBuffers() { ASSERT_MSG(begin_result == vk::Result::eSuccess, "Failed to begin command buffer: {}", vk::to_string(begin_result)); +#if TRACY_GPU_ENABLED auto* profiler_ctx = instance.GetProfilerContext(); if (profiler_ctx) { static const auto scope_loc = GPU_SCOPE_LOCATION("Guest Frame", MarkersPalette::GpuMarkerColor); new (profiler_scope) tracy::VkCtxScope{profiler_ctx, &scope_loc, current_cmdbuf, true}; } +#endif } void Scheduler::SubmitExecution(SubmitInfo& info) { std::scoped_lock lk{submit_mutex}; const u64 signal_value = master_semaphore.NextTick(); +#if TRACY_GPU_ENABLED auto* profiler_ctx = instance.GetProfilerContext(); if (profiler_ctx) { profiler_scope->~VkCtxScope(); TracyVkCollect(profiler_ctx, current_cmdbuf); } +#endif EndRendering(); auto end_result = current_cmdbuf.end(); From 43fd8fa44a4ea7e8bdfba86106e72e57d7e678d5 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 22 Dec 2024 23:11:16 +0100 Subject: [PATCH 66/67] hot-fix: macos release build --- src/core/libraries/kernel/equeue.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 6543cc3199..42a8eed894 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/assert.h" #include "common/debug.h" #include "common/logging/log.h" From dfdd819e3e020104d325aeed435eda7ff1d6cb5c Mon Sep 17 00:00:00 2001 From: DanielSvoboda Date: Mon, 23 Dec 2024 04:29:30 -0300 Subject: [PATCH 67/67] Fix AutoUpdate Issue with Non-Latin Usernames/Directories (#1853) * Fix updater for non-Latin user directories * QStandardPaths --- src/qt_gui/check_update.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp index d713f67fb9..bb07baaf59 100644 --- a/src/qt_gui/check_update.cpp +++ b/src/qt_gui/check_update.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -348,7 +349,9 @@ void CheckUpdate::DownloadUpdate(const QString& url) { QString userPath; Common::FS::PathToQString(userPath, Common::FS::GetUserPath(Common::FS::PathType::UserDir)); #ifdef Q_OS_WIN - QString tempDownloadPath = QString(getenv("LOCALAPPDATA")) + "/Temp/temp_download_update"; + QString tempDownloadPath = + QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + + "/Temp/temp_download_update"; #else QString tempDownloadPath = userPath + "/temp_download_update"; #endif @@ -397,10 +400,11 @@ void CheckUpdate::Install() { QString processCommand; #ifdef Q_OS_WIN - // On windows, overwrite tempDirPath with AppData/Local/Temp folder + // On windows, overwrite tempDirPath with AppData/Roaming/shadps4/Temp folder // due to PowerShell Expand-Archive not being able to handle correctly // paths in square brackets (ie: ./[shadps4]) - tempDirPath = QString(getenv("LOCALAPPDATA")) + "/Temp/temp_download_update"; + tempDirPath = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + + "/Temp/temp_download_update"; // Windows Batch Script scriptFileName = tempDirPath + "/update.ps1"; @@ -536,6 +540,7 @@ void CheckUpdate::Install() { QFile scriptFile(scriptFileName); if (scriptFile.open(QIODevice::WriteOnly | QIODevice::Text)) { QTextStream out(&scriptFile); + scriptFile.write("\xEF\xBB\xBF"); #ifdef Q_OS_WIN out << scriptContent.arg(binaryStartingUpdate).arg(tempDirPath).arg(rootPath); #endif