Merge remote-tracking branch 'jdk/master' into clean-read-table-for-degen

2025-06-10 13:12:58 -07:00 · 2025-06-10 13:12:58 -07:00 · 5e2c20a80f
commit 5e2c20a80f
parent 16003772d9 38b877e941
1036 changed files with 38363 additions and 26136 deletions
--- a/.github/workflows/build-windows.yml
+++ b/.github/workflows/build-windows.yml
@ -63,7 +63,7 @@ env:
 jobs:
  build-windows:
    name: build
-    runs-on: windows-2019
+    runs-on: windows-2025
    defaults:
      run:
        shell: bash
@ -102,7 +102,7 @@ jobs:
        id: toolchain-check
        run: |
          set +e
-          '/c/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/vc/auxiliary/build/vcvars64.bat' -vcvars_ver=${{ inputs.msvc-toolset-version }}
+          '/c/Program Files/Microsoft Visual Studio/2022/Enterprise/vc/auxiliary/build/vcvars64.bat' -vcvars_ver=${{ inputs.msvc-toolset-version }}
          if [ $? -eq 0 ]; then
            echo "Toolchain is already installed"
            echo "toolchain-installed=true" >> $GITHUB_OUTPUT
@ -115,7 +115,7 @@ jobs:
        run: |
          # Run Visual Studio Installer
          '/c/Program Files (x86)/Microsoft Visual Studio/Installer/vs_installer.exe' \
-            modify --quiet --installPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' \
+            modify --quiet --installPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' \
            --add Microsoft.VisualStudio.Component.VC.${{ inputs.msvc-toolset-version }}.${{ inputs.msvc-toolset-architecture }}
        if: steps.toolchain-check.outputs.toolchain-installed != 'true'

--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -310,7 +310,7 @@ jobs:
    uses: ./.github/workflows/build-windows.yml
    with:
      platform: windows-x64
-      msvc-toolset-version: '14.29'
+      msvc-toolset-version: '14.43'
      msvc-toolset-architecture: 'x86.x64'
      configure-arguments: ${{ github.event.inputs.configure-arguments }}
      make-arguments: ${{ github.event.inputs.make-arguments }}
@ -322,7 +322,7 @@ jobs:
    uses: ./.github/workflows/build-windows.yml
    with:
      platform: windows-aarch64
-      msvc-toolset-version: '14.29'
+      msvc-toolset-version: '14.43'
      msvc-toolset-architecture: 'arm64'
      make-target: 'hotspot'
      extra-conf-options: '--openjdk-target=aarch64-unknown-cygwin'
@ -393,5 +393,5 @@ jobs:
    with:
      platform: windows-x64
      bootjdk-platform: windows-x64
-      runs-on: windows-2019
+      runs-on: windows-2025
      debug-suffix: -debug
--- a/.jcheck/conf
+++ b/.jcheck/conf
@ -1,7 +1,7 @@
 [general]
 project=jdk
 jbs=JDK
-version=25
+version=26

 [checks]
 error=author,committer,reviewers,merge,issues,executable,symlink,message,hg-tag,whitespace,problemlists,copyright
--- a/doc/starting-next-release.html
+++ b/doc/starting-next-release.html
@ -0,0 +1,127 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
+<head>
+  <meta charset="utf-8" />
+  <meta name="generator" content="pandoc" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+  <title>Explanation of start of release changes</title>
+  <style>
+    code{white-space: pre-wrap;}
+    span.smallcaps{font-variant: small-caps;}
+    div.columns{display: flex; gap: min(4vw, 1.5em);}
+    div.column{flex: auto; overflow-x: auto;}
+    div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+    /* The extra [class] is a hack that increases specificity enough to
+       override a similar rule in reveal.js */
+    ul.task-list[class]{list-style: none;}
+    ul.task-list li input[type="checkbox"] {
+      font-size: inherit;
+      width: 0.8em;
+      margin: 0 0.8em 0.2em -1.6em;
+      vertical-align: middle;
+    }
+    .display.math{display: block; text-align: center; margin: 0.5rem auto;}
+  </style>
+  <link rel="stylesheet" href="../make/data/docs-resources/resources/jdk-default.css" />
+  <!--[if lt IE 9]>
+    <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+  <![endif]-->
+</head>
+<body>
+<header id="title-block-header">
+<h1 class="title">Explanation of start of release changes</h1>
+</header>
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#overview" id="toc-overview">Overview</a></li>
+<li><a href="#details-and-file-updates"
+id="toc-details-and-file-updates">Details and file updates</a>
+<ul>
+<li><a href="#meta-data-files" id="toc-meta-data-files">Meta-data
+files</a></li>
+<li><a href="#src-files" id="toc-src-files"><code>src</code>
+files</a></li>
+<li><a href="#test-files" id="toc-test-files"><code>test</code>
+files</a></li>
+</ul></li>
+</ul>
+</nav>
+<h2 id="overview">Overview</h2>
+<p>The start of release changes, the changes that turn JDK <em>N</em>
+into JDK (<em>N</em>+1), are primarily small updates to various files
+along with new files to store symbol information to allow
+<code>javac --release N ...</code> to run on JDK (<em>N</em>+1).</p>
+<p>The updates include changes to files holding meta-data about the
+release, files under the <code>src</code> directory for API and tooling
+updates, and incidental updates under the <code>test</code>
+directory.</p>
+<h2 id="details-and-file-updates">Details and file updates</h2>
+<p>As a matter of policy, there are a number of semantically distinct
+concepts which get incremented separately at the start of a new
+release:</p>
+<ul>
+<li>Feature value of <code>Runtime.version()</code></li>
+<li>Highest source version modeled by
+<code>javax.lang.model.SourceVersion</code></li>
+<li>Highest class file format major version recognized by the
+platform</li>
+<li>Highest
+<code>-source</code>/<code>-target</code>/<code>--release</code>
+argument recognized by <code>javac</code> and related tools</li>
+</ul>
+<p>The expected file updates are listed below. Additional files may need
+to be updated for a particular release.</p>
+<h3 id="meta-data-files">Meta-data files</h3>
+<ul>
+<li><code>jcheck/conf</code>: update meta-data used by
+<code>jcheck</code> and the Skara tooling</li>
+<li><code>make/conf/version-numbers.conf</code>: update to meta-data
+used in the build</li>
+</ul>
+<h3 id="src-files"><code>src</code> files</h3>
+<ul>
+<li><code>src/hotspot/share/classfile/classFileParser.cpp</code>: add a
+<code>#define</code> for the new version</li>
+<li><code>src/java.base/share/classes/java/lang/classfile/ClassFile.java</code>:
+add a constant for the new class file format version</li>
+<li><code>src/java.base/share/classes/java/lang/reflect/ClassFileFormatVersion.java</code>:
+add an <code>enum</code> constant for the new class file format
+version</li>
+<li><code>src/java.compiler/share/classes/javax/lang/model/SourceVersion.java</code>:
+add an <code>enum</code> constant for the new source version</li>
+<li><code>src/java.compiler/share/classes/javax/lang/model/util/*</code>
+visitors: Update <code>@SupportedSourceVersion</code> annotations to
+latest value. Note this update is done in lieu of introducing another
+set of visitors for each Java SE release.</li>
+<li><code>src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java</code>:
+add an <code>enum</code> constant for the new source version internal to
+<code>javac</code></li>
+<li><code>src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java</code>:
+add an <code>enum</code> constant for the new class file format version
+internal to <code>javac</code></li>
+<li><code>src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/Target.java</code>:
+add an <code>enum</code> constant for the new target version internal to
+<code>javac</code></li>
+<li><code>src/jdk.compiler/share/classes/com/sun/tools/javac/processing/PrintingProcessor.java</code>
+update printing processor to support the new source version</li>
+<li>The symbol information for <code>--release</code> is stored as new
+text files in the <code>src/jdk.compiler/share/data/symbols</code>
+directory, one file per module. The README file in that directory
+contains directions on how to create the files.</li>
+</ul>
+<h3 id="test-files"><code>test</code> files</h3>
+<ul>
+<li><code>test/langtools/tools/javac/api/TestGetSourceVersions.java</code>:
+add new <code>SourceVersion</code> constant to test matrix.</li>
+<li><code>test/langtools/tools/javac/classfiles/ClassVersionChecker.java</code>:
+add new enum constant for the new class file version</li>
+<li><code>test/langtools/tools/javac/lib/JavacTestingAbstractProcessor.java</code>
+update annotation processor extended by <code>javac</code> tests to
+cover the new source version</li>
+<li><code>test/langtools/tools/javac/preview/classReaderTest/Client.nopreview.out</code>
+and
+<code>test/langtools/tools/javac/preview/classReaderTest/Client.preview.out</code>:
+update expected messages for preview errors and warnings</li>
+</ul>
+</body>
+</html>
--- a/doc/starting-next-release.md
+++ b/doc/starting-next-release.md
@ -0,0 +1,68 @@
+% Explanation of start of release changes
+
+## Overview
+
+The start of release changes, the changes that turn JDK _N_ into JDK
+(_N_+1), are primarily small updates to various files along with new files to
+store symbol information to allow `javac --release N ...` to run on
+JDK (_N_+1).
+
+The updates include changes to files holding meta-data about the
+release, files under the `src` directory for API and tooling updates,
+and incidental updates under the `test` directory.
+
+## Details and file updates
+
+As a matter of policy, there are a number of semantically distinct
+concepts which get incremented separately at the start of a new
+release:
+
+* Feature value of `Runtime.version()`
+* Highest source version modeled by `javax.lang.model.SourceVersion`
+* Highest class file format major version recognized by the platform
+* Highest `-source`/`-target`/`--release` argument recognized by
+  `javac` and related tools
+
+The expected file updates are listed below. Additional files may need
+to be updated for a particular release.
+
+### Meta-data files
+
+* `jcheck/conf`: update meta-data used by `jcheck` and the Skara tooling
+* `make/conf/version-numbers.conf`: update to meta-data used in the build
+
+### `src` files
+
+* `src/hotspot/share/classfile/classFileParser.cpp`: add a `#define`
+  for the new version
+* `src/java.base/share/classes/java/lang/classfile/ClassFile.java`:
+  add a constant for the new class file format version
+* `src/java.base/share/classes/java/lang/reflect/ClassFileFormatVersion.java`:
+   add an `enum` constant for the new class file format version
+* `src/java.compiler/share/classes/javax/lang/model/SourceVersion.java`:
+  add an `enum` constant for the new source version
+* `src/java.compiler/share/classes/javax/lang/model/util/*` visitors: Update
+  `@SupportedSourceVersion` annotations to latest value. Note this update
+  is done in lieu of introducing another set of visitors for each Java
+  SE release.
+* `src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java`:
+   add an `enum` constant for the new source version internal to `javac`
+* `src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java`:
+   add an `enum` constant for the new class file format version internal to `javac`
+* `src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/Target.java`:
+   add an `enum` constant for the new target version internal to `javac`
+* `src/jdk.compiler/share/classes/com/sun/tools/javac/processing/PrintingProcessor.java`
+   update printing processor to support the new source version
+* The symbol information for `--release` is stored as new text files in the
+  `src/jdk.compiler/share/data/symbols` directory, one file per
+  module. The README file in that directory contains directions on how
+  to create the files.
+
+### `test` files
+
+* `test/langtools/tools/javac/api/TestGetSourceVersions.java`: add new `SourceVersion` constant to test matrix.
+* `test/langtools/tools/javac/classfiles/ClassVersionChecker.java`: add new enum constant for the new class file version
+* `test/langtools/tools/javac/lib/JavacTestingAbstractProcessor.java`
+   update annotation processor extended by `javac` tests to cover the new source version
+* `test/langtools/tools/javac/preview/classReaderTest/Client.nopreview.out` and `test/langtools/tools/javac/preview/classReaderTest/Client.preview.out`: update expected messages for preview errors and warnings
+
--- a/doc/testing.html
+++ b/doc/testing.html
@ -72,9 +72,11 @@ id="toc-notes-for-specific-tests">Notes for Specific Tests</a>
 <li><a href="#non-us-locale" id="toc-non-us-locale">Non-US
 locale</a></li>
 <li><a href="#pkcs11-tests" id="toc-pkcs11-tests">PKCS11 Tests</a></li>
+</ul></li>
 <li><a href="#testing-ahead-of-time-optimizations"
-id="toc-testing-ahead-of-time-optimizations">Testing Ahead-of-time
-Optimizations</a></li>
+id="toc-testing-ahead-of-time-optimizations">### Testing Ahead-of-time
+Optimizations</a>
+<ul>
 <li><a href="#testing-with-alternative-security-providers"
 id="toc-testing-with-alternative-security-providers">Testing with
 alternative security providers</a></li>
@ -599,8 +601,8 @@ element of the appropriate <code>@Artifact</code> class. (See
    JTREG=&quot;JAVA_OPTIONS=-Djdk.test.lib.artifacts.nsslib-linux_aarch64=/path/to/NSS-libs&quot;</code></pre>
 <p>For more notes about the PKCS11 tests, please refer to
 test/jdk/sun/security/pkcs11/README.</p>
-<h3 id="testing-ahead-of-time-optimizations">Testing Ahead-of-time
-Optimizations</h3>
+<h2 id="testing-ahead-of-time-optimizations">### Testing Ahead-of-time
+Optimizations</h2>
 <p>One way to improve test coverage of ahead-of-time (AOT) optimizations
 in the JDK is to run existing jtreg test cases in a special "AOT_JDK"
 mode. Example:</p>
--- a/make/Init.gmk
+++ b/make/Init.gmk
@ -110,7 +110,18 @@ reconfigure:
 	    CUSTOM_CONFIG_DIR="$(CUSTOM_CONFIG_DIR)" \
 	    $(RECONFIGURE_COMMAND) )

-.PHONY: print-modules print-targets print-tests print-configuration reconfigure
+# Create files that are needed to run most targets in Main.gmk
+create-make-helpers:
+	( cd $(TOPDIR) && \
+	    $(MAKE) $(MAKE_ARGS) -j 1 -f make/GenerateFindTests.gmk \
+	    $(USER_MAKE_VARS) )
+	( cd $(TOPDIR) && \
+	    $(MAKE) $(MAKE_ARGS) -j 1 -f make/Main.gmk $(USER_MAKE_VARS) \
+	    UPDATE_MODULE_DEPS=true NO_RECIPES=true \
+	    create-main-targets-include )
+
+.PHONY: print-modules print-targets print-tests print-configuration \
+    reconfigure create-make-helpers

 ##############################################################################
 # The main target. This will delegate all other targets into Main.gmk.
@ -130,7 +141,7 @@ TARGET_DESCRIPTION := target$(if $(word 2, $(MAIN_TARGETS)),s) \
 # variables are explicitly propagated using $(USER_MAKE_VARS).
 main: MAKEOVERRIDES :=

-main: $(INIT_TARGETS)
+main: $(INIT_TARGETS) create-make-helpers
        ifneq ($(SEQUENTIAL_TARGETS)$(PARALLEL_TARGETS), )
 	  $(call RotateLogFiles)
 	  $(ECHO) "Building $(TARGET_DESCRIPTION)" $(BUILD_LOG_PIPE_SIMPLE)
@ -142,12 +153,7 @@ main: $(INIT_TARGETS)
 	        $(SEQUENTIAL_TARGETS) )
            # We might have cleaned away essential files, recreate them.
 	    ( cd $(TOPDIR) && \
-	        $(MAKE) $(MAKE_ARGS) -j 1 -f make/GenerateFindTests.gmk \
-	        $(USER_MAKE_VARS) )
-	    ( cd $(TOPDIR) && \
-	        $(MAKE) $(MAKE_ARGS) -j 1 -f make/Main.gmk $(USER_MAKE_VARS) \
-	        UPDATE_MODULE_DEPS=true NO_RECIPES=true \
-	        create-main-targets-include )
+	        $(MAKE) $(MAKE_ARGS) -j 1 -f make/Init.gmk create-make-helpers )
          endif
          ifneq ($(PARALLEL_TARGETS), )
 	    $(call PrepareFailureLogs)
--- a/make/Main.gmk
+++ b/make/Main.gmk
@ -417,12 +417,14 @@ $(eval $(call SetupTarget, create-source-revision-tracker, \
 ))

 BOOTCYCLE_TARGET := product-images
+BOOTCYCLE_SPEC := $(dir $(SPEC))bootcycle-spec.gmk
+
 bootcycle-images:
        ifneq ($(COMPILE_TYPE), cross)
 	  $(call LogWarn, Boot cycle build step 2: Building a new JDK image using previously built image)
 	  $(call MakeDir, $(OUTPUTDIR)/bootcycle-build)
 	  +$(MAKE) $(MAKE_ARGS) -f $(TOPDIR)/make/Init.gmk PARALLEL_TARGETS=$(BOOTCYCLE_TARGET) \
-	      LOG_PREFIX="[bootcycle] " JOBS= SPEC=$(dir $(SPEC))bootcycle-spec.gmk main
+	      LOG_PREFIX="[bootcycle] " JOBS= SPEC=$(BOOTCYCLE_SPEC) main
        else
 	  $(call LogWarn, Boot cycle build disabled when cross compiling)
        endif
--- a/make/PreInit.gmk
+++ b/make/PreInit.gmk
@ -50,7 +50,8 @@ include $(TOPDIR)/make/Global.gmk

 # Targets provided by Init.gmk.
 ALL_INIT_TARGETS := print-modules print-targets print-configuration \
-    print-tests reconfigure pre-compare-build post-compare-build
+    print-tests reconfigure pre-compare-build post-compare-build \
+    create-make-helpers

 # CALLED_TARGETS is the list of targets that the user provided,
 # or "default" if unspecified.
--- a/make/autoconf/lib-tests.m4
+++ b/make/autoconf/lib-tests.m4
@ -28,7 +28,7 @@
 ################################################################################

 # Minimum supported versions
-JTREG_MINIMUM_VERSION=7.5.1
+JTREG_MINIMUM_VERSION=7.5.2
 GTEST_MINIMUM_VERSION=1.14.0

 ################################################################################
--- a/make/conf/github-actions.conf
+++ b/make/conf/github-actions.conf
@ -26,7 +26,7 @@
 # Versions and download locations for dependencies used by GitHub Actions (GHA)

 GTEST_VERSION=1.14.0
-JTREG_VERSION=7.5.1+1
+JTREG_VERSION=7.5.2+1

 LINUX_X64_BOOT_JDK_EXT=tar.gz
 LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk24/1f9ff9062db4449d8ca828c504ffae90/36/GPL/openjdk-24_linux-x64_bin.tar.gz
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@ -1174,9 +1174,9 @@ var getJibProfilesDependencies = function (input, common) {
        jtreg: {
            server: "jpg",
            product: "jtreg",
-            version: "7.5.1",
+            version: "7.5.2",
            build_number: "1",
-            file: "bundles/jtreg-7.5.1+1.zip",
+            file: "bundles/jtreg-7.5.2+1.zip",
            environment_name: "JT_HOME",
            environment_path: input.get("jtreg", "home_path") + "/bin",
            configure_args: "--with-jtreg=" + input.get("jtreg", "home_path"),
@ -1192,8 +1192,8 @@ var getJibProfilesDependencies = function (input, common) {
            server: "jpg",
            product: "jcov",
            version: "3.0",
-            build_number: "1",
-            file: "bundles/jcov-3.0+1.zip",
+            build_number: "3",
+            file: "bundles/jcov-3.0+3.zip",
            environment_name: "JCOV_HOME",
        },

--- a/make/conf/version-numbers.conf
+++ b/make/conf/version-numbers.conf
@ -26,17 +26,17 @@
 # Default version, product, and vendor information to use,
 # unless overridden by configure

-DEFAULT_VERSION_FEATURE=25
+DEFAULT_VERSION_FEATURE=26
 DEFAULT_VERSION_INTERIM=0
 DEFAULT_VERSION_UPDATE=0
 DEFAULT_VERSION_PATCH=0
 DEFAULT_VERSION_EXTRA1=0
 DEFAULT_VERSION_EXTRA2=0
 DEFAULT_VERSION_EXTRA3=0
-DEFAULT_VERSION_DATE=2025-09-16
-DEFAULT_VERSION_CLASSFILE_MAJOR=69  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
+DEFAULT_VERSION_DATE=2026-03-17
+DEFAULT_VERSION_CLASSFILE_MAJOR=70  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
 DEFAULT_VERSION_CLASSFILE_MINOR=0
 DEFAULT_VERSION_DOCS_API_SINCE=11
-DEFAULT_ACCEPTABLE_BOOT_VERSIONS="24 25"
-DEFAULT_JDK_SOURCE_TARGET_VERSION=25
+DEFAULT_ACCEPTABLE_BOOT_VERSIONS="24 25 26"
+DEFAULT_JDK_SOURCE_TARGET_VERSION=26
 DEFAULT_PROMOTED_VERSION_PRE=ea
--- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1190,7 +1190,7 @@ OUTER:  for (int i = 0; i < n; i += m) {
        if (Csyntax)
            result.append("  static ");
        else
-            result.append("  static final ");
+            result.append("  @Stable static final ");
        result.append(atype);
        result.append(" ").append(name).append("[");
        if (Csyntax)
@ -1347,7 +1347,7 @@ OUTER:  for (int i = 0; i < n; i += m) {
    }

    static void genCaseMapTableDeclaration(StringBuffer result) {
-        result.append("    static final char[][][] charMap;\n");
+        result.append("    @Stable static final char[][][] charMap;\n");
    }

    static void genCaseMapTable(StringBuffer result, SpecialCaseMap[] specialCaseMaps){
--- a/make/modules/java.base/Gensrc.gmk
+++ b/make/modules/java.base/Gensrc.gmk
@ -46,6 +46,8 @@ CLDR_GEN_DONE := $(GENSRC_DIR)/_cldr-gensrc.marker
 TZ_DATA_DIR := $(MODULE_SRC)/share/data/tzdata
 ZONENAME_TEMPLATE := $(MODULE_SRC)/share/classes/java/time/format/ZoneName.java.template

+# The `-utf8` option is used even for US English, as some names
+# may contain non-ASCII characters, such as “Türkiye”.
 $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \
    $(wildcard $(CLDR_DATA_DIR)/main/en*.xml) \
    $(wildcard $(CLDR_DATA_DIR)/supplemental/*.xml) \
@ -61,7 +63,8 @@ $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \
 	    -basemodule \
 	    -year $(COPYRIGHT_YEAR) \
 	    -zntempfile $(ZONENAME_TEMPLATE) \
-	    -tzdatadir $(TZ_DATA_DIR))
+	    -tzdatadir $(TZ_DATA_DIR) \
+	    -utf8)
 	$(TOUCH) $@

 TARGETS += $(CLDR_GEN_DONE)
--- a/make/modules/jdk.localedata/Gensrc.gmk
+++ b/make/modules/jdk.localedata/Gensrc.gmk
@ -45,7 +45,8 @@ $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \
 	    -baselocales "en-US" \
 	    -year $(COPYRIGHT_YEAR) \
 	    -o $(GENSRC_DIR) \
-	    -tzdatadir $(TZ_DATA_DIR))
+	    -tzdatadir $(TZ_DATA_DIR) \
+	    -utf8)
 	$(TOUCH) $@

 TARGETS += $(CLDR_GEN_DONE)
--- a/src/demo/share/jfc/Stylepad/HelloWorld.java
+++ b/src/demo/share/jfc/Stylepad/HelloWorld.java
@ -187,22 +187,18 @@ public class HelloWorld {
            new Run("none", "Hello from Cupertino")
        }),
        new Paragraph("title", new Run[] {
-            new Run("none", "\u53F0\u5317\u554F\u5019\u60A8\u0021")
+            new Run("none", "台北問候您!")
        }),
        new Paragraph("title", new Run[] {
-            new Run("none", "\u0391\u03B8\u03B7\u03BD\u03B1\u03B9\u0020" // Greek
-            + "\u03B1\u03C3\u03C0\u03B1\u03B6\u03BF\u03BD"
-            + "\u03C4\u03B1\u03B9\u0020\u03C5\u03BC\u03B1"
-            + "\u03C2\u0021")
+            new Run("none", "Αθηναι ασπαζονται υμας!") // Greek
        }),
        new Paragraph("title", new Run[] {
-            new Run("none", "\u6771\u4eac\u304b\u3089\u4eca\u65e5\u306f")
+            new Run("none", "東京から今日は")
        }),
        new Paragraph("title", new Run[] {
-            new Run("none", "\u05e9\u05dc\u05d5\u05dd \u05de\u05d9\u05e8\u05d5"
-            + "\u05e9\u05dc\u05d9\u05dd")
+            new Run("none", "שלום מירושלים")
        }),
        new Paragraph("title", new Run[] {
-            new Run("none", "\u0633\u0644\u0627\u0645")
+            new Run("none", "سلام")
        }), };
 }
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -3921,6 +3921,10 @@ ins_attrib ins_alignment(4);    // Required alignment attribute (must
                                // compute_padding() function must be
                                // provided for the instruction

+// Whether this node is expanded during code emission into a sequence of
+// instructions and the first instruction can perform an implicit null check.
+ins_attrib ins_is_late_expanded_null_check_candidate(false);
+
 //----------OPERANDS-----------------------------------------------------------
 // Operand definitions must precede instruction definitions for correct parsing
 // in the ADLC because operands constitute user defined types which are used in
@ -7761,14 +7765,12 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
  effect(TEMP tmp);
  ins_cost(INSN_COST * 13);

-  format %{ "movw   $src, $src\n\t"
-            "mov    $tmp, $src\t# vector (1D)\n\t"
+  format %{ "fmovs  $tmp, $src\t# vector (1S)\n\t"
            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
            "addv   $tmp, $tmp\t# vector (8B)\n\t"
            "mov    $dst, $tmp\t# vector (1D)" %}
  ins_encode %{
-    __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
-    __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
+    __ fmovs($tmp$$FloatRegister, $src$$Register);
    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
    __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
@ -106,6 +106,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
  match(Set dst (LoadP mem));
  predicate(UseZGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
  effect(TEMP dst, KILL cr);
+  // The main load is a candidate to implement implicit null checks, as long as
+  // legitimize_address() does not require a preceding lea instruction to
+  // materialize the memory operand. The absence of a preceding lea instruction
+  // is guaranteed for immLoffset8 memory operands, because these do not lead to
+  // out-of-range offsets (see definition of immLoffset8). Fortunately,
+  // immLoffset8 memory operands are the most common ones in practice.
+  ins_is_late_expanded_null_check_candidate(opnd_array(1)->opcode() == INDOFFL8);

  ins_cost(4 * INSN_COST);

@ -117,7 +124,11 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
      // Fix up any out-of-range offsets.
      assert_different_registers(rscratch2, as_Register($mem$$base));
      assert_different_registers(rscratch2, $dst$$Register);
-      ref_addr = __ legitimize_address(ref_addr, 8, rscratch2);
+      int size = 8;
+      assert(!this->is_late_expanded_null_check_candidate() ||
+             !MacroAssembler::legitimize_address_requires_lea(ref_addr, size),
+             "an instruction that can be used for implicit null checking should emit the candidate memory access first");
+      ref_addr = __ legitimize_address(ref_addr, size, rscratch2);
    }
    __ ldr($dst$$Register, ref_addr);
    z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -83,8 +83,6 @@ define_pd_global(intx, InlineSmallCode,          1000);
                   range,                                               \
                   constraint)                                          \
                                                                        \
-  product(bool, NearCpool, true,                                        \
-         "constant pool is close to instructions")                      \
  product(bool, UseCRC32, false,                                        \
          "Use CRC32 instructions for CRC32 computation")               \
  product(bool, UseCryptoPmullForCRC32, false,                          \
@ -97,6 +95,8 @@ define_pd_global(intx, InlineSmallCode,          1000);
          "Use simplest and shortest implementation for array equals")  \
  product(bool, UseSIMDForBigIntegerShiftIntrinsics, true,              \
          "Use SIMD instructions for left/right shift of BigInteger")   \
+  product(bool, UseSIMDForSHA3Intrinsic, true,                          \
+          "Use SIMD SHA3 instructions for SHA3 intrinsic")              \
  product(bool, AvoidUnalignedAccesses, false,                          \
          "Avoid generating unaligned memory accesses")                 \
  product(bool, UseLSE, false,                                          \
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@ -926,60 +926,26 @@ void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,


 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-                                                      int constant,
-                                                      bool decrement) {
-  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+                                                      int constant) {
+  increment_mdp_data_at(mdp_in, noreg, constant);
 }

 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-                                                      Register reg,
-                                                      int constant,
-                                                      bool decrement) {
+                                                      Register index,
+                                                      int constant) {
  assert(ProfileInterpreter, "must be profiling interpreter");
-  // %%% this does 64bit counters at best it is wasting space
-  // at worst it is a rare bug when counters overflow

-  assert_different_registers(rscratch2, rscratch1, mdp_in, reg);
+  assert_different_registers(rscratch2, rscratch1, mdp_in, index);

  Address addr1(mdp_in, constant);
-  Address addr2(rscratch2, reg, Address::lsl(0));
+  Address addr2(rscratch2, index, Address::lsl(0));
  Address &addr = addr1;
-  if (reg != noreg) {
+  if (index != noreg) {
    lea(rscratch2, addr1);
    addr = addr2;
  }

-  if (decrement) {
-    // Decrement the register.  Set condition codes.
-    // Intel does this
-    // addptr(data, (int32_t) -DataLayout::counter_increment);
-    // If the decrement causes the counter to overflow, stay negative
-    // Label L;
-    // jcc(Assembler::negative, L);
-    // addptr(data, (int32_t) DataLayout::counter_increment);
-    // so we do this
-    ldr(rscratch1, addr);
-    subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment);
-    Label L;
-    br(Assembler::LO, L);       // skip store if counter underflow
-    str(rscratch1, addr);
-    bind(L);
-  } else {
-    assert(DataLayout::counter_increment == 1,
-           "flow-free idiom only works with 1");
-    // Intel does this
-    // Increment the register.  Set carry flag.
-    // addptr(data, DataLayout::counter_increment);
-    // If the increment causes the counter to overflow, pull back by 1.
-    // sbbptr(data, (int32_t)0);
-    // so we do this
-    ldr(rscratch1, addr);
-    adds(rscratch1, rscratch1, DataLayout::counter_increment);
-    Label L;
-    br(Assembler::CS, L);       // skip store if counter overflow
-    str(rscratch1, addr);
-    bind(L);
-  }
+  increment(addr, DataLayout::counter_increment);
 }

 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
@ -247,11 +247,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void verify_method_data_pointer();

  void set_mdp_data_at(Register mdp_in, int constant, Register value);
-  void increment_mdp_data_at(Address data, bool decrement = false);
-  void increment_mdp_data_at(Register mdp_in, int constant,
-                             bool decrement = false);
-  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
-                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant);
+  void increment_mdp_data_at(Register mdp_in, Register index, int constant);
  void increment_mask_and_jump(Address counter_addr,
                               int increment, Address mask,
                               Register scratch, Register scratch2,
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@ -129,17 +129,22 @@ class MacroAssembler: public Assembler {
    a.lea(this, r);
  }

+  // Whether materializing the given address for a LDR/STR requires an
+  // additional lea instruction.
+  static bool legitimize_address_requires_lea(const Address &a, int size) {
+    return a.getMode() == Address::base_plus_offset &&
+           !Address::offset_ok_for_immed(a.offset(), exact_log2(size));
+  }
+
  /* Sometimes we get misaligned loads and stores, usually from Unsafe
     accesses, and these can exceed the offset range. */
  Address legitimize_address(const Address &a, int size, Register scratch) {
-    if (a.getMode() == Address::base_plus_offset) {
-      if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) {
+    if (legitimize_address_requires_lea(a, size)) {
      block_comment("legitimize_address {");
      lea(scratch, a);
      block_comment("} legitimize_address");
      return Address(scratch);
    }
-    }
    return a;
  }

@ -323,6 +328,27 @@ class MacroAssembler: public Assembler {
    extr(Rd, Rn, Rn, imm);
  }

+  inline void rolw(Register Rd, Register Rn, unsigned imm) {
+    extrw(Rd, Rn, Rn, (32 - imm));
+  }
+
+  inline void rol(Register Rd, Register Rn, unsigned imm) {
+    extr(Rd, Rn, Rn, (64 - imm));
+  }
+
+  using Assembler::rax1;
+  using Assembler::eor3;
+
+  inline void rax1(Register Rd, Register Rn, Register Rm) {
+    eor(Rd, Rn, Rm, ROR, 63); // Rd = Rn ^ rol(Rm, 1)
+  }
+
+  inline void eor3(Register Rd, Register Rn, Register Rm, Register Rk) {
+    assert(Rd != Rn, "Use tmp register");
+    eor(Rd, Rm, Rk);
+    eor(Rd, Rd, Rn);
+  }
+
  inline void sxtbw(Register Rd, Register Rn) {
    sbfmw(Rd, Rn, 0, 7);
  }
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@ -7081,6 +7081,366 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  void bcax5(Register a0, Register a1, Register a2, Register a3, Register a4,
+             Register tmp0, Register tmp1, Register tmp2) {
+    __ bic(tmp0, a2, a1); // for a0
+    __ bic(tmp1, a3, a2); // for a1
+    __ bic(tmp2, a4, a3); // for a2
+    __ eor(a2, a2, tmp2);
+    __ bic(tmp2, a0, a4); // for a3
+    __ eor(a3, a3, tmp2);
+    __ bic(tmp2, a1, a0); // for a4
+    __ eor(a0, a0, tmp0);
+    __ eor(a1, a1, tmp1);
+    __ eor(a4, a4, tmp2);
+  }
+
+  void keccak_round_gpr(bool can_use_fp, bool can_use_r18, Register rc,
+                        Register a0, Register a1, Register a2, Register a3, Register a4,
+                        Register a5, Register a6, Register a7, Register a8, Register a9,
+                        Register a10, Register a11, Register a12, Register a13, Register a14,
+                        Register a15, Register a16, Register a17, Register a18, Register a19,
+                        Register a20, Register a21, Register a22, Register a23, Register a24,
+                        Register tmp0, Register tmp1, Register tmp2) {
+    __ eor3(tmp1, a4, a9, a14);
+    __ eor3(tmp0, tmp1, a19, a24); // tmp0 = a4^a9^a14^a19^a24 = c4
+    __ eor3(tmp2, a1, a6, a11);
+    __ eor3(tmp1, tmp2, a16, a21); // tmp1 = a1^a6^a11^a16^a21 = c1
+    __ rax1(tmp2, tmp0, tmp1); // d0
+    {
+
+      Register tmp3, tmp4;
+      if (can_use_fp && can_use_r18) {
+        tmp3 = rfp;
+        tmp4 = r18_tls;
+      } else {
+        tmp3 = a4;
+        tmp4 = a9;
+        __ stp(tmp3, tmp4, __ pre(sp, -16));
+      }
+
+      __ eor3(tmp3, a0, a5, a10);
+      __ eor3(tmp4, tmp3, a15, a20); // tmp4 = a0^a5^a10^a15^a20 = c0
+      __ eor(a0, a0, tmp2);
+      __ eor(a5, a5, tmp2);
+      __ eor(a10, a10, tmp2);
+      __ eor(a15, a15, tmp2);
+      __ eor(a20, a20, tmp2); // d0(tmp2)
+      __ eor3(tmp3, a2, a7, a12);
+      __ eor3(tmp2, tmp3, a17, a22); // tmp2 = a2^a7^a12^a17^a22 = c2
+      __ rax1(tmp3, tmp4, tmp2); // d1
+      __ eor(a1, a1, tmp3);
+      __ eor(a6, a6, tmp3);
+      __ eor(a11, a11, tmp3);
+      __ eor(a16, a16, tmp3);
+      __ eor(a21, a21, tmp3); // d1(tmp3)
+      __ rax1(tmp3, tmp2, tmp0); // d3
+      __ eor3(tmp2, a3, a8, a13);
+      __ eor3(tmp0, tmp2, a18, a23);  // tmp0 = a3^a8^a13^a18^a23 = c3
+      __ eor(a3, a3, tmp3);
+      __ eor(a8, a8, tmp3);
+      __ eor(a13, a13, tmp3);
+      __ eor(a18, a18, tmp3);
+      __ eor(a23, a23, tmp3);
+      __ rax1(tmp2, tmp1, tmp0); // d2
+      __ eor(a2, a2, tmp2);
+      __ eor(a7, a7, tmp2);
+      __ eor(a12, a12, tmp2);
+      __ rax1(tmp0, tmp0, tmp4); // d4
+      if (!can_use_fp || !can_use_r18) {
+        __ ldp(tmp3, tmp4, __ post(sp, 16));
+      }
+      __ eor(a17, a17, tmp2);
+      __ eor(a22, a22, tmp2);
+      __ eor(a4, a4, tmp0);
+      __ eor(a9, a9, tmp0);
+      __ eor(a14, a14, tmp0);
+      __ eor(a19, a19, tmp0);
+      __ eor(a24, a24, tmp0);
+    }
+
+    __ rol(tmp0, a10, 3);
+    __ rol(a10, a1, 1);
+    __ rol(a1, a6, 44);
+    __ rol(a6, a9, 20);
+    __ rol(a9, a22, 61);
+    __ rol(a22, a14, 39);
+    __ rol(a14, a20, 18);
+    __ rol(a20, a2, 62);
+    __ rol(a2, a12, 43);
+    __ rol(a12, a13, 25);
+    __ rol(a13, a19, 8) ;
+    __ rol(a19, a23, 56);
+    __ rol(a23, a15, 41);
+    __ rol(a15, a4, 27);
+    __ rol(a4, a24, 14);
+    __ rol(a24, a21, 2);
+    __ rol(a21, a8, 55);
+    __ rol(a8, a16, 45);
+    __ rol(a16, a5, 36);
+    __ rol(a5, a3, 28);
+    __ rol(a3, a18, 21);
+    __ rol(a18, a17, 15);
+    __ rol(a17, a11, 10);
+    __ rol(a11, a7, 6);
+    __ mov(a7, tmp0);
+
+    bcax5(a0, a1, a2, a3, a4, tmp0, tmp1, tmp2);
+    bcax5(a5, a6, a7, a8, a9, tmp0, tmp1, tmp2);
+    bcax5(a10, a11, a12, a13, a14, tmp0, tmp1, tmp2);
+    bcax5(a15, a16, a17, a18, a19, tmp0, tmp1, tmp2);
+    bcax5(a20, a21, a22, a23, a24, tmp0, tmp1, tmp2);
+
+    __ ldr(tmp1, __ post(rc, 8));
+    __ eor(a0, a0, tmp1);
+
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - byte[]  source+offset
+  //   c_rarg1   - byte[]  SHA.state
+  //   c_rarg2   - int     block_size
+  //   c_rarg3   - int     offset
+  //   c_rarg4   - int     limit
+  //
+  address generate_sha3_implCompress_gpr(StubGenStubId stub_id) {
+    bool multi_block;
+    switch (stub_id) {
+    case sha3_implCompress_id:
+      multi_block = false;
+      break;
+    case sha3_implCompressMB_id:
+      multi_block = true;
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+
+    static const uint64_t round_consts[24] = {
+      0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
+      0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
+      0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
+      0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
+      0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
+      0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
+      0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
+      0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
+    };
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, stub_id);
+    address start = __ pc();
+
+    Register buf           = c_rarg0;
+    Register state         = c_rarg1;
+    Register block_size    = c_rarg2;
+    Register ofs           = c_rarg3;
+    Register limit         = c_rarg4;
+
+    // use r3.r17,r19..r28 to keep a0..a24.
+    // a0..a24 are respective locals from SHA3.java
+    Register a0 = r25,
+             a1 = r26,
+             a2 = r27,
+             a3 = r3,
+             a4 = r4,
+             a5 = r5,
+             a6 = r6,
+             a7 = r7,
+             a8 = rscratch1, // r8
+             a9 = rscratch2, // r9
+             a10 = r10,
+             a11 = r11,
+             a12 = r12,
+             a13 = r13,
+             a14 = r14,
+             a15 = r15,
+             a16 = r16,
+             a17 = r17,
+             a18 = r28,
+             a19 = r19,
+             a20 = r20,
+             a21 = r21,
+             a22 = r22,
+             a23 = r23,
+             a24 = r24;
+
+    Register tmp0 = block_size, tmp1 = buf, tmp2 = state, tmp3 = r30;
+
+    Label sha3_loop, rounds24_preloop, loop_body;
+    Label sha3_512_or_sha3_384, shake128;
+
+    bool can_use_r18 = false;
+#ifndef R18_RESERVED
+    can_use_r18 = true;
+#endif
+    bool can_use_fp = !PreserveFramePointer;
+
+    __ enter();
+
+    // save almost all yet unsaved gpr registers on stack
+    __ str(block_size, __ pre(sp, -128));
+    if (multi_block) {
+      __ stpw(ofs, limit, Address(sp, 8));
+    }
+    // 8 bytes at sp+16 will be used to keep buf
+    __ stp(r19, r20, Address(sp, 32));
+    __ stp(r21, r22, Address(sp, 48));
+    __ stp(r23, r24, Address(sp, 64));
+    __ stp(r25, r26, Address(sp, 80));
+    __ stp(r27, r28, Address(sp, 96));
+    if (can_use_r18 && can_use_fp) {
+      __ stp(r18_tls, state, Address(sp, 112));
+    } else {
+      __ str(state, Address(sp, 112));
+    }
+
+    // begin sha3 calculations: loading a0..a24 from state arrary
+    __ ldp(a0, a1, state);
+    __ ldp(a2, a3, Address(state, 16));
+    __ ldp(a4, a5, Address(state, 32));
+    __ ldp(a6, a7, Address(state, 48));
+    __ ldp(a8, a9, Address(state, 64));
+    __ ldp(a10, a11, Address(state, 80));
+    __ ldp(a12, a13, Address(state, 96));
+    __ ldp(a14, a15, Address(state, 112));
+    __ ldp(a16, a17, Address(state, 128));
+    __ ldp(a18, a19, Address(state, 144));
+    __ ldp(a20, a21, Address(state, 160));
+    __ ldp(a22, a23, Address(state, 176));
+    __ ldr(a24, Address(state, 192));
+
+    __ BIND(sha3_loop);
+
+    // load input
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a0, a0, tmp3);
+    __ eor(a1, a1, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a2, a2, tmp3);
+    __ eor(a3, a3, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a4, a4, tmp3);
+    __ eor(a5, a5, tmp2);
+    __ ldr(tmp3, __ post(buf, 8));
+    __ eor(a6, a6, tmp3);
+
+    // block_size == 72, SHA3-512; block_size == 104, SHA3-384
+    __ tbz(block_size, 7, sha3_512_or_sha3_384);
+
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a7, a7, tmp3);
+    __ eor(a8, a8, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a9, a9, tmp3);
+    __ eor(a10, a10, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a11, a11, tmp3);
+    __ eor(a12, a12, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a13, a13, tmp3);
+    __ eor(a14, a14, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a15, a15, tmp3);
+    __ eor(a16, a16, tmp2);
+
+    // block_size == 136, bit4 == 0 and bit5 == 0, SHA3-256 or SHAKE256
+    __ andw(tmp2, block_size, 48);
+    __ cbzw(tmp2, rounds24_preloop);
+    __ tbnz(block_size, 5, shake128);
+    // block_size == 144, bit5 == 0, SHA3-244
+    __ ldr(tmp3, __ post(buf, 8));
+    __ eor(a17, a17, tmp3);
+    __ b(rounds24_preloop);
+
+    __ BIND(shake128);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a17, a17, tmp3);
+    __ eor(a18, a18, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a19, a19, tmp3);
+    __ eor(a20, a20, tmp2);
+    __ b(rounds24_preloop); // block_size == 168, SHAKE128
+
+    __ BIND(sha3_512_or_sha3_384);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a7, a7, tmp3);
+    __ eor(a8, a8, tmp2);
+    __ tbz(block_size, 5, rounds24_preloop); // SHA3-512
+
+    // SHA3-384
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a9, a9, tmp3);
+    __ eor(a10, a10, tmp2);
+    __ ldp(tmp3, tmp2, __ post(buf, 16));
+    __ eor(a11, a11, tmp3);
+    __ eor(a12, a12, tmp2);
+
+    __ BIND(rounds24_preloop);
+    __ fmovs(v0, 24.0); // float loop counter,
+    __ fmovs(v1, 1.0);  // exact representation
+
+    __ str(buf, Address(sp, 16));
+    __ lea(tmp3, ExternalAddress((address) round_consts));
+
+    __ BIND(loop_body);
+    keccak_round_gpr(can_use_fp, can_use_r18, tmp3,
+                     a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
+                     a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24,
+                     tmp0, tmp1, tmp2);
+    __ fsubs(v0, v0, v1);
+    __ fcmps(v0, 0.0);
+    __ br(__ NE, loop_body);
+
+    if (multi_block) {
+      __ ldrw(block_size, sp); // block_size
+      __ ldpw(tmp2, tmp1, Address(sp, 8)); // offset, limit
+      __ addw(tmp2, tmp2, block_size);
+      __ cmpw(tmp2, tmp1);
+      __ strw(tmp2, Address(sp, 8)); // store offset in case we're jumping
+      __ ldr(buf, Address(sp, 16)); // restore buf in case we're jumping
+      __ br(Assembler::LE, sha3_loop);
+      __ movw(c_rarg0, tmp2); // return offset
+    }
+    if (can_use_fp && can_use_r18) {
+      __ ldp(r18_tls, state, Address(sp, 112));
+    } else {
+      __ ldr(state, Address(sp, 112));
+    }
+    // save calculated sha3 state
+    __ stp(a0, a1, Address(state));
+    __ stp(a2, a3, Address(state, 16));
+    __ stp(a4, a5, Address(state, 32));
+    __ stp(a6, a7, Address(state, 48));
+    __ stp(a8, a9, Address(state, 64));
+    __ stp(a10, a11, Address(state, 80));
+    __ stp(a12, a13, Address(state, 96));
+    __ stp(a14, a15, Address(state, 112));
+    __ stp(a16, a17, Address(state, 128));
+    __ stp(a18, a19, Address(state, 144));
+    __ stp(a20, a21, Address(state, 160));
+    __ stp(a22, a23, Address(state, 176));
+    __ str(a24, Address(state, 192));
+
+    // restore required registers from stack
+    __ ldp(r19, r20, Address(sp, 32));
+    __ ldp(r21, r22, Address(sp, 48));
+    __ ldp(r23, r24, Address(sp, 64));
+    __ ldp(r25, r26, Address(sp, 80));
+    __ ldp(r27, r28, Address(sp, 96));
+    if (can_use_fp && can_use_r18) {
+      __ add(rfp, sp, 128); // leave() will copy rfp to sp below
+    } // else no need to recalculate rfp, since it wasn't changed
+
+    __ leave();
+
+    __ ret(lr);
+
+    return start;
+  }
+
  /**
   *  Arguments:
   *
@ -11512,9 +11872,15 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(StubGenStubId::sha512_implCompressMB_id);
    }
    if (UseSHA3Intrinsics) {
-      StubRoutines::_sha3_implCompress     = generate_sha3_implCompress(StubGenStubId::sha3_implCompress_id);
+
      StubRoutines::_double_keccak         = generate_double_keccak();
+      if (UseSIMDForSHA3Intrinsic) {
+         StubRoutines::_sha3_implCompress     = generate_sha3_implCompress(StubGenStubId::sha3_implCompress_id);
         StubRoutines::_sha3_implCompressMB   = generate_sha3_implCompress(StubGenStubId::sha3_implCompressMB_id);
+      } else {
+         StubRoutines::_sha3_implCompress     = generate_sha3_implCompress_gpr(StubGenStubId::sha3_implCompress_id);
+         StubRoutines::_sha3_implCompressMB   = generate_sha3_implCompress_gpr(StubGenStubId::sha3_implCompressMB_id);
+      }
    }

    if (UsePoly1305Intrinsics) {
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@ -865,6 +865,10 @@ void TemplateInterpreterGenerator::lock_method() {
 //      rcpool: cp cache
 //      stack_pointer: previous sp
 void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+  // Save ConstMethod* in r5_const_method for later use to avoid loading multiple times
+  Register r5_const_method = r5;
+  __ ldr(r5_const_method, Address(rmethod, Method::const_offset()));
+
  // initialize fixed part of activation frame
  if (native_call) {
    __ sub(esp, sp, 14 *  wordSize);
@ -875,8 +879,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
    __ stp(zr, zr, Address(sp, 12 * wordSize));
  } else {
    __ sub(esp, sp, 12 *  wordSize);
-    __ ldr(rscratch1, Address(rmethod, Method::const_offset()));    // get ConstMethod
-    __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase
+    __ add(rbcp, r5_const_method, in_bytes(ConstMethod::codes_offset())); // get codebase
    __ mov(rscratch1, frame::interpreter_frame_initial_sp_offset);
    __ stp(rscratch1, rbcp, Address(__ pre(sp, -12 * wordSize)));
  }
@ -896,9 +899,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  __ stp(rfp, lr, Address(sp, 10 * wordSize));
  __ lea(rfp, Address(sp, 10 * wordSize));

-  __ ldr(rcpool, Address(rmethod, Method::const_offset()));
-  __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
-  __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset()));
+  // Save ConstantPool* in r11_constants for later use to avoid loading multiple times
+  Register r11_constants = r11;
+  __ ldr(r11_constants, Address(r5_const_method, ConstMethod::constants_offset()));
+  __ ldr(rcpool, Address(r11_constants, ConstantPool::cache_offset()));
  __ sub(rscratch1, rlocals, rfp);
  __ lsr(rscratch1, rscratch1, Interpreter::logStackElementSize);   // rscratch1 = rlocals - fp();
  // Store relativized rlocals, see frame::interpreter_frame_locals().
@ -908,11 +912,12 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  // leave last_sp as null
  __ stp(zr, r19_sender_sp, Address(sp, 8 * wordSize));

-  // Get mirror
-  __ load_mirror(r10, rmethod, r5, rscratch2);
+  // Get mirror. Resolve ConstantPool* -> InstanceKlass* -> Java mirror.
+  __ ldr(r10, Address(r11_constants, ConstantPool::pool_holder_offset()));
+  __ ldr(r10, Address(r10, in_bytes(Klass::java_mirror_offset())));
+  __ resolve_oop_handle(r10, rscratch1, rscratch2);
  if (! native_call) {
-    __ ldr(rscratch1, Address(rmethod, Method::const_offset()));
-    __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
+    __ ldrh(rscratch1, Address(r5_const_method, ConstMethod::max_stack_offset()));
    __ add(rscratch1, rscratch1, MAX2(3, Method::extra_stack_entries()));
    __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3);
    __ andr(rscratch1, rscratch1, -16);
@ -1888,6 +1893,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {

  Interpreter::_remove_activation_preserving_args_entry = __ pc();
  __ empty_expression_stack();
+  __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp.
  // Set the popframe_processing bit in pending_popframe_condition
  // indicating that we are currently handling popframe, so that
  // call_VMs that may happen later do not trigger new popframe
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@ -1144,6 +1144,7 @@ void TemplateTable::aastore() {
  // Get the value we will store
  __ ldr(r0, at_tos());
  // Now store using the appropriate barrier
+  // Clobbers: r10, r11, r3
  do_oop_store(_masm, element_address, r0, IS_ARRAY);
  __ b(done);

@ -1152,6 +1153,7 @@ void TemplateTable::aastore() {
  __ profile_null_seen(r2);

  // Store a null
+  // Clobbers: r10, r11, r3
  do_oop_store(_masm, element_address, noreg, IS_ARRAY);

  // Pop stack arguments
@ -2882,6 +2884,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
    __ pop(atos);
    if (!is_static) pop_and_check_object(obj);
    // Store into the field
+    // Clobbers: r10, r11, r3
    do_oop_store(_masm, field, r0, IN_HEAP);
    if (rc == may_rewrite) {
      patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
@ -3077,12 +3080,12 @@ void TemplateTable::fast_storefield(TosState state)
  // access constant pool cache
  __ load_field_entry(r2, r1);

-  // R1: field offset, R2: field holder, R3: flags
-  load_resolved_field_entry(r2, r2, noreg, r1, r3);
+  // R1: field offset, R2: field holder, R5: flags
+  load_resolved_field_entry(r2, r2, noreg, r1, r5);

  {
    Label notVolatile;
-    __ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile);
+    __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile);
    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
    __ bind(notVolatile);
  }
@ -3098,6 +3101,7 @@ void TemplateTable::fast_storefield(TosState state)
  // access field
  switch (bytecode()) {
  case Bytecodes::_fast_aputfield:
+    // Clobbers: r10, r11, r3
    do_oop_store(_masm, field, r0, IN_HEAP);
    break;
  case Bytecodes::_fast_lputfield:
@ -3130,7 +3134,7 @@ void TemplateTable::fast_storefield(TosState state)

  {
    Label notVolatile;
-    __ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile);
+    __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile);
    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
    __ bind(notVolatile);
  }
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@ -379,7 +379,7 @@ void VM_Version::initialize() {
        FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
      }
    }
-  } else if (UseSHA3Intrinsics) {
+  } else if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) {
    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
  }
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
@ -175,6 +175,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
  case Interpreter::java_lang_math_fmaD:
  case Interpreter::java_lang_math_fmaF:
  case Interpreter::java_lang_math_tanh:
+  case Interpreter::java_lang_math_cbrt:
    // TODO: Implement intrinsic
    break;
  default:
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp
@ -591,6 +591,10 @@ class Assembler : public AbstractAssembler {
    XVRDPIC_OPCODE = (60u << OPCODE_SHIFT |  235u << 2),
    XVRDPIM_OPCODE = (60u << OPCODE_SHIFT |  249u << 2),
    XVRDPIP_OPCODE = (60u << OPCODE_SHIFT |  233u << 2),
+    XVMINSP_OPCODE = (60u << OPCODE_SHIFT |  200u << 3),
+    XVMINDP_OPCODE = (60u << OPCODE_SHIFT |  232u << 3),
+    XVMAXSP_OPCODE = (60u << OPCODE_SHIFT |  192u << 3),
+    XVMAXDP_OPCODE = (60u << OPCODE_SHIFT |  224u << 3),

    // Deliver A Random Number (introduced with POWER9)
    DARN_OPCODE    = (31u << OPCODE_SHIFT |  755u << 1),
@ -699,15 +703,19 @@ class Assembler : public AbstractAssembler {
    VMAXSB_OPCODE  = (4u  << OPCODE_SHIFT |  258u     ),
    VMAXSW_OPCODE  = (4u  << OPCODE_SHIFT |  386u     ),
    VMAXSH_OPCODE  = (4u  << OPCODE_SHIFT |  322u     ),
+    VMAXSD_OPCODE  = (4u  << OPCODE_SHIFT |  450u     ),
    VMAXUB_OPCODE  = (4u  << OPCODE_SHIFT |    2u     ),
    VMAXUW_OPCODE  = (4u  << OPCODE_SHIFT |  130u     ),
    VMAXUH_OPCODE  = (4u  << OPCODE_SHIFT |   66u     ),
+    VMAXUD_OPCODE  = (4u  << OPCODE_SHIFT |  194u     ),
    VMINSB_OPCODE  = (4u  << OPCODE_SHIFT |  770u     ),
    VMINSW_OPCODE  = (4u  << OPCODE_SHIFT |  898u     ),
    VMINSH_OPCODE  = (4u  << OPCODE_SHIFT |  834u     ),
+    VMINSD_OPCODE  = (4u  << OPCODE_SHIFT |  962u     ),
    VMINUB_OPCODE  = (4u  << OPCODE_SHIFT |  514u     ),
    VMINUW_OPCODE  = (4u  << OPCODE_SHIFT |  642u     ),
    VMINUH_OPCODE  = (4u  << OPCODE_SHIFT |  578u     ),
+    VMINUD_OPCODE  = (4u  << OPCODE_SHIFT |  706u     ),

    VCMPEQUB_OPCODE= (4u  << OPCODE_SHIFT |    6u     ),
    VCMPEQUH_OPCODE= (4u  << OPCODE_SHIFT |   70u     ),
@ -2302,15 +2310,19 @@ class Assembler : public AbstractAssembler {
  inline void vmaxsb(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vmaxsw(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vmaxsh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxsd(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vmaxub(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vmaxuw(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vmaxuh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxud(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminsb(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminsw(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminsh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminsd(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminub(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminuw(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vminuh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminud(   VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b);
  inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b);
@ -2435,6 +2447,12 @@ class Assembler : public AbstractAssembler {
  inline void xvrdpim(  VectorSRegister d, VectorSRegister b);
  inline void xvrdpip(  VectorSRegister d, VectorSRegister b);

+  // The following functions do not match exactly the Java.math semantics.
+  inline void xvminsp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmindp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmaxsp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmaxdp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+
  // VSX Extended Mnemonics
  inline void xxspltd(  VectorSRegister d, VectorSRegister a, int x);
  inline void xxmrghd(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@ -908,6 +908,11 @@ inline void Assembler::xvrdpic(   VectorSRegister d, VectorSRegister b)
 inline void Assembler::xvrdpim(   VectorSRegister d, VectorSRegister b)                  { emit_int32( XVRDPIM_OPCODE | vsrt(d) | vsrb(b)); }
 inline void Assembler::xvrdpip(   VectorSRegister d, VectorSRegister b)                  { emit_int32( XVRDPIP_OPCODE | vsrt(d) | vsrb(b)); }

+inline void Assembler::xvminsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmindp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmaxsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMAXSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmaxdp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMAXDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+
 inline void Assembler::mtvrd(   VectorRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrd(   Register        a, VectorRegister d)         { emit_int32( MFVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mtvrwz(  VectorRegister  d, Register a)               { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@ -1022,15 +1027,19 @@ inline void Assembler::vavguh(  VectorRegister d, VectorRegister a, VectorRegist
 inline void Assembler::vmaxsb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmaxsw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmaxsh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxsd(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSD_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmaxub(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmaxuw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmaxuh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxud(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUD_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminsb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminsw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminsh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminsd(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSD_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminub(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminuw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vminuh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminud(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUD_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
 inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
 inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
@ -619,3 +619,48 @@ void C2_MacroAssembler::count_positives(Register src, Register cnt, Register res
  bind(Ldone);
  subf(result, src, result);      // Result is offset from src.
 }
+
+void C2_MacroAssembler::reduceI(int opcode, Register dst, Register iSrc, VectorRegister vSrc,
+                                VectorRegister vTmp1, VectorRegister vTmp2) {
+
+  auto fn_vec_op = [this](int opcode, const VectorRegister &dst, const VectorRegister &a, const VectorRegister &b) {
+    switch(opcode) {
+      case Op_AddReductionVI: vadduwm(dst, a, b);  break;
+      case Op_MulReductionVI: vmuluwm(dst, a , b); break;
+      case Op_AndReductionV:  vand(dst, a, b);     break;
+      case Op_OrReductionV:   vor(dst, a, b);      break;
+      case Op_XorReductionV:  vxor(dst, a, b);     break;
+      case Op_MinReductionV:  vminsw(dst, a, b);   break;
+      case Op_MaxReductionV:  vmaxsw(dst, a, b);   break;
+      default: assert(false, "wrong opcode");
+    }
+  };
+
+  auto fn_scalar_op = [this](int opcode, const Register &dst, const Register &a, const Register &b) {
+    switch (opcode) {
+      case Op_AddReductionVI: add(dst, a, b);   break;
+      case Op_MulReductionVI: mullw(dst, a, b); break;
+      case Op_AndReductionV:  andr(dst, a, b);  break;
+      case Op_OrReductionV:   orr(dst, a, b);   break;
+      case Op_XorReductionV:  xorr(dst, a, b);  break;
+      case Op_MinReductionV:
+        cmpw(CR0, a, b);
+        isel(dst, CR0, Assembler::less, /*invert*/false, a, b);
+        break;
+      case Op_MaxReductionV:
+        cmpw(CR0, a, b);
+        isel(dst, CR0, Assembler::greater, /*invert*/false, a, b);
+        break;
+      default: assert(false, "wrong opcode");
+    }
+  };
+
+  // vSrc = [i0,i1,i2,i3]
+  vsldoi(vTmp1, vSrc, vSrc, 8);           // vTmp1 <- [i2,i3,i0,i1]
+  fn_vec_op(opcode, vTmp2, vSrc, vTmp1);  // vTmp2 <- [op(i0,i2), op(i1,i3), op(i2,i0), op(i3,i1)]
+  vsldoi(vTmp1, vTmp2, vTmp2, 4);         // vTmp1 <- [op(i1,i3), op(i2,i0), op(i3,i1), op(i0,i2)]
+  fn_vec_op(opcode, vTmp1, vTmp1, vTmp2); // vTmp1 <- [op(i0,i1,i2,i3), op(i0,i1,i2,i3), op(i0,i1,i2,i3), op(i0,i1,i2,i3)]
+  mfvsrwz(R0, vTmp1.to_vsr());            // R0    <- op(i0,i1,i2,i3)
+  fn_scalar_op(opcode, dst, iSrc, R0);    // dst   <- op(iSrc, R0)
+}
+
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
@ -73,4 +73,6 @@

  void count_positives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);

+  void reduceI(int opcode, Register dst, Register iSrc, VectorRegister vSrc, VectorRegister vTmp1, VectorRegister vTmp2);
+
 #endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
--- a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
@ -34,9 +34,11 @@
 #include <sys/mman.h>
 #endif // LINUX

-// Default value if probing is not implemented for a certain platform: 128TB
-static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
-// Minimum value returned, if probing fails: 64GB
+// Default value if probing is not implemented for a certain platform
+// Max address bit is restricted by implicit assumptions in the code, for instance
+// the bit layout of ZForwardingEntry or Partial array entry (see ZMarkStackEntry) in mark stack
+static const size_t DEFAULT_MAX_ADDRESS_BIT = 46;
+// Minimum value returned, if probing fail
 static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;

 static size_t probe_valid_max_address_bit() {
@ -91,10 +93,15 @@ static size_t probe_valid_max_address_bit() {
 size_t ZPlatformAddressOffsetBits() {
  static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
+#ifdef ADDRESS_SANITIZER
+  // The max supported value is 44 because of other internal data structures.
+  return MIN2(valid_max_address_offset_bits, (size_t)44);
+#else
  const size_t min_address_offset_bits = max_address_offset_bits - 2;
  const size_t address_offset = ZGlobalsPointers::min_address_offset_request();
  const size_t address_offset_bits = log2i_exact(address_offset);
  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
+#endif
 }

 size_t ZPlatformAddressHeapBaseShift() {
--- a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
@ -141,6 +141,7 @@ instruct zLoadP(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
 %{
  match(Set dst (LoadP mem));
  effect(TEMP_DEF dst, KILL cr0);
+  ins_is_late_expanded_null_check_candidate(true);
  ins_cost(MEMORY_REF_COST);

  predicate((UseZGC && n->as_Load()->barrier_data() != 0)
@ -160,6 +161,7 @@ instruct zLoadP_acq(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
 %{
  match(Set dst (LoadP mem));
  effect(TEMP_DEF dst, KILL cr0);
+  ins_is_late_expanded_null_check_candidate(true);
  ins_cost(3 * MEMORY_REF_COST);

  // Predicate on instruction order is implicitly present due to the predicate of the cheaper zLoadP operation
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp
@ -115,8 +115,8 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
          "Use static branch prediction hints for uncommon paths.")         \
                                                                            \
  /* special instructions */                                                \
-  product(bool, SuperwordUseVSX, true,                                      \
-          "Use Power8 VSX instructions for superword optimization.")        \
+  product(bool, SuperwordUseVSX, false,                                     \
+          "Use VSX instructions for superword optimization.")               \
                                                                            \
  product(bool, UseByteReverseInstructions, false, DIAGNOSTIC,              \
          "Use byte reverse instructions.")                                 \
--- a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp
+++ b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp
@ -359,7 +359,9 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
        ? -1                                  // enforce receiver null check
        : oopDesc::klass_offset_in_bytes();   // regular null-checking behavior

-      __ null_check_throw(receiver_reg, klass_offset, temp1, Interpreter::throw_NullPointerException_entry());
+      address NullPointerException_entry = for_compiler_entry ? SharedRuntime::throw_NullPointerException_at_call_entry()
+                                                              : Interpreter::throw_NullPointerException_entry();
+      __ null_check_throw(receiver_reg, klass_offset, temp1, NullPointerException_entry);

      if (iid != vmIntrinsics::_linkToSpecial || VerifyMethodHandles) {
        __ load_klass(temp1_recv_klass, receiver_reg);
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@ -2399,6 +2399,18 @@ bool Matcher::match_rule_supported(int opcode) {
    case Op_SubVL:
    case Op_MulVI:
    case Op_RoundDoubleModeV:
+    case Op_MinV:
+    case Op_MaxV:
+    case Op_AndV:
+    case Op_OrV:
+    case Op_XorV:
+    case Op_AddReductionVI:
+    case Op_MulReductionVI:
+    case Op_AndReductionV:
+    case Op_OrReductionV:
+    case Op_XorReductionV:
+    case Op_MinReductionV:
+    case Op_MaxReductionV:
      return SuperwordUseVSX;
    case Op_PopCountVI:
    case Op_PopCountVL:
@ -2440,6 +2452,22 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
    return false;
  }
+  // Special cases
+  switch (opcode) {
+    // Reductions only support INT at the moment.
+    case Op_AddReductionVI:
+    case Op_MulReductionVI:
+    case Op_AndReductionV:
+    case Op_OrReductionV:
+    case Op_XorReductionV:
+    case Op_MinReductionV:
+    case Op_MaxReductionV:
+      return bt == T_INT;
+    // MaxV, MinV need types == INT || LONG.
+    case Op_MaxV:
+    case Op_MinV:
+      return bt == T_INT || bt == T_LONG;
+  }
  return true; // Per default match rules are supported.
 }

@ -4008,6 +4036,10 @@ ins_attrib ins_field_cbuf_insts_offset(-1);
 ins_attrib ins_field_load_ic_hi_node(0);
 ins_attrib ins_field_load_ic_node(0);

+// Whether this node is expanded during code emission into a sequence of
+// instructions and the first instruction can perform an implicit null check.
+ins_attrib ins_is_late_expanded_null_check_candidate(false);
+
 //----------OPERANDS-----------------------------------------------------------
 // Operand definitions must precede instruction definitions for correct
 // parsing in the ADLC because operands constitute user defined types
@ -13485,6 +13517,113 @@ instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
  ins_pipe(pipe_class_default);
 %}

+// Vector Min / Max Instructions
+
+instruct vmin_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (MinV src1 src2));
+  format %{ "VMIN  $dst,$src1,$src2\t// vector min" %}
+  size(4);
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    switch (bt) {
+      case T_INT:
+        __ vminsw($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+        break;
+      case T_LONG:
+        __ vminsd($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmax_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (MaxV src1 src2));
+  format %{ "VMAX  $dst,$src1,$src2\t// vector max" %}
+  size(4);
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    switch (bt) {
+      case T_INT:
+        __ vmaxsw($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+        break;
+      case T_LONG:
+        __ vmaxsd($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vand(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AndV src1 src2));
+  size(4);
+  format %{ "VAND   $dst,$src1,$src2\t// and vectors" %}
+  ins_encode %{
+    __ vand($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vor(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (OrV src1 src2));
+  size(4);
+  format %{ "VOR   $dst,$src1,$src2\t// or vectors" %}
+  ins_encode %{
+    __ vor($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vxor(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (XorV src1 src2));
+  size(4);
+  format %{ "VXOR   $dst,$src1,$src2\t// xor vectors" %}
+  ins_encode %{
+    __ vxor($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct reductionI_arith_logic(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2) %{
+  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT);
+  match(Set dst (AddReductionVI srcInt srcVec));
+  match(Set dst (MulReductionVI srcInt srcVec));
+  match(Set dst (AndReductionV  srcInt srcVec));
+  match(Set dst ( OrReductionV  srcInt srcVec));
+  match(Set dst (XorReductionV  srcInt srcVec));
+  effect(TEMP tmp1, TEMP tmp2);
+  ins_cost(DEFAULT_COST * 6);
+  format %{ "REDUCEI_ARITH_LOGIC // $dst,$srcInt,$srcVec,$tmp1,$tmp2\t// reduce vector int add/mul/and/or/xor" %}
+  size(24);
+  ins_encode %{
+    int opcode = this->ideal_Opcode();
+    __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorSRegister->to_vr(),
+        $tmp1$$VectorSRegister->to_vr(), $tmp2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct reductionI_min_max(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2, flagsRegCR0 cr0) %{
+  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT);
+  match(Set dst (MinReductionV srcInt srcVec));
+  match(Set dst (MaxReductionV srcInt srcVec));
+  effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+  ins_cost(DEFAULT_COST * 7);
+  format %{ "REDUCEI_MINMAX // $dst,$srcInt,$srcVec,$tmp1,$tmp2,cr0\t// reduce vector int min/max" %}
+  size(28);
+  ins_encode %{
+    int opcode = this->ideal_Opcode();
+    __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorSRegister->to_vr(),
+        $tmp1$$VectorSRegister->to_vr(), $tmp2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Vector Absolute Instructions

 instruct vabs4F_reg(vecX dst, vecX src) %{
--- a/src/hotspot/cpu/ppc/register_ppc.hpp
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp
@ -523,7 +523,7 @@ constexpr FloatRegister F11_ARG11  = F11; // volatile
 constexpr FloatRegister F12_ARG12  = F12; // volatile
 constexpr FloatRegister F13_ARG13  = F13; // volatile

-// Register declarations to be used in frame manager assembly code.
+// Register declarations to be used in template interpreter assembly code.
 // Use only non-volatile registers in order to keep values across C-calls.
 constexpr Register R14_bcp       = R14;
 constexpr Register R15_esp       = R15;      // slot below top of expression stack for ld/st with update
@ -533,7 +533,7 @@ constexpr Register R17_tos       = R17;      // The interpreter's top of (expres
 constexpr Register R18_locals    = R18;      // address of first param slot (receiver).
 constexpr Register R19_method    = R19;      // address of current method

-// Temporary registers to be used within frame manager. We can use
+// Temporary registers to be used within template interpreter. We can use
 // the non-volatiles because the call stub has saved them.
 // Use only non-volatile registers in order to keep values across C-calls.
 constexpr Register R21_tmp1 = R21;
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@ -2935,7 +2935,7 @@ static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
  __ cmpdi(CR0, number_of_frames_reg, 0);
  __ bne(CR0, loop);

-  // Get the return address pointing into the frame manager.
+  // Get the return address pointing into the template interpreter.
  __ ld(R0, 0, pcs_reg);
  // Store it in the top interpreter frame.
  __ std(R0, _abi0(lr), R1_SP);
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@ -86,7 +86,7 @@ class StubGenerator: public StubCodeGenerator {
  //   R10 - thread                   : Thread*
  //
  address generate_call_stub(address& return_address) {
-    // Setup a new c frame, copy java arguments, call frame manager or
+    // Setup a new c frame, copy java arguments, call template interpreter or
    // native_entry, and process result.

    StubGenStubId stub_id = StubGenStubId::call_stub_id;
@ -215,11 +215,10 @@ class StubGenerator: public StubCodeGenerator {
    }

    {
-      BLOCK_COMMENT("Call frame manager or native entry.");
-      // Call frame manager or native entry.
+      BLOCK_COMMENT("Call template interpreter or native entry.");
      assert_different_registers(r_arg_entry, r_top_of_arguments_addr, r_arg_method, r_arg_thread);

-      // Register state on entry to frame manager / native entry:
+      // Register state on entry to template interpreter / native entry:
      //
      //   tos         -  intptr_t*    sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
      //   R19_method  -  Method
@ -242,7 +241,7 @@ class StubGenerator: public StubCodeGenerator {

      // Set R15_prev_state to 0 for simplifying checks in callee.
      __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R0);
-      // Stack on entry to frame manager / native entry:
+      // Stack on entry to template interpreter / native entry:
      //
      //      F0      [TOP_IJAVA_FRAME_ABI]
      //              alignment (optional)
@ -262,7 +261,7 @@ class StubGenerator: public StubCodeGenerator {
      __ mr(R21_sender_SP, R1_SP);

      // Do a light-weight C-call here, r_arg_entry holds the address
-      // of the interpreter entry point (frame manager or native entry)
+      // of the interpreter entry point (template interpreter or native entry)
      // and save runtime-value of LR in return_address.
      assert(r_arg_entry != tos && r_arg_entry != R19_method && r_arg_entry != R16_thread,
             "trashed r_arg_entry");
@ -270,11 +269,10 @@ class StubGenerator: public StubCodeGenerator {
    }

    {
-      BLOCK_COMMENT("Returned from frame manager or native entry.");
-      // Returned from frame manager or native entry.
+      BLOCK_COMMENT("Returned from template interpreter or native entry.");
      // Now pop frame, process result, and return to caller.

-      // Stack on exit from frame manager / native entry:
+      // Stack on exit from template interpreter / native entry:
      //
      //      F0      [ABI]
      //              ...
@ -295,7 +293,7 @@ class StubGenerator: public StubCodeGenerator {
      Register r_cr = R12_scratch2;

      // Reload some volatile registers which we've spilled before the call
-      // to frame manager / native entry.
+      // to template interpreter / native entry.
      // Access all locals via frame pointer, because we know nothing about
      // the topmost frame's size.
      __ ld(r_entryframe_fp, _abi0(callers_sp), R1_SP); // restore after call
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@ -1090,6 +1090,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
    case Interpreter::java_lang_math_cos  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);   break;
    case Interpreter::java_lang_math_tan  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);   break;
    case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
+    case Interpreter::java_lang_math_cbrt : /* run interpreted */ break;
    case Interpreter::java_lang_math_abs  : /* run interpreted */ break;
    case Interpreter::java_lang_math_sqrt : /* run interpreted */  break;
    case Interpreter::java_lang_math_log  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);   break;
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@ -1035,7 +1035,7 @@ void TemplateTable::bastore() {

  // Need to check whether array is boolean or byte
  // since both types share the bastore bytecode.
-  __ load_klass(Rscratch, Rarray);
+  __ load_klass_check_null_throw(Rscratch, Rarray, Rscratch);
  __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch);
  int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
  __ testbitdi(CR0, R0, Rscratch, diffbit);
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@ -95,6 +95,13 @@ void VM_Version::initialize() {
    FLAG_SET_ERGO(TrapBasedRangeChecks, false);
  }

+  if (PowerArchitecturePPC64 >= 9) {
+    // Performance is good since Power9.
+    if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
+      FLAG_SET_ERGO(SuperwordUseVSX, true);
+    }
+  }
+
  MaxVectorSize = SuperwordUseVSX ? 16 : 8;
  if (FLAG_IS_DEFAULT(AlignVector)) {
    FLAG_SET_ERGO(AlignVector, false);
@ -479,29 +486,10 @@ void VM_Version::determine_features() {
  // Emit code.
  void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
  uint32_t *code = (uint32_t *)a->pc();
-  // Don't use R0 in ldarx.
  // Keep R3_ARG1 unmodified, it contains &field (see below).
  // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
-  a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
-  a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
-  a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
-  a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
-  a->popcntb(R7, R5);                          // code[5]  -> popcntb
-  a->popcntw(R7, R5);                          // code[6]  -> popcntw
-  a->fcfids(F3, F4);                           // code[7]  -> fcfids
-  a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
-  // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
-  a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
-  a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
-  a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
-  a->mfdscr(R0);                               // code[12] -> mfdscr
-  a->lxvd2x(VSR0, R3_ARG1);                    // code[13] -> vsx
-  a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[14] -> ldbrx
-  a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[15] -> stdbrx
-  a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[16] -> vshasig
-  a->darn(R7);                                 // code[17] -> darn
-  a->brw(R5, R6);                              // code[18] -> brw
+  a->darn(R7);
+  a->brw(R5, R6);
  a->blr();

  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -536,23 +524,6 @@ void VM_Version::determine_features() {

  // determine which instructions are legal.
  int feature_cntr = 0;
-  if (code[feature_cntr++]) features |= fsqrt_m;
-  if (code[feature_cntr++]) features |= fsqrts_m;
-  if (code[feature_cntr++]) features |= isel_m;
-  if (code[feature_cntr++]) features |= lxarxeh_m;
-  if (code[feature_cntr++]) features |= cmpb_m;
-  if (code[feature_cntr++]) features |= popcntb_m;
-  if (code[feature_cntr++]) features |= popcntw_m;
-  if (code[feature_cntr++]) features |= fcfids_m;
-  if (code[feature_cntr++]) features |= vand_m;
-  if (code[feature_cntr++]) features |= lqarx_m;
-  if (code[feature_cntr++]) features |= vcipher_m;
-  if (code[feature_cntr++]) features |= vpmsumb_m;
-  if (code[feature_cntr++]) features |= mfdscr_m;
-  if (code[feature_cntr++]) features |= vsx_m;
-  if (code[feature_cntr++]) features |= ldbrx_m;
-  if (code[feature_cntr++]) features |= stdbrx_m;
-  if (code[feature_cntr++]) features |= vshasig_m;
  if (code[feature_cntr++]) features |= darn_m;
  if (code[feature_cntr++]) features |= brw_m;

--- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,46 +32,12 @@
 class VM_Version: public Abstract_VM_Version {
 protected:
  enum Feature_Flag {
-    fsqrt,
-    fsqrts,
-    isel,
-    lxarxeh,
-    cmpb,
-    popcntb,
-    popcntw,
-    fcfids,
-    vand,
-    lqarx,
-    vcipher,
-    vpmsumb,
-    mfdscr,
-    vsx,
-    ldbrx,
-    stdbrx,
-    vshasig,
    darn,
    brw,
    num_features // last entry to count features
  };
  enum Feature_Flag_Set {
    unknown_m             = 0,
-    fsqrt_m               = (1 << fsqrt  ),
-    fsqrts_m              = (1 << fsqrts ),
-    isel_m                = (1 << isel   ),
-    lxarxeh_m             = (1 << lxarxeh),
-    cmpb_m                = (1 << cmpb   ),
-    popcntb_m             = (1 << popcntb),
-    popcntw_m             = (1 << popcntw),
-    fcfids_m              = (1 << fcfids ),
-    vand_m                = (1 << vand   ),
-    lqarx_m               = (1 << lqarx  ),
-    vcipher_m             = (1 << vcipher),
-    vpmsumb_m             = (1 << vpmsumb),
-    mfdscr_m              = (1 << mfdscr ),
-    vsx_m                 = (1 << vsx    ),
-    ldbrx_m               = (1 << ldbrx  ),
-    stdbrx_m              = (1 << stdbrx ),
-    vshasig_m             = (1 << vshasig),
    darn_m                = (1 << darn   ),
    brw_m                 = (1 << brw    ),
    all_features_m        = (unsigned long)-1
@ -101,7 +67,6 @@ public:

  static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
  // CPU instruction support
-  static bool has_mfdscr()  { return (_features & mfdscr_m) != 0; }
  static bool has_darn()    { return (_features & darn_m) != 0; }
  static bool has_brw()     { return (_features & brw_m) != 0; }

--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@ -963,80 +963,238 @@ protected:

  enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};

-#define INSN(NAME, op, funct3, funct7)                                                  \
-  void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {        \
-    unsigned insn = 0;                                                                  \
-    patch((address)&insn, 6, 0, op);                                                    \
-    patch((address)&insn, 14, 12, funct3);                                              \
-    patch_reg((address)&insn, 7, Rd);                                                   \
-    patch_reg((address)&insn, 15, Rs1);                                                 \
-    patch_reg((address)&insn, 20, Rs2);                                                 \
-    patch((address)&insn, 31, 27, funct7);                                              \
-    patch((address)&insn, 26, 25, memory_order);                                        \
-    emit(insn);                                                                         \
+ private:
+
+  enum AmoWidthFunct3 : uint8_t {
+    AMO_WIDTH_BYTE        = 0b000, // Zabha extension
+    AMO_WIDTH_HALFWORD    = 0b001, // Zabha extension
+    AMO_WIDTH_WORD        = 0b010,
+    AMO_WIDTH_DOUBLEWORD  = 0b011,
+    AMO_WIDTH_QUADWORD    = 0b100,
+    // 0b101 to 0b111 are reserved
+  };
+
+  enum AmoOperationFunct5 : uint8_t {
+    AMO_ADD  = 0b00000,
+    AMO_SWAP = 0b00001,
+    AMO_LR   = 0b00010,
+    AMO_SC   = 0b00011,
+    AMO_XOR  = 0b00100,
+    AMO_OR   = 0b01000,
+    AMO_AND  = 0b01100,
+    AMO_MIN  = 0b10000,
+    AMO_MAX  = 0b10100,
+    AMO_MINU = 0b11000,
+    AMO_MAXU = 0b11100,
+    AMO_CAS  = 0b00101 // Zacas
+  };
+
+  static constexpr uint32_t OP_AMO_MAJOR = 0b0101111;
+
+  template <AmoOperationFunct5 funct5, AmoWidthFunct3 width>
+  void amo_base(Register Rd, Register Rs1, uint8_t Rs2, Aqrl memory_order = aqrl) {
+    assert(width > AMO_WIDTH_HALFWORD || UseZabha, "Must be");
+    assert(funct5 != AMO_CAS || UseZacas, "Must be");
+    unsigned insn = 0;
+    patch((address)&insn,  6,  0, OP_AMO_MAJOR);
+    patch_reg((address)&insn,  7, Rd);
+    patch((address)&insn, 14, 12, width);
+    patch_reg((address)&insn, 15, Rs1);
+    patch((address)&insn, 24, 20, Rs2);
+    patch((address)&insn, 26, 25, memory_order);
+    patch((address)&insn, 31, 27, funct5);
+    emit(insn);
  }

-  INSN(amoswap_w, 0b0101111, 0b010, 0b00001);
-  INSN(amoadd_w,  0b0101111, 0b010, 0b00000);
-  INSN(amoxor_w,  0b0101111, 0b010, 0b00100);
-  INSN(amoand_w,  0b0101111, 0b010, 0b01100);
-  INSN(amoor_w,   0b0101111, 0b010, 0b01000);
-  INSN(amomin_w,  0b0101111, 0b010, 0b10000);
-  INSN(amomax_w,  0b0101111, 0b010, 0b10100);
-  INSN(amominu_w, 0b0101111, 0b010, 0b11000);
-  INSN(amomaxu_w, 0b0101111, 0b010, 0b11100);
-  INSN(amoswap_d, 0b0101111, 0b011, 0b00001);
-  INSN(amoadd_d,  0b0101111, 0b011, 0b00000);
-  INSN(amoxor_d,  0b0101111, 0b011, 0b00100);
-  INSN(amoand_d,  0b0101111, 0b011, 0b01100);
-  INSN(amoor_d,   0b0101111, 0b011, 0b01000);
-  INSN(amomin_d,  0b0101111, 0b011, 0b10000);
-  INSN(amomax_d , 0b0101111, 0b011, 0b10100);
-  INSN(amominu_d, 0b0101111, 0b011, 0b11000);
-  INSN(amomaxu_d, 0b0101111, 0b011, 0b11100);
-  INSN(amocas_w,  0b0101111, 0b010, 0b00101);
-  INSN(amocas_d,  0b0101111, 0b011, 0b00101);
-#undef INSN
+  template <AmoOperationFunct5 funct5, AmoWidthFunct3 width>
+  void amo_base(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<funct5, width>(Rd, Rs1, Rs2->raw_encoding(), memory_order);
+  }
+
+ public:
+
+  void amoadd_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_ADD, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoadd_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_ADD, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoadd_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_ADD, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoadd_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_ADD, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoswap_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SWAP, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoswap_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SWAP, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoswap_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SWAP, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoswap_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SWAP, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoxor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_XOR, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoxor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_XOR, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoxor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_XOR, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoxor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_XOR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_OR, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_OR, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_OR, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_OR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoand_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_AND, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoand_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_AND, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoand_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_AND, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amoand_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_AND, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomin_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MIN, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomin_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MIN, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomin_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MIN, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomin_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MIN, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amominu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MINU, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amominu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MINU, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amominu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MINU, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amominu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MINU, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomax_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAX, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomax_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAX, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomax_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAX, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomax_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAX, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomaxu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAXU, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomaxu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAXU, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomaxu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAXU, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amomaxu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_MAXU, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+ protected:
+
+  void lr_w(Register Rd, Register Rs1, Aqrl memory_order = aqrl) {
+    amo_base<AMO_LR, AMO_WIDTH_WORD>(Rd, Rs1, 0, memory_order);
+  }
+
+  void lr_d(Register Rd, Register Rs1, Aqrl memory_order = aqrl) {
+    amo_base<AMO_LR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, 0, memory_order);
+  }
+
+  void sc_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SC, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void sc_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_SC, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amocas_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_CAS, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amocas_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_CAS, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amocas_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_CAS, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+  void amocas_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
+    amo_base<AMO_CAS, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
+  }
+
+ public:

  enum operand_size { int8, int16, int32, uint32, int64 };

-#define INSN(NAME, op, funct3, funct7)                                              \
-  void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) {               \
-    unsigned insn = 0;                                                              \
-    uint32_t val = memory_order & 0x3;                                              \
-    patch((address)&insn, 6, 0, op);                                                \
-    patch((address)&insn, 14, 12, funct3);                                          \
-    patch_reg((address)&insn, 7, Rd);                                               \
-    patch_reg((address)&insn, 15, Rs1);                                             \
-    patch((address)&insn, 25, 20, 0b00000);                                         \
-    patch((address)&insn, 31, 27, funct7);                                          \
-    patch((address)&insn, 26, 25, val);                                             \
-    emit(insn);                                                                     \
-  }
-
-  INSN(lr_w, 0b0101111, 0b010, 0b00010);
-  INSN(lr_d, 0b0101111, 0b011, 0b00010);
-
-#undef INSN
-
-#define INSN(NAME, op, funct3, funct7)                                                      \
-  void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) {         \
-    unsigned insn = 0;                                                                      \
-    uint32_t val = memory_order & 0x3;                                                      \
-    patch((address)&insn, 6, 0, op);                                                        \
-    patch((address)&insn, 14, 12, funct3);                                                  \
-    patch_reg((address)&insn, 7, Rd);                                                       \
-    patch_reg((address)&insn, 15, Rs2);                                                     \
-    patch_reg((address)&insn, 20, Rs1);                                                     \
-    patch((address)&insn, 31, 27, funct7);                                                  \
-    patch((address)&insn, 26, 25, val);                                                     \
-    emit(insn);                                                                             \
-  }
-
-  INSN(sc_w, 0b0101111, 0b010, 0b00011);
-  INSN(sc_d, 0b0101111, 0b011, 0b00011);
-#undef INSN
-
 // Immediate Instruction
 #define INSN(NAME, op, funct3)                                                              \
  void NAME(Register Rd, Register Rs1, int64_t imm) {                                       \
--- a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
@ -36,9 +36,11 @@
 #include <sys/mman.h>
 #endif // LINUX

-// Default value if probe is not implemented for a certain platform: 128TB
-static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
-// Minimum value returned, if probing fails: 64GB
+// Default value if probing is not implemented for a certain platform
+// Max address bit is restricted by implicit assumptions in the code, for instance
+// the bit layout of ZForwardingEntry or Partial array entry (see ZMarkStackEntry) in mark stack
+static const size_t DEFAULT_MAX_ADDRESS_BIT = 46;
+// Minimum value returned, if probing fail
 static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;

 static size_t probe_valid_max_address_bit() {
--- a/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
@ -96,6 +96,7 @@ instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp, rFlagsReg cr)
  match(Set dst (LoadP mem));
  predicate(UseZGC && n->as_Load()->barrier_data() != 0);
  effect(TEMP dst, TEMP tmp, KILL cr);
+  ins_is_late_expanded_null_check_candidate(true);

  ins_cost(4 * DEFAULT_COST);

--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@ -107,6 +107,7 @@ define_pd_global(intx, InlineSmallCode,          1000);
  product(bool, UseZfh, false, DIAGNOSTIC, "Use Zfh instructions")               \
  product(bool, UseZfhmin, false, DIAGNOSTIC, "Use Zfhmin instructions")         \
  product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions")         \
+  product(bool, UseZabha, false, EXPERIMENTAL, "Use UseZabha instructions")      \
  product(bool, UseZcb, false, EXPERIMENTAL, "Use Zcb instructions")             \
  product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions")       \
  product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions")       \
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@ -955,47 +955,29 @@ void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,


 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-                                                      int constant,
-                                                      bool decrement) {
-  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+                                                      int constant) {
+  increment_mdp_data_at(mdp_in, noreg, constant);
 }

 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-                                                      Register reg,
-                                                      int constant,
-                                                      bool decrement) {
+                                                      Register index,
+                                                      int constant) {
  assert(ProfileInterpreter, "must be profiling interpreter");
-  // %%% this does 64bit counters at best it is wasting space
-  // at worst it is a rare bug when counters overflow

-  assert_different_registers(t1, t0, mdp_in, reg);
+  assert_different_registers(t1, t0, mdp_in, index);

  Address addr1(mdp_in, constant);
  Address addr2(t1, 0);
  Address &addr = addr1;
-  if (reg != noreg) {
+  if (index != noreg) {
    la(t1, addr1);
-    add(t1, t1, reg);
+    add(t1, t1, index);
    addr = addr2;
  }

-  if (decrement) {
-    ld(t0, addr);
-    subi(t0, t0, DataLayout::counter_increment);
-    Label L;
-    bltz(t0, L);      // skip store if counter underflow
-    sd(t0, addr);
-    bind(L);
-  } else {
-    assert(DataLayout::counter_increment == 1,
-           "flow-free idiom only works with 1");
  ld(t0, addr);
  addi(t0, t0, DataLayout::counter_increment);
-    Label L;
-    blez(t0, L);       // skip store if counter overflow
  sd(t0, addr);
-    bind(L);
-  }
 }

 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@ -233,11 +233,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void verify_method_data_pointer();

  void set_mdp_data_at(Register mdp_in, int constant, Register value);
-  void increment_mdp_data_at(Address data, bool decrement = false);
-  void increment_mdp_data_at(Register mdp_in, int constant,
-                             bool decrement = false);
-  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
-                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant);
+  void increment_mdp_data_at(Register mdp_in, Register index, int constant);
  void increment_mask_and_jump(Address counter_addr,
                               int increment, Address mask,
                               Register tmp1, Register tmp2,
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@ -3798,7 +3798,7 @@ void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register o

 void MacroAssembler::load_reserved(Register dst,
                                   Register addr,
-                                   enum operand_size size,
+                                   Assembler::operand_size size,
                                   Assembler::Aqrl acquire) {
  switch (size) {
    case int64:
@ -3819,15 +3819,15 @@ void MacroAssembler::load_reserved(Register dst,
 void MacroAssembler::store_conditional(Register dst,
                                       Register new_val,
                                       Register addr,
-                                       enum operand_size size,
+                                       Assembler::operand_size size,
                                       Assembler::Aqrl release) {
  switch (size) {
    case int64:
-      sc_d(dst, new_val, addr, release);
+      sc_d(dst, addr, new_val, release);
      break;
    case int32:
    case uint32:
-      sc_w(dst, new_val, addr, release);
+      sc_w(dst, addr, new_val, release);
      break;
    default:
      ShouldNotReachHere();
@ -3836,7 +3836,7 @@ void MacroAssembler::store_conditional(Register dst,


 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
-                                                 enum operand_size size,
+                                                 Assembler::operand_size size,
                                                 Register shift, Register mask, Register aligned_addr) {
  assert(size == int8 || size == int16, "unsupported operand size");

@ -3866,10 +3866,11 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
 // which are forced to work with 4-byte aligned address.
 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
                                          Register new_val,
-                                          enum operand_size size,
+                                          Assembler::operand_size size,
                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
                                          Register result, bool result_as_bool,
                                          Register tmp1, Register tmp2, Register tmp3) {
+  assert(!(UseZacas && UseZabha), "Use amocas");
  assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1);

  Register scratch0 = t0, aligned_addr = t1;
@ -3902,13 +3903,13 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
    notr(scratch1, mask);
    bind(retry);

-    lr_w(result, aligned_addr, acquire);
+    load_reserved(result, aligned_addr, operand_size::int32, acquire);
    andr(scratch0, result, mask);
    bne(scratch0, expected, fail);

    andr(scratch0, result, scratch1); // scratch1 is ~mask
    orr(scratch0, scratch0, new_val);
-    sc_w(scratch0, scratch0, aligned_addr, release);
+    store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release);
    bnez(scratch0, retry);
  }

@ -3940,10 +3941,11 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
 // failed.
 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
                                               Register new_val,
-                                               enum operand_size size,
+                                               Assembler::operand_size size,
                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
                                               Register result,
                                               Register tmp1, Register tmp2, Register tmp3) {
+  assert(!(UseZacas && UseZabha), "Use amocas");
  assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1);

  Register scratch0 = t0, aligned_addr = t1;
@ -3974,13 +3976,13 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
  } else {
    notr(scratch1, mask);

-    lr_w(result, aligned_addr, acquire);
+    load_reserved(result, aligned_addr, operand_size::int32, acquire);
    andr(scratch0, result, mask);
    bne(scratch0, expected, fail);

    andr(scratch0, result, scratch1); // scratch1 is ~mask
    orr(scratch0, scratch0, new_val);
-    sc_w(scratch0, scratch0, aligned_addr, release);
+    store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release);
    bnez(scratch0, fail);
  }

@ -3997,10 +3999,10 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,

 void MacroAssembler::cmpxchg(Register addr, Register expected,
                             Register new_val,
-                             enum operand_size size,
+                             Assembler::operand_size size,
                             Assembler::Aqrl acquire, Assembler::Aqrl release,
                             Register result, bool result_as_bool) {
-  assert(size != int8 && size != int16, "unsupported operand size");
+  assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size");
  assert_different_registers(addr, t0);
  assert_different_registers(expected, t0);
  assert_different_registers(new_val, t0);
@ -4058,10 +4060,10 @@ void MacroAssembler::cmpxchg(Register addr, Register expected,

 void MacroAssembler::weak_cmpxchg(Register addr, Register expected,
                                  Register new_val,
-                                  enum operand_size size,
+                                  Assembler::operand_size size,
                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
                                  Register result) {
-
+  assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size");
  assert_different_registers(addr, t0);
  assert_different_registers(expected, t0);
  assert_different_registers(new_val, t0);
@ -4134,7 +4136,7 @@ ATOMIC_XCHGU(xchgalwu, xchgalw)
 #undef ATOMIC_XCHGU

 void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr,
-                                enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) {
+                                Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) {
  switch (size) {
    case int64:
      amocas_d(prev, addr, newv, (Assembler::Aqrl)(acquire | release));
@ -4146,6 +4148,12 @@ void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr,
      amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release));
      zext(prev, prev, 32);
      break;
+    case int16:
+      amocas_h(prev, addr, newv, (Assembler::Aqrl)(acquire | release));
+      break;
+    case int8:
+      amocas_b(prev, addr, newv, (Assembler::Aqrl)(acquire | release));
+      break;
    default:
      ShouldNotReachHere();
  }
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@ -666,7 +666,7 @@ class MacroAssembler: public Assembler {
  // We try to follow risc-v asm menomics.
  // But as we don't layout a reachable GOT,
  // we often need to resort to movptr, li <48imm>.
-  // https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
+  // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc

  // Hotspot only use the standard calling convention using x1/ra.
  // The alternative calling convection using x5/t0 is not used.
@ -1187,26 +1187,26 @@ public:
  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
  void cmpxchg(Register addr, Register expected,
               Register new_val,
-               enum operand_size size,
+               Assembler::operand_size size,
               Assembler::Aqrl acquire, Assembler::Aqrl release,
               Register result, bool result_as_bool = false);
  void weak_cmpxchg(Register addr, Register expected,
                    Register new_val,
-                    enum operand_size size,
+                    Assembler::operand_size size,
                    Assembler::Aqrl acquire, Assembler::Aqrl release,
                    Register result);
  void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
-                                   enum operand_size size,
+                                   Assembler::operand_size size,
                                   Register shift, Register mask, Register aligned_addr);
  void cmpxchg_narrow_value(Register addr, Register expected,
                            Register new_val,
-                            enum operand_size size,
+                            Assembler::operand_size size,
                            Assembler::Aqrl acquire, Assembler::Aqrl release,
                            Register result, bool result_as_bool,
                            Register tmp1, Register tmp2, Register tmp3);
  void weak_cmpxchg_narrow_value(Register addr, Register expected,
                                 Register new_val,
-                                 enum operand_size size,
+                                 Assembler::operand_size size,
                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
                                 Register result,
                                 Register tmp1, Register tmp2, Register tmp3);
@ -1223,7 +1223,7 @@ public:
  void atomic_xchgwu(Register prev, Register newv, Register addr);
  void atomic_xchgalwu(Register prev, Register newv, Register addr);

-  void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size,
+  void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
              Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);

  // Emit a far call/jump. Only invalidates the tmp register which
@ -1636,8 +1636,8 @@ private:
  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
  Address add_memory_helper(const Address dst, Register tmp);

-  void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire);
-  void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release);
+  void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
+  void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);

 public:
  void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -2304,42 +2304,6 @@ encode %{
    }
  %}

-  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
-  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
-  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
-  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
-  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
-  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
-    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-               /*result as bool*/ true);
-  %}
-
  // compare and branch instruction encodings

  enc_class riscv_enc_j(label lbl) %{
@ -2655,6 +2619,10 @@ ins_attrib ins_alignment(4);    // Required alignment attribute (must
                                // compute_padding() function must be
                                // provided for the instruction

+// Whether this node is expanded during code emission into a sequence of
+// instructions and the first instruction can perform an implicit null check.
+ins_attrib ins_is_late_expanded_null_check_candidate(false);
+
 //----------OPERANDS-----------------------------------------------------------
 // Operand definitions must precede instruction definitions for correct parsing
 // in the ADLC because operands constitute user defined types which are used in
@ -5250,18 +5218,20 @@ instruct prefetchalloc( memory mem ) %{

 // standard CompareAndSwapX when we are using barriers
 // these have higher priority than the rules selected by a predicate
-instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (CompareAndSwapB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB_narrow"
  %}

  ins_encode %{
@ -5273,18 +5243,42 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1
  ins_pipe(pipe_slow);
 %}

-instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+               true /* result as bool */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (CompareAndSwapS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS_narrow"
  %}

  ins_encode %{
@ -5296,18 +5290,44 @@ instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1
  ins_pipe(pipe_slow);
 %}

+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+               true /* result as bool */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  match(Set res (CompareAndSwapI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
  %}

-  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5316,14 +5336,18 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval
 %{
  match(Set res (CompareAndSwapL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
  %}

-  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5334,14 +5358,18 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval

  match(Set res (CompareAndSwapP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
  %}

-  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5349,35 +5377,40 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 %{
  predicate(n->as_LoadStore()->barrier_data() == 0);
+
  match(Set res (CompareAndSwapN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
  %}

-  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}

 // alternative CompareAndSwapX when we are eliding barriers
-instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                   iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq_narrow"
  %}

  ins_encode %{
@ -5389,20 +5422,42 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI
  ins_pipe(pipe_slow);
 %}

-instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+               true /* result as bool */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                   iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq_narrow"
  %}

  ins_encode %{
@ -5414,20 +5469,46 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI
  ins_pipe(pipe_slow);
 %}

+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+               true /* result as bool */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
  %}

-  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5438,14 +5519,18 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL new

  match(Set res (CompareAndSwapL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
  %}

-  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5456,14 +5541,18 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new

  match(Set res (CompareAndSwapP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
  %}

-  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5474,14 +5563,18 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new

  match(Set res (CompareAndSwapN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
  %}

-  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}

  ins_pipe(pipe_slow);
 %}
@ -5492,17 +5585,19 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new
 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
 // can't check the type of memory ordering here, so we always emit a
 // sc_d(w) with rl bit set.
-instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndExchangeB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                    iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
-    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB_narrow"
  %}

  ins_encode %{
@ -5514,17 +5609,39 @@ instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                    iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
-    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS_narrow"
  %}

  ins_encode %{
@ -5536,13 +5653,31 @@ instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg
  ins_pipe(pipe_slow);
 %}

+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
@ -5560,9 +5695,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
 %{
  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
@ -5579,11 +5712,10 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
 %{
  predicate(n->as_LoadStore()->barrier_data() == 0);
+
  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
@ -5600,11 +5732,10 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 %{
  predicate(n->as_LoadStore()->barrier_data() == 0);
+
  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
@ -5618,19 +5749,19 @@ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ne
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndExchangeBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                       iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
-    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq_narrow"
  %}

  ins_encode %{
@ -5642,19 +5773,39 @@ instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
  ins_pipe(pipe_slow);
 %}

-instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                       iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
-    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq_narrow"
  %}

  ins_encode %{
@ -5666,15 +5817,33 @@ instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
  ins_pipe(pipe_slow);
 %}

+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
@ -5694,9 +5863,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL

  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
@ -5716,9 +5883,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN

  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
@ -5738,9 +5903,7 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP

  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-
-  effect(TEMP_DEF res);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
@ -5754,18 +5917,20 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP
  ins_pipe(pipe_slow);
 %}

-instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct weakCompareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                    iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-    "# $res == 1 when success, #@weakCompareAndSwapB"
+    "# $res == 1 when success, #@weakCompareAndSwapB_narrow"
  %}

  ins_encode %{
@ -5777,18 +5942,41 @@ instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg
  ins_pipe(pipe_slow);
 %}

-instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapB"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                    iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
+  predicate(!UseZabha || !UseZacas);
+
  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-    "# $res == 1 when success, #@weakCompareAndSwapS"
+    "# $res == 1 when success, #@weakCompareAndSwapS_narrow"
  %}

  ins_encode %{
@ -5800,11 +5988,32 @@ instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg
  ins_pipe(pipe_slow);
 %}

+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(UseZabha && UseZacas);
+
+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapS"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5823,7 +6032,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
 %{
  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5841,9 +6050,10 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 %{
  predicate(n->as_LoadStore()->barrier_data() == 0);
+
  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5861,9 +6071,10 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 %{
  predicate(n->as_LoadStore()->barrier_data() == 0);
+
  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5878,20 +6089,20 @@ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
  ins_pipe(pipe_slow);
 %}

-instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct weakCompareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                       iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
+    "# $res == 1 when success, #@weakCompareAndSwapBAcq_narrow"
  %}

  ins_encode %{
@ -5903,20 +6114,41 @@ instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
  ins_pipe(pipe_slow);
 %}

-instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                       iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_reserved(n));
+  predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+  ins_cost(2 * VOLATILE_REF_COST);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
+    "# $res == 1 when success, #@weakCompareAndSwapSAcq_narrow"
  %}

  ins_encode %{
@ -5928,13 +6160,34 @@ instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
  ins_pipe(pipe_slow);
 %}

+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{
+    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5955,7 +6208,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL

  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5976,7 +6229,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN

  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
@ -5997,7 +6250,7 @@ instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP

  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));

-  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+  ins_cost(2 * VOLATILE_REF_COST);

  format %{
    "weak_cmpxchg_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@ -765,6 +765,10 @@ void TemplateInterpreterGenerator::lock_method() {
 //      xcpool: cp cache
 //      stack_pointer: previous sp
 void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+  // Save ConstMethod* in x15_const_method for later use to avoid loading multiple times
+  Register x15_const_method = x15;
+  __ ld(x15_const_method, Address(xmethod, Method::const_offset()));
+
  // initialize fixed part of activation frame
  if (native_call) {
    __ subi(esp, sp, 14 * wordSize);
@ -775,8 +779,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
    __ sd(zr, Address(sp, 12 * wordSize));
  } else {
    __ subi(esp, sp, 12 * wordSize);
-    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
-    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
+    __ add(xbcp, x15_const_method, in_bytes(ConstMethod::codes_offset())); // get codebase
    __ subi(sp, sp, 12 * wordSize);
  }
  __ sd(xbcp, Address(sp, wordSize));
@ -798,9 +801,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  __ sd(fp, Address(sp, 10 * wordSize));
  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp

-  __ ld(xcpool, Address(xmethod, Method::const_offset()));
-  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
-  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset()));
+  // Save ConstantPool* in x28_constants for later use to avoid loading multiple times
+  Register x28_constants = x28;
+  __ ld(x28_constants, Address(x15_const_method, ConstMethod::constants_offset()));
+  __ ld(xcpool, Address(x28_constants, ConstantPool::cache_offset()));
  __ sd(xcpool, Address(sp, 3 * wordSize));
  __ sub(t0, xlocals, fp);
  __ srai(t0, t0, Interpreter::logStackElementSize);   // t0 = xlocals - fp();
@ -812,13 +816,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  __ sd(x19_sender_sp, Address(sp, 9 * wordSize));
  __ sd(zr, Address(sp, 8 * wordSize));

-  // Get mirror and store it in the frame as GC root for this Method*
-  __ load_mirror(t2, xmethod, x15, t1);
+  // Get mirror, Resolve ConstantPool* -> InstanceKlass* -> Java mirror
+  // and store it in the frame as GC root for this Method*
+  __ ld(t2, Address(x28_constants, ConstantPool::pool_holder_offset()));
+  __ ld(t2, Address(t2, in_bytes(Klass::java_mirror_offset())));
+  __ resolve_oop_handle(t2, t0, t1);
  __ sd(t2, Address(sp, 4 * wordSize));

  if (!native_call) {
-    __ ld(t0, Address(xmethod, Method::const_offset()));
-    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+    __ lhu(t0, Address(x15_const_method, ConstMethod::max_stack_offset()));
    __ add(t0, t0, MAX2(3, Method::extra_stack_entries()));
    __ slli(t0, t0, 3);
    __ sub(t0, sp, t0);
@ -1640,6 +1646,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {

  Interpreter::_remove_activation_preserving_args_entry = __ pc();
  __ empty_expression_stack();
+  __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp.
  // Set the popframe_processing bit in pending_popframe_condition
  // indicating that we are currently handling popframe, so that
  // call_VMs that may happen later do not trigger new popframe
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@ -125,24 +125,6 @@ static inline Address at_tos_p5() {
  return Address(esp, Interpreter::expr_offset_in_bytes(5));
 }

-// Miscellaneous helper routines
-// Store an oop (or null) at the Address described by obj.
-// If val == noreg this means store a null
-static void do_oop_store(InterpreterMacroAssembler* _masm,
-                         Address dst,
-                         Register val,
-                         DecoratorSet decorators) {
-  assert(val == noreg || val == x10, "parameter is just for looks");
-  __ store_heap_oop(dst, val, x28, x29, x13, decorators);
-}
-
-static void do_oop_load(InterpreterMacroAssembler* _masm,
-                        Address src,
-                        Register dst,
-                        DecoratorSet decorators) {
-  __ load_heap_oop(dst, src, x28, x29, decorators);
-}
-
 Address TemplateTable::at_bcp(int offset) {
  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
  return Address(xbcp, offset);
@ -787,7 +769,7 @@ void TemplateTable::aaload() {
  index_check(x10, x11); // leaves index in x11
  __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
-  do_oop_load(_masm, Address(x10), x10, IS_ARRAY);
+  __ load_heap_oop(x10, Address(x10), x28, x29, IS_ARRAY);
 }

 void TemplateTable::baload() {
@ -1099,7 +1081,7 @@ void TemplateTable::aastore() {
  // Get the value we will store
  __ ld(x10, at_tos());
  // Now store using the appropriate barrier
-  do_oop_store(_masm, element_address, x10, IS_ARRAY);
+  __ store_heap_oop(element_address, x10, x28, x29, x13, IS_ARRAY);
  __ j(done);

  // Have a null in x10, x13=array, x12=index.  Store null at ary[idx]
@ -1107,7 +1089,7 @@ void TemplateTable::aastore() {
  __ profile_null_seen(x12);

  // Store a null
-  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
+  __ store_heap_oop(element_address, noreg, x28, x29, x13, IS_ARRAY);

  // Pop stack arguments
  __ bind(done);
@ -2565,7 +2547,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
  __ subi(t0, tos_state, (u1)atos);
  __ bnez(t0, notObj);
  // atos
-  do_oop_load(_masm, field, x10, IN_HEAP);
+  __ load_heap_oop(x10, field, x28, x29, IN_HEAP);
  __ push(atos);
  if (rc == may_rewrite) {
    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
@ -2809,7 +2791,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
    const Address field(off, 0);
    // Store into the field
-    do_oop_store(_masm, field, x10, IN_HEAP);
+    __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP);
    if (rc == may_rewrite) {
      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
    }
@ -3051,10 +3033,10 @@ void TemplateTable::fast_storefield(TosState state) {
  __ add(x11, x12, x11);
  const Address field(x11, 0);

-  // access field
+  // access field, must not clobber x13 - flags
  switch (bytecode()) {
    case Bytecodes::_fast_aputfield:
-      do_oop_store(_masm, field, x10, IN_HEAP);
+      __ store_heap_oop(field, x10, x28, x29, x15, IN_HEAP);
      break;
    case Bytecodes::_fast_lputfield:
      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
@ -3133,7 +3115,7 @@ void TemplateTable::fast_accessfield(TosState state) {
  // access field
  switch (bytecode()) {
    case Bytecodes::_fast_agetfield:
-      do_oop_load(_masm, field, x10, IN_HEAP);
+      __ load_heap_oop(x10, field, x28, x29, IN_HEAP);
      __ verify_oop(x10);
      break;
    case Bytecodes::_fast_lgetfield:
@ -3191,7 +3173,7 @@ void TemplateTable::fast_xaccess(TosState state) {
      break;
    case atos:
      __ add(x10, x10, x11);
-      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
+      __ load_heap_oop(x10, Address(x10, 0), x28, x29, IN_HEAP);
      __ verify_oop(x10);
      break;
    case ftos:
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@ -221,13 +221,13 @@ class VM_Version : public Abstract_VM_Version {
      FLAG_SET_DEFAULT(UseExtension, true);     \
    }                                           \

-  // https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva20-profiles
+  // https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva20-profiles
  #define RV_USE_RVA20U64                            \
    RV_ENABLE_EXTENSION(UseRVC)                      \

  static void useRVA20U64Profile();

-  // https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva22-profiles
+  // https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22-profiles
  #define RV_USE_RVA22U64                            \
    RV_ENABLE_EXTENSION(UseRVC)                      \
    RV_ENABLE_EXTENSION(UseZba)                      \
@ -241,7 +241,7 @@ class VM_Version : public Abstract_VM_Version {

  static void useRVA22U64Profile();

-  // https://github.com/riscv/riscv-profiles/blob/main/rva23-profile.adoc#rva23u64-profile
+  // https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile
  #define RV_USE_RVA23U64                           \
    RV_ENABLE_EXTENSION(UseRVC)                     \
    RV_ENABLE_EXTENSION(UseRVV)                     \
--- a/src/hotspot/cpu/s390/frame_s390.hpp
+++ b/src/hotspot/cpu/s390/frame_s390.hpp
@ -410,7 +410,7 @@

  // C2I adapter frames:
  //
-  // STACK (interpreted called from compiled, on entry to frame manager):
+  // STACK (interpreted called from compiled, on entry to template interpreter):
  //
  //       [TOP_C2I_FRAME]
  //       [JIT_FRAME]
--- a/src/hotspot/cpu/s390/register_s390.hpp
+++ b/src/hotspot/cpu/s390/register_s390.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -414,7 +414,7 @@ constexpr FloatRegister  Z_FARG2   = Z_F2;
 constexpr FloatRegister  Z_FARG3   = Z_F4;
 constexpr FloatRegister  Z_FARG4   = Z_F6;

-// Register declarations to be used in frame manager assembly code.
+// Register declarations to be used in template interpreter assembly code.
 // Use only non-volatile registers in order to keep values across C-calls.

 // Register to cache the integer value on top of the operand stack.
@ -439,7 +439,7 @@ constexpr Register      Z_bcp          = Z_R13;
 // Bytecode which is dispatched (short lived!).
 constexpr Register      Z_bytecode     = Z_R14;

-// Temporary registers to be used within frame manager. We can use
+// Temporary registers to be used within template interpreter. We can use
 // the nonvolatile ones because the call stub has saved them.
 // Use only non-volatile registers in order to keep values across C-calls.
 constexpr Register Z_tmp_1 =  Z_R10;
--- a/src/hotspot/cpu/s390/runtime_s390.cpp
+++ b/src/hotspot/cpu/s390/runtime_s390.cpp
@ -118,7 +118,7 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() {
  __ z_lgr(Z_SP, saved_sp);

  // [Z_RET] isn't null was possible in hotspot5 but not in sapjvm6.
-  // C2I adapter extensions are now removed by a resize in the frame manager
+  // C2I adapter extensions are now removed by a resize in the template interpreter
  // (unwind_initial_activation_pending_exception).
 #ifdef ASSERT
  __ z_ltgr(handle_exception, handle_exception);
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@ -2139,7 +2139,7 @@ static address gen_c2i_adapter(MacroAssembler  *masm,
  Register  value       = Z_R12;

  // Remember the senderSP so we can pop the interpreter arguments off of the stack.
-  // In addition, frame manager expects initial_caller_sp in Z_R10.
+  // In addition, template interpreter expects initial_caller_sp in Z_R10.
  __ z_lgr(sender_SP, Z_SP);

  // This should always fit in 14 bit immediate.
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@ -115,7 +115,7 @@ class StubGenerator: public StubCodeGenerator {
  //   [SP+176]  - thread                   : Thread*
  //
  address generate_call_stub(address& return_address) {
-    // Set up a new C frame, copy Java arguments, call frame manager
+    // Set up a new C frame, copy Java arguments, call template interpreter
    // or native_entry, and process result.

    StubGenStubId stub_id = StubGenStubId::call_stub_id;
@ -272,10 +272,10 @@ class StubGenerator: public StubCodeGenerator {

    BLOCK_COMMENT("call {");
    {
-      // Call frame manager or native entry.
+      // Call template interpreter or native entry.

      //
-      // Register state on entry to frame manager / native entry:
+      // Register state on entry to template interpreter / native entry:
      //
      //   Z_ARG1 = r_top_of_arguments_addr  - intptr_t *sender tos (prepushed)
      //                                       Lesp = (SP) + copied_arguments_offset - 8
@ -290,7 +290,7 @@ class StubGenerator: public StubCodeGenerator {
      __ z_lgr(Z_esp, r_top_of_arguments_addr);

      //
-      // Stack on entry to frame manager / native entry:
+      // Stack on entry to template interpreter / native entry:
      //
      //     F0      [TOP_IJAVA_FRAME_ABI]
      //             [outgoing Java arguments]
@ -300,7 +300,7 @@ class StubGenerator: public StubCodeGenerator {
      //

      // Do a light-weight C-call here, r_new_arg_entry holds the address
-      // of the interpreter entry point (frame manager or native entry)
+      // of the interpreter entry point (template interpreter or native entry)
      // and save runtime-value of return_pc in return_address
      // (call by reference argument).
      return_address = __ call_stub(r_new_arg_entry);
@ -309,11 +309,11 @@ class StubGenerator: public StubCodeGenerator {

    {
      BLOCK_COMMENT("restore registers {");
-      // Returned from frame manager or native entry.
+      // Returned from template interpreter or native entry.
      // Now pop frame, process result, and return to caller.

      //
-      // Stack on exit from frame manager / native entry:
+      // Stack on exit from template interpreter / native entry:
      //
      //     F0      [ABI]
      //             ...
@ -330,7 +330,7 @@ class StubGenerator: public StubCodeGenerator {
      __ pop_frame();

      // Reload some volatile registers which we've spilled before the call
-      // to frame manager / native entry.
+      // to template interpreter / native entry.
      // Access all locals via frame pointer, because we know nothing about
      // the topmost frame's size.
      __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
@ -1468,11 +1468,120 @@ class StubGenerator: public StubCodeGenerator {
    return __ addr_at(start_off);
  }

+  //
+  //  Generate 'unsafe' set memory stub
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t (# bytes) argument instead of an element count.
+  //
+  //  Input:
+  //    Z_ARG1   - destination array address
+  //    Z_ARG2   - byte count (size_t)
+  //    Z_ARG3   - byte value
+  //
+  address generate_unsafe_setmemory(address unsafe_byte_fill) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id);
+    unsigned int start_off = __ offset();
+
+    // bump this on entry, not on exit:
+    // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr);
+
+    const Register dest = Z_ARG1;
+    const Register size = Z_ARG2;
+    const Register byteVal = Z_ARG3;
+    NearLabel tail, finished;
+    // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char)
+
+    // Mark remaining code as such which performs Unsafe accesses.
+    UnsafeMemoryAccessMark umam(this, true, false);
+
+    __ z_vlvgb(Z_V0, byteVal, 0);
+    __ z_vrepb(Z_V0, Z_V0, 0);
+
+    __ z_aghi(size, -32);
+    __ z_brl(tail);
+
+    {
+      NearLabel again;
+      __ bind(again);
+      __ z_vst(Z_V0, Address(dest, 0));
+      __ z_vst(Z_V0, Address(dest, 16));
+      __ z_aghi(dest, 32);
+      __ z_aghi(size, -32);
+      __ z_brnl(again);
+    }
+
+    __ bind(tail);
+
+    {
+      NearLabel dont;
+      __ testbit(size, 4);
+      __ z_brz(dont);
+      __ z_vst(Z_V0, Address(dest, 0));
+      __ z_aghi(dest, 16);
+      __ bind(dont);
+    }
+
+    {
+      NearLabel dont;
+      __ testbit(size, 3);
+      __ z_brz(dont);
+      __ z_vsteg(Z_V0, 0, Z_R0, dest, 0);
+      __ z_aghi(dest, 8);
+      __ bind(dont);
+    }
+
+    __ z_tmll(size, 7);
+    __ z_brc(Assembler::bcondAllZero, finished);
+
+    {
+      NearLabel dont;
+      __ testbit(size, 2);
+      __ z_brz(dont);
+      __ z_vstef(Z_V0, 0, Z_R0, dest, 0);
+      __ z_aghi(dest, 4);
+      __ bind(dont);
+    }
+
+    {
+      NearLabel dont;
+      __ testbit(size, 1);
+      __ z_brz(dont);
+      __ z_vsteh(Z_V0, 0, Z_R0, dest, 0);
+      __ z_aghi(dest, 2);
+      __ bind(dont);
+    }
+
+    {
+      NearLabel dont;
+      __ testbit(size, 0);
+      __ z_brz(dont);
+      __ z_vsteb(Z_V0, 0, Z_R0, dest, 0);
+      __ bind(dont);
+    }
+
+    __ bind(finished);
+    __ z_br(Z_R14);
+
+    return __ addr_at(start_off);
+  }
+
+  // This is common errorexit stub for UnsafeMemoryAccess.
+  address generate_unsafecopy_common_error_exit() {
+    unsigned int start_off = __ offset();
+    __ z_lghi(Z_RET, 0); // return 0
+    __ z_br(Z_R14);
+    return __ addr_at(start_off);
+  }

  void generate_arraycopy_stubs() {

    // Note: the disjoint stubs must be generated first, some of
    // the conjoint stubs use them.
+
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit);
+
    StubRoutines::_jbyte_disjoint_arraycopy      = generate_disjoint_nonoop_copy (StubGenStubId::jbyte_disjoint_arraycopy_id);
    StubRoutines::_jshort_disjoint_arraycopy     = generate_disjoint_nonoop_copy(StubGenStubId::jshort_disjoint_arraycopy_id);
    StubRoutines::_jint_disjoint_arraycopy       = generate_disjoint_nonoop_copy  (StubGenStubId::jint_disjoint_arraycopy_id);
@ -1500,6 +1609,12 @@ class StubGenerator: public StubCodeGenerator {
    StubRoutines::_arrayof_jlong_arraycopy      = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jlong_arraycopy_id);
    StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_id);
    StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id);
+
+#ifdef COMPILER2
+    StubRoutines::_unsafe_setmemory =
+             VM_Version::has_VectorFacility() ? generate_unsafe_setmemory(StubRoutines::_jbyte_fill) : nullptr;
+
+#endif // COMPILER2
  }

  // Call interface for AES_encryptBlock, AES_decryptBlock stubs.
@ -3184,6 +3299,10 @@ class StubGenerator: public StubCodeGenerator {
    //----------------------------------------------------------------------
    // Entry points that are platform specific.

+    if (UnsafeMemoryAccess::_table == nullptr) {
+      UnsafeMemoryAccess::create_table(4); // 4 for setMemory
+    }
+
    if (UseCRC32Intrinsics) {
      StubRoutines::_crc_table_adr     = (address)StubRoutines::zarch::_crc_table;
      StubRoutines::_updateBytesCRC32  = generate_CRC32_updateBytes();
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
@ -1217,7 +1217,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {

 // Various method entries

-// Math function, frame manager must set up an interpreter state, etc.
+// Math function, template interpreter must set up an interpreter state, etc.
 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {

  // Decide what to do: Use same platform specific instructions and runtime calls as compilers.
@ -1240,6 +1240,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
    case Interpreter::java_lang_math_cos  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);   break;
    case Interpreter::java_lang_math_tan  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);   break;
    case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
+    case Interpreter::java_lang_math_cbrt : /* run interpreted */ break;
    case Interpreter::java_lang_math_abs  : /* run interpreted */ break;
    case Interpreter::java_lang_math_sqrt : /* runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); not available */ break;
    case Interpreter::java_lang_math_log  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);   break;
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@ -772,10 +772,10 @@ private:
  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b,
                       bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags = false);

-  void evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
+  void eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
                        InstructionAttr *attributes, bool no_flags = false);

-  void evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
+  void eevex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
                       InstructionAttr *attributes, bool no_flags = false);

  void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
@ -785,11 +785,28 @@ private:
                             VexSimdPrefix pre, VexOpcode opc,
                             InstructionAttr *attributes, bool src_is_gpr = false, bool nds_is_ndd = false, bool no_flags = false);

-  int  evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+  int eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
                                 InstructionAttr *attributes, bool no_flags = false);

-  int  evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                                 InstructionAttr *attributes, bool no_flags = false);
+  int emit_eevex_prefix_ndd(int dst_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags = false);
+
+  int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+                                      InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
+
+  int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+                                      InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
+
+  void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
+                                             int size, int op1, int op2, bool no_flags);
+
+  void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
+                            int size, int opcode_byte, bool no_flags = false, bool is_map1 = false);
+
+  void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+                            int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false);
+
+  void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc,
+                            int size, int opcode_byte, bool no_flags, bool is_map1 = false);

  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
                   VexOpcode opc, InstructionAttr *attributes);
@ -798,10 +815,10 @@ private:
                             VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr = false);

  // Helper functions for groups of instructions
+  bool is_demotable(bool no_flags, int dst_enc, int nds_enc);
  void emit_arith_b(int op1, int op2, Register dst, int imm8);

-  void emit_arith(int op1, int op2, Register dst, int32_t imm32);
-  void emit_arith_ndd(int op1, int op2, Register dst, int32_t imm32);
+  void emit_arith(int op1, int op2, Register dst, int32_t imm32, bool optimize_rax_dst = true);
  // Force generation of a 4 byte immediate value even if it fits into 8bit
  void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
  void emit_arith(int op1, int op2, Register dst, Register src);
@ -950,6 +967,7 @@ private:
  // New cpus require use of movaps and movapd to avoid partial register stall
  // when moving between registers.
  void movaps(XMMRegister dst, XMMRegister src);
+  void movapd(XMMRegister dst, Address src);
  void movapd(XMMRegister dst, XMMRegister src);

  // End avoid using directly
@ -2450,6 +2468,9 @@ private:
  void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

+  // Bitwise Logical OR of Packed Floating-Point Values
+  void orpd(XMMRegister dst, XMMRegister src);
+
  void unpckhpd(XMMRegister dst, XMMRegister src);
  void unpcklpd(XMMRegister dst, XMMRegister src);

--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@ -720,7 +720,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
-      x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh
+      x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh ||
+      x->id() == vmIntrinsics::_dcbrt
      ) {
    do_LibmIntrinsic(x);
    return;
@ -840,6 +841,12 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
        __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args());
      }
      break;
+    case vmIntrinsics::_dcbrt:
+      assert(StubRoutines::dcbrt() != nullptr, "cbrt intrinsic not found");
+      if (StubRoutines::dcbrt() != nullptr) {
+        __ call_runtime_leaf(StubRoutines::dcbrt(), getThreadTemp(), result_reg, cc->args());
+      }
+      break;
    default:  ShouldNotReachHere();
  }

--- a/src/hotspot/cpu/x86/compressedKlass_x86.cpp
+++ b/src/hotspot/cpu/x86/compressedKlass_x86.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2023, 2025, Red Hat, Inc. All rights reserved.
 * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -25,6 +25,7 @@

 #ifdef _LP64

+#include "memory/metaspace.hpp"
 #include "oops/compressedKlass.hpp"
 #include "utilities/globalDefinitions.hpp"

@ -32,15 +33,25 @@ char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size

  char* result = nullptr;

-  // Optimize for unscaled encoding; failing that, for zero-based encoding:
-  if (optimize_for_zero_base) {
-    result = reserve_address_space_for_unscaled_encoding(size, aslr);
-    if (result == nullptr) {
+  assert(CompressedKlassPointers::narrow_klass_pointer_bits() == 32 ||
+         CompressedKlassPointers::narrow_klass_pointer_bits() == 22, "Rethink if we ever use different nKlass bit sizes");
+
+  // Unconditionally attempting to reserve in lower 4G first makes always sense:
+  // -CDS -COH: Try to get unscaled mode (zero base, zero shift)
+  // +CDS -COH: No zero base possible (CDS prevents it); but we still benefit from small base pointers (imm32 movabs)
+  // -CDS +COH: No zero base possible (22bit nKlass + zero base zero shift = 4MB encoding range, way too small);
+  //            but we still benefit from small base pointers (imm32 movabs)
+  // +CDS +COH: No zero base possible for multiple reasons (CDS prevents it and encoding range too small);
+  //            but we still benefit from small base pointers (imm32 movabs)
+
+  result = reserve_address_space_below_4G(size, aslr);
+
+  if (result == nullptr && optimize_for_zero_base) {
+    // Failing that, if we are running without CDS, attempt to allocate below 32G.
+    // This allows us to use zero-based encoding with a non-zero shift.
    result = reserve_address_space_for_zerobased_encoding(size, aslr);
  }
-  } // end: low-address reservation

-  // Nothing more to optimize for on x64. If base != 0, we will always emit the full 64-bit immediate.
  return result;
 }

--- a/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp
@ -30,11 +30,15 @@
 size_t ZPointerLoadShift;

 size_t ZPlatformAddressOffsetBits() {
+#ifdef ADDRESS_SANITIZER
+  return 44;
+#else
  const size_t min_address_offset_bits = 42; // 4TB
  const size_t max_address_offset_bits = 44; // 16TB
  const size_t address_offset = ZGlobalsPointers::min_address_offset_request();
  const size_t address_offset_bits = log2i_exact(address_offset);
  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
+#endif
 }

 size_t ZPlatformAddressHeapBaseShift() {
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
@ -118,6 +118,10 @@ instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
  predicate(UseZGC && n->as_Load()->barrier_data() != 0);
  match(Set dst (LoadP mem));
  effect(TEMP dst, KILL cr);
+  // The main load is a candidate to implement implicit null checks. The
+  // barrier's slow path includes an identical reload, which does not need to be
+  // registered in the exception table because it is dominated by the main one.
+  ins_is_late_expanded_null_check_candidate(true);

  ins_cost(125);

--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@ -1355,25 +1355,15 @@ void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
 }


-void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
-                                                     Register bumped_count) {
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
-    // Otherwise, assign to mdp
    test_method_data_pointer(mdp, profile_continue);

    // We are taking a branch.  Increment the taken count.
-    // We inline increment_mdp_data_at to return bumped_count in a register
-    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
-    Address data(mdp, in_bytes(JumpData::taken_offset()));
-    movptr(bumped_count, data);
-    assert(DataLayout::counter_increment == 1,
-            "flow-free idiom only works with 1");
-    addptr(bumped_count, DataLayout::counter_increment);
-    sbbptr(bumped_count, 0);
-    movptr(data, bumped_count); // Store back out
+    increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));

    // The method data pointer needs to be updated to reflect the new target.
    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
@ -1389,7 +1379,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
    // If no method data exists, go to profile_continue.
    test_method_data_pointer(mdp, profile_continue);

-    // We are taking a branch.  Increment the not taken count.
+    // We are not taking a branch.  Increment the not taken count.
    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));

    // The method data pointer needs to be updated to correspond to
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp
@ -236,7 +236,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void update_mdp_by_constant(Register mdp_in, int constant);
  void update_mdp_for_ret(Register return_bci);

-  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_taken_branch(Register mdp);
  void profile_not_taken_branch(Register mdp);
  void profile_call(Register mdp);
  void profile_final_call(Register mdp);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@ -2250,6 +2250,16 @@ void MacroAssembler::evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_l
  }
 }

+void MacroAssembler::movapd(XMMRegister dst, AddressLiteral src, Register rscratch) {
+  assert(rscratch != noreg || always_reachable(src), "missing");
+
+  if (reachable(src)) {
+    Assembler::movapd(dst, as_Address(src));
+  } else {
+    lea(rscratch, src);
+    Assembler::movapd(dst, Address(rscratch, 0));
+  }
+}

 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src, Register rscratch) {
  assert(rscratch != noreg || always_reachable(src), "missing");
@ -5402,24 +5412,27 @@ void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
 }

 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
+  BLOCK_COMMENT("encode_klass_not_null {");
  assert_different_registers(r, tmp);
  if (CompressedKlassPointers::base() != nullptr) {
    if (AOTCodeCache::is_on_for_dump()) {
      movptr(tmp, ExternalAddress(CompressedKlassPointers::base_addr()));
    } else {
-      mov64(tmp, (int64_t)CompressedKlassPointers::base());
+      movptr(tmp, (intptr_t)CompressedKlassPointers::base());
    }
    subq(r, tmp);
  }
  if (CompressedKlassPointers::shift() != 0) {
    shrq(r, CompressedKlassPointers::shift());
  }
+  BLOCK_COMMENT("} encode_klass_not_null");
 }

 void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) {
+  BLOCK_COMMENT("encode_and_move_klass_not_null {");
  assert_different_registers(src, dst);
  if (CompressedKlassPointers::base() != nullptr) {
-    mov64(dst, -(int64_t)CompressedKlassPointers::base());
+    movptr(dst, -(intptr_t)CompressedKlassPointers::base());
    addq(dst, src);
  } else {
    movptr(dst, src);
@ -5427,9 +5440,11 @@ void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src)
  if (CompressedKlassPointers::shift() != 0) {
    shrq(dst, CompressedKlassPointers::shift());
  }
+  BLOCK_COMMENT("} encode_and_move_klass_not_null");
 }

 void  MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
+  BLOCK_COMMENT("decode_klass_not_null {");
  assert_different_registers(r, tmp);
  // Note: it will change flags
  assert(UseCompressedClassPointers, "should only be used for compressed headers");
@ -5443,13 +5458,15 @@ void  MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
    if (AOTCodeCache::is_on_for_dump()) {
      movptr(tmp, ExternalAddress(CompressedKlassPointers::base_addr()));
    } else {
-      mov64(tmp, (int64_t)CompressedKlassPointers::base());
+      movptr(tmp, (intptr_t)CompressedKlassPointers::base());
    }
    addq(r, tmp);
  }
+  BLOCK_COMMENT("} decode_klass_not_null");
 }

 void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) {
+  BLOCK_COMMENT("decode_and_move_klass_not_null {");
  assert_different_registers(src, dst);
  // Note: it will change flags
  assert (UseCompressedClassPointers, "should only be used for compressed headers");
@ -5465,7 +5482,7 @@ void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
  } else {
    if (CompressedKlassPointers::shift() <= Address::times_8) {
      if (CompressedKlassPointers::base() != nullptr) {
-        mov64(dst, (int64_t)CompressedKlassPointers::base());
+        movptr(dst, (intptr_t)CompressedKlassPointers::base());
      } else {
        xorq(dst, dst);
      }
@ -5477,9 +5494,9 @@ void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
      }
    } else {
      if (CompressedKlassPointers::base() != nullptr) {
-        const uint64_t base_right_shifted =
-            (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
-        mov64(dst, base_right_shifted);
+        const intptr_t base_right_shifted =
+            (intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
+        movptr(dst, base_right_shifted);
      } else {
        xorq(dst, dst);
      }
@ -5487,6 +5504,7 @@ void  MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
      shlq(dst, CompressedKlassPointers::shift());
    }
  }
+  BLOCK_COMMENT("} decode_and_move_klass_not_null");
 }

 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -995,6 +995,8 @@ public:
  void andpd(XMMRegister dst, Address        src) { Assembler::andpd(dst, src); }
  void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);

+  void andnpd(XMMRegister dst, XMMRegister src) { Assembler::andnpd(dst, src); }
+
  void andps(XMMRegister dst, XMMRegister    src) { Assembler::andps(dst, src); }
  void andps(XMMRegister dst, Address        src) { Assembler::andps(dst, src); }
  void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
@ -1007,6 +1009,8 @@ public:
  void comisd(XMMRegister dst, Address        src) { Assembler::comisd(dst, src); }
  void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);

+  void orpd(XMMRegister dst, XMMRegister src) { Assembler::orpd(dst, src); }
+
  void cmp32_mxcsr_std(Address mxcsr_save, Register tmp, Register rscratch = noreg);
  void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
  void ldmxcsr(AddressLiteral src, Register rscratch = noreg);
@ -1241,6 +1245,9 @@ public:
  void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
  void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);

+  using Assembler::movapd;
+  void movapd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+
  // Move Aligned Double Quadword
  void movdqa(XMMRegister dst, XMMRegister    src) { Assembler::movdqa(dst, src); }
  void movdqa(XMMRegister dst, Address        src) { Assembler::movdqa(dst, src); }
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@ -3692,6 +3692,9 @@ void StubGenerator::generate_libm_stubs() {
    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) {
      StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp
    }
+    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcbrt)) {
+      StubRoutines::_dcbrt = generate_libmCbrt(); // from stubGenerator_x86_64_cbrt.cpp
+    }
    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
      StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp
    }
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@ -556,6 +556,7 @@ class StubGenerator: public StubCodeGenerator {
  address generate_libmCos();
  address generate_libmTan();
  address generate_libmTanh();
+  address generate_libmCbrt();
  address generate_libmExp();
  address generate_libmPow();
  address generate_libmLog();
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp
@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2025, Intel Corporation. All rights reserved.
+ * Intel Math Library (LIBM) Source Code
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "macroAssembler_x86.hpp"
+#include "stubGenerator_x86_64.hpp"
+
+/******************************************************************************/
+//                     ALGORITHM DESCRIPTION
+//                     ---------------------
+//
+// x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52
+// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5],
+// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision
+// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5]
+// (T stores the high 53 bits, D stores the low order bits)
+// Result=2^k*T+(2^k*T*r)*P+2^k*D
+// where P=p1+p2*r+..+p8*r^7
+//
+// Special cases:
+//  cbrt(NaN) = quiet NaN
+//  cbrt(+/-INF) = +/-INF
+//  cbrt(+/-0) = +/-0
+//
+/******************************************************************************/
+
+ATTRIBUTE_ALIGNED(4) static const juint _SIG_MASK[] =
+{
+    0, 1032192
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _EXP_MASK[] =
+{
+    0, 3220176896
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK2[] =
+{
+    0, 3220193280
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK3[] =
+{
+    4294967295, 1048575
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _SCALE63[] =
+{
+    0, 1138753536
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _ZERON[] =
+{
+    0, 2147483648
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _INF[] =
+{
+    0, 2146435072
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _NEG_INF[] =
+{
+    0, 4293918720
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _coeff_table[] =
+{
+    1553778919, 3213899486, 3534952507, 3215266280, 1646371399,
+    3214412045, 477218588,  3216798151, 3582521621, 1066628362,
+    1007461464, 1068473053, 889629714,  1067378449, 1431655765,
+    1070945621
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _rcp_table[] =
+{
+    528611360,  3220144632, 2884679527, 3220082993, 1991868891, 3220024928,
+    2298714891, 3219970134, 58835168,   3219918343, 3035110223, 3219869313,
+    1617585086, 3219822831, 2500867033, 3219778702, 4241943008, 3219736752,
+    258732970,  3219696825, 404232216,  3219658776, 2172167368, 3219622476,
+    1544257904, 3219587808, 377579543,  3219554664, 1616385542, 3219522945,
+    813783277,  3219492562, 3940743189, 3219463431, 2689777499, 3219435478,
+    1700977147, 3219408632, 3169102082, 3219382828, 327235604,  3219358008,
+    1244336319, 3219334115, 1300311200, 3219311099, 3095471925, 3219288912,
+    2166487928, 3219267511, 2913108253, 3219246854, 293672978,  3219226904,
+    288737297,  3219207624, 1810275472, 3219188981, 174592167,  3219170945,
+    3539053052, 3219153485, 2164392968, 3219136576
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _cbrt_table[] =
+{
+    572345495,  1072698681, 1998204467, 1072709382, 3861501553, 1072719872,
+    2268192434, 1072730162, 2981979308, 1072740260, 270859143,  1072750176,
+    2958651392, 1072759916, 313113243,  1072769490, 919449400,  1072778903,
+    2809328903, 1072788162, 2222981587, 1072797274, 2352530781, 1072806244,
+    594152517,  1072815078, 1555767199, 1072823780, 4282421314, 1072832355,
+    2355578597, 1072840809, 1162590619, 1072849145, 797864051,  1072857367,
+    431273680,  1072865479, 2669831148, 1072873484, 733477752,  1072881387,
+    4280220604, 1072889189, 801961634,  1072896896, 2915370760, 1072904508,
+    1159613482, 1072912030, 2689944798, 1072919463, 1248687822, 1072926811,
+    2967951030, 1072934075, 630170432,  1072941259, 3760898254, 1072948363,
+    0,          1072955392, 2370273294, 1072962345, 1261754802, 1072972640,
+    546334065,  1072986123, 1054893830, 1072999340, 1571187597, 1073012304,
+    1107975175, 1073025027, 3606909377, 1073037519, 1113616747, 1073049792,
+    4154744632, 1073061853, 3358931423, 1073073713, 4060702372, 1073085379,
+    747576176,  1073096860, 3023138255, 1073108161, 1419988548, 1073119291,
+    1914185305, 1073130255, 294389948,  1073141060, 3761802570, 1073151710,
+    978281566,  1073162213, 823148820,  1073172572, 2420954441, 1073182792,
+    3815449908, 1073192878, 2046058587, 1073202835, 1807524753, 1073212666,
+    2628681401, 1073222375, 3225667357, 1073231966, 1555307421, 1073241443,
+    3454043099, 1073250808, 1208137896, 1073260066, 3659916772, 1073269218,
+    1886261264, 1073278269, 3593647839, 1073287220, 3086012205, 1073296075,
+    2769796922, 1073304836, 888716057,  1073317807, 2201465623, 1073334794,
+    164369365,  1073351447, 3462666733, 1073367780, 2773905457, 1073383810,
+    1342879088, 1073399550, 2543933975, 1073415012, 1684477781, 1073430209,
+    3532178543, 1073445151, 1147747300, 1073459850, 1928031793, 1073474314,
+    2079717015, 1073488553, 4016765315, 1073502575, 3670431139, 1073516389,
+    3549227225, 1073530002, 11637607,   1073543422, 588220169,  1073556654,
+    2635407503, 1073569705, 2042029317, 1073582582, 1925128962, 1073595290,
+    4136375664, 1073607834, 759964600,  1073620221, 4257606771, 1073632453,
+    297278907,  1073644538, 3655053093, 1073656477, 2442253172, 1073668277,
+    1111876799, 1073679941, 3330973139, 1073691472, 3438879452, 1073702875,
+    3671565478, 1073714153, 1317849547, 1073725310, 1642364115, 1073736348
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _D_table[] =
+{
+    4050900474, 1014427190, 1157977860, 1016444461, 1374568199, 1017271387,
+    2809163288, 1016882676, 3742377377, 1013168191, 3101606597, 1017541672,
+    65224358,   1017217597, 2691591250, 1017266643, 4020758549, 1017689313,
+    1316310992, 1018030788, 1031537856, 1014090882, 3261395239, 1016413641,
+    886424999,  1016313335, 3114776834, 1014195875, 1681120620, 1017825416,
+    1329600273, 1016625740, 465474623,  1017097119, 4251633980, 1017169077,
+    1986990133, 1017710645, 752958613,  1017159641, 2216216792, 1018020163,
+    4282860129, 1015924861, 1557627859, 1016039538, 3889219754, 1018086237,
+    3684996408, 1017353275, 723532103,  1017717141, 2951149676, 1012528470,
+    831890937,  1017830553, 1031212645, 1017387331, 2741737450, 1017604974,
+    2863311531, 1003776682, 4276736099, 1013153088, 4111778382, 1015673686,
+    1728065769, 1016413986, 2708718031, 1018078833, 1069335005, 1015291224,
+    700037144,  1016482032, 2904566452, 1017226861, 4074156649, 1017622651,
+    25019565,   1015245366, 3601952608, 1015771755, 3267129373, 1017904664,
+    503203103,  1014921629, 2122011730, 1018027866, 3927295461, 1014189456,
+    2790625147, 1016024251, 1330460186, 1016940346, 4033568463, 1015538390,
+    3695818227, 1017509621, 257573361,  1017208868, 3227697852, 1017337964,
+    234118548,  1017169577, 4009025803, 1017278524, 1948343394, 1017749310,
+    678398162,  1018144239, 3083864863, 1016669086, 2415453452, 1017890370,
+    175467344,  1017330033, 3197359580, 1010339928, 2071276951, 1015941358,
+    268372543,  1016737773, 938132959,  1017389108, 1816750559, 1017337448,
+    4119203749, 1017152174, 2578653878, 1013108497, 2470331096, 1014678606,
+    123855735,  1016553320, 1265650889, 1014782687, 3414398172, 1017182638,
+    1040773369, 1016158401, 3483628886, 1016886550, 4140499405, 1016191425,
+    3893477850, 1016964495, 3935319771, 1009634717, 2978982660, 1015027112,
+    2452709923, 1017990229, 3190365712, 1015835149, 4237588139, 1015832925,
+    2610678389, 1017962711, 2127316774, 1017405770, 824267502,  1017959463,
+    2165924042, 1017912225, 2774007076, 1013257418, 4123916326, 1017582284,
+    1976417958, 1016959909, 4092806412, 1017711279, 119251817,  1015363631,
+    3475418768, 1017675415, 1972580503, 1015470684, 815541017,  1017517969,
+    2429917451, 1017397776, 4062888482, 1016749897, 68284153,   1017925678,
+    2207779246, 1016320298, 1183466520, 1017408657, 143326427,  1017060403
+};
+
+#define __ _masm->
+
+address StubGenerator::generate_libmCbrt() {
+  StubGenStubId stub_id = StubGenStubId::dcbrt_id;
+  StubCodeMark mark(this, stub_id);
+  address start = __ pc();
+
+  Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
+  Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1;
+  Label B1_1, B1_2, B1_4;
+
+  address SIG_MASK        = (address)_SIG_MASK;
+  address EXP_MASK        = (address)_EXP_MASK;
+  address EXP_MSK2        = (address)_EXP_MSK2;
+  address EXP_MSK3        = (address)_EXP_MSK3;
+  address SCALE63         = (address)_SCALE63;
+  address ZERON           = (address)_ZERON;
+  address INF             = (address)_INF;
+  address NEG_INF         = (address)_NEG_INF;
+  address coeff_table     = (address)_coeff_table;
+  address rcp_table       = (address)_rcp_table;
+  address cbrt_table      = (address)_cbrt_table;
+  address D_table         = (address)_D_table;
+
+  __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+  __ bind(B1_1);
+  __ subq(rsp, 24);
+  __ movsd(Address(rsp), xmm0);
+
+  __ bind(B1_2);
+  __ movq(xmm7, xmm0);
+  __ movl(rdx, 524032);
+  __ movsd(xmm5, ExternalAddress(EXP_MSK3), r11 /*rscratch*/);
+  __ movsd(xmm3, ExternalAddress(EXP_MSK2), r11 /*rscratch*/);
+  __ psrlq(xmm7, 44);
+  __ pextrw(rcx, xmm7, 0);
+  __ movdl(rax, xmm7);
+  __ movsd(xmm1, ExternalAddress(EXP_MASK), r11 /*rscratch*/);
+  __ movsd(xmm2, ExternalAddress(SIG_MASK), r11 /*rscratch*/);
+  __ andl(rcx, 248);
+  __ lea(r8, ExternalAddress(rcp_table));
+  __ movsd(xmm4, Address(rcx, r8, Address::times_1));
+  __ movq(r9, rax);
+  __ andl(rdx, rax);
+  __ cmpl(rdx, 0);
+  __ jcc(Assembler::equal, L_2TAG_PACKET_0_0_1); // Branch only if |x| is denormalized
+  __ cmpl(rdx, 524032);
+  __ jcc(Assembler::equal, L_2TAG_PACKET_1_0_1); // Branch only if |x| is INF or NaN
+  __ shrl(rdx, 8);
+  __ shrq(r9, 8);
+  __ andpd(xmm2, xmm0);
+  __ andpd(xmm0, xmm5);
+  __ orpd(xmm3, xmm2);
+  __ orpd(xmm1, xmm0);
+  __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/);
+  __ movl(rax, 5462);
+  __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/);
+  __ mull(rdx);
+  __ movq(rdx, r9);
+  __ andq(r9, 2047);
+  __ shrl(rax, 14);
+  __ andl(rdx, 2048);
+  __ subq(r9, rax);
+  __ subq(r9, rax);
+  __ subq(r9, rax);
+  __ shlq(r9, 8);
+  __ addl(rax, 682);
+  __ orl(rax, rdx);
+  __ movdl(xmm7, rax);
+  __ addq(rcx, r9);
+  __ psllq(xmm7, 52);
+
+  __ bind(L_2TAG_PACKET_2_0_1);
+  __ movapd(xmm2, ExternalAddress(coeff_table + 32), r11 /*rscratch*/);
+  __ movapd(xmm0, ExternalAddress(coeff_table + 48), r11 /*rscratch*/);
+  __ subsd(xmm1, xmm3);
+  __ movq(xmm3, xmm7);
+  __ lea(r8, ExternalAddress(cbrt_table));
+  __ mulsd(xmm7, Address(rcx, r8, Address::times_1));
+  __ mulsd(xmm1, xmm4);
+  __ lea(r8, ExternalAddress(D_table));
+  __ mulsd(xmm3, Address(rcx, r8, Address::times_1));
+  __ movapd(xmm4, xmm1);
+  __ unpcklpd(xmm1, xmm1);
+  __ mulpd(xmm5, xmm1);
+  __ mulpd(xmm6, xmm1);
+  __ mulpd(xmm1, xmm1);
+  __ addpd(xmm2, xmm5);
+  __ addpd(xmm0, xmm6);
+  __ mulpd(xmm2, xmm1);
+  __ mulpd(xmm1, xmm1);
+  __ mulsd(xmm4, xmm7);
+  __ addpd(xmm0, xmm2);
+  __ mulsd(xmm1, xmm0);
+  __ unpckhpd(xmm0, xmm0);
+  __ addsd(xmm0, xmm1);
+  __ mulsd(xmm0, xmm4);
+  __ addsd(xmm0, xmm3);
+  __ addsd(xmm0, xmm7);
+  __ jmp(B1_4);
+
+  __ bind(L_2TAG_PACKET_0_0_1);
+  __ mulsd(xmm0, ExternalAddress(SCALE63), r11 /*rscratch*/);
+  __ movq(xmm7, xmm0);
+  __ movl(rdx, 524032);
+  __ psrlq(xmm7, 44);
+  __ pextrw(rcx, xmm7, 0);
+  __ movdl(rax, xmm7);
+  __ andl(rcx, 248);
+  __ lea(r8, ExternalAddress(rcp_table));
+  __ movsd(xmm4, Address(rcx, r8, Address::times_1));
+  __ movq(r9, rax);
+  __ andl(rdx, rax);
+  __ shrl(rdx, 8);
+  __ shrq(r9, 8);
+  __ cmpl(rdx, 0);
+  __ jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); // Branch only if |x| is zero
+  __ andpd(xmm2, xmm0);
+  __ andpd(xmm0, xmm5);
+  __ orpd(xmm3, xmm2);
+  __ orpd(xmm1, xmm0);
+  __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/);
+  __ movl(rax, 5462);
+  __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/);
+  __ mull(rdx);
+  __ movq(rdx, r9);
+  __ andq(r9, 2047);
+  __ shrl(rax, 14);
+  __ andl(rdx, 2048);
+  __ subq(r9, rax);
+  __ subq(r9, rax);
+  __ subq(r9, rax);
+  __ shlq(r9, 8);
+  __ addl(rax, 661);
+  __ orl(rax, rdx);
+  __ movdl(xmm7, rax);
+  __ addq(rcx, r9);
+  __ psllq(xmm7, 52);
+  __ jmp(L_2TAG_PACKET_2_0_1);
+
+  __ bind(L_2TAG_PACKET_3_0_1);
+  __ cmpq(r9, 0);
+  __ jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); // Branch only if x is negative zero
+  __ xorpd(xmm0, xmm0);
+  __ jmp(B1_4);
+
+  __ bind(L_2TAG_PACKET_4_0_1);
+  __ movsd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/);
+  __ jmp(B1_4);
+
+  __ bind(L_2TAG_PACKET_1_0_1);
+  __ movl(rax, Address(rsp, 4));
+  __ movl(rdx, Address(rsp));
+  __ movl(rcx, rax);
+  __ andl(rcx, 2147483647);
+  __ cmpl(rcx, 2146435072);
+  __ jcc(Assembler::above, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN
+  __ cmpl(rdx, 0);
+  __ jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN
+  __ cmpl(rax, 2146435072);
+  __ jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); // Branch only if x is negative INF
+  __ movsd(xmm0, ExternalAddress(INF), r11 /*rscratch*/);
+  __ jmp(B1_4);
+
+  __ bind(L_2TAG_PACKET_6_0_1);
+  __ movsd(xmm0, ExternalAddress(NEG_INF), r11 /*rscratch*/);
+  __ jmp(B1_4);
+
+  __ bind(L_2TAG_PACKET_5_0_1);
+  __ movsd(xmm0, Address(rsp));
+  __ addsd(xmm0, xmm0);
+  __ movq(Address(rsp, 8), xmm0);
+
+  __ bind(B1_4);
+  __ addq(rsp, 24);
+  __ leave(); // required for proper stackwalking of RuntimeStub frame
+  __ ret(0);
+
+  return start;
+}
+
+#undef __
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
@ -1441,6 +1441,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {

  Interpreter::_remove_activation_preserving_args_entry = __ pc();
  __ empty_expression_stack();
+  __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp.
  // Set the popframe_processing bit in pending_popframe_condition
  // indicating that we are currently handling popframe, so that
  // call_VMs that may happen later do not trigger new popframe
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
@ -468,6 +468,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
      assert(StubRoutines::dtanh() != nullptr, "not initialized");
      __ movdbl(xmm0, Address(rsp, wordSize));
      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh())));
+  } else if (kind == Interpreter::java_lang_math_cbrt) {
+    assert(StubRoutines::dcbrt() != nullptr, "not initialized");
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcbrt())));
  } else if (kind == Interpreter::java_lang_math_abs) {
    assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized");
    __ movdbl(xmm0, Address(rsp, wordSize));
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@ -1687,8 +1687,7 @@ void TemplateTable::float_cmp(bool is_float, int unordered_result) {

 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  __ get_method(rcx); // rcx holds method
-  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
-                                     // holds bumped taken count
+  __ profile_taken_branch(rax); // rax holds updated MDP

  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
                             InvocationCounter::counter_offset();
@ -1739,7 +1738,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
  if (UseLoopCounter) {
    // increment backedge counter for backward branches
    // rax: MDO
-    // rbx: MDO bumped taken-count
    // rcx: method
    // rdx: target offset
    // r13: target bcp
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@ -909,7 +909,7 @@ void VM_Version::get_processor_features() {
  }

  // Check if processor has Intel Ecore
-  if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
+  if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
    (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
      _model == 0xCC || _model == 0xDD)) {
    FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
@ -1594,7 +1594,7 @@ void VM_Version::get_processor_features() {
    if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
      UseStoreImmI16 = false; // don't use it on Intel cpus
    }
-    if (cpu_family() == 6 || cpu_family() == 15) {
+    if (is_intel_server_family() || cpu_family() == 15) {
      if (FLAG_IS_DEFAULT(UseAddressNop)) {
        // Use it on all Intel cpus starting from PentiumPro
        UseAddressNop = true;
@ -1610,7 +1610,7 @@ void VM_Version::get_processor_features() {
        UseXmmRegToRegMoveAll = false;
      }
    }
-    if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
+    if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
 #ifdef COMPILER2
      if (FLAG_IS_DEFAULT(MaxLoopPad)) {
        // For new Intel cpus do the next optimization:
@ -1848,7 +1848,7 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
  }

-  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+  if (is_intel() && is_intel_server_family() && supports_sse3()) {
    if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
        supports_sse4_2() && supports_ht()) { // Nehalem based cpus
      FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
@ -3262,7 +3262,7 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
      return 128; // Athlon
    }
  } else { // Intel
-    if (supports_sse3() && cpu_family() == 6) {
+    if (supports_sse3() && is_intel_server_family()) {
      if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
        return 192;
      } else if (use_watermark_prefetch) { // watermark prefetching on Core
@ -3270,7 +3270,7 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
      }
    }
    if (supports_sse2()) {
-      if (cpu_family() == 6) {
+      if (is_intel_server_family()) {
        return 256; // Pentium M, Core, Core2
      } else {
        return 512; // Pentium 4
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@ -791,6 +791,7 @@ public:
  static uint32_t cpu_stepping()          { return _cpuid_info.cpu_stepping(); }
  static int  cpu_family()        { return _cpu;}
  static bool is_P6()             { return cpu_family() >= 6; }
+  static bool is_intel_server_family()    { return cpu_family() == 6 || cpu_family() == 19; }
  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
  static bool is_hygon()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
  static bool is_amd_family()     { return is_amd() || is_hygon(); }
@ -946,7 +947,7 @@ public:
  }

  // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
-  static bool has_fast_idiv()     { return is_intel() && cpu_family() == 6 &&
+  static bool has_fast_idiv()     { return is_intel() && is_intel_server_family() &&
                                           supports_sse3() && _model != 0x1C; }

  static bool supports_compare_and_exchange() { return true; }
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@ -2055,6 +2055,10 @@ ins_attrib ins_alignment(1);    // Required alignment attribute (must
                                // compute_padding() function must be
                                // provided for the instruction

+// Whether this node is expanded during code emission into a sequence of
+// instructions and the first instruction can perform an implicit null check.
+ins_attrib ins_is_late_expanded_null_check_candidate(false);
+
 //----------OPERANDS-----------------------------------------------------------
 // Operand definitions must precede instruction definitions for correct parsing
 // in the ADLC because operands constitute user defined types which are used in
@ -7052,21 +7056,6 @@ instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
  ins_pipe(ialu_reg_mem);
 %}

-instruct addI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (AddI (LoadI src1) src2));
-  effect(KILL cr);
-  flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
-
-  ins_cost(150);
-  format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
-  ins_encode %{
-    __ eaddl($dst$$Register, $src1$$Address, $src2$$Register, false);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 %{
  predicate(UseAPX);
@ -7370,21 +7359,6 @@ instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
  ins_pipe(ialu_reg_mem);
 %}

-instruct addL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (AddL (LoadL src1) src2));
-  effect(KILL cr);
-  flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
-
-  ins_cost(150);
-  format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
-  ins_encode %{
-    __ eaddq($dst$$Register, $src1$$Address, $src2$$Register, false);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 %{
  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
@ -8596,7 +8570,6 @@ instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)

 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 %{
-  predicate(!UseAPX);
  match(Set dst (MulI src imm));
  effect(KILL cr);

@ -8608,20 +8581,6 @@ instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
  ins_pipe(ialu_reg_reg_alu0);
 %}

-instruct mulI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (MulI src1 src2));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
-  ins_encode %{
-    __ eimull($dst$$Register, $src1$$Register, $src2$$constant, false);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 %{
  predicate(!UseAPX);
@ -8652,7 +8611,6 @@ instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr

 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 %{
-  predicate(!UseAPX);
  match(Set dst (MulI (LoadI src) imm));
  effect(KILL cr);

@ -8664,20 +8622,6 @@ instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
  ins_pipe(ialu_reg_mem_alu0);
 %}

-instruct mulI_rReg_mem_imm(rRegI dst, memory src1, immI src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (MulI (LoadI src1) src2));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
-  ins_encode %{
-    __ eimull($dst$$Register, $src1$$Address, $src2$$constant, false);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 %{
  match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
@ -8718,7 +8662,6 @@ instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)

 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 %{
-  predicate(!UseAPX);
  match(Set dst (MulL src imm));
  effect(KILL cr);

@ -8730,20 +8673,6 @@ instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
  ins_pipe(ialu_reg_reg_alu0);
 %}

-instruct mulL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (MulL src1 src2));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
-  ins_encode %{
-    __ eimulq($dst$$Register, $src1$$Register, $src2$$constant, false);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 %{
  predicate(!UseAPX);
@ -8774,7 +8703,6 @@ instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr

 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 %{
-  predicate(!UseAPX);
  match(Set dst (MulL (LoadL src) imm));
  effect(KILL cr);

@ -8786,20 +8714,6 @@ instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
  ins_pipe(ialu_reg_mem_alu0);
 %}

-instruct mulL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (MulL (LoadL src1) src2));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
-  ins_encode %{
-    __ eimulq($dst$$Register, $src1$$Address, $src2$$constant, false);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 %{
  match(Set dst (MulHiL src rax));
@ -10689,21 +10603,6 @@ instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
  ins_pipe(ialu_reg_mem);
 %}

-instruct xorI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (XorI (LoadI src1) src2));
-  effect(KILL cr);
-  flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
-
-  ins_cost(150);
-  format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
-  ins_encode %{
-    __ exorl($dst$$Register, $src1$$Address, $src2$$Register, false);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 // Xor Memory with Register
 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 %{
@ -10883,21 +10782,6 @@ instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
  ins_pipe(ialu_reg_mem);
 %}

-instruct andL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (AndL (LoadL src1) src2));
-  effect(KILL cr);
-  flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
-
-  ins_cost(150);
-  format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
-  ins_encode %{
-    __ eandq($dst$$Register, $src1$$Address, $src2$$Register, false);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 // And Memory with Register
 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 %{
@ -11393,21 +11277,6 @@ instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
  ins_pipe(ialu_reg_mem);
 %}

-instruct xorL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
-%{
-  predicate(UseAPX);
-  match(Set dst (XorL (LoadL src1) src2));
-  effect(KILL cr);
-  flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
-
-  ins_cost(150);
-  format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
-  ins_encode %{
-    __ exorq($dst$$Register, $src1$$Address, $src2$$Register, false);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 // Xor Memory with Register
 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 %{
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@ -1261,69 +1261,6 @@ void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) {
  // Nothing to do beyond of what os::print_cpu_info() does.
 }

-static char saved_jvm_path[MAXPATHLEN] = {0};
-
-// Find the full path to the current module, libjvm.so.
-void os::jvm_path(char *buf, jint buflen) {
-  // Error checking.
-  if (buflen < MAXPATHLEN) {
-    assert(false, "must use a large-enough buffer");
-    buf[0] = '\0';
-    return;
-  }
-  // Lazy resolve the path to current module.
-  if (saved_jvm_path[0] != 0) {
-    strcpy(buf, saved_jvm_path);
-    return;
-  }
-
-  Dl_info dlinfo;
-  int ret = dladdr(CAST_FROM_FN_PTR(void *, os::jvm_path), &dlinfo);
-  assert(ret != 0, "cannot locate libjvm");
-  char* rp = os::realpath((char *)dlinfo.dli_fname, buf, buflen);
-  assert(rp != nullptr, "error in realpath(): maybe the 'path' argument is too long?");
-
-  // If executing unit tests we require JAVA_HOME to point to the real JDK.
-  if (Arguments::executing_unit_tests()) {
-    // Look for JAVA_HOME in the environment.
-    char* java_home_var = ::getenv("JAVA_HOME");
-    if (java_home_var != nullptr && java_home_var[0] != 0) {
-
-      // Check the current module name "libjvm.so".
-      const char* p = strrchr(buf, '/');
-      if (p == nullptr) {
-        return;
-      }
-      assert(strstr(p, "/libjvm") == p, "invalid library name");
-
-      stringStream ss(buf, buflen);
-      rp = os::realpath(java_home_var, buf, buflen);
-      if (rp == nullptr) {
-        return;
-      }
-
-      assert((int)strlen(buf) < buflen, "Ran out of buffer room");
-      ss.print("%s/lib", buf);
-
-      if (0 == access(buf, F_OK)) {
-        // Use current module name "libjvm.so"
-        ss.print("/%s/libjvm%s", Abstract_VM_Version::vm_variant(), JNI_LIB_SUFFIX);
-        assert(strcmp(buf + strlen(buf) - strlen(JNI_LIB_SUFFIX), JNI_LIB_SUFFIX) == 0,
-               "buf has been truncated");
-      } else {
-        // Go back to path of .so
-        rp = os::realpath((char *)dlinfo.dli_fname, buf, buflen);
-        if (rp == nullptr) {
-          return;
-        }
-      }
-    }
-  }
-
-  strncpy(saved_jvm_path, buf, sizeof(saved_jvm_path));
-  saved_jvm_path[sizeof(saved_jvm_path) - 1] = '\0';
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Virtual Memory

--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@ -1482,83 +1482,6 @@ void os::print_memory_info(outputStream* st) {
  st->cr();
 }

-static char saved_jvm_path[MAXPATHLEN] = {0};
-
-// Find the full path to the current module, libjvm
-void os::jvm_path(char *buf, jint buflen) {
-  // Error checking.
-  if (buflen < MAXPATHLEN) {
-    assert(false, "must use a large-enough buffer");
-    buf[0] = '\0';
-    return;
-  }
-  // Lazy resolve the path to current module.
-  if (saved_jvm_path[0] != 0) {
-    strcpy(buf, saved_jvm_path);
-    return;
-  }
-
-  char dli_fname[MAXPATHLEN];
-  dli_fname[0] = '\0';
-  bool ret = dll_address_to_library_name(
-                                         CAST_FROM_FN_PTR(address, os::jvm_path),
-                                         dli_fname, sizeof(dli_fname), nullptr);
-  assert(ret, "cannot locate libjvm");
-  char *rp = nullptr;
-  if (ret && dli_fname[0] != '\0') {
-    rp = os::realpath(dli_fname, buf, buflen);
-  }
-  if (rp == nullptr) {
-    return;
-  }
-
-  // If executing unit tests we require JAVA_HOME to point to the real JDK.
-  if (Arguments::executing_unit_tests()) {
-    // Look for JAVA_HOME in the environment.
-    char* java_home_var = ::getenv("JAVA_HOME");
-    if (java_home_var != nullptr && java_home_var[0] != 0) {
-
-      // Check the current module name "libjvm"
-      const char* p = strrchr(buf, '/');
-      assert(strstr(p, "/libjvm") == p, "invalid library name");
-
-      stringStream ss(buf, buflen);
-      rp = os::realpath(java_home_var, buf, buflen);
-      if (rp == nullptr) {
-        return;
-      }
-
-      assert((int)strlen(buf) < buflen, "Ran out of buffer space");
-      // Add the appropriate library and JVM variant subdirs
-      ss.print("%s/lib/%s", buf, Abstract_VM_Version::vm_variant());
-
-      if (0 != access(buf, F_OK)) {
-        ss.reset();
-        ss.print("%s/lib", buf);
-      }
-
-      // If the path exists within JAVA_HOME, add the JVM library name
-      // to complete the path to JVM being overridden.  Otherwise fallback
-      // to the path to the current library.
-      if (0 == access(buf, F_OK)) {
-        // Use current module name "libjvm"
-        ss.print("/libjvm%s", JNI_LIB_SUFFIX);
-        assert(strcmp(buf + strlen(buf) - strlen(JNI_LIB_SUFFIX), JNI_LIB_SUFFIX) == 0,
-               "buf has been truncated");
-      } else {
-        // Fall back to path of current library
-        rp = os::realpath(dli_fname, buf, buflen);
-        if (rp == nullptr) {
-          return;
-        }
-      }
-    }
-  }
-
-  strncpy(saved_jvm_path, buf, MAXPATHLEN);
-  saved_jvm_path[MAXPATHLEN - 1] = '\0';
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Virtual Memory

--- a/src/hotspot/os/linux/globals_linux.hpp
+++ b/src/hotspot/os/linux/globals_linux.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,9 +35,6 @@
                         range,                                         \
                         constraint)                                    \
                                                                        \
-  product(bool, UseOprofile, false,                                     \
-        "(Deprecated) enable support for Oprofile profiler")            \
-                                                                        \
  product(bool, UseTransparentHugePages, false,                         \
          "Use MADV_HUGEPAGE for large pages")                          \
                                                                        \
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@ -2746,118 +2746,9 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
 #endif
 }

-static char saved_jvm_path[MAXPATHLEN] = {0};
-
-// Find the full path to the current module, libjvm.so
-void os::jvm_path(char *buf, jint buflen) {
-  // Error checking.
-  if (buflen < MAXPATHLEN) {
-    assert(false, "must use a large-enough buffer");
-    buf[0] = '\0';
-    return;
-  }
-  // Lazy resolve the path to current module.
-  if (saved_jvm_path[0] != 0) {
-    strcpy(buf, saved_jvm_path);
-    return;
-  }
-
-  char dli_fname[MAXPATHLEN];
-  dli_fname[0] = '\0';
-  bool ret = dll_address_to_library_name(
-                                         CAST_FROM_FN_PTR(address, os::jvm_path),
-                                         dli_fname, sizeof(dli_fname), nullptr);
-  assert(ret, "cannot locate libjvm");
-  char *rp = nullptr;
-  if (ret && dli_fname[0] != '\0') {
-    rp = os::realpath(dli_fname, buf, buflen);
-  }
-  if (rp == nullptr) {
-    return;
-  }
-
-  // If executing unit tests we require JAVA_HOME to point to the real JDK.
-  if (Arguments::executing_unit_tests()) {
-    // Look for JAVA_HOME in the environment.
-    char* java_home_var = ::getenv("JAVA_HOME");
-    if (java_home_var != nullptr && java_home_var[0] != 0) {
-
-      // Check the current module name "libjvm.so".
-      const char* p = strrchr(buf, '/');
-      if (p == nullptr) {
-        return;
-      }
-      assert(strstr(p, "/libjvm") == p, "invalid library name");
-
-      stringStream ss(buf, buflen);
-      rp = os::realpath(java_home_var, buf, buflen);
-      if (rp == nullptr) {
-        return;
-      }
-
-      assert((int)strlen(buf) < buflen, "Ran out of buffer room");
-      ss.print("%s/lib", buf);
-
-      if (0 == access(buf, F_OK)) {
-        // Use current module name "libjvm.so"
-        ss.print("/%s/libjvm%s", Abstract_VM_Version::vm_variant(), JNI_LIB_SUFFIX);
-        assert(strcmp(buf + strlen(buf) - strlen(JNI_LIB_SUFFIX), JNI_LIB_SUFFIX) == 0,
-               "buf has been truncated");
-      } else {
-        // Go back to path of .so
-        rp = os::realpath(dli_fname, buf, buflen);
-        if (rp == nullptr) {
-          return;
-        }
-      }
-    }
-  }
-
-  strncpy(saved_jvm_path, buf, MAXPATHLEN);
-  saved_jvm_path[MAXPATHLEN - 1] = '\0';
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Virtual Memory

-// Rationale behind this function:
-//  current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable
-//  mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get
-//  samples for JITted code. Here we create private executable mapping over the code cache
-//  and then we can use standard (well, almost, as mapping can change) way to provide
-//  info for the reporting script by storing timestamp and location of symbol
-void linux_wrap_code(char* base, size_t size) {
-  static volatile jint cnt = 0;
-
-  static_assert(sizeof(off_t) == 8, "Expected Large File Support in this file");
-
-  if (!UseOprofile) {
-    return;
-  }
-
-  char buf[PATH_MAX+1];
-  int num = Atomic::add(&cnt, 1);
-
-  snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
-           os::get_temp_directory(), os::current_process_id(), num);
-  unlink(buf);
-
-  int fd = ::open(buf, O_CREAT | O_RDWR, S_IRWXU);
-
-  if (fd != -1) {
-    off_t rv = ::lseek(fd, size-2, SEEK_SET);
-    if (rv != (off_t)-1) {
-      if (::write(fd, "", 1) == 1) {
-        mmap(base, size,
-             PROT_READ|PROT_WRITE|PROT_EXEC,
-             MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0);
-      }
-    }
-    ::close(fd);
-    unlink(buf);
-  }
-}
-
 static bool recoverable_mmap_error(int err) {
  // See if the error is one we can let the caller handle. This
  // list of errno values comes from JBS-6843484. I can't find a
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@ -1060,6 +1060,95 @@ bool os::same_files(const char* file1, const char* file2) {
  return is_same;
 }

+static char saved_jvm_path[MAXPATHLEN] = {0};
+
+// Find the full path to the current module, libjvm.so
+void os::jvm_path(char *buf, jint buflen) {
+  // Error checking.
+  if (buflen < MAXPATHLEN) {
+    assert(false, "must use a large-enough buffer");
+    buf[0] = '\0';
+    return;
+  }
+  // Lazy resolve the path to current module.
+  if (saved_jvm_path[0] != 0) {
+    strcpy(buf, saved_jvm_path);
+    return;
+  }
+
+  char* fname;
+#ifdef AIX
+  Dl_info dlinfo;
+  int ret = dladdr(CAST_FROM_FN_PTR(void *, os::jvm_path), &dlinfo);
+  assert(ret != 0, "cannot locate libjvm");
+  if (ret == 0) {
+    return;
+  }
+  fname = dlinfo.dli_fname;
+#else
+  char dli_fname[MAXPATHLEN];
+  dli_fname[0] = '\0';
+  bool ret = dll_address_to_library_name(
+                                         CAST_FROM_FN_PTR(address, os::jvm_path),
+                                         dli_fname, sizeof(dli_fname), nullptr);
+  assert(ret, "cannot locate libjvm");
+  if (!ret) {
+    return;
+  }
+  fname = dli_fname;
+#endif // AIX
+  char* rp = nullptr;
+  if (fname[0] != '\0') {
+    rp = os::realpath(dli_fname, buf, buflen);
+  }
+  if (rp == nullptr) {
+    return;
+  }
+
+  // If executing unit tests we require JAVA_HOME to point to the real JDK.
+  if (Arguments::executing_unit_tests()) {
+    // Look for JAVA_HOME in the environment.
+    char* java_home_var = ::getenv("JAVA_HOME");
+    if (java_home_var != nullptr && java_home_var[0] != 0) {
+
+      // Check the current module name "libjvm.so".
+      const char* p = strrchr(buf, '/');
+      if (p == nullptr) {
+        return;
+      }
+      assert(strstr(p, "/libjvm") == p, "invalid library name");
+
+      stringStream ss(buf, buflen);
+      rp = os::realpath(java_home_var, buf, buflen);
+      if (rp == nullptr) {
+        return;
+      }
+
+      assert((int)strlen(buf) < buflen, "Ran out of buffer room");
+      ss.print("%s/lib", buf);
+
+      // If the path exists within JAVA_HOME, add the VM variant directory and JVM
+      // library name to complete the path to JVM being overridden.  Otherwise fallback
+      // to the path to the current library.
+      if (0 == access(buf, F_OK)) {
+        // Use current module name "libjvm.so"
+        ss.print("/%s/libjvm%s", Abstract_VM_Version::vm_variant(), JNI_LIB_SUFFIX);
+        assert(strcmp(buf + strlen(buf) - strlen(JNI_LIB_SUFFIX), JNI_LIB_SUFFIX) == 0,
+               "buf has been truncated");
+      } else {
+        // Go back to path of .so
+        rp = os::realpath(dli_fname, buf, buflen);
+        if (rp == nullptr) {
+          return;
+        }
+      }
+    }
+  }
+
+  strncpy(saved_jvm_path, buf, MAXPATHLEN);
+  saved_jvm_path[MAXPATHLEN - 1] = '\0';
+}
+
 // Called when creating the thread.  The minimum stack sizes have already been calculated
 size_t os::Posix::get_initial_stack_size(ThreadType thr_type, size_t req_stack_size) {
  size_t stack_size;
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@ -1505,6 +1505,14 @@ bool PosixSignals::is_sig_ignored(int sig) {
  }
 }

+void* PosixSignals::get_signal_handler_for_signal(int sig) {
+  struct sigaction oact;
+  if (sigaction(sig, (struct sigaction*)nullptr, &oact) == -1) {
+    return nullptr;
+  }
+  return get_signal_handler(&oact);
+}
+
 static void signal_sets_init() {
  sigemptyset(&preinstalled_sigs);

--- a/src/hotspot/os/posix/signals_posix.hpp
+++ b/src/hotspot/os/posix/signals_posix.hpp
@ -52,6 +52,8 @@ public:

  static bool is_sig_ignored(int sig);

+  static void* get_signal_handler_for_signal(int sig);
+
  static void hotspot_sigmask(Thread* thread);

  static void print_signal_handler(outputStream* st, int sig, char* buf, size_t buflen);
--- a/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
@ -81,14 +81,12 @@
 #endif

 #define SPELL_REG_SP "sp"
-#define SPELL_REG_FP "fp"

 #ifdef __APPLE__
 // see darwin-xnu/osfmk/mach/arm/_structs.h

 // 10.5 UNIX03 member name prefixes
 #define DU3_PREFIX(s, m) __ ## s.__ ## m
-#endif

 #define context_x    uc_mcontext->DU3_PREFIX(ss,x)
 #define context_fp   uc_mcontext->DU3_PREFIX(ss,fp)
@ -97,6 +95,31 @@
 #define context_pc   uc_mcontext->DU3_PREFIX(ss,pc)
 #define context_cpsr uc_mcontext->DU3_PREFIX(ss,cpsr)
 #define context_esr  uc_mcontext->DU3_PREFIX(es,esr)
+#endif
+
+#ifdef __FreeBSD__
+# define context_x  uc_mcontext.mc_gpregs.gp_x
+# define context_fp context_x[REG_FP]
+# define context_lr uc_mcontext.mc_gpregs.gp_lr
+# define context_sp uc_mcontext.mc_gpregs.gp_sp
+# define context_pc uc_mcontext.mc_gpregs.gp_elr
+#endif
+
+#ifdef __NetBSD__
+# define context_x  uc_mcontext.__gregs
+# define context_fp uc_mcontext.__gregs[_REG_FP]
+# define context_lr uc_mcontext.__gregs[_REG_LR]
+# define context_sp uc_mcontext.__gregs[_REG_SP]
+# define context_pc uc_mcontext.__gregs[_REG_ELR]
+#endif
+
+#ifdef __OpenBSD__
+# define context_x  sc_x
+# define context_fp sc_x[REG_FP]
+# define context_lr sc_lr
+# define context_sp sc_sp
+# define context_pc sc_elr
+#endif

 #define REG_BCP context_x[22]

@ -497,9 +520,11 @@ int os::extra_bang_size_in_bytes() {
  return 0;
 }

+#ifdef __APPLE__
 void os::current_thread_enable_wx(WXMode mode) {
  pthread_jit_write_protect_np(mode == WXExec);
 }
+#endif

 static inline void atomic_copy64(const volatile void *src, volatile void *dst) {
  *(jlong *) dst = *(const jlong *) src;
--- a/src/hotspot/share/adlc/main.cpp
+++ b/src/hotspot/share/adlc/main.cpp
@ -481,7 +481,3 @@ int get_legal_text(FileBuff &fbuf, char **legal_text)
  *legal_text = legal_start;
  return (int) (legal_end - legal_start);
 }
-
-void *operator new( size_t size, int, const char *, int ) throw() {
-  return ::operator new( size );
-}
--- a/src/hotspot/share/adlc/output_h.cpp
+++ b/src/hotspot/share/adlc/output_h.cpp
@ -1626,6 +1626,8 @@ void ArchDesc::declareClasses(FILE *fp) {
    while (attr != nullptr) {
      if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0) {
        fprintf(fp, "  virtual bool           is_TrapBasedCheckNode() const { return %s; }\n", attr->_val);
+      } else if (strcmp (attr->_ident, "ins_is_late_expanded_null_check_candidate") == 0) {
+        fprintf(fp, "  virtual bool           is_late_expanded_null_check_candidate() const { return %s; }\n", attr->_val);
      } else if (strcmp (attr->_ident, "ins_cost") != 0 &&
          strncmp(attr->_ident, "ins_field_", 10) != 0 &&
          // Must match function in node.hpp: return type bool, no prefix "ins_".
--- a/src/hotspot/share/asm/codeBuffer.cpp
+++ b/src/hotspot/share/asm/codeBuffer.cpp
@ -29,6 +29,7 @@
 #include "compiler/disassembler.hpp"
 #include "logging/log.hpp"
 #include "oops/klass.inline.hpp"
+#include "oops/methodCounters.hpp"
 #include "oops/methodData.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/icache.hpp"
@ -537,6 +538,9 @@ void CodeBuffer::finalize_oop_references(const methodHandle& mh) {
            if (m->is_methodData()) {
              m = ((MethodData*)m)->method();
            }
+            if (m->is_methodCounters()) {
+              m = ((MethodCounters*)m)->method();
+            }
            if (m->is_method()) {
              m = ((Method*)m)->method_holder();
            }
@ -561,6 +565,9 @@ void CodeBuffer::finalize_oop_references(const methodHandle& mh) {
        if (m->is_methodData()) {
          m = ((MethodData*)m)->method();
        }
+        if (m->is_methodCounters()) {
+          m = ((MethodCounters*)m)->method();
+        }
        if (m->is_method()) {
          m = ((Method*)m)->method_holder();
        }
@ -1099,7 +1106,8 @@ CHeapString::~CHeapString() {
 // offset is a byte offset into an instruction stream (CodeBuffer, CodeBlob or
 // other memory buffer) and remark is a string (comment).
 //
-AsmRemarks::AsmRemarks() : _remarks(new AsmRemarkCollection()) {
+AsmRemarks::AsmRemarks() {
+  init();
  assert(_remarks != nullptr, "Allocation failure!");
 }

@ -1107,6 +1115,10 @@ AsmRemarks::~AsmRemarks() {
  assert(_remarks == nullptr, "Must 'clear()' before deleting!");
 }

+void AsmRemarks::init() {
+  _remarks = new AsmRemarkCollection();
+}
+
 const char* AsmRemarks::insert(uint offset, const char* remstr) {
  precond(remstr != nullptr);
  return _remarks->insert(offset, remstr);
@ -1151,7 +1163,8 @@ uint AsmRemarks::print(uint offset, outputStream* strm) const {
 // Acting as interface to reference counted collection of (debug) strings used
 // in the code generated, and thus requiring a fixed address.
 //
-DbgStrings::DbgStrings() : _strings(new DbgStringCollection()) {
+DbgStrings::DbgStrings() {
+  init();
  assert(_strings != nullptr, "Allocation failure!");
 }

@ -1159,6 +1172,10 @@ DbgStrings::~DbgStrings() {
  assert(_strings == nullptr, "Must 'clear()' before deleting!");
 }

+void DbgStrings::init() {
+  _strings = new DbgStringCollection();
+}
+
 const char* DbgStrings::insert(const char* dbgstr) {
  const char* str = _strings->lookup(dbgstr);
  return str != nullptr ? str : _strings->insert(dbgstr);
--- a/src/hotspot/share/asm/codeBuffer.hpp
+++ b/src/hotspot/share/asm/codeBuffer.hpp
@ -426,6 +426,8 @@ class AsmRemarks {
  AsmRemarks();
 ~AsmRemarks();

+  void init();
+
  const char* insert(uint offset, const char* remstr);

  bool is_empty() const;
@ -452,6 +454,8 @@ class DbgStrings {
  DbgStrings();
 ~DbgStrings();

+  void init();
+
  const char* insert(const char* dbgstr);

  bool is_empty() const;
--- a/src/hotspot/share/c1/c1_Compiler.cpp
+++ b/src/hotspot/share/c1/c1_Compiler.cpp
@ -168,6 +168,7 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
  case vmIntrinsics::_dtan:
  #if defined(AMD64)
  case vmIntrinsics::_dtanh:
+  case vmIntrinsics::_dcbrt:
  #endif
  case vmIntrinsics::_dlog:
  case vmIntrinsics::_dlog10:
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp
+++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp
@ -3298,6 +3298,7 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope)
  case vmIntrinsics::_dcos          : // fall through
  case vmIntrinsics::_dtan          : // fall through
  case vmIntrinsics::_dtanh         : // fall through
+  case vmIntrinsics::_dcbrt         : // fall through
  case vmIntrinsics::_dlog          : // fall through
  case vmIntrinsics::_dlog10        : // fall through
  case vmIntrinsics::_dexp          : // fall through
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@ -2870,6 +2870,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
  case vmIntrinsics::_dtanh:          // fall through
  case vmIntrinsics::_dsin :          // fall through
  case vmIntrinsics::_dcos :          // fall through
+  case vmIntrinsics::_dcbrt :         // fall through
  case vmIntrinsics::_dexp :          // fall through
  case vmIntrinsics::_dpow :          do_MathIntrinsic(x); break;
  case vmIntrinsics::_arraycopy:      do_ArrayCopy(x);     break;
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@ -365,6 +365,7 @@ const char* Runtime1::name_for_address(address entry) {
  FUNCTION_CASE(entry, StubRoutines::dcos());
  FUNCTION_CASE(entry, StubRoutines::dtan());
  FUNCTION_CASE(entry, StubRoutines::dtanh());
+  FUNCTION_CASE(entry, StubRoutines::dcbrt());

 #undef FUNCTION_CASE

@ -508,7 +509,7 @@ static nmethod* counter_overflow_helper(JavaThread* current, int branch_bci, Met

 JRT_BLOCK_ENTRY(address, Runtime1::counter_overflow(JavaThread* current, int bci, Method* method))
  nmethod* osr_nm;
-  JRT_BLOCK
+  JRT_BLOCK_NO_ASYNC
    osr_nm = counter_overflow_helper(current, bci, method);
    if (osr_nm != nullptr) {
      RegisterMap map(current,
@ -817,7 +818,7 @@ JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* current, jint trap_request))
  Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request);

  if (action == Deoptimization::Action_make_not_entrant) {
-    if (nm->make_not_entrant("C1 deoptimize")) {
+    if (nm->make_not_entrant(nmethod::ChangeReason::C1_deoptimize)) {
      if (reason == Deoptimization::Reason_tenured) {
        MethodData* trap_mdo = Deoptimization::get_method_data(current, method, true /*create_if_missing*/);
        if (trap_mdo != nullptr) {
@ -1109,7 +1110,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, C1StubId stub_id ))
    // safepoint, but if it's still alive then make it not_entrant.
    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
    if (nm != nullptr) {
-      nm->make_not_entrant("C1 code patch");
+      nm->make_not_entrant(nmethod::ChangeReason::C1_codepatch);
    }

    Deoptimization::deoptimize_frame(current, caller_frame.id());
@ -1357,7 +1358,7 @@ void Runtime1::patch_code(JavaThread* current, C1StubId stub_id) {
    // Make sure the nmethod is invalidated, i.e. made not entrant.
    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
    if (nm != nullptr) {
-      nm->make_not_entrant("C1 deoptimize for patching");
+      nm->make_not_entrant(nmethod::ChangeReason::C1_deoptimize_for_patching);
    }
  }

@ -1485,7 +1486,7 @@ JRT_ENTRY(void, Runtime1::predicate_failed_trap(JavaThread* current))

  nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
  assert (nm != nullptr, "no more nmethod?");
-  nm->make_not_entrant("C1 predicate failed trap");
+  nm->make_not_entrant(nmethod::ChangeReason::C1_predicate_failed_trap);

  methodHandle m(current, nm->method());
  MethodData* mdo = m->method_data();
--- a/src/hotspot/share/cds/aotClassLocation.cpp
+++ b/src/hotspot/share/cds/aotClassLocation.cpp
@ -464,6 +464,7 @@ void AOTClassLocationConfig::dumptime_init_helper(TRAPS) {
  AOTClassLocation* jrt = AOTClassLocation::allocate(THREAD, ClassLoader::get_jrt_entry()->name(),
                                               0, Group::MODULES_IMAGE,
                                               /*from_cpattr*/false, /*is_jrt*/true);
+  log_info(class, path)("path [%d] = (modules image)", tmp_array.length());
  tmp_array.append(jrt);

  parse(THREAD, tmp_array, all_css.boot_cp(), Group::BOOT_CLASSPATH, /*parse_manifest*/true);
@ -573,6 +574,7 @@ void AOTClassLocationConfig::parse(JavaThread* current, GrowableClassLocationArr
 void AOTClassLocationConfig::add_class_location(JavaThread* current, GrowableClassLocationArray& tmp_array,
                                                const char* path, Group group, bool parse_manifest, bool from_cpattr) {
  AOTClassLocation* cs = AOTClassLocation::allocate(current, path, tmp_array.length(), group, from_cpattr);
+  log_info(class, path)("path [%d] = %s%s", tmp_array.length(), path, from_cpattr ? " (from cpattr)" : "");
  tmp_array.append(cs);

  if (!parse_manifest) {
@ -726,6 +728,8 @@ bool AOTClassLocationConfig::is_valid_classpath_index(int classpath_index, Insta
 }

 AOTClassLocationConfig* AOTClassLocationConfig::write_to_archive() const {
+  log_locations(CDSConfig::output_archive_path(), /*is_write=*/true);
+
  Array<AOTClassLocation*>* archived_copy = ArchiveBuilder::new_ro_array<AOTClassLocation*>(_class_locations->length());
  for (int i = 0; i < _class_locations->length(); i++) {
    archived_copy->at_put(i, _class_locations->at(i)->write_to_archive());
@ -773,7 +777,7 @@ bool AOTClassLocationConfig::check_classpaths(bool is_boot_classpath, bool has_a
      effective_dumptime_path = substitute(effective_dumptime_path, _dumptime_lcp_len, runtime_lcp, runtime_lcp_len);
    }

-    log_info(class, path)("Checking '%s' %s%s", effective_dumptime_path, cs->file_type_string(),
+    log_info(class, path)("Checking [%d] '%s' %s%s", i, effective_dumptime_path, cs->file_type_string(),
                          cs->from_cpattr() ? " (from JAR manifest ClassPath attribute)" : "");
    if (!cs->from_cpattr() && file_exists(effective_dumptime_path)) {
      if (!runtime_css.has_next()) {
@ -961,11 +965,14 @@ bool AOTClassLocationConfig::need_lcp_match_helper(int start, int end, ClassLoca
  return true;
 }

-bool AOTClassLocationConfig::validate(bool has_aot_linked_classes, bool* has_extra_module_paths) const {
+bool AOTClassLocationConfig::validate(const char* cache_filename, bool has_aot_linked_classes, bool* has_extra_module_paths) const {
  ResourceMark rm;
  AllClassLocationStreams all_css;

+  log_locations(cache_filename, /*is_write=*/false);
+
  const char* jrt = ClassLoader::get_jrt_entry()->name();
+  log_info(class, path)("Checking [0] (modules image)");
  bool success = class_location_at(0)->check(jrt, has_aot_linked_classes);
  log_info(class, path)("Modules image %s validation: %s", jrt, success ? "passed" : "failed");
  if (!success) {
@ -1036,6 +1043,17 @@ bool AOTClassLocationConfig::validate(bool has_aot_linked_classes, bool* has_ext
  return success;
 }

+void AOTClassLocationConfig::log_locations(const char* cache_filename, bool is_write) const {
+  if (log_is_enabled(Info, class, path)) {
+    LogStreamHandle(Info, class, path) st;
+    st.print_cr("%s classpath(s) %s %s (size = %d)",
+                is_write ? "Writing" : "Reading",
+                is_write ? "into" : "from",
+                cache_filename, class_locations()->length());
+    print_on(&st);
+  }
+}
+
 void AOTClassLocationConfig::print() {
  if (CDSConfig::is_dumping_archive()) {
    tty->print_cr("AOTClassLocationConfig::_dumptime_instance = %p", _dumptime_instance);
@ -1052,8 +1070,15 @@ void AOTClassLocationConfig::print() {
 }

 void AOTClassLocationConfig::print_on(outputStream* st) const {
+  const char* type = "boot";
  int n = class_locations()->length();
  for (int i = 0; i < n; i++) {
+    if (i >= boot_cp_end_index()) {
+      type = "app";
+    }
+    if (i >= app_cp_end_index()) {
+      type = "module";
+    }
    const AOTClassLocation* cs = class_location_at(i);
    const char* path;
    if (i == 0) {
@ -1061,12 +1086,6 @@ void AOTClassLocationConfig::print_on(outputStream* st) const {
    } else {
      path = cs->path();
    }
-    st->print_cr("[%d] = %s", i, path);
-    if (i == boot_cp_end_index() && i < n) {
-      st->print_cr("--- end of boot");
-    }
-    if (i == app_cp_end_index() && i < n) {
-      st->print_cr("--- end of app");
-    }
+    st->print_cr("(%-6s) [%d] = %s", type, i, path);
  }
 }
--- a/src/hotspot/share/cds/aotClassLocation.hpp
+++ b/src/hotspot/share/cds/aotClassLocation.hpp
@ -204,6 +204,7 @@ class AOTClassLocationConfig : public CHeapObj<mtClassShared> {
                                const char* prepend, size_t prepend_len) const;

  void print_on(outputStream* st) const;
+  void log_locations(const char* cache_filename, bool is_writing) const;

 public:
  static AOTClassLocationConfig* dumptime() {
@ -269,7 +270,7 @@ public:
  AOTClassLocationConfig* write_to_archive() const;

  // Functions used only during runtime
-  bool validate(bool has_aot_linked_classes, bool* has_extra_module_paths) const;
+  bool validate(const char* cache_filename, bool has_aot_linked_classes, bool* has_extra_module_paths) const;

  bool is_valid_classpath_index(int classpath_index, InstanceKlass* ik);

--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@ -110,12 +110,24 @@ const char* CDSConfig::default_archive_path() {
  // before CDSConfig::ergo_initialize() is called.
  assert(_cds_ergo_initialize_started, "sanity");
  if (_default_archive_path == nullptr) {
+    stringStream tmp;
+    if (is_vm_statically_linked()) {
+      // It's easier to form the path using JAVA_HOME as os::jvm_path
+      // gives the path to the launcher executable on static JDK.
+      const char* subdir = WINDOWS_ONLY("bin") NOT_WINDOWS("lib");
+      tmp.print("%s%s%s%s%s%sclasses",
+                Arguments::get_java_home(), os::file_separator(),
+                subdir, os::file_separator(),
+                Abstract_VM_Version::vm_variant(), os::file_separator());
+    } else {
+      // Assume .jsa is in the same directory where libjvm resides on
+      // non-static JDK.
      char jvm_path[JVM_MAXPATHLEN];
      os::jvm_path(jvm_path, sizeof(jvm_path));
      char *end = strrchr(jvm_path, *os::file_separator());
      if (end != nullptr) *end = '\0';
-    stringStream tmp;
      tmp.print("%s%sclasses", jvm_path, os::file_separator());
+    }
 #ifdef _LP64
    if (!UseCompressedOops) {
      tmp.print_raw("_nocoops");
--- a/Show More
+++ b/Show More