DataDog
diff --git a/‎src/hotspot/cpu/riscv/globals_riscv.hpp‎
Lines changed: 0 additions & 2 deletions b/‎src/hotspot/cpu/riscv/globals_riscv.hpp‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/hotspot/cpu/riscv/stubGenerator_riscv.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/hotspot/cpu/riscv/stubGenerator_riscv.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hotspot/cpu/riscv/vm_version_riscv.cpp‎
Lines changed: 0 additions & 1 deletion b/‎src/hotspot/cpu/riscv/vm_version_riscv.cpp‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/hotspot/cpu/s390/vm_version_s390.cpp‎
Lines changed: 6 additions & 0 deletions b/‎src/hotspot/cpu/s390/vm_version_s390.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/hotspot/os/windows/attachListener_windows.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/hotspot/os/windows/attachListener_windows.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hotspot/share/cds/archiveBuilder.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/hotspot/share/cds/archiveBuilder.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hotspot/share/cds/archiveUtils.cpp‎
Lines changed: 185 additions & 0 deletions b/‎src/hotspot/share/cds/archiveUtils.cpp‎
Lines changed: 185 additions & 0 deletions
diff --git a/‎src/hotspot/share/cds/archiveUtils.hpp‎
Lines changed: 93 additions & 0 deletions b/‎src/hotspot/share/cds/archiveUtils.hpp‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎src/hotspot/share/cds/cds_globals.hpp‎
Lines changed: 4 additions & 1 deletion b/‎src/hotspot/share/cds/cds_globals.hpp‎
Lines changed: 4 additions & 1 deletion
@@ -117,8 +117,6 @@ define_pd_global(intx, InlineSmallCode,          1000);
   product(bool, UseZvfh, false, DIAGNOSTIC, "Use Zvfh instructions")             \
   product(bool, UseZvkn, false, EXPERIMENTAL,                                    \
           "Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt")                \
-  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
-          "Use RVV instructions for left/right shift of BigInteger")             \
   product(bool, UseCtxFencei, false, EXPERIMENTAL,                               \
           "Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")
 
 
@@ -6502,7 +6502,7 @@ static const int64_t right_3_bits = right_n_bits(3);
       StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
     }
 
-    if (UseRVVForBigIntegerShiftIntrinsics) {
+    if (UseRVV) {
       StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
       StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
     }
 
@@ -233,7 +233,6 @@ void VM_Version::c2_initialize() {
 
   if (!UseRVV) {
     FLAG_SET_DEFAULT(MaxVectorSize, 0);
-    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
   } else {
     if (!FLAG_IS_DEFAULT(MaxVectorSize) && MaxVectorSize != _initial_vector_length) {
       warning("Current system does not support RVV vector length for MaxVectorSize %d. Set MaxVectorSize to %d",
 
@@ -320,6 +320,12 @@ void VM_Version::initialize() {
   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
   }
+
+  // The OptoScheduling information is not maintained in s390.ad.
+  if (OptoScheduling) {
+    warning("OptoScheduling is not supported on this CPU.");
+    FLAG_SET_DEFAULT(OptoScheduling, false);
+  }
 }
 
 
 
@@ -161,7 +161,7 @@ class Win32AttachOperation: public AttachOperation {
 
 
 // Win32AttachOperationRequest is an element of AttachOperation request list.
-class Win32AttachOperationRequest {
+class Win32AttachOperationRequest: public CHeapObj<mtServiceability> {
 private:
   AttachAPIVersion _ver;
   char _name[AttachOperation::name_length_max + 1];
 
@@ -897,7 +897,7 @@ void ArchiveBuilder::make_klasses_shareable() {
           assert(HeapShared::is_archivable_hidden_klass(ik), "sanity");
         } else {
           // Legacy CDS support for lambda proxies
-          assert(HeapShared::is_lambda_proxy_klass(ik), "sanity");
+          CDS_JAVA_HEAP_ONLY(assert(HeapShared::is_lambda_proxy_klass(ik), "sanity");)
         }
       } else if (ik->is_shared_boot_class()) {
         type = "boot";
 
@@ -399,3 +399,188 @@ size_t HeapRootSegments::segment_offset(size_t seg_idx) {
   return _base_offset + seg_idx * _max_size_in_bytes;
 }
 
+ArchiveWorkers ArchiveWorkers::_workers;
+
+ArchiveWorkers::ArchiveWorkers() :
+        _start_semaphore(0),
+        _end_semaphore(0),
+        _num_workers(0),
+        _started_workers(0),
+        _waiting_workers(0),
+        _running_workers(0),
+        _state(NOT_READY),
+        _task(nullptr) {
+}
+
+void ArchiveWorkers::initialize() {
+  assert(Atomic::load(&_state) == NOT_READY, "Should be");
+
+  Atomic::store(&_num_workers, max_workers());
+  Atomic::store(&_state, READY);
+
+  // Kick off pool startup by creating a single worker.
+  start_worker_if_needed();
+}
+
+int ArchiveWorkers::max_workers() {
+  // The pool is used for short-lived bursty tasks. We do not want to spend
+  // too much time creating and waking up threads unnecessarily. Plus, we do
+  // not want to overwhelm large machines. This is why we want to be very
+  // conservative about the number of workers actually needed.
+  return MAX2(0, log2i_graceful(os::active_processor_count()));
+}
+
+bool ArchiveWorkers::is_parallel() {
+  return _num_workers > 0;
+}
+
+void ArchiveWorkers::shutdown() {
+  while (true) {
+    State state = Atomic::load(&_state);
+    if (state == SHUTDOWN) {
+      // Already shut down.
+      return;
+    }
+    if (Atomic::cmpxchg(&_state, state, SHUTDOWN, memory_order_relaxed) == state) {
+      if (is_parallel()) {
+        // Execute a shutdown task and block until all workers respond.
+        run_task(&_shutdown_task);
+      }
+    }
+  }
+}
+
+void ArchiveWorkers::start_worker_if_needed() {
+  while (true) {
+    int cur = Atomic::load(&_started_workers);
+    if (cur >= _num_workers) {
+      return;
+    }
+    if (Atomic::cmpxchg(&_started_workers, cur, cur + 1, memory_order_relaxed) == cur) {
+      new ArchiveWorkerThread(this);
+      return;
+    }
+  }
+}
+
+void ArchiveWorkers::signal_worker_if_needed() {
+  while (true) {
+    int cur = Atomic::load(&_waiting_workers);
+    if (cur == 0) {
+      return;
+    }
+    if (Atomic::cmpxchg(&_waiting_workers, cur, cur - 1, memory_order_relaxed) == cur) {
+      _start_semaphore.signal(1);
+      return;
+    }
+  }
+}
+
+void ArchiveWorkers::run_task(ArchiveWorkerTask* task) {
+  assert((Atomic::load(&_state) == READY) ||
+         ((Atomic::load(&_state) == SHUTDOWN) && (task == &_shutdown_task)),
+         "Should be in correct state");
+  assert(Atomic::load(&_task) == nullptr, "Should not have running tasks");
+
+  if (is_parallel()) {
+    run_task_multi(task);
+  } else {
+    run_task_single(task);
+  }
+}
+
+void ArchiveWorkers::run_task_single(ArchiveWorkerTask* task) {
+  // Single thread needs no chunking.
+  task->configure_max_chunks(1);
+
+  // Execute the task ourselves, as there are no workers.
+  task->work(0, 1);
+}
+
+void ArchiveWorkers::run_task_multi(ArchiveWorkerTask* task) {
+  // Multiple threads can work with multiple chunks.
+  task->configure_max_chunks(_num_workers * CHUNKS_PER_WORKER);
+
+  // Set up the run and publish the task.
+  Atomic::store(&_waiting_workers, _num_workers);
+  Atomic::store(&_running_workers, _num_workers);
+  Atomic::release_store(&_task, task);
+
+  // Kick off pool wakeup by signaling a single worker, and proceed
+  // immediately to executing the task locally.
+  signal_worker_if_needed();
+
+  // Execute the task ourselves, while workers are catching up.
+  // This allows us to hide parts of task handoff latency.
+  task->run();
+
+  // Done executing task locally, wait for any remaining workers to complete,
+  // and then do the final housekeeping.
+  _end_semaphore.wait();
+  Atomic::store(&_task, (ArchiveWorkerTask *) nullptr);
+  OrderAccess::fence();
+
+  assert(Atomic::load(&_waiting_workers) == 0, "All workers were signaled");
+  assert(Atomic::load(&_running_workers) == 0, "No workers are running");
+}
+
+void ArchiveWorkerTask::run() {
+  while (true) {
+    int chunk = Atomic::load(&_chunk);
+    if (chunk >= _max_chunks) {
+      return;
+    }
+    if (Atomic::cmpxchg(&_chunk, chunk, chunk + 1, memory_order_relaxed) == chunk) {
+      assert(0 <= chunk && chunk < _max_chunks, "Sanity");
+      work(chunk, _max_chunks);
+    }
+  }
+}
+
+void ArchiveWorkerTask::configure_max_chunks(int max_chunks) {
+  if (_max_chunks == 0) {
+    _max_chunks = max_chunks;
+  }
+}
+
+bool ArchiveWorkers::run_as_worker() {
+  assert(is_parallel(), "Should be in parallel mode");
+  _start_semaphore.wait();
+
+  // Avalanche wakeups: each worker signals two others.
+  signal_worker_if_needed();
+  signal_worker_if_needed();
+
+  ArchiveWorkerTask* task = Atomic::load_acquire(&_task);
+  task->run();
+
+  // All work done in threads should be visible to caller.
+  OrderAccess::fence();
+
+  // Signal the pool the tasks are complete, if this is the last worker.
+  if (Atomic::sub(&_running_workers, 1, memory_order_relaxed) == 0) {
+    _end_semaphore.signal();
+  }
+
+  // Continue if task was not a termination task.
+  return (task != &_shutdown_task);
+}
+
+ArchiveWorkerThread::ArchiveWorkerThread(ArchiveWorkers* pool) : NamedThread(), _pool(pool) {
+  set_name("ArchiveWorkerThread");
+  os::create_thread(this, os::os_thread);
+  os::start_thread(this);
+}
+
+void ArchiveWorkerThread::run() {
+  // Avalanche thread startup: each starting worker starts two others.
+  _pool->start_worker_if_needed();
+  _pool->start_worker_if_needed();
+
+  // Set ourselves up.
+  os::set_priority(this, NearMaxPriority);
+
+  while (_pool->run_as_worker()) {
+    // Work until terminated.
+  }
+}
@@ -33,6 +33,8 @@
 #include "utilities/bitMap.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/macros.hpp"
+#include "runtime/nonJavaThread.hpp"
+#include "runtime/semaphore.hpp"
 
 class BootstrapInfo;
 class ReservedSpace;
@@ -319,4 +321,95 @@ class HeapRootSegments {
   HeapRootSegments& operator=(const HeapRootSegments&) = default;
 };
 
+class ArchiveWorkers;
+
+// A task to be worked on by worker threads
+class ArchiveWorkerTask : public CHeapObj<mtInternal> {
+  friend class ArchiveWorkers;
+  friend class ArchiveWorkerShutdownTask;
+private:
+  const char* _name;
+  int _max_chunks;
+  volatile int _chunk;
+
+  void run();
+
+  void configure_max_chunks(int max_chunks);
+
+public:
+  ArchiveWorkerTask(const char* name) :
+      _name(name), _max_chunks(0), _chunk(0) {}
+  const char* name() const { return _name; }
+  virtual void work(int chunk, int max_chunks) = 0;
+};
+
+class ArchiveWorkerThread : public NamedThread {
+  friend class ArchiveWorkers;
+private:
+  ArchiveWorkers* const _pool;
+
+public:
+  ArchiveWorkerThread(ArchiveWorkers* pool);
+  const char* type_name() const override { return "Archive Worker Thread"; }
+  void run() override;
+};
+
+class ArchiveWorkerShutdownTask : public ArchiveWorkerTask {
+public:
+  ArchiveWorkerShutdownTask() : ArchiveWorkerTask("Archive Worker Shutdown") {
+    // This task always have only one chunk.
+    configure_max_chunks(1);
+  }
+  void work(int chunk, int max_chunks) override {
+    // Do nothing.
+  }
+};
+
+// Special worker pool for archive workers. The goal for this pool is to
+// startup fast, distribute spiky workloads efficiently, and being able to
+// shutdown after use. This makes the implementation quite different from
+// the normal GC worker pool.
+class ArchiveWorkers {
+  friend class ArchiveWorkerThread;
+private:
+  // Target number of chunks per worker. This should be large enough to even
+  // out work imbalance, and small enough to keep bookkeeping overheads low.
+  static constexpr int CHUNKS_PER_WORKER = 4;
+  static int max_workers();
+
+  // Global shared instance. Can be uninitialized, can be shut down.
+  static ArchiveWorkers _workers;
+
+  ArchiveWorkerShutdownTask _shutdown_task;
+  Semaphore _start_semaphore;
+  Semaphore _end_semaphore;
+
+  int _num_workers;
+  int _started_workers;
+  int _waiting_workers;
+  int _running_workers;
+
+  typedef enum { NOT_READY, READY, SHUTDOWN } State;
+  volatile State _state;
+
+  ArchiveWorkerTask* _task;
+
+  bool run_as_worker();
+  void start_worker_if_needed();
+  void signal_worker_if_needed();
+
+  void run_task_single(ArchiveWorkerTask* task);
+  void run_task_multi(ArchiveWorkerTask* task);
+
+  bool is_parallel();
+
+  ArchiveWorkers();
+
+public:
+  static ArchiveWorkers* workers() { return &_workers; }
+  void initialize();
+  void shutdown();
+  void run_task(ArchiveWorkerTask* task);
+};
+
 #endif // SHARE_CDS_ARCHIVEUTILS_HPP
@@ -117,7 +117,10 @@
   product(bool, AOTClassLinking, false,                                     \
           "Load/link all archived classes for the boot/platform/app "       \
           "loaders before application main")                                \
-
+                                                                            \
+  product(bool, AOTCacheParallelRelocation, true, DIAGNOSTIC,               \
+          "Use parallel relocation code to speed up startup.")              \
+                                                                            \
 // end of CDS_FLAGS
 
 DECLARE_FLAGS(CDS_FLAGS)
Original file line number	Diff line number	Diff line change
`@@ -6502,7 +6502,7 @@ static const int64_t right_3_bits = right_n_bits(3);`
`6502`	`6502`	`StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();`
`6503`	`6503`	`}`
`6504`	`6504`
`6505`		`- if (UseRVVForBigIntegerShiftIntrinsics) {`
	`6505`	`+ if (UseRVV) {`
`6506`	`6506`	`StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();`
`6507`	`6507`	`StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();`
`6508`	`6508`	`}`
Original file line number	Diff line number	Diff line change
`@@ -320,6 +320,12 @@ void VM_Version::initialize() {`
`320`	`320`	`if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {`
`321`	`321`	`FLAG_SET_DEFAULT(UseUnalignedAccesses, true);`
`322`	`322`	`}`
	`323`	`+`
	`324`	`+ // The OptoScheduling information is not maintained in s390.ad.`
	`325`	`+ if (OptoScheduling) {`
	`326`	`+ warning("OptoScheduling is not supported on this CPU.");`
	`327`	`+ FLAG_SET_DEFAULT(OptoScheduling, false);`
	`328`	`+ }`
`323`	`329`	`}`
`324`	`330`
`325`	`331`
Original file line number	Diff line number	Diff line change
`@@ -897,7 +897,7 @@ void ArchiveBuilder::make_klasses_shareable() {`
`897`	`897`	`assert(HeapShared::is_archivable_hidden_klass(ik), "sanity");`
`898`	`898`	`} else {`
`899`	`899`	`// Legacy CDS support for lambda proxies`
`900`		`- assert(HeapShared::is_lambda_proxy_klass(ik), "sanity");`
	`900`	`+ CDS_JAVA_HEAP_ONLY(assert(HeapShared::is_lambda_proxy_klass(ik), "sanity");)`
`901`	`901`	`}`
`902`	`902`	`} else if (ik->is_shared_boot_class()) {`
`903`	`903`	`type = "boot";`