Fix #107356: Cycles: improve oneAPI error handling

This commit is contained in:
Nikita Sirgienko 2023-05-03 12:05:04 +02:00
parent 8775cf804e
commit 1dcc8e6ffa

View File

@ -302,6 +302,12 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")"; << string_human_readable_size(mem.memory_size()) << ")";
} }
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (mem.type == MEM_GLOBAL) { if (mem.type == MEM_GLOBAL) {
global_free(mem); global_free(mem);
global_alloc(mem); global_alloc(mem);
@ -334,6 +340,12 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
<< " data " << size << " bytes"; << " data " << size << " bytes";
} }
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
assert(device_queue_); assert(device_queue_);
assert(size != 0); assert(size != 0);
@ -357,6 +369,12 @@ void OneapiDevice::mem_zero(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")\n"; << string_human_readable_size(mem.memory_size()) << ")\n";
} }
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (!mem.device_pointer) { if (!mem.device_pointer) {
mem_alloc(mem); mem_alloc(mem);
} }
@ -602,19 +620,14 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, dest, true); OneapiDevice::check_usm(queue_, dest, true);
OneapiDevice::check_usm(queue_, src, true); OneapiDevice::check_usm(queue_, src, true);
try {
sycl::event mem_event = queue->memcpy(dest, src, num_bytes); sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
# ifdef WITH_CYCLES_DEBUG # ifdef WITH_CYCLES_DEBUG
try {
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/ */
mem_event.wait_and_throw(); mem_event.wait_and_throw();
return true; return true;
}
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
# else # else
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context()); sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context()); sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
@ -630,6 +643,11 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
return true; return true;
# endif # endif
} }
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
}
bool OneapiDevice::usm_memset(SyclQueue *queue_, bool OneapiDevice::usm_memset(SyclQueue *queue_,
void *usm_ptr, void *usm_ptr,
@ -639,23 +657,22 @@ bool OneapiDevice::usm_memset(SyclQueue *queue_,
assert(queue_); assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, usm_ptr, true); OneapiDevice::check_usm(queue_, usm_ptr, true);
try {
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes); sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
# ifdef WITH_CYCLES_DEBUG # ifdef WITH_CYCLES_DEBUG
try {
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/ */
mem_event.wait_and_throw(); mem_event.wait_and_throw();
# else
(void)mem_event;
# endif
return true; return true;
} }
catch (sycl::exception const &e) { catch (sycl::exception const &e) {
oneapi_error_string_ = e.what(); oneapi_error_string_ = e.what();
return false; return false;
} }
# else
(void)mem_event;
return true;
# endif
} }
bool OneapiDevice::queue_synchronize(SyclQueue *queue_) bool OneapiDevice::queue_synchronize(SyclQueue *queue_)