Description of problem
I am building a retina simulator and successfully running brian2cuda in WSL2 under Windows 11, and the result seems ok. However, I am getting device -associated INFOs which include error lines (see below). I would like to understand the errors before continuing.
The error lines appear when executing the following expression:
b2.device.run(
run_args={
x_input: this_x_vec,
neuron_group.HS: this_HS,
neuron_group.TS: this_TS * b2u.ms,
}
)
Additional note: I needed np.ascontiguousarray method for my 2D views of a 3D array, which was not mentioned in the brian2cuda documentation.
Minimal code to reproduce problem
Original method:
“”"
def create_dynamic_temporal_signal(
self, tvec, svec, _dt, params, show_impulse=False, impulse_contrast=1.0
):
“”"
Dynamic temporal signal for midget units
“”"
# params is [n units, n_features, n_domains] array, domains are the [center, surround].
n_units = params.shape[0]
n_timepoints = tvec.shape[-1]
if params.ndim != 3: # params is [n_units, n_features, n_domains]
raise ValueError("params array must be 3D for midget units, aborting...")
if svec.size == n_units * n_timepoints * 2:
# Visual stimulus
svecs = svec
elif svec.size == n_timepoints:
# Impulse response
svecs = np.tile(
np.expand_dims(svec, -1), (params.shape[0], 1, params.shape[2])
)
else:
raise ValueError(
"svec size matches neither visual simulus or impulse, aborting..."
)
tvec = np.expand_dims(tvec, axis=0)
lp = np.zeros((n_units, n_timepoints, 2))
# parameter name order for midget ["NL", "NLTL", "TS", "HS", "D", "A"]
for domain_idx in range(2):
NL = np.int32(params[:, 0, domain_idx])
TL = params[:, 1, domain_idx] / params[:, 0, domain_idx] # TL = NLTL / NL
NL = np.expand_dims(NL, axis=-1)
TL = np.expand_dims(TL, axis=-1)
lp[:, :, domain_idx] = self._create_lowpass_response(tvec, NL, TL)
lp_total = np.sum(lp, axis=(-2))
# h_cen and h_sur are [n_units, n_timepoints]
h_cen = lp[:, :, 0] / np.expand_dims(lp_total[:, 0], -1)
h_sur = lp[:, :, 1] / np.expand_dims(lp_total[:, 1], -1)
_HS = params[:, 3]
_TS = params[:, 2]
D = params[:, 4]
padding_size = n_timepoints - 1
# Pad the stimulus
svec_padded = np.pad(svecs, ((0, 0), (padding_size, 0), (0, 0)), mode="edge")
# Initialize the output arrays
x_vec = np.empty((n_units, n_timepoints, 2))
yvecs = np.zeros([n_units, n_timepoints, 2])
for domain_idx, h in enumerate([h_cen, h_sur]):
# Convolve the stimulus with the low-pass kernel for all units
for this_unit in range(n_units):
x_vec[this_unit, :, domain_idx] = fftconvolve(
svec_padded[this_unit, :, domain_idx], h[this_unit, :], mode="valid"
)
b2.set_device("cuda_standalone", build_on_run=False)
# Define the equations for the high-pass stage
eqs = """
dy/dt = (-y + TS * x_derivative + (1 - HS) * x_input(t, i)) / TS : 1
x_derivative = (x_input(t, i) - x_input(t-dt, i)) / dt : Hz
# Parameters
HS : 1
TS : second
"""
_dt = _dt * b2u.ms
simulation_duration = n_timepoints * _dt
x_input = b2.TimedArray(np.zeros_like(x_vec[:, :, 0].T), dt=_dt)
neuron_group = b2.NeuronGroup(n_units, eqs, method="euler", dt=_dt)
neuron_group.HS = _HS[:, 0]
neuron_group.TS = _TS[:, 0] * b2u.ms
state_monitor = b2.StateMonitor(neuron_group, ["y"], record=True)
b2.run(simulation_duration)
b2.device.build(directory="cuda_standalone", compile=True, run=False)
for domain_idx, h in enumerate([h_cen, h_sur]):
this_x_vec = np.ascontiguousarray(x_vec[:, :, domain_idx].T)
this_HS = np.ascontiguousarray(_HS[:, domain_idx])
this_TS = np.ascontiguousarray(_TS[:, domain_idx])
b2.device.run(
run_args={
x_input: this_x_vec,
neuron_group.HS: this_HS,
neuron_group.TS: this_TS * b2u.ms,
}
)
yvecs[:, :, domain_idx] = state_monitor.y
# Sum center and surround responses
yvecs = np.sum(yvecs, axis=-1)
if show_impulse is True:
return yvecs
# Add delay
delay_timepoints = np.int16(np.round(D / _dt))
generator_potential = np.zeros((n_units, n_timepoints + delay_timepoints))
generator_potential[:, delay_timepoints:] = yvecs
return generator_potential
“”"
What you have aready tried
The files mentioned in the error code exist under cuda_standalone\static_arrays
They are binary and non-empty
Expected output (if relevant)
No error lines (see below)
Actual output (if relevant)
“”"
INFO CUDA installation directory found in standard location: /usr/local/cuda [brian2cuda.utils.gputools]
INFO Compiling device code for GPU 0 (NVIDIA GeForce RTX 3080 Ti Laptop GPU) [brian2cuda.utils.gputools]
INFO Compiling device code for compute capability 8.6 (compiler flags: [‘-arch=sm_86’]) [brian2cuda.device]
INFO Using the following preferences for CUDA standalone: [brian2cuda.device]
INFO devices.cuda_standalone.SM_multiplier = 1 [brian2cuda.device]
INFO devices.cuda_standalone.parallel_blocks = 1 [brian2cuda.device]
INFO devices.cuda_standalone.launch_bounds = False [brian2cuda.device]
INFO devices.cuda_standalone.syn_launch_bounds = False [brian2cuda.device]
INFO devices.cuda_standalone.calc_occupancy = True [brian2cuda.device]
INFO devices.cuda_standalone.extra_threshold_kernel = True [brian2cuda.device]
INFO devices.cuda_standalone.random_number_generator_type = CURAND_RNG_PSEUDO_DEFAULT [brian2cuda.device]
INFO devices.cuda_standalone.random_number_generator_ordering = False [brian2cuda.device]
INFO devices.cuda_standalone.push_synapse_bundles = True [brian2cuda.device]
INFO devices.cuda_standalone.threads_per_synapse_bundle = {max} [brian2cuda.device]
INFO devices.cuda_standalone.bundle_threads_warp_multiple = False [brian2cuda.device]
INFO devices.cuda_standalone.no_pre_references = False [brian2cuda.device]
INFO devices.cuda_standalone.no_post_references = False [brian2cuda.device]
INFO devices.cuda_standalone.default_functions_integral_convertion = <class ‘numpy.float64’> [brian2cuda.device]
INFO devices.cuda_standalone.use_atomics = True [brian2cuda.device]
INFO devices.cuda_standalone.profile_statemonitor_copy_to_host = None [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.gpu_heap_size = 128 [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.detect_gpus = True [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.gpu_id = None [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.extra_compile_args_nvcc = [‘-w’, ‘-use_fast_math’] [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.compute_capability = None [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.detect_cuda = True [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.cuda_path = None [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.cuda_runtime_version = None [brian2cuda.device]
INFO devices.cuda_standalone.cuda_backend.device_query_path = None [brian2cuda.device]
INFO: _init_arrays() took 0.002866s
Error reading ‘static_arrays/init_neurongroup_hs_044988eab6cb3bdced31d4879772c676.dat’: file size -1 does not match expected size 1624
Error reading ‘static_arrays/init_neurongroup_ts_2a79f1fc2454b46fa1072205f0f40dae.dat’: file size -1 does not match expected size 1624
INFO _run_kernel_statemonitor_codeobject
1 blocks
768 threads
12 registers per thread
0 bytes statically-allocated shared memory per block
0 bytes local memory per thread
504 bytes user-allocated constant memory
1.000 theoretical occupancy
INFO _run_kernel_neurongroup_stateupdater_codeobject
1 blocks
640 threads
42 registers per thread
0 bytes statically-allocated shared memory per block
0 bytes local memory per thread
504 bytes user-allocated constant memory
0.833 theoretical occupancy
INFO: _init_arrays() took 0.002349s
Error reading ‘static_arrays/init_neurongroup_hs_1ba5a51a89c69ee8012b4b83610e7327.dat’: file size -1 does not match expected size 1624
Error reading ‘static_arrays/init_neurongroup_ts_11d5277e27b4bf373e1731435da7d725.dat’: file size -1 does not match expected size 1624
INFO _run_kernel_statemonitor_codeobject
1 blocks
768 threads
12 registers per thread
0 bytes statically-allocated shared memory per block
0 bytes local memory per thread
504 bytes user-allocated constant memory
1.000 theoretical occupancy
INFO _run_kernel_neurongroup_stateupdater_codeobject
1 blocks
640 threads
42 registers per thread
0 bytes statically-allocated shared memory per block
0 bytes local memory per thread
504 bytes user-allocated constant memory
0.833 theoretical occupancy
“”"