Description of problem
Hi,
I have a couple of issues that appear to be related, stemming from the use of multiprocess:
- parallelizing simulations with multiprocess.Pool produces a lot of warning messages, but it doesn’t kill the process and the code runs to completion when calling via “python my_simulation.py”. An example of the warning message is attached at the bottom, and this may be related to Investigate issues with parallel processes using Cython · Issue #1154 · brian-team/brian2 · GitHub.
This only happens sometimes, i.e., repeating the multiprocess call multiple times in a for loop, the error only shows up in some iterations, and typically not the first iteration.
- executing a shell script that defines compute resources on a slurm cluster and then calling the same python file will result in complete hanging of the process. Not a single iteration completes, and eventually the job is terminated as it runs out of time. It also produces an error file thousands of lines long, but I’ve attached the beginning of it at the bottom here in case it’s helpful.
The simulation code is a slightly modified version of the standard brunel network, adapted from https://github.com/EPFL-LCN/neuronaldynamics-exercises/blob/master/neurodynex3/brunel_model/LIF_spiking_network.py
It’s run with a network of 5000 neurons, for 10seconds with dt of 0.1ms.
Sorry for the mess of error messages at the end!!
Minimal code to reproduce problem
for num_repeats in range(10):
with Pool(num_cores) as pool:
pool_collect = pool.map(sim_brunel_gnu, dict_list)
What you have already tried
manually setting b2.prefs.codegen.target = “numpy”, but this does not get rid of the warning messages when run via python, and cluster job still hangs
Expected output (if relevant)
Actual output (if relevant)
Full traceback of error (if relevant)
example warning when run via python:
Traceback (most recent call last):
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/logging/handlers.py”, line 70, in emit
self.doRollover()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/logging/handlers.py”, line 170, in doRollover
os.remove(dfn)
FileNotFoundError: [Errno 2] No such file or directory: ‘/tmp/brian_debug_m6zje3k0.log.1’
Call stack:
File “simulate_brunel_gnu.py”, line 156, in
with Pool(num_cores) as pool:
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 212, in init
self._repopulate_pool()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 326, in _repopulate_pool_static
w.start()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 121, in start
self._popen = self._Popen(self)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 277, in _Popen
return Popen(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 19, in init
self._launch(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 75, in _launch
code = process_obj._bootstrap(parent_sentinel=child_r)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 315, in _bootstrap
self.run()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 108, in run
self._target(*self._args, **self._kwargs)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 125, in worker
result = (True, func(*args, **kwds))
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 48, in mapstar
return list(map(*args))
File “simulate_brunel_gnu.py”, line 26, in sim_brunel_gnu
network = b2.NeuronGroup(
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/groups/neurongroup.py”, line 481, in init
Group.init(self, dt=dt, clock=clock, when=‘start’, order=order,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/units/fundamentalunits.py”, line 2434, in new_f
result = f(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/base.py”, line 94, in init
Nameable.init(self, name)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/names.py”, line 77, in init
logger.diagnostic("Created object of class “+self.class.name+” with name "+self._name)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 374, in diagnostic
self._log(‘DIAGNOSTIC’, msg, name_suffix, once)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 358, in _log
the_logger.log(LOG_LEVELS[log_level], msg)
Message: ‘Created object of class NeuronGroup with name neurongroup’
Arguments: ()
File “simulate_brunel_gnu.py”, line 156, in
with Pool(num_cores) as pool:
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 212, in init
self._repopulate_pool()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 326, in _repopulate_pool_static
w.start()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 121, in start
self._popen = self._Popen(self)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 277, in _Popen
return Popen(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 19, in init
self._launch(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 75, in _launch
code = process_obj._bootstrap(parent_sentinel=child_r)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 315, in _bootstrap
self.run()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 108, in run
self._target(*self._args, **self._kwargs)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 125, in worker
result = (True, func(*args, **kwds))
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 48, in mapstar
return list(map(*args))
File “simulate_brunel_gnu.py”, line 26, in sim_brunel_gnu
network = b2.NeuronGroup(
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/groups/neurongroup.py”, line 481, in init
Group.init(self, dt=dt, clock=clock, when=‘start’, order=order,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/units/fundamentalunits.py”, line 2434, in new_f
result = f(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/base.py”, line 94, in init
Nameable.init(self, name)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/names.py”, line 77, in init
logger.diagnostic("Created object of class “+self.class.name+” with name "+self._name)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 374, in diagnostic
self._log(‘DIAGNOSTIC’, msg, name_suffix, once)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 358, in _log
the_logger.log(LOG_LEVELS[log_level], msg)
Message: ‘Created object of class NeuronGroup with name neurongroup’
Arguments: ()
warning when run via sbatch script.sh:
INFO Cache size for target “cython”: 2491 MB.
You can call “clear_cache(‘cython’)” to delete all files from the cache or manually delete files in the “/home/macke/rgao97/.cython/brian_extensions” directory. [brian2]
— Logging error —
Traceback (most recent call last):
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/logging/handlers.py”, line 70, in emit
self.doRollover()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/logging/handlers.py”, line 170, in doRollover
os.remove(dfn)
FileNotFoundError: [Errno 2] No such file or directory: ‘/tmp/brian_debug_n9p6jvf6.log.1’
Call stack:
File “simulate_brunel_gnu.py”, line 152, in
with Pool(num_cores) as pool:
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 212, in init
self._repopulate_pool()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 326, in _repopulate_pool_static
w.start()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 121, in start
self._popen = self._Popen(self)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/context.py”, line 277, in _Popen
return Popen(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 19, in init
self._launch(process_obj)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/popen_fork.py”, line 75, in _launch
code = process_obj.bootstrap(parent_sentinel=child_r)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 315, in bootstrap
self.run()
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/process.py”, line 108, in run
self.target(*self.args, **self.kwargs)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 125, in worker
result = (True, func(*args, **kwds))
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/multiprocessing/pool.py”, line 48, in mapstar
return list(map(*args))
File “simulate_brunel_gnu.py”, line 60, in sim_brunel_gnu
net_collect.run(param_dict[‘sim_time’])
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/base.py”, line 278, in device_override_decorated_function
return func(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/units/fundamentalunits.py”, line 2434, in new_f
result = f(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/network.py”, line 1008, in run
self.before_run(namespace)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/base.py”, line 278, in device_override_decorated_function
return func(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/network.py”, line 897, in before_run
obj.before_run(run_namespace)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/core/base.py”, line 278, in device_override_decorated_function
return func(*args, **kwds)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/synapses/synapses.py”, line 320, in before_run
super(SynapticPathway, self).before_run(run_namespace)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/groups/group.py”, line 1143, in before_run
self.create_code_objects(run_namespace)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/synapses/synapses.py”, line 333, in create_code_objects
self.pushspikes_codeobj = create_runner_codeobj(self,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/codegen/codeobject.py”, line 441, in create_runner_codeobj
return device.code_object(owner=group,
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/devices/device.py”, line 321, in code_object
logger.diagnostic(‘%s code:\n%s’ % (name, indent(code_representation(code))))
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 374, in diagnostic
self.log(‘DIAGNOSTIC’, msg, name_suffix, once)
File “/home/macke/rgao97/.conda/envs/automind_slurm/lib/python3.8/site-packages/brian2/utils/logger.py”, line 358, in log
the_logger.log(LOG_LEVELS[log_level], msg)
Message: 'synapses_1_pre_push_spikes code:\n imports:\n import numpy as numpy\n cimport numpy as numpy\n from libc.math cimport fabs, sin, cos, tan, sinh, cosh, tanh, exp, log, log10, expm1, log1p, sqrt, asin, acos, atan, fmod, floor, ceil, isinf\n cdef extern from “math.h”:\n double M_PI\n # Import the two versions of std::abs\n from libc.stdlib cimport abs # For integers\n from libc.math cimport abs # For floating point values\n from libc.limits cimport INT_MIN, INT_MAX\n from libcpp cimport bool\n numpy.import_array()\n cdef extern from “numpy/ndarraytypes.h”:\n void PyArray_CLEARFLAGS(numpy.PyArrayObject *arr, int flags)\n from libc.stdlib cimport free\n cdef extern from “numpy/npy_math.h”:\n bint npy_isinf(double x)\n double NPY_INFINITY\n cdef extern from “stdint_compat.h”:\n # Longness only used for type promotion\n # Actual compile time size used for conversion\n ctypedef signed int int32_t\n ctypedef signed long int64_t\n ctypedef unsigned long uint64_t\n # It seems we cannot used a fused type here\n cdef int int(bool)\n cdef int int(char)\n cdef int int(short)\n cdef int int(int)\n cdef int int(long)\n cdef int int(float)\n cdef int int(double)\n cdef int int(long double)\n run:\n #cython: language_level=3\n #cython: boundscheck=False\n #cython: wraparound=False\n #cython: cdivision=False\n #cython: infer_types=True\n import numpy as numpy\n cimport numpy as numpy\n from libc.math cimport fabs, sin, cos, tan, sinh, cosh, tanh, exp, log, log10, expm1, log1p, sqrt, asin, acos, atan, fmod, floor, ceil, isinf\n cdef extern from “math.h”:\n double M_PI\n # Import the two versions of std::abs\n from libc.stdlib cimport abs # For integers\n from libc.math cimport abs # For floating point values\n from libc.limits cimport INT_MIN, INT_MAX\n from libcpp cimport bool\n numpy.import_array()\n cdef extern from “numpy/ndarraytypes.h”:\n void PyArray_CLEARFLAGS(numpy.PyArrayObject *arr, int flags)\n from libc.stdlib cimport free\n cdef extern from “numpy/npy_math.h”:\n bint npy_isinf(double x)\n double NPY_INFINITY\n cdef extern from “stdint_compat.h”:\n # Longness only used for type promotion\n # Actual compile time size used for conversion\n ctypedef signed int int32_t\n ctypedef signed long int64_t\n ctypedef unsigned long uint64_t\n # It seems we cannot used a fused type here\n cdef int int(bool)\n cdef int int(char)\n cdef int int(short)\n cdef int int(int)\n cdef int int(long)\n cdef int int(float)\n cdef int int(double)\n cdef int int(long double)\n # support code\n # template-specific support code\n def main(_namespace):\n cdef size_t _idx\n cdef size_t _vectorisation_idx\n _var__spikespace = _namespace[“_var__spikespace”]\n cdef _numpy.ndarray[int32_t, ndim=1, mode='c'] _buf__array_neurongroup__spikespace = _namespace['_array_neurongroup__spikespace']\n cdef int32_t * _array_neurongroup__spikespace = <int32_t *> _buf__array_neurongroup__spikespace.data\n cdef size_t _num_array_neurongroup__spikespace = len(_namespace['_array_neurongroup__spikespace'])\n cdef int32_t _spikespace\n if '_owner' in _namespace:\n owner = namespace['owner']\n owner.push_spikes()\n cython_directives:\n #cython: language_level=3\n #cython: boundscheck=False\n #cython: wraparound=False\n #cython: cdivision=False\n #cython: infer_types=True\n after_run:\n #cython: language_level=3\n #cython: boundscheck=False\n #cython: wraparound=False\n #cython: cdivision=False\n #cython: infer_types=True\n import numpy as numpy\n cimport numpy as numpy\n from libc.math cimport fabs, sin, cos, tan, sinh, cosh, tanh, exp, log, log10, expm1, log1p, sqrt, asin, acos, atan, fmod, floor, ceil, isinf\n cdef extern from “math.h”:\n double M_PI\n # Import the two versions of std::abs\n from libc.stdlib cimport abs # For integers\n from libc.math cimport abs # For floating point values\n from libc.limits cimport INT_MIN, INT_MAX\n from libcpp cimport bool\n numpy.import_array()\n cdef extern from “numpy/ndarraytypes.h”:\n void PyArray_CLEARFLAGS(numpy.PyArrayObject *arr, int flags)\n from libc.stdlib cimport free\n cdef extern from “numpy/npy_math.h”:\n bint npy_isinf(double x)\n double NPY_INFINITY\n cdef extern from “stdint_compat.h”:\n # Longness only used for type promotion\n # Actual compile time size used for conversion\n ctypedef signed int int32_t\n ctypedef signed long int64_t\n ctypedef unsigned long uint64_t\n # It seems we cannot used a fused type here\n cdef int int(bool)\n cdef int int(char)\n cdef int int(short)\n cdef int int(int)\n cdef int int(long)\n cdef int int(float)\n cdef int int(double)\n cdef int int(long double)\n def main(_namespace):\n _var__spikespace = _namespace[“_var__spikespace”]\n cdef _numpy.ndarray[int32_t, ndim=1, mode='c'] _buf__array_neurongroup__spikespace = _namespace['_array_neurongroup__spikespace']\n cdef int32_t * _array_neurongroup__spikespace = <int32_t *> _buf__array_neurongroup__spikespace.data\n cdef size_t _num_array_neurongroup__spikespace = len(_namespace['_array_neurongroup__spikespace'])\n cdef int32_t _spikespace\n if '_owner' in namespace:\n owner = namespace['owner']\n # EMPTY_CODE_BLOCK – overwrite in child template\n before_run:\n #cython: language_level=3\n #cython: boundscheck=False\n #cython: wraparound=False\n #cython: cdivision=False\n #cython: infer_types=True\n import numpy as numpy\n cimport numpy as numpy\n from libc.math cimport fabs, sin, cos, tan, sinh, cosh, tanh, exp, log, log10, expm1, log1p, sqrt, asin, acos, atan, fmod, floor, ceil, isinf\n cdef extern from “math.h”:\n double M_PI\n # Import the two versions of std::abs\n from libc.stdlib cimport abs # For integers\n from libc.math cimport abs # For floating point values\n from libc.limits cimport INT_MIN, INT_MAX\n from libcpp cimport bool\n numpy.import_array()\n cdef extern from “numpy/ndarraytypes.h”:\n void PyArray_CLEARFLAGS(numpy.PyArrayObject *arr, int flags)\n from libc.stdlib cimport free\n cdef extern from “numpy/npy_math.h”:\n bint npy_isinf(double x)\n double NPY_INFINITY\n cdef extern from “stdint_compat.h”:\n # Longness only used for type promotion\n # Actual compile time size used for conversion\n ctypedef signed int int32_t\n ctypedef signed long int64_t\n ctypedef unsigned long uint64_t\n # It seems we cannot used a fused type here\n cdef int int(bool)\n cdef int int(char)\n cdef int int(short)\n cdef int int(int)\n cdef int int(long)\n cdef int int(float)\n cdef int int(double)\n cdef int int(long double)\n def main(_namespace):\n _var__spikespace = _namespace[“_var__spikespace”]\n cdef _numpy.ndarray[int32_t, ndim=1, mode='c'] _buf__array_neurongroup__spikespace = _namespace['_array_neurongroup__spikespace']\n cdef int32_t * _array_neurongroup__spikespace = <int32_t *> _buf__array_neurongroup__spikespace.data\n cdef size_t _num_array_neurongroup__spikespace = len(_namespace['_array_neurongroup__spikespace'])\n cdef int32_t _spikespace\n if '_owner' in _namespace:\n _owner = _namespace['_owner']\n _owner.initialise_queue()’
Arguments: ()