Reputation: 11
My data having 1000 features and 1000 samples has some random values from 0 to 100. where I am applying a function whose return type is bool on the dask_cudf data frame, but I'm getting an error in the terminal <source missing, REPL/exec in use?> Any ideas on how to fix this error? This is the whole code
>>>from collections import Counter
>>>import dask_cudf
>>>def change(row, thresholds):
return 100.0 - (100.0 * Counter(row).most_common(1)[0][1] / len(row)) > thresholds
>>>data = dask_cudf.read_csv("file1.csv")
data.head()
Unnamed: 0 0 1 2 3 4 5 6 7 8 ... 990 991 992 993 994 995 996 997 998 999
0 0 68 92 21 43 47 39 78 36 37 ... 15 74 25 16 36 29 76 79 69 45
1 1 97 11 92 54 87 80 37 79 31 ... 20 8 40 53 94 2 22 15 33 78
2 2 20 19 45 29 43 56 25 76 4 ... 42 6 88 95 84 15 31 63 79 7
3 3 91 50 20 37 51 58 81 48 79 ... 28 7 87 64 66 3 59 5 59 44
4 4 32 22 60 52 32 7 87 88 63 ... 94 36 44 59 88 40 79 66 92 4
[5 rows x 1001 columns]
>>> data = data[data.apply(change, axis=1, args=(5.0,), meta=(None, 'bool'))]
>>> data.head()
Traceback (most recent call last):
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/indexed_frame.py", line 1096, in _apply
kernel, retty = _compile_or_get(
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/udf/utils.py", line 202, in _compile_or_get
kernel, scalar_return_type = kernel_getter(frame, func, args)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/udf/row_function.py", line 129, in _get_row_kernel
scalar_return_type = _get_udf_return_type(row_type, func, args)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/udf/utils.py", line 53, in _get_udf_return_type
ptx, output_type = cudautils.compile_udf(func, compile_sig)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/utils/cudautils.py", line 248, in compile_udf
ptx_code, return_type = cuda.compile_ptx_for_current_device(
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/cuda/compiler.py", line 290, in compile_ptx_for_current_device
return compile_ptx(pyfunc, args, debug=debug, lineinfo=lineinfo,
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/cuda/compiler.py", line 267, in compile_ptx
cres = compile_cuda(pyfunc, None, args, debug=debug, lineinfo=lineinfo,
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/cuda/compiler.py", line 202, in compile_cuda
cres = compiler.compile_extra(typingctx=typingctx,
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler.py", line 693, in compile_extra
return pipeline.compile_extra(func)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler.py", line 429, in compile_extra
return self._compile_bytecode()
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler.py", line 497, in _compile_bytecode
return self._compile_core()
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler.py", line 476, in _compile_core
raise e
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler.py", line 463, in _compile_core
pm.run(self.state)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_machinery.py", line 353, in run
raise patched_exception
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_machinery.py", line 341, in run
self._runPass(idx, pass_inst, state)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_machinery.py", line 296, in _runPass
mutated |= check(pss.run_pass, internal_state)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/compiler_machinery.py", line 269, in check
mangled = func(compiler_state)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typed_passes.py", line 105, in run_pass
typemap, return_type, calltypes, errs = type_inference_stage(
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typed_passes.py", line 81, in type_inference_stage
infer.build_constraint()
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typeinfer.py", line 1039, in build_constraint
self.constrain_statement(inst)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typeinfer.py", line 1386, in constrain_statement
self.typeof_assign(inst)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typeinfer.py", line 1459, in typeof_assign
self.typeof_global(inst, inst.target, value)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typeinfer.py", line 1559, in typeof_global
typ = self.resolve_value_type(inst, gvar.value)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/numba/core/typeinfer.py", line 1480, in resolve_value_type
raise TypingError(msg, loc=inst.loc)
numba.core.errors.TypingError: Failed in cuda mode pipeline (step: nopython frontend)
Untyped global name 'Counter': Cannot determine Numba type of <class 'type'>
File "<stdin>", line 2:
<source missing, REPL/exec in use?>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/dataframe/core.py", line 1219, in head
return self._head(n=n, npartitions=npartitions, compute=compute, safe=safe)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/dataframe/core.py", line 1253, in _head
result = result.compute()
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/base.py", line 312, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/base.py", line 600, in compute
results = schedule(dsk, keys, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 554, in get_sync
return get_async(
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 497, in get_async
for key, res_info, failed in queue_get(queue).result():
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/concurrent/futures/_base.py", line 439, in result
return self.__get_result()
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
raise self._exception
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 539, in submit
fut.set_result(fn(*args, **kwargs))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 235, in batch_execute_tasks
return [execute_task(*a) for a in it]
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 235, in <listcomp>
return [execute_task(*a) for a in it]
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 226, in execute_task
result = pack_exception(e, dumps)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/local.py", line 221, in execute_task
result = _execute_task(task, data)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/optimization.py", line 990, in __call__
return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/core.py", line 149, in get
result = _execute_task(task, cache)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/core.py", line 119, in <genexpr>
return func(*(_execute_task(a, cache) for a in args))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/utils.py", line 41, in apply
return func(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/dataframe/core.py", line 6533, in apply_and_enforce
df = func(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/dask/utils.py", line 1053, in __call__
return getattr(__obj, self.method)(*args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/dataframe.py", line 3826, in apply
return self._apply(func, _get_row_kernel, *args, **kwargs)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/software/compilers/anaconda3.9/envs/rapids-22.06/lib/python3.9/site-packages/cudf/core/indexed_frame.py", line 1100, in _apply
raise ValueError(
ValueError: user defined function compilation failed.
>>>
Upvotes: 0
Views: 470
Reputation: 1291
Your error is 3 lines above Untyped global name 'Counter': Cannot determine Numba type of <class 'type'>
. Numba needs to know the function and cannot just import from an external library like that.
The better solution is that you can skip the apply
and use .mode()
to find the most common integer from the pandas/dask API. https://docs.dask.org/en/latest/generated/dask.dataframe.DataFrame.mode.html
However, cuDF has incorrect documentation, keeping it from emulating the pandas example below, and I made a docs request. https://github.com/rapidsai/cudf/issues/11570
import pandas as pd
df = pd.DataFrame(np.random.randint(4,10,size=(15, 10)), columns=list('ABCDEFGHIJ'))
lrow = len(df.columns) # so you don't need to recalc this every run :)
print(df.mode(axis = 1)[0]) #it sometimes created a few columns for me
output = 100.0 - (100.0 * df.mode(axis = 1)[0] / lrow) > 5
print(output)
In your next q, please include the data or a way to generate the data :)
Upvotes: 1