Reputation: 23567
In my for loop, my code generates a list like this one:
list([0.0,0.0]/sum([0.0,0.0]))
The loop generates all sort of other number vectors but it also generates [nan,nan]
, and to avoid it I tried to put in a conditional to prevent it like the one below, but it doesn't return true.
nan in list([0.0,0.0]/sum([0.0,0.0]))
>>> False
Shouldn't it return true?
Libraries I've loaded:
import PerformanceAnalytics as perf
import DataAnalyticsHelpers
import DataHelpers as data
import OptimizationHelpers as optim
from matplotlib.pylab import *
from pandas.io.data import DataReader
from datetime import datetime,date,time
import tradingWithPython as twp
import tradingWithPython.lib.yahooFinance as data_downloader # used to get data from yahoo finance
import pandas as pd # as always.
import numpy as np
import zipline as zp
from scipy.optimize import minimize
from itertools import product, combinations
import time
from math import isnan
Upvotes: 25
Views: 136802
Reputation: 2617
has_nan = any(each!=each for each in your_list)
import math
my_nan = float("NaN")
list1 = [ math.nan ]
list2 = [ float("NaN") ]
list3 = [ my_nan ]
math.nan in list1 # True
math.nan in list2 # False
math.nan in list3 # False
float("NaN") in list1 # False
float("NaN") in list2 # False
float("NaN") in list3 # False
my_nan in list1 # False
my_nan in list2 # False
my_nan in list3 # True
# also makes sets really annoying:
set1 = set([math.nan , math.nan , math.nan ])
set2 = set([my_nan , float("nan"), my_nan ])
set3 = set([float("nan"), float("nan"), float("nan") ])
len(set1) # >>> 1
len(set2) # >>> 2
len(set3) # >>> 3
For whatever reason, python treats NaN's similar to JavaScripts symbols
If your data is in a list/tuple/set this is the fastest way to check*
has_nan = any(each!=each for each in your_list)
# from math import isnan #<- is slow
Okay so, that^ is the fastest way unless
Then this is slightly faster:
# NOTE: can *average* 4x slower for small lists
has_nan = numpy.isnan(numpy.array(your_list)).any()
However, if the data is already inside a numpy array, then this is the fastest way:
# when len() == 20,000 this is literally 100 times faster than the pure-python approach
has_nan = numpy.isnan(your_array).any()
Here's the performance/chart code (numpy 1.23 on python 3.11):
import timeit
from math import isnan
import numpy
import random
from statistics import mean as average
import json
values = []
sample_size = 200
for index in range(1,15):
list_size = 2**index
source = list(range(0,list_size))
cases = {
"pure python: !=":[],
"pure python: isnan":[],
"numpy convert to array":[],
"numpy (data already in array)":[],
}
pure_times = []
numpy_times = []
numpy_prebuilts = []
for _ in range(0,sample_size):
index = random.randint(-(list_size-1),list_size-1)
local_source = list(source)
if index >= 0:
local_source[index] = float("NaN")
local_source = tuple(local_source)
prebuilt = numpy.array(local_source)
cases["pure python: !="].append(timeit.timeit(lambda: any(each!=each for each in local_source ), number=1_000))
cases["pure python: isnan"].append(timeit.timeit(lambda: any(isnan(each) for each in local_source ), number=1_000))
cases["numpy convert to array"].append(timeit.timeit(lambda: numpy.isnan(numpy.array(local_source)).any(), number=1_000))
cases["numpy (data already in array)"].append(timeit.timeit(lambda: numpy.isnan(prebuilt).any(), number=1_000))
for each_key, each_value in cases.items():
cases[each_key] = average(each_value)
print(json.dumps({ "list_size":list_size, **cases,}))
values.append({ "number of elements":list_size, **cases },)
# draw chart
import pandas
import plotly.express as px
df = pandas.DataFrame(values)
df = pandas.melt(df, value_vars=list(cases.keys()), id_vars=['number of elements'])
df["computation time"] = df["value"]
df.sort_values(by=["variable","number of elements"], inplace=True)
fig = px.line(df, color="variable",x="number of elements",y="computation time")
fig.update_layout(xaxis_type="log",yaxis_type="log")
fig.show()
Upvotes: 1
Reputation: 41
May be this is what you are looking for...
a = [2,3,np.nan]
b = True if True in np.isnan(np.array(a)) else False
print(b)
Upvotes: 4
Reputation: 352959
I think this makes sense because of your pulling numpy
into scope indirectly via the star import.
>>> import numpy as np
>>> [0.0,0.0]/0
Traceback (most recent call last):
File "<ipython-input-3-aae9e30b3430>", line 1, in <module>
[0.0,0.0]/0
TypeError: unsupported operand type(s) for /: 'list' and 'int'
>>> [0.0,0.0]/np.float64(0)
array([ nan, nan])
When you did
from matplotlib.pylab import *
it pulled in numpy.sum
:
>>> from matplotlib.pylab import *
>>> sum is np.sum
True
>>> [0.0,0.0]/sum([0.0, 0.0])
array([ nan, nan])
You can test that this nan
object (nan
isn't unique in general) is in a list via identity, but if you try it in an array
it seems to test via equality, and nan != nan
:
>>> nan == nan
False
>>> nan == nan, nan is nan
(False, True)
>>> nan in [nan]
True
>>> nan in np.array([nan])
False
You could use np.isnan
:
>>> np.isnan([nan, nan])
array([ True, True], dtype=bool)
>>> np.isnan([nan, nan]).any()
True
Upvotes: 39