Reputation: 186
I'm trying to plot GPS displacement for each day as a scatter plot. I wrote a function to convert each day to a decimal date.
However when I plot the scatterplot it plots every single date on the x axis and it just looks like a black bar. Is it possible to change the increment of the x axis?
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import datetime
import matplotlib.dates as dates
import random
lst1 = ['2015/01/01','2016/01/01','2017/01/01','2015/01/02',
'2016/01/02','2017/01/02','2015/01/03','2016/01/03','2017/01/03',
'2015/01/04','2015/01/05','2017/01/04','2016/01/04','2016/01/05',
'2015/01/06','2017/01/05','2016/01/06','2015/01/07','2017/01/06',
'2017/01/07','2016/01/07','2015/01/08','2017/01/08','2016/01/08',
'2015/01/09','2016/01/09','2017/01/09','2016/01/10','2017/01/10',
'2015/01/11','2016/01/11','2017/01/11','2015/01/12','2016/01/12',
'2015/01/13','2017/01/12','2016/01/13','92017/01/13','2016/01/14',
'2015/01/14','2017/01/14','2015/01/15','2016/01/15','2017/01/15',
'2016/01/16','2015/01/16','2017/01/16','2017/01/17','2016/01/17',
'2015/01/18','2016/01/18','2017/01/18','2015/01/19','2016/01/19',
'2017/01/19','2015/01/20','2016/01/20','2017/01/20','2015/01/21',
'2016/01/21','2017/01/21','2015/01/22','2016/01/22','2017/01/22',
'2015/01/23','2016/01/23','2017/01/23','2015/01/24','2016/01/24',
'2017/01/24', '2015/01/25', '2016/01/25']
lst2 = random.sample(range(72), 72)
def date2decdate(date):
d = date.split('/')
year = float(d[0])
month = float(d[1])
day = float(d[2])
decdate = str(year + ((month-1)*30+day)/365)
return decdate
df = pd.DataFrame(
{'Date': lst1,
'Elevation': lst2
})
print(df.Date)
# convert displacement to centimeters
df['Elevation']*=100
#df['Northing']*=100
#df['Easting']*=100
# calculate displacement
h = float(df['Elevation'].head(1))
df['Elevation']-=h
# Remove outliers by keeping data points that are within +-3 standard devations
# in the column Elevation
df = df[np.abs(df.Elevation-df.Elevation.median())<=(3*df.Elevation.std())]
df['Date'] = df['Date'].apply(date2decdate) #converts Dates to decimal date
plt.scatter(df.Date, df.Elevation)
plt.xlabel('Dates')
plt.ylabel('Displacement(cm)')
plt.show()
Upvotes: 0
Views: 1190
Reputation: 339160
You are plotting the dates on the x axis as strings. This will lead matplotlib to think it is some catogorical variable (e.g. ["apple", "banana", "cherry"]) and it will show all labels (which would be meaningful for such cases).
Here you don't want to have categories but true dates or numbers. First you need to makes sure that those strings actually represent dates or numbers - remove somthing like '92017/01/13'
from the list.
To use usual decimal numbers, remove the str
cast from your function.
decdate = year + ((month-1)*30+day)/365.
Complete code for reproduction:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import random
lst1 = ['2015/01/01','2016/01/01','2017/01/01','2015/01/02',
'2016/01/02','2017/01/02','2015/01/03','2016/01/03','2017/01/03',
'2015/01/04','2015/01/05','2017/01/04','2016/01/04','2016/01/05',
'2015/01/06','2017/01/05','2016/01/06','2015/01/07','2017/01/06',
'2017/01/07','2016/01/07','2015/01/08','2017/01/08','2016/01/08',
'2015/01/09','2016/01/09','2017/01/09','2016/01/10','2017/01/10',
'2015/01/11','2016/01/11','2017/01/11','2015/01/12','2016/01/12',
'2015/01/13','2017/01/12','2016/01/13','2017/01/13','2016/01/14',
'2015/01/14','2017/01/14','2015/01/15','2016/01/15','2017/01/15',
'2016/01/16','2015/01/16','2017/01/16','2017/01/17','2016/01/17',
'2015/01/18','2016/01/18','2017/01/18','2015/01/19','2016/01/19',
'2017/01/19','2015/01/20','2016/01/20','2017/01/20','2015/01/21',
'2016/01/21','2017/01/21','2015/01/22','2016/01/22','2017/01/22',
'2015/01/23','2016/01/23','2017/01/23','2015/01/24','2016/01/24',
'2017/01/24', '2015/01/25', '2016/01/25']
lst2 = random.sample(range(72), 72)
def date2decdate(date):
d = date.split('/')
year = float(d[0])
month = float(d[1])
day = float(d[2])
decdate = year + ((month-1)*30+day)/365.
return decdate
df = pd.DataFrame( {'Date': lst1, 'Elevation': lst2 })
df['Elevation']*=100
h = float(df['Elevation'].head(1))
df['Elevation']-=h
df = df[np.abs(df.Elevation-df.Elevation.median())<=(3*df.Elevation.std())]
df['Date'] = df['Date'].apply(date2decdate) #converts Dates to decimal date
plt.scatter(df.Date, df.Elevation)
plt.xlabel('Dates')
plt.ylabel('Displacement(cm)')
plt.show()
In many cases, it is advantageous to use real dates. You can convert the column to datetime,
df['Date'] = pd.to_datetime(df["Date"], format="%Y/%m/%d")
This can then directly plotted via
df.plot(x="Date", y="Elevation")
# or, if you want scatter points
df.plot(x="Date", y="Elevation", ls="", marker="o")
Complete code for reproduction:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import random
lst1 = ['2015/01/01','2016/01/01','2017/01/01','2015/01/02',
'2016/01/02','2017/01/02','2015/01/03','2016/01/03','2017/01/03',
'2015/01/04','2015/01/05','2017/01/04','2016/01/04','2016/01/05',
'2015/01/06','2017/01/05','2016/01/06','2015/01/07','2017/01/06',
'2017/01/07','2016/01/07','2015/01/08','2017/01/08','2016/01/08',
'2015/01/09','2016/01/09','2017/01/09','2016/01/10','2017/01/10',
'2015/01/11','2016/01/11','2017/01/11','2015/01/12','2016/01/12',
'2015/01/13','2017/01/12','2016/01/13','2017/01/13','2016/01/14',
'2015/01/14','2017/01/14','2015/01/15','2016/01/15','2017/01/15',
'2016/01/16','2015/01/16','2017/01/16','2017/01/17','2016/01/17',
'2015/01/18','2016/01/18','2017/01/18','2015/01/19','2016/01/19',
'2017/01/19','2015/01/20','2016/01/20','2017/01/20','2015/01/21',
'2016/01/21','2017/01/21','2015/01/22','2016/01/22','2017/01/22',
'2015/01/23','2016/01/23','2017/01/23','2015/01/24','2016/01/24',
'2017/01/24', '2015/01/25', '2016/01/25']
lst2 = random.sample(range(72), 72)
df = pd.DataFrame( {'Date': lst1, 'Elevation': lst2 })
df['Elevation']*=100
h = float(df['Elevation'].head(1))
df['Elevation']-=h
df = df[np.abs(df.Elevation-df.Elevation.median())<=(3*df.Elevation.std())]
#Convert to datetime
df['Date'] = pd.to_datetime(df["Date"], format="%Y/%m/%d")
#plot with pandas wrapper
df.plot(x="Date", y="Elevation", ls="", marker="o")
plt.xlabel('Dates')
plt.ylabel('Displacement(cm)')
plt.show()
To have more control over the appearance of the dates on the axes, you may use matplotlib. E.g. to tick every first of the months january and july, and use the datetime format with slashes, use
plt.scatter(df['Date'].values,df['Elevation'])
plt.gca().xaxis.set_major_locator(dates.MonthLocator((1,7)))
plt.gca().xaxis.set_major_formatter(dates.DateFormatter("%Y/%m/%d"))
plt.gcf().autofmt_xdate()
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.dates as dates
import random
lst1 = ['2015/01/01','2016/01/01','2017/01/01','2015/01/02',
'2016/01/02','2017/01/02','2015/01/03','2016/01/03','2017/01/03',
'2015/01/04','2015/01/05','2017/01/04','2016/01/04','2016/01/05',
'2015/01/06','2017/01/05','2016/01/06','2015/01/07','2017/01/06',
'2017/01/07','2016/01/07','2015/01/08','2017/01/08','2016/01/08',
'2015/01/09','2016/01/09','2017/01/09','2016/01/10','2017/01/10',
'2015/01/11','2016/01/11','2017/01/11','2015/01/12','2016/01/12',
'2015/01/13','2017/01/12','2016/01/13','2017/01/13','2016/01/14',
'2015/01/14','2017/01/14','2015/01/15','2016/01/15','2017/01/15',
'2016/01/16','2015/01/16','2017/01/16','2017/01/17','2016/01/17',
'2015/01/18','2016/01/18','2017/01/18','2015/01/19','2016/01/19',
'2017/01/19','2015/01/20','2016/01/20','2017/01/20','2015/01/21',
'2016/01/21','2017/01/21','2015/01/22','2016/01/22','2017/01/22',
'2015/01/23','2016/01/23','2017/01/23','2015/01/24','2016/01/24',
'2017/01/24', '2015/01/25', '2016/01/25']
lst2 = random.sample(range(72), 72)
df = pd.DataFrame( {'Date': lst1, 'Elevation': lst2 })
df['Elevation']*=100
h = float(df['Elevation'].head(1))
df['Elevation']-=h
df = df[np.abs(df.Elevation-df.Elevation.median())<=(3*df.Elevation.std())]
df['Date'] = pd.to_datetime(df["Date"], format="%Y/%m/%d")
plt.scatter(df['Date'].values,df['Elevation'])
plt.gca().xaxis.set_major_locator(dates.MonthLocator((1,7)))
plt.gca().xaxis.set_major_formatter(dates.DateFormatter("%Y/%m/%d"))
plt.gcf().autofmt_xdate()
plt.xlabel('Dates')
plt.ylabel('Displacement(cm)')
plt.show()
Upvotes: 1