Reputation: 206
I have a data frame, and I have to compare a column's median that has specific values from my dataframe (filtered), with the same column's median that has all the values in my original dataframe.
Here's the furthest I have reached, I have presented two graphs, which in my opinion should be in the same graph:
filt_waterfront = df['waterfront'] == 1
fig, axs = plt.subplots(1,2)
sns.boxplot(y='price', data = df[filt_waterfront], ax=axs[0], color= 'red')
sns.boxplot(y='price', data = df, ax=axs[1], color = 'orange')
fig.set_size_inches(9,6)
fig.suptitle('Price plots of properties with waterfront and general properties')
fig.axes[1].set_ylabel("Price")
fig.axes[0].set_ylabel("Price")
fig.axes[1].set_xlabel("General Properties")
fig.axes[0].set_xlabel("Properties with Waterfront") <br>
Where my filter is properties having waterfront, the graph on the right shows general properties, which means the original column, and on the left with the filter, I'm trying to find a way to get both of these graph into one graph (because it would look much cleaner and there's no real reason to present two graphs other than me failing to do it).
Any help is really appreciated, thanks in advance!
Upvotes: 2
Views: 683
Reputation: 62553
filtered = df[['price', 'waterfront']][df.waterfront.eq(1)].copy()
comb
is created by selecting the specific columns to be plotted, to prevent creating a potentially large DataFrame with unnecessary information.
comb = df.assign(waterfront="All")
python 3.11
, pandas 1.5.3
, matplotlib 3.7.0
, seaborn 0.12.2
import pandas as pd
import seaborn as sns
# Using the sample data from the OP, which has many columns
# Create a copy of the columns to plot and all rows, with waterfront as "All"
comb = df[['price']].assign(waterfront="All")
# combine it to the original columns with the original categories
comb = pd.concat([df[['price', 'waterfront']], comb], ignore_index=True)
# plot
ax = sns.boxplot(data=comb, y='price', x='waterfront')
comb.head()
price waterfront
0 221900.0 0
1 538000.0 1
2 180000.0 0
3 604000.0 1
4 510000.0 0
comb.tail()
price waterfront
95 488000.0 All
96 210490.0 All
97 785000.0 All
98 450000.0 All
99 1350000.0 All
data = {'id': [7129300520, 6414100192, 5631500400, 2487200875, 1954400510, 7237550310, 1321400060, 2008000270, 2414600126, 3793500160, 1736800520, 9212900260, 114101516, 6054650070, 1175000570, 9297300055, 1875500060, 6865200140, 16000397, 7983200060, 6300500875, 2524049179, 7137970340, 8091400200, 3814700200, 1202000200, 1794500383, 3303700376, 5101402488, 1873100390, 8562750320, 2426039314, 461000390, 7589200193, 7955080270, 9547205180, 9435300030, 2768000400, 7895500070, 2078500320, 5547700270, 7766200013, 7203220400, 9270200160, 1432701230, 8035350320, 8945200830, 4178300310, 9215400105, 822039084], 'date': ['20141013T000000', '20141209T000000', '20150225T000000', '20141209T000000', '20150218T000000', '20140512T000000', '20140627T000000', '20150115T000000', '20150415T000000', '20150312T000000', '20150403T000000', '20140527T000000', '20140528T000000', '20141007T000000', '20150312T000000', '20150124T000000', '20140731T000000', '20140529T000000', '20141205T000000', '20150424T000000', '20140514T000000', '20140826T000000', '20140703T000000', '20140516T000000', '20141120T000000', '20141103T000000', '20140626T000000', '20141201T000000', '20140624T000000', '20150302T000000', '20141110T000000', '20141201T000000', '20140624T000000', '20141110T000000', '20141203T000000', '20140613T000000', '20140528T000000', '20141230T000000', '20150213T000000', '20140620T000000', '20140715T000000', '20140811T000000', '20140707T000000', '20141028T000000', '20140729T000000', '20140718T000000', '20150325T000000', '20140716T000000', '20150428T000000', '20150311T000000'], 'price': [221900.0, 538000.0, 180000.0, 604000.0, 510000.0, 1225000.0, 257500.0, 291850.0, 229500.0, 323000.0, 662500.0, 468000.0, 310000.0, 400000.0, 530000.0, 650000.0, 395000.0, 485000.0, 189000.0, 230000.0, 385000.0, 2000000.0, 285000.0, 252700.0, 329000.0, 233000.0, 937000.0, 667000.0, 438000.0, 719000.0, 580500.0, 280000.0, 687500.0, 535000.0, 322500.0, 696000.0, 550000.0, 640000.0, 240000.0, 605000.0, 625000.0, 775000.0, 861990.0, 685000.0, 309000.0, 488000.0, 210490.0, 785000.0, 450000.0, 1350000.0], 'bedrooms': [3, 3, 2, 4, 3, 4, 3, 3, 3, 3, 3, 2, 3, 3, 5, 4, 3, 4, 2, 3, 4, 3, 5, 2, 3, 3, 3, 3, 3, 4, 3, 2, 4, 3, 4, 3, 4, 4, 4, 4, 4, 4, 5, 3, 3, 3, 3, 4, 3, 3], 'bathrooms': [1.0, 2.25, 1.0, 3.0, 2.0, 4.5, 2.25, 1.5, 1.0, 2.5, 2.5, 1.0, 1.0, 1.75, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.75, 2.75, 2.5, 1.5, 2.25, 2.0, 1.75, 1.0, 1.75, 2.5, 2.5, 1.5, 1.75, 1.0, 2.75, 2.5, 1.0, 2.0, 1.0, 2.5, 2.5, 2.25, 2.75, 1.0, 1.0, 2.5, 1.0, 2.5, 1.75, 2.5], 'sqmeters_living': [109.624675, 238.758826, 71.534745, 182.088443, 156.075808, 503.530286, 159.327388, 98.476403, 165.366035, 175.585284, 330.73207, 107.76663, 132.850242, 127.276106, 168.153103, 274.061687, 175.585284, 148.643627, 111.48272, 116.127834, 150.501672, 283.351914, 210.888146, 99.405425, 227.610554, 158.862876, 227.610554, 130.063174, 141.211446, 238.758826, 215.533259, 110.553698, 216.462282, 101.263471, 191.37867, 213.675214, 154.217763, 219.24935, 113.340766, 243.403939, 238.758826, 392.047566, 333.983649, 145.856559, 118.914902, 293.571163, 91.973244, 212.746191, 116.127834, 255.759941], 'sqmeters_lot': [524.897808, 672.798216, 929.022668, 464.511334, 750.650316, 9469.528056, 633.500557, 902.173913, 693.979933, 609.43887, 910.070606, 557.413601, 1848.848012, 899.293943, 450.575994, 464.511334, 1304.347826, 399.479747, 915.087328, 908.026756, 462.653289, 4168.246005, 585.284281, 895.856559, 603.864734, 436.361947, 250.0, 146.878484, 592.716462, 666.38796, 369.751022, 117.521368, 464.511334, 278.7068, 618.636195, 284.280936, 3237.458194, 557.413601, 750.185805, 701.690821, 512.820513, 2246.934225, 523.875883, 211.817168, 897.064288, 1263.749535, 792.270531, 1246.376812, 553.976217, 6039.111854], 'floors': [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.5, 1.0, 1.5, 2.0, 2.0, 1.5, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.5, 2.0, 1.5, 1.0, 2.0, 2.0, 3.0, 1.5, 1.5, 1.0, 1.5, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0], 'waterfront': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], 'view': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2], 'grade': [7, 7, 6, 7, 8, 11, 7, 7, 7, 7, 8, 7, 7, 7, 7, 9, 7, 7, 7, 7, 7, 9, 8, 7, 8, 6, 8, 8, 7, 8, 8, 7, 7, 8, 7, 8, 5, 8, 7, 8, 9, 8, 9, 7, 6, 8, 6, 9, 7, 9], 'sqmeters_above': [109.624675, 201.597919, 71.534745, 97.54738, 156.075808, 361.389818, 159.327388, 98.476403, 97.54738, 175.585284, 172.798216, 79.895949, 132.850242, 127.276106, 168.153103, 183.946488, 175.585284, 148.643627, 111.48272, 116.127834, 79.895949, 216.462282, 210.888146, 99.405425, 227.610554, 158.862876, 162.578967, 130.063174, 73.392791, 238.758826, 215.533259, 110.553698, 140.282423, 101.263471, 118.914902, 140.282423, 86.399108, 219.24935, 82.683017, 243.403939, 238.758826, 241.545894, 333.983649, 145.856559, 85.470085, 293.571163, 91.973244, 212.746191, 116.127834, 201.133408], 'sqmeters_basement': [0.0, 37.160907, 0.0, 84.541063, 0.0, 142.140468, 0.0, 0.0, 67.818655, 0.0, 157.933854, 27.87068, 0.0, 0.0, 0.0, 90.115199, 0.0, 0.0, 0.0, 0.0, 70.605723, 66.889632, 0.0, 0.0, 0.0, 0.0, 65.031587, 0.0, 67.818655, 0.0, 0.0, 0.0, 76.179859, 0.0, 72.463768, 73.392791, 67.818655, 0.0, 30.657748, 0.0, 0.0, 150.501672, 0.0, 0.0, 33.444816, 0.0, 0.0, 0.0, 0.0, 0.0], 'yr_built': [1955.0, 1951.0, 1933.0, 1965.0, 1987.0, 2001.0, 1995.0, 1963.0, 1960.0, 2003.0, 1965.0, 1942.0, 1927.0, 1977.0, 1900.0, 1979.0, 1994.0, 1916.0, 1921.0, 1969.0, 1947.0, 1968.0, 1995.0, 1985.0, 1985.0, 1941.0, 1915.0, 1909.0, 1948.0, 2005.0, 2003.0, 2005.0, 1929.0, 1929.0, 1981.0, 1930.0, 1933.0, 1904.0, 1969.0, 1996.0, 2000.0, 1984.0, 2014.0, 1922.0, 1959.0, 2003.0, 1966.0, 1981.0, 1953.0, 0.0], 'yr_renovated': [0.0, 1991.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2002.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'zipcode': [98178.0, 98125.0, 98028.0, 98136.0, 98074.0, 98053.0, 98003.0, 98198.0, 98146.0, 98038.0, 98007.0, 98115.0, 98028.0, 98074.0, 98107.0, 98126.0, 98019.0, 98103.0, 98002.0, 98003.0, 98133.0, 98040.0, 98092.0, 98030.0, 98030.0, 98002.0, 98119.0, 98112.0, 98115.0, 98052.0, 98027.0, 98133.0, 98117.0, 98117.0, 98058.0, 98115.0, 98052.0, 98107.0, 98001.0, 98056.0, 98074.0, 98166.0, 98053.0, 98119.0, 98058.0, 98019.0, 98023.0, 98007.0, 98115.0, 0.0], 'lat': [47.5112, 47.721, 47.7379, 47.5208, 47.6168, 47.6561, 47.3097, 47.4095, 47.5123, 47.3684, 47.6007, 47.69, 47.7558, 47.6127, 47.67, 47.5714, 47.7277, 47.6648, 47.3089, 47.3343, 47.7025, 47.5316, 47.3266, 47.3533, 47.3739, 47.3048, 47.6386, 47.6221, 47.695, 47.7073, 47.5391, 47.7274, 47.6823, 47.6889, 47.4276, 47.6827, 47.6621, 47.6702, 47.3341, 47.5301, 47.6145, 47.445, 47.6848, 47.6413, 47.4485, 47.7443, 47.3066, 47.6194, 47.6796, 0.0], 'long': [-122.257, -122.319, -122.233, -122.393, -122.045, -122.005, -122.327, -122.315, -122.337, -122.031, -122.145, -122.292, -122.229, -122.045, -122.394, -122.375, -121.962, -122.343, -122.21, -122.306, -122.341, -122.233, -122.169, -122.166, -122.172, -122.218, -122.36, -122.314, -122.304, -122.11, -122.07, -122.357, -122.368, -122.375, -122.157, -122.31, -122.132, -122.362, -122.282, -122.18, -122.027, -122.347, -122.016, -122.364, -122.175, -121.977, -122.371, -122.151, -122.301, 0.0], 'sqmeters_living15': [124.489038, 157.004831, 252.694166, 126.347083, 167.22408, 442.21479, 207.915273, 153.28874, 165.366035, 222.036418, 205.31401, 123.560015, 165.366035, 127.276106, 126.347083, 198.810851, 175.585284, 149.57265, 98.476403, 118.914902, 130.063174, 381.828317, 208.101078, 113.340766, 204.384987, 95.689335, 163.50799, 172.798216, 141.211446, 244.332962, 239.687848, 129.134151, 135.63731, 145.856559, 187.662579, 147.714604, 200.668896, 160.720922, 119.843924, 243.403939, 229.468599, 223.894463, 336.770717, 146.785582, 124.489038, 283.351914, 114.083984, 248.978075, 90.115199, 0.0], 'sqmeters_lot15': [524.897808, 709.680416, 748.978075, 464.511334, 697.045708, 9469.528056, 633.500557, 902.173913, 753.716091, 703.27016, 829.152731, 557.413601, 1179.580082, 948.34634, 450.575994, 371.609067, 1302.303976, 399.479747, 473.337049, 822.185061, 462.653289, 1889.260498, 650.780379, 779.07841, 637.774062, 437.105165, 331.939799, 358.695652, 579.245634, 559.82906, 369.751022, 163.136381, 464.511334, 471.943515, 810.107767, 303.232999, 1065.310294, 436.640654, 724.637681, 1104.050539, 526.662951, 2844.388703, 523.875883, 245.261984, 818.283166, 857.673727, 821.256039, 1271.367521, 473.801561, 0.0]}
df = pd.DataFrame(data)
# display(df[['price', 'waterfront']].head())
price waterfront
0 221900.0 0
1 538000.0 1
2 180000.0 0
3 604000.0 1
4 510000.0 0
Upvotes: 4
Reputation: 6369
Use the hue property of the boxplot:
sns.boxplot(y='price', data = meters_df, ax=axs[1], color = 'orange', hue=filt_waterfront)
Upvotes: 0