Analyzing a Data Set: Airbnbs in New York City
import pandas as pd
# Get data from URL (what we did in workshop)
# df = pd.read_csv('http://bit.ly/airbnbcsv')
# Get data from this folder (you need the CSV in the same folder as this notebook.)
df = pd.read_csv('airbnb.csv')
df
id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2539 | Clean & quiet apt home by the park | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 2018-10-19 | 0.21 | 6 | 365 |
1 | 2595 | Skylit Midtown Castle | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 2019-05-21 | 0.38 | 2 | 355 |
2 | 3647 | THE VILLAGE OF HARLEM....NEW YORK ! | 4632 | Elisabeth | Manhattan | Harlem | 40.80902 | -73.94190 | Private room | 150 | 3 | 0 | NaN | NaN | 1 | 365 |
3 | 3831 | Cozy Entire Floor of Brownstone | 4869 | LisaRoxanne | Brooklyn | Clinton Hill | 40.68514 | -73.95976 | Entire home/apt | 89 | 1 | 270 | 2019-07-05 | 4.64 | 1 | 194 |
4 | 5022 | Entire Apt: Spacious Studio/Loft by central park | 7192 | Laura | Manhattan | East Harlem | 40.79851 | -73.94399 | Entire home/apt | 80 | 10 | 9 | 2018-11-19 | 0.10 | 1 | 0 |
5 | 5099 | Large Cozy 1 BR Apartment In Midtown East | 7322 | Chris | Manhattan | Murray Hill | 40.74767 | -73.97500 | Entire home/apt | 200 | 3 | 74 | 2019-06-22 | 0.59 | 1 | 129 |
6 | 5121 | BlissArtsSpace! | 7356 | Garon | Brooklyn | Bedford-Stuyvesant | 40.68688 | -73.95596 | Private room | 60 | 45 | 49 | 2017-10-05 | 0.40 | 1 | 0 |
7 | 5178 | Large Furnished Room Near B'way | 8967 | Shunichi | Manhattan | Hell's Kitchen | 40.76489 | -73.98493 | Private room | 79 | 2 | 430 | 2019-06-24 | 3.47 | 1 | 220 |
8 | 5203 | Cozy Clean Guest Room - Family Apt | 7490 | MaryEllen | Manhattan | Upper West Side | 40.80178 | -73.96723 | Private room | 79 | 2 | 118 | 2017-07-21 | 0.99 | 1 | 0 |
9 | 5238 | Cute & Cozy Lower East Side 1 bdrm | 7549 | Ben | Manhattan | Chinatown | 40.71344 | -73.99037 | Entire home/apt | 150 | 1 | 160 | 2019-06-09 | 1.33 | 4 | 188 |
10 | 5295 | Beautiful 1br on Upper West Side | 7702 | Lena | Manhattan | Upper West Side | 40.80316 | -73.96545 | Entire home/apt | 135 | 5 | 53 | 2019-06-22 | 0.43 | 1 | 6 |
11 | 5441 | Central Manhattan/near Broadway | 7989 | Kate | Manhattan | Hell's Kitchen | 40.76076 | -73.98867 | Private room | 85 | 2 | 188 | 2019-06-23 | 1.50 | 1 | 39 |
12 | 5803 | Lovely Room 1, Garden, Best Area, Legal rental | 9744 | Laurie | Brooklyn | South Slope | 40.66829 | -73.98779 | Private room | 89 | 4 | 167 | 2019-06-24 | 1.34 | 3 | 314 |
13 | 6021 | Wonderful Guest Bedroom in Manhattan for SINGLES | 11528 | Claudio | Manhattan | Upper West Side | 40.79826 | -73.96113 | Private room | 85 | 2 | 113 | 2019-07-05 | 0.91 | 1 | 333 |
14 | 6090 | West Village Nest - Superhost | 11975 | Alina | Manhattan | West Village | 40.73530 | -74.00525 | Entire home/apt | 120 | 90 | 27 | 2018-10-31 | 0.22 | 1 | 0 |
15 | 6848 | Only 2 stops to Manhattan studio | 15991 | Allen & Irina | Brooklyn | Williamsburg | 40.70837 | -73.95352 | Entire home/apt | 140 | 2 | 148 | 2019-06-29 | 1.20 | 1 | 46 |
16 | 7097 | Perfect for Your Parents + Garden | 17571 | Jane | Brooklyn | Fort Greene | 40.69169 | -73.97185 | Entire home/apt | 215 | 2 | 198 | 2019-06-28 | 1.72 | 1 | 321 |
17 | 7322 | Chelsea Perfect | 18946 | Doti | Manhattan | Chelsea | 40.74192 | -73.99501 | Private room | 140 | 1 | 260 | 2019-07-01 | 2.12 | 1 | 12 |
18 | 7726 | Hip Historic Brownstone Apartment with Backyard | 20950 | Adam And Charity | Brooklyn | Crown Heights | 40.67592 | -73.94694 | Entire home/apt | 99 | 3 | 53 | 2019-06-22 | 4.44 | 1 | 21 |
19 | 7750 | Huge 2 BR Upper East Cental Park | 17985 | Sing | Manhattan | East Harlem | 40.79685 | -73.94872 | Entire home/apt | 190 | 7 | 0 | NaN | NaN | 2 | 249 |
20 | 7801 | Sweet and Spacious Brooklyn Loft | 21207 | Chaya | Brooklyn | Williamsburg | 40.71842 | -73.95718 | Entire home/apt | 299 | 3 | 9 | 2011-12-28 | 0.07 | 1 | 0 |
21 | 8024 | CBG CtyBGd HelpsHaiti rm#1:1-4 | 22486 | Lisel | Brooklyn | Park Slope | 40.68069 | -73.97706 | Private room | 130 | 2 | 130 | 2019-07-01 | 1.09 | 6 | 347 |
22 | 8025 | CBG Helps Haiti Room#2.5 | 22486 | Lisel | Brooklyn | Park Slope | 40.67989 | -73.97798 | Private room | 80 | 1 | 39 | 2019-01-01 | 0.37 | 6 | 364 |
23 | 8110 | CBG Helps Haiti Rm #2 | 22486 | Lisel | Brooklyn | Park Slope | 40.68001 | -73.97865 | Private room | 110 | 2 | 71 | 2019-07-02 | 0.61 | 6 | 304 |
24 | 8490 | MAISON DES SIRENES1,bohemian apartment | 25183 | Nathalie | Brooklyn | Bedford-Stuyvesant | 40.68371 | -73.94028 | Entire home/apt | 120 | 2 | 88 | 2019-06-19 | 0.73 | 2 | 233 |
25 | 8505 | Sunny Bedroom Across Prospect Park | 25326 | Gregory | Brooklyn | Windsor Terrace | 40.65599 | -73.97519 | Private room | 60 | 1 | 19 | 2019-06-23 | 1.37 | 2 | 85 |
26 | 8700 | Magnifique Suite au N de Manhattan - vue Cloitres | 26394 | Claude & Sophie | Manhattan | Inwood | 40.86754 | -73.92639 | Private room | 80 | 4 | 0 | NaN | NaN | 1 | 0 |
27 | 9357 | Midtown Pied-a-terre | 30193 | Tommi | Manhattan | Hell's Kitchen | 40.76715 | -73.98533 | Entire home/apt | 150 | 10 | 58 | 2017-08-13 | 0.49 | 1 | 75 |
28 | 9518 | SPACIOUS, LOVELY FURNISHED MANHATTAN BEDROOM | 31374 | Shon | Manhattan | Inwood | 40.86482 | -73.92106 | Private room | 44 | 3 | 108 | 2019-06-15 | 1.11 | 3 | 311 |
29 | 9657 | Modern 1 BR / NYC / EAST VILLAGE | 21904 | Dana | Manhattan | East Village | 40.72920 | -73.98542 | Entire home/apt | 180 | 14 | 29 | 2019-04-19 | 0.24 | 1 | 67 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48865 | 36472171 | 1 bedroom in sunlit apartment | 99144947 | Brenda | Manhattan | Inwood | 40.86845 | -73.92449 | Private room | 80 | 1 | 0 | NaN | NaN | 1 | 79 |
48866 | 36472710 | CozyHideAway Suite | 274225617 | Alberth | Queens | Briarwood | 40.70786 | -73.81448 | Entire home/apt | 58 | 1 | 0 | NaN | NaN | 1 | 159 |
48867 | 36473044 | The place you were dreaming for.(only for guys) | 261338177 | Diana | Brooklyn | Gravesend | 40.59080 | -73.97116 | Shared room | 25 | 1 | 0 | NaN | NaN | 6 | 338 |
48868 | 36473253 | Heaven for you(only for guy) | 261338177 | Diana | Brooklyn | Gravesend | 40.59118 | -73.97119 | Shared room | 25 | 7 | 0 | NaN | NaN | 6 | 365 |
48869 | 36474023 | Cozy, Sunny Brooklyn Escape | 1550580 | Julia | Brooklyn | Bedford-Stuyvesant | 40.68759 | -73.95705 | Private room | 45 | 4 | 0 | NaN | NaN | 1 | 7 |
48870 | 36474911 | Cozy, clean Williamsburg 1- bedroom apartment | 1273444 | Tanja | Brooklyn | Williamsburg | 40.71197 | -73.94946 | Entire home/apt | 99 | 4 | 0 | NaN | NaN | 1 | 22 |
48871 | 36475746 | A LARGE ROOM - 1 MONTH MINIMUM - WASHER&DRYER | 144008701 | Ozzy Ciao | Manhattan | Harlem | 40.82233 | -73.94687 | Private room | 35 | 29 | 0 | NaN | NaN | 2 | 31 |
48872 | 36476675 | Nycity-MyHome | 8636072 | Ben | Manhattan | Hell's Kitchen | 40.76236 | -73.99255 | Entire home/apt | 260 | 3 | 0 | NaN | NaN | 1 | 9 |
48873 | 36477307 | Brooklyn paradise | 241945355 | Clement & Rose | Brooklyn | Flatlands | 40.63116 | -73.92616 | Entire home/apt | 170 | 1 | 0 | NaN | NaN | 2 | 363 |
48874 | 36477588 | Short Term Rental in East Harlem | 214535893 | Jeffrey | Manhattan | East Harlem | 40.79760 | -73.93947 | Private room | 50 | 7 | 0 | NaN | NaN | 1 | 22 |
48875 | 36478343 | Welcome all as family | 274273284 | Anastasia | Manhattan | East Harlem | 40.78749 | -73.94749 | Private room | 140 | 1 | 0 | NaN | NaN | 1 | 180 |
48876 | 36478357 | Cozy, Air-Conditioned Private Bedroom in Harlem | 177932088 | Joseph | Manhattan | Harlem | 40.80953 | -73.95410 | Private room | 60 | 1 | 0 | NaN | NaN | 1 | 26 |
48877 | 36479230 | Studio sized room with beautiful light | 65767720 | Melanie | Brooklyn | Bushwick | 40.70418 | -73.91471 | Private room | 42 | 7 | 0 | NaN | NaN | 1 | 16 |
48878 | 36479723 | Room for rest | 41326856 | Jeerathinan | Queens | Elmhurst | 40.74477 | -73.87727 | Private room | 45 | 1 | 0 | NaN | NaN | 5 | 172 |
48879 | 36480292 | Gorgeous 1.5 Bdr with a private yard- Williams... | 540335 | Lee | Brooklyn | Williamsburg | 40.71728 | -73.94394 | Entire home/apt | 120 | 20 | 0 | NaN | NaN | 1 | 22 |
48880 | 36481315 | The Raccoon Artist Studio in Williamsburg New ... | 208514239 | Melki | Brooklyn | Williamsburg | 40.71232 | -73.94220 | Entire home/apt | 120 | 1 | 0 | NaN | NaN | 3 | 365 |
48881 | 36481615 | Peaceful space in Greenpoint, BK | 274298453 | Adrien | Brooklyn | Greenpoint | 40.72585 | -73.94001 | Private room | 54 | 6 | 0 | NaN | NaN | 1 | 15 |
48882 | 36482231 | Bushwick _ Myrtle-Wyckoff | 66058896 | Luisa | Brooklyn | Bushwick | 40.69652 | -73.91079 | Private room | 40 | 20 | 0 | NaN | NaN | 1 | 31 |
48883 | 36482416 | Sunny Bedroom NYC! Walking to Central Park!! | 131529729 | Kendall | Manhattan | East Harlem | 40.79755 | -73.93614 | Private room | 75 | 2 | 0 | NaN | NaN | 2 | 364 |
48884 | 36482783 | Brooklyn Oasis in the heart of Williamsburg | 274307600 | Jonathan | Brooklyn | Williamsburg | 40.71790 | -73.96238 | Private room | 190 | 7 | 0 | NaN | NaN | 1 | 341 |
48885 | 36482809 | Stunning Bedroom NYC! Walking to Central Park!! | 131529729 | Kendall | Manhattan | East Harlem | 40.79633 | -73.93605 | Private room | 75 | 2 | 0 | NaN | NaN | 2 | 353 |
48886 | 36483010 | Comfy 1 Bedroom in Midtown East | 274311461 | Scott | Manhattan | Midtown | 40.75561 | -73.96723 | Entire home/apt | 200 | 6 | 0 | NaN | NaN | 1 | 176 |
48887 | 36483152 | Garden Jewel Apartment in Williamsburg New York | 208514239 | Melki | Brooklyn | Williamsburg | 40.71232 | -73.94220 | Entire home/apt | 170 | 1 | 0 | NaN | NaN | 3 | 365 |
48888 | 36484087 | Spacious Room w/ Private Rooftop, Central loca... | 274321313 | Kat | Manhattan | Hell's Kitchen | 40.76392 | -73.99183 | Private room | 125 | 4 | 0 | NaN | NaN | 1 | 31 |
48889 | 36484363 | QUIT PRIVATE HOUSE | 107716952 | Michael | Queens | Jamaica | 40.69137 | -73.80844 | Private room | 65 | 1 | 0 | NaN | NaN | 2 | 163 |
48890 | 36484665 | Charming one bedroom - newly renovated rowhouse | 8232441 | Sabrina | Brooklyn | Bedford-Stuyvesant | 40.67853 | -73.94995 | Private room | 70 | 2 | 0 | NaN | NaN | 2 | 9 |
48891 | 36485057 | Affordable room in Bushwick/East Williamsburg | 6570630 | Marisol | Brooklyn | Bushwick | 40.70184 | -73.93317 | Private room | 40 | 4 | 0 | NaN | NaN | 2 | 36 |
48892 | 36485431 | Sunny Studio at Historical Neighborhood | 23492952 | Ilgar & Aysel | Manhattan | Harlem | 40.81475 | -73.94867 | Entire home/apt | 115 | 10 | 0 | NaN | NaN | 1 | 27 |
48893 | 36485609 | 43rd St. Time Square-cozy single bed | 30985759 | Taz | Manhattan | Hell's Kitchen | 40.75751 | -73.99112 | Shared room | 55 | 1 | 0 | NaN | NaN | 6 | 2 |
48894 | 36487245 | Trendy duplex in the very heart of Hell's Kitchen | 68119814 | Christophe | Manhattan | Hell's Kitchen | 40.76404 | -73.98933 | Private room | 90 | 7 | 0 | NaN | NaN | 1 | 23 |
48895 rows × 16 columns
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48895 entries, 0 to 48894
Data columns (total 16 columns):
id 48895 non-null int64
name 48879 non-null object
host_id 48895 non-null int64
host_name 48874 non-null object
neighbourhood_group 48895 non-null object
neighbourhood 48895 non-null object
latitude 48895 non-null float64
longitude 48895 non-null float64
room_type 48895 non-null object
price 48895 non-null int64
minimum_nights 48895 non-null int64
number_of_reviews 48895 non-null int64
last_review 38843 non-null object
reviews_per_month 38843 non-null float64
calculated_host_listings_count 48895 non-null int64
availability_365 48895 non-null int64
dtypes: float64(3), int64(7), object(6)
memory usage: 6.0+ MB
df['price'].describe()
count 48884.000000
mean 152.755053
std 240.170260
min 10.000000
25% 69.000000
50% 106.000000
75% 175.000000
max 10000.000000
Name: price, dtype: float64
Remove rooms that cost zero per night
price_is_zero = df['price'] == 0
# The ~ or "bitwise" operator flips the series of bolleans (true/false values)
df = df[~price_is_zero]
df['price'].describe()
count 48884.000000
mean 152.755053
std 240.170260
min 10.000000
25% 69.000000
50% 106.000000
75% 175.000000
max 10000.000000
Name: price, dtype: float64
How many entries in each neighborhood?
df['neighbourhood'].value_counts()
Williamsburg 3919
Bedford-Stuyvesant 3710
Harlem 2658
Bushwick 2462
Upper West Side 1971
Hell's Kitchen 1958
East Village 1853
Upper East Side 1798
Crown Heights 1564
Midtown 1545
East Harlem 1117
Greenpoint 1114
Chelsea 1113
Lower East Side 911
Astoria 900
Washington Heights 899
West Village 768
Financial District 744
Flatbush 621
Clinton Hill 572
Long Island City 537
Prospect-Lefferts Gardens 535
Park Slope 506
East Flatbush 500
Fort Greene 489
Murray Hill 484
Kips Bay 470
Flushing 426
Ridgewood 423
Greenwich Village 392
...
Emerson Hill 5
New Dorp Beach 5
New Brighton 5
Oakwood 5
Prince's Bay 4
Olinville 4
Castleton Corners 4
Arden Heights 4
Holliswood 4
Mill Basin 4
Todt Hill 4
Spuyten Duyvil 4
Neponsit 3
Huguenot 3
Eltingville 3
Graniteville 3
Breezy Point 3
West Farms 2
Lighthouse Hill 2
Silver Lake 2
Co-op City 2
Howland Hook 2
Westerleigh 2
Bay Terrace, Staten Island 2
Woodrow 1
Richmondtown 1
Willowbrook 1
Fort Wadsworth 1
Rossville 1
New Dorp 1
Name: neighbourhood, Length: 221, dtype: int64
Is Morningside Heights in the data set?
# Set function gets unique values in a list or series
'Morningside Heights' in set(df['neighbourhood'])
True
How many neightborhoods in data set?
len(set(df['neighbourhood']))
221
Let's find out the most pricey neighborhoods
df.groupby('neighbourhood')['price'].mean().sort_values(ascending=False)
neighbourhood
Fort Wadsworth 800.000000
Woodrow 700.000000
Tribeca 490.638418
Sea Gate 487.857143
Riverdale 442.090909
Prince's Bay 409.500000
Battery Park City 367.557143
Flatiron District 341.925000
Randall Manor 336.000000
NoHo 295.717949
SoHo 287.103352
Midtown 282.719094
Neponsit 274.666667
West Village 267.682292
Greenwich Village 263.405612
Chelsea 249.738544
Willowbrook 249.000000
Theater District 248.013889
Nolita 230.138340
Financial District 225.490591
Gramercy 222.754438
Little Italy 222.066116
Murray Hill 221.415289
Breezy Point 213.333333
Cobble Hill 211.929293
Upper West Side 210.918316
Brooklyn Heights 209.064935
Hell's Kitchen 204.794178
Kips Bay 202.408511
DUMBO 196.305556
...
Fieldston 75.083333
Rossville 75.000000
Concourse Village 73.781250
Westerleigh 71.500000
Highbridge 71.111111
Silver Lake 70.000000
University Heights 69.571429
Fordham 69.444444
Morris Park 69.333333
Schuylerville 69.230769
Parkchester 69.076923
Graniteville 68.666667
Emerson Hill 68.200000
Arden Heights 67.250000
Woodhaven 67.170455
Olinville 64.000000
Borough Park 63.066176
Castle Hill 63.000000
Woodlawn 60.090909
Corona 59.171875
Mount Eden 58.500000
Concord 58.192308
Grant City 57.666667
New Dorp Beach 57.400000
Bronxdale 57.105263
New Dorp 57.000000
Soundview 53.466667
Tremont 51.545455
Hunts Point 50.500000
Bull's Head 47.333333
Name: price, Length: 221, dtype: float64
What's the average price in my old neighborhood (Sunnyside)?
sunnyside = df[df['neighbourhood'] == 'Sunnyside']
sunnyside['price'].mean()
84.86501377410468
sunnyside['price'].describe()
count 363.000000
mean 84.865014
std 52.227837
min 12.000000
25% 50.000000
50% 75.000000
75% 100.000000
max 600.000000
Name: price, dtype: float64
From FAQ: How do we get a dataframe with only certain columsn?
df[['price', 'neighbourhood']]
price | neighbourhood | |
---|---|---|
0 | 149 | Kensington |
1 | 225 | Midtown |
2 | 150 | Harlem |
3 | 89 | Clinton Hill |
4 | 80 | East Harlem |
5 | 200 | Murray Hill |
6 | 60 | Bedford-Stuyvesant |
7 | 79 | Hell's Kitchen |
8 | 79 | Upper West Side |
9 | 150 | Chinatown |
10 | 135 | Upper West Side |
11 | 85 | Hell's Kitchen |
12 | 89 | South Slope |
13 | 85 | Upper West Side |
14 | 120 | West Village |
15 | 140 | Williamsburg |
16 | 215 | Fort Greene |
17 | 140 | Chelsea |
18 | 99 | Crown Heights |
19 | 190 | East Harlem |
20 | 299 | Williamsburg |
21 | 130 | Park Slope |
22 | 80 | Park Slope |
23 | 110 | Park Slope |
24 | 120 | Bedford-Stuyvesant |
25 | 60 | Windsor Terrace |
26 | 80 | Inwood |
27 | 150 | Hell's Kitchen |
28 | 44 | Inwood |
29 | 180 | East Village |
... | ... | ... |
48865 | 80 | Inwood |
48866 | 58 | Briarwood |
48867 | 25 | Gravesend |
48868 | 25 | Gravesend |
48869 | 45 | Bedford-Stuyvesant |
48870 | 99 | Williamsburg |
48871 | 35 | Harlem |
48872 | 260 | Hell's Kitchen |
48873 | 170 | Flatlands |
48874 | 50 | East Harlem |
48875 | 140 | East Harlem |
48876 | 60 | Harlem |
48877 | 42 | Bushwick |
48878 | 45 | Elmhurst |
48879 | 120 | Williamsburg |
48880 | 120 | Williamsburg |
48881 | 54 | Greenpoint |
48882 | 40 | Bushwick |
48883 | 75 | East Harlem |
48884 | 190 | Williamsburg |
48885 | 75 | East Harlem |
48886 | 200 | Midtown |
48887 | 170 | Williamsburg |
48888 | 125 | Hell's Kitchen |
48889 | 65 | Jamaica |
48890 | 70 | Bedford-Stuyvesant |
48891 | 40 | Bushwick |
48892 | 115 | Harlem |
48893 | 55 | Hell's Kitchen |
48894 | 90 | Hell's Kitchen |
48884 rows × 2 columns