import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.pyplot import figure

df=pd.read_csv('listings.csv')

df.head()

def remove_sign(x,sign):
    if type(x) is str:
        x = float(x.replace(sign,'').replace(',',''))
    return x

df['price']

0       $250.00 
1        $65.00 
2        $65.00 
3        $75.00 
4        $79.00 
          ...   
3580     $69.00 
3581    $150.00 
3582    $198.00 
3583     $65.00 
3584     $65.00 
Name: price, Length: 3585, dtype: object

df=df[['price','property_type']]
df=pd.DataFrame(df)

figure(figsize=(12, 8), dpi=80)
df.price = df.price.apply(remove_sign,sign='$')
sns.boxplot(y='price', x='property_type',data=df)
plt.xticks(rotation=90)
plt.ylabel('Price ($)')

Text(0, 0.5, 'Price ($)')

def remove_outlier_IQR(df):
    Q1=df.quantile(0.25)
    Q3=df.quantile(0.75)
    IQR=Q3-Q1
    df_final=df[~((df<(Q1-1.5*IQR)) | (df>(Q3+1.5*IQR)))]
    return df_final

df_outlier_removed=remove_outlier_IQR(df.price)
df_outlier_removed=pd.DataFrame(df_outlier_removed)
ind_diff=df.index.difference(df_outlier_removed.index)

figure(figsize=(12, 8), dpi=80)
for i in range(0, len(ind_diff),1):
    df_final=df.drop([ind_diff[i]])
    df=df_final
    
sns.boxplot(y='price', x='property_type',data=df_final)
plt.xticks(rotation=90)
plt.ylabel('Price ($)')

Text(0, 0.5, 'Price ($)')

def remove_outlier_Hampel(df):
    med=df.median()
    List=abs(df-med)
    cond=List.median()*4.5
    good_list=List[~(List>cond)]
    return good_list

df=pd.read_csv('listings.csv')

df.price = df.price.apply(remove_sign,sign='$')

df_outlier_removed=remove_outlier_Hampel(df.price)
df_outlier_removed=pd.DataFrame(df_outlier_removed)
ind_diff=df.index.difference(df_outlier_removed.index)

for i in range(0, len(ind_diff),1):
    df_final=df.drop([ind_diff[i]])
    df=df_final
    
sns.boxplot(y='price', x='property_type',data=df_final)
plt.xticks(rotation=90)
plt.ylabel('Price ($)')

Text(0, 0.5, 'Price ($)')

len(ind_diff)

95

from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors

def remove_outliers_DBSCAN(df,eps,min_samples):
    outlier_detection = DBSCAN(eps = eps, min_samples = min_samples)
    clusters = outlier_detection.fit_predict(df.values.reshape(-1,1))
    data = pd.DataFrame()
    data['cluster'] = clusters
    return data['cluster']

df=pd.read_csv('listings.csv')
df.price = df.price.apply(remove_sign,sign='$')

clusters=remove_outliers_DBSCAN((df['price']),0.5,5)
clusters.value_counts().sort_values(ascending=False)

-1      384
 9      144
 21     117
 4      101
 0       95
       ... 
 81       6
 56       5
 82       5
 124      5
 8        5
Name: cluster, Length: 127, dtype: int64

plt.plot(clusters)

[<matplotlib.lines.Line2D at 0x282abed7748>]

df_cluster=pd.DataFrame(clusters)
ind_outlier=df_cluster.index[df_cluster['cluster']==-1]
ind_outlier

Int64Index([  12,   40,   70,   75,   81,   84,   96,   99,  100,  107,
            ...
            3501, 3529, 3532, 3539, 3550, 3552, 3565, 3572, 3576, 3582],
           dtype='int64', length=384)

for i in range(0, len(ind_outlier),1):
    df_final=df.drop([ind_outlier[i]])
    df=df_final
    
sns.boxplot(y='price', x='property_type',data=df_final)
plt.xticks(rotation=90)
plt.ylabel('Price ($)')

Text(0, 0.5, 'Price ($)')

len(ind_outlier)

384

neigh = NearestNeighbors(n_neighbors=3)
a=df.price.values.reshape(-1,1)
nbrs = neigh.fit(a)
distances, indices = nbrs.kneighbors(a)

distances = np.sort(distances, axis=0)
distances = distances[:,1]
plt.plot(distances)

[<matplotlib.lines.Line2D at 0x28297a2c548>]

iris=pd.read_csv("Iris.csv")

df=iris[iris['Species']=='Iris-virginica']
x=df['SepalLengthCm']
y=df['SepalWidthCm']
plt.scatter(x,y)

<matplotlib.collections.PathCollection at 0x282af0ba9c8>

coef = np.polyfit(x,y,1)
poly1d_fn = np.poly1d(coef) 

plt.plot(x,y, 'yo', x, poly1d_fn(x), '--k')

[<matplotlib.lines.Line2D at 0x282af057f48>,
 <matplotlib.lines.Line2D at 0x282af02bec8>]

x=x.append(pd.Series([20]))
y=y.append(pd.Series([6.08]))

coef

array([0.23161465, 1.44811456])

20*0.23189+1.446

6.0838

coef = np.polyfit(x,y,1)
poly1d_fn = np.poly1d(coef) 

plt.plot(x,y, 'yo', x, poly1d_fn(x), '--k')

[<matplotlib.lines.Line2D at 0x282af0f6bc8>,
 <matplotlib.lines.Line2D at 0x282af165d08>]

	id	listing_url	scrape_id	last_scraped	name	summary	space	description	experiences_offered	neighborhood_overview	...	review_scores_value	requires_license	license	jurisdiction_names	instant_bookable	cancellation_policy	require_guest_profile_picture	require_guest_phone_verification	calculated_host_listings_count	reviews_per_month
0	12147973	https://www.airbnb.com/rooms/12147973	2.016090e+13	9/7/2016	Sunny Bungalow in the City	Cozy, sunny, family home. Master bedroom high...	The house has an open and cozy feel at the sam...	Cozy, sunny, family home. Master bedroom high...	none	Roslindale is quiet, convenient and friendly. ...	...	NaN	f	NaN	NaN	f	moderate	f	f	1	NaN
1	3075044	https://www.airbnb.com/rooms/3075044	2.016090e+13	9/7/2016	Charming room in pet friendly apt	Charming and quiet room in a second floor 1910...	Small but cozy and quite room with a full size...	Charming and quiet room in a second floor 1910...	none	The room is in Roslindale, a diverse and prima...	...	9.0	f	NaN	NaN	t	moderate	f	f	1	1.30
2	6976	https://www.airbnb.com/rooms/6976	2.016090e+13	9/7/2016	Mexican Folk Art Haven in Boston	Come stay with a friendly, middle-aged guy in ...	Come stay with a friendly, middle-aged guy in ...	Come stay with a friendly, middle-aged guy in ...	none	The LOCATION: Roslindale is a safe and diverse...	...	10.0	f	NaN	NaN	f	moderate	t	f	1	0.47
3	1436513	https://www.airbnb.com/rooms/1436513	2.016090e+13	9/7/2016	Spacious Sunny Bedroom Suite in Historic Home	Come experience the comforts of home away from...	Most places you find in Boston are small howev...	Come experience the comforts of home away from...	none	Roslindale is a lovely little neighborhood loc...	...	10.0	f	NaN	NaN	f	moderate	f	f	1	1.00
4	7651065	https://www.airbnb.com/rooms/7651065	2.016090e+13	9/7/2016	Come Home to Boston	My comfy, clean and relaxing home is one block...	Clean, attractive, private room, one block fro...	My comfy, clean and relaxing home is one block...	none	I love the proximity to downtown, the neighbor...	...	10.0	f	NaN	NaN	f	flexible	f	f	1	2.25