In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.offline import iplot
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv("UserCarData.csv")
df.head()
Out[1]:
| Sales_ID | name | year | selling_price | km_driven | Region | State or Province | City | fuel | seller_type | transmission | owner | mileage | engine | max_power | torque | seats | sold | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Maruti | 2014 | 450000 | 145500 | East | District of Columbia | Washington | Diesel | Individual | Manual | First_Owner | 23.40 | 1248 | 74.00 | 190Nm@ 2000rpm | 5 | Y |
| 1 | 2 | Skoda | 2014 | 370000 | 120000 | East | New York | New York City | Diesel | Individual | Manual | Second_Owner | 21.14 | 1498 | 103.52 | 250Nm@ 1500-2500rpm | 5 | Y |
| 2 | 3 | Honda | 2006 | 158000 | 140000 | Central | Illinois | Chicago | Petrol | Individual | Manual | Third_Owner | 17.70 | 1497 | 78.00 | 12.7@ 2,700(kgm@ rpm) | 5 | Y |
| 3 | 4 | Hyundai | 2010 | 225000 | 127000 | Central | Illinois | Chicago | Diesel | Individual | Manual | First_Owner | 23.00 | 1396 | 90.00 | 22.4 kgm at 1750-2750rpm | 5 | Y |
| 4 | 5 | Maruti | 2007 | 130000 | 120000 | East | New York | New York City | Petrol | Individual | Manual | First_Owner | 16.10 | 1298 | 88.20 | 11.5@ 4,500(kgm@ rpm) | 5 | Y |
In [2]:
# 二手车名称分析
print(f"Most Used Sold Car '{df['name'].value_counts().idxmax()}'")
print(f"Lowest Used Car Sold '{df['name'].value_counts().idxmin()}'")
iplot(px.bar(df['name'].value_counts()[:20],
labels={'value':'Count','name':'Name'},
color=df['name'].value_counts()[:20].index,
text_auto=True,
title='Top Used Cars Sold'
))
Most Used Sold Car 'Maruti' Lowest Used Car Sold 'Ashok'
In [3]:
# 二手车年份分析
print(f"Most Year Used Sold Car '{df['year'].value_counts().idxmax()}'")
print(f"Lowest Year Used Car Sold '{df['year'].value_counts().idxmin()}'")
iplot(px.bar(df['year'].value_counts()[:20].sort_index(ascending=False),
labels={'value':'Count','year':'Year'},
color_discrete_sequence=['#c72320'],
text_auto=True,
title='Top Year Used Cars Sold'
).update_xaxes(type=('category')))
Most Year Used Sold Car '2017' Lowest Year Used Car Sold '1995'
In [4]:
# 地区分析
iplot(px.pie(values=df['Region'].value_counts(),
names=['Central','West','East','South'],
title='Region where Sold Used Car'
).update_traces(textinfo='label+percent'))
In [5]:
# 省或州分析
print(f"Top State or Province where Sold Used car '{df['State or Province'].value_counts().idxmax()}'")
print(f"Least State or Province where Sold Used car '{df['State or Province'].value_counts().idxmin()}'")
iplot(px.bar(df['State or Province'].value_counts().sort_values(ascending=True)[:30],
orientation='h',
color=df['State or Province'][:30].index,
title='Top State or Province Sold Used Car',
labels={'value':'Count'}
))
Top State or Province where Sold Used car 'California' Least State or Province where Sold Used car 'Wyoming'
In [6]:
# 城市分析
print(f"Top City where Sold Used car '{df['City'].value_counts().idxmax()}'")
print(f"Least City where Sold Used car '{df['City'].value_counts().idxmin()}'")
iplot(px.bar(df['City'].value_counts().sort_values(ascending=False)[:20],
color=df['City'][:20].index,
title='Top City Sold Used Car',
labels={'value':'Count'},
text_auto=True
))
Top City where Sold Used car 'New York City' Least City where Sold Used car 'West Springfield'
In [7]:
# 燃料分析
print(f"Top Fuel Used in Used car '{df['fuel'].value_counts().idxmax()}'")
print(f"Least Fuel Used in Used car '{df['fuel'].value_counts().idxmin()}'")
iplot(px.pie(values=df['fuel'].value_counts(),
names=['Diesel','Petrol','CNG','LPG'],
title='Fuel Used in Used Car'
).update_traces(textinfo='label+percent'))
Top Fuel Used in Used car 'Diesel' Least Fuel Used in Used car 'LPG'
In [8]:
# 卖家类型分析
print(f"Most Type of Seller '{df['seller_type'].value_counts().idxmax()}'")
iplot(px.pie(values=df['seller_type'].value_counts(),
names=['Individual','Dealer','Trustmark_Dealer'],
title='Types of Seller Used Cars'
).update_traces(textinfo='label+percent'))
Most Type of Seller 'Individual'
In [9]:
# 变速器类型
iplot(px.pie(values=df['transmission'].value_counts(),
names=['Manual','Automatic'],
title='Types of Transmissions Used in Used Sold Cars'
).update_traces(textinfo='label+percent'))
In [10]:
# 业主类型分析
print(f"Most Owner Sold Used Cars '{df['owner'].value_counts().idxmax()}'")
iplot(px.pie(values=df['owner'].value_counts(),
names=['First Owner','Second Owner','Third Owner','Fourth Above Owner','Test Drive Car'],
title='Owners of Used Cars'
).update_traces(textinfo='label+percent'))
Most Owner Sold Used Cars 'First_Owner'
In [11]:
# 转矩分析
iplot(px.bar(df['torque'].value_counts().sort_values(ascending=False)[:20],
orientation='h',
color=df['torque'][:20].index,
title='Top Torque Used in Used Sold Cars',
labels={'value':'Count','torque':'Torque'}
))
In [12]:
# 行驶里程分析
plt.figure(figsize=(15,6))
sns.kdeplot(df['mileage'],fill=True)
plt.xlabel("Mileage")
plt.show()
In [ ]: