import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import warnings
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
warnings.filterwarnings('ignore')
sns.set_style("darkgrid")
data=pd.read_csv('C:/Users/Ambuj nayan mishra/Downloads/Most Wickets in Test Cricket .csv')
f,ax=plt.subplots(1,1,figsize=(25,10))
sns.countplot(x='Country ',data=data,palette ='bright',order=data['Country '].value_counts().index,saturation=0.95)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=['Sri Lanka ', 'Australia ', 'England ', 'India ', 'West Indies','South Africa', 'New Zealand ', 'Pakistan ', 'Zimbabwe','Bangladesh '], values=data['Country '].value_counts(), hole=.3)])
fig.show()
data['Country '].value_counts()
Australia 18 England 15 India 11 South Africa 9 West Indies 8 New Zealand 7 Pakistan 7 Sri Lanka 3 Zimbabwe 1 Bangladesh 1 Name: Country , dtype: int64
Group_data=data.groupby('Country ')
Group_data.get_group('Australia ')
| Rank | Player | Country | Matches | Innings | Balls | Runs | Wickets | Average | Econ | SR | 5 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 2 | S Warne | Australia | 145 | 273 | 40705 | 17995 | 708 | 25.41 | 2.65 | 57.4 | 37 | 10 |
| 4 | 5 | GD McGrath | Australia | 124 | 243 | 29248 | 12186 | 563 | 21.64 | 2.49 | 51.9 | 29 | 3 |
| 12 | 13 | NM Lyon | Australia | 108 | 204 | 27983 | 13732 | 427 | 32.15 | 2.94 | 65.5 | 19 | 3 |
| 23 | 24 | DK Lillee | Australia | 70 | 132 | 18467 | 8493 | 355 | 23.92 | 2.75 | 52.0 | 23 | 7 |
| 28 | 29 | MG Johnson | Australia | 73 | 140 | 16001 | 8891 | 313 | 28.40 | 3.33 | 51.1 | 12 | 3 |
| 31 | 32 | B Lee | Australia | 76 | 150 | 16531 | 9554 | 310 | 30.81 | 3.46 | 53.3 | 10 | 0 |
| 38 | 39 | CJ McDermott | Australia | 71 | 124 | 16586 | 8332 | 291 | 28.63 | 3.01 | 56.9 | 14 | 2 |
| 39 | 40 | MA Starc | Australia | 69 | 133 | 14104 | 7786 | 282 | 27.60 | 3.31 | 50.0 | 13 | 2 |
| 43 | 44 | JN Gillespie | Australia | 71 | 137 | 14234 | 6770 | 259 | 26.13 | 2.85 | 54.9 | 8 | 0 |
| 47 | 48 | R Benaud | Australia | 63 | 116 | 19108 | 6704 | 248 | 27.03 | 2.10 | 77.0 | 16 | 1 |
| 49 | 50 | GD McKenzie | Australia | 60 | 113 | 17681 | 7328 | 246 | 29.78 | 2.48 | 71.8 | 16 | 3 |
| 63 | 64 | RR Lindwall | Australia | 61 | 113 | 13650 | 5251 | 228 | 23.03 | 2.30 | 59.8 | 12 | 0 |
| 67 | 68 | PM Siddle | Australia | 67 | 126 | 13907 | 6777 | 221 | 30.66 | 2.92 | 62.9 | 8 | 0 |
| 69 | 70 | CV Grimmett | Australia | 37 | 67 | 14513 | 5231 | 216 | 24.21 | 2.16 | 67.1 | 21 | 7 |
| 71 | 72 | JR Hazlewood | Australia | 57 | 107 | 12235 | 5573 | 215 | 25.92 | 2.73 | 56.9 | 9 | 0 |
| 74 | 75 | MG Hughes | Australia | 53 | 97 | 12285 | 6017 | 212 | 28.38 | 2.93 | 57.9 | 7 | 1 |
| 75 | 76 | SCG MacGill | Australia | 44 | 85 | 11237 | 6038 | 208 | 29.02 | 3.22 | 54.0 | 12 | 2 |
| 79 | 80 | JR Thomson | Australia | 51 | 90 | 10535 | 5601 | 200 | 28.00 | 3.18 | 52.6 | 8 | 0 |
Group_data['Player '].head(2).values
array(['M Muralidaran ', 'S Warne', 'JM Anderson ', 'A Kumble ',
'GD McGrath', 'SCJ Broad', 'CA Walsh ', 'R Ashwin ', 'DW Steyn ',
'HMRKB Herath ', 'Sir RJ Hadlee', 'SM Pollock ', 'Wasim Akram ',
'CEL Ambrose', 'Waqar Younis ', 'DL Vettori', 'HH Streak ',
'Shakib Al Hasan '], dtype=object)
Group_data['Wickets '].head(2).values
array([800, 708, 640, 619, 563, 537, 519, 442, 439, 433, 431, 421, 414,
405, 373, 362, 216, 215], dtype=int64)
df = px.data.tips()
fig = px.bar(Group_data, x=Group_data['Player '].head(2).values, y=Group_data['Wickets '].head(2).values,
color=Group_data['Wickets '].head(2),
labels={'y':'Wickets','x':'Player'}, height=600)
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()
df = px.data.tips()
fig = px.bar(Group_data, x=Group_data['Wickets '].head(1).values, y=Group_data['Player '].head(1).values, color=Group_data['Country '].head(1).values, orientation='h',
height=600,labels={'y':'Players','x':'Wickets'})
fig.show()
fig = go.Figure(go.Funnel(
y = data['Player '].values,
x = data['Wickets '].values))
fig.show()
Observation:- Most wicket taker bowler is M. Muralidaran.¶Observation:-Second wicket taker bowler is S. Warne.¶Group_data['Matches'].max().sort_values(ascending=False)
Country England 169 South Africa 166 Australia 145 Sri Lanka 133 India 132 West Indies 132 New Zealand 113 Pakistan 104 Zimbabwe 65 Bangladesh 59 Name: Matches, dtype: int64
Group_data['Matches'].head(2).values
array([133, 145, 169, 132, 124, 152, 132, 86, 93, 93, 86, 108, 104,
98, 87, 113, 65, 59], dtype=int64)
#Group_data['Matches'].max().sort_values(ascending=False)
Group_data['Player '].head(2).values
array(['M Muralidaran ', 'S Warne', 'JM Anderson ', 'A Kumble ',
'GD McGrath', 'SCJ Broad', 'CA Walsh ', 'R Ashwin ', 'DW Steyn ',
'HMRKB Herath ', 'Sir RJ Hadlee', 'SM Pollock ', 'Wasim Akram ',
'CEL Ambrose', 'Waqar Younis ', 'DL Vettori', 'HH Streak ',
'Shakib Al Hasan '], dtype=object)
df = px.data.tips()
fig = px.bar(Group_data, x=Group_data['Player '].head(1).values, y=Group_data['Matches'].head(1).values,
color=Group_data['Matches'].head(1),labels={'x':'Players','y':'Matches'},height=600,color_continuous_scale=px.colors.sequential.Inferno)
fig.show()
Observation:- Most matches played player JM Anderson¶f,ax=plt.subplots(1,1,figsize=(30,16))
sns.barplot(x=Group_data['Player '].head(2).values, y=Group_data['Balls '].head(2).values,palette = 'bright',saturation=0.95)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
Observation:- Most Balls throw by bowler is M. Muralidaran¶df = px.data.tips()
fig = px.bar(Group_data, x=Group_data['Player '].head(1).values, y=Group_data['Econ'].head(1).values,
color=Group_data['Econ'].head(1),
labels={'y':'Economical','x':'Player'}, height=600,color_continuous_scale=px.colors.sequential.Magenta)
fig.show()
Observation:- Most Economical bowler is M. Muralidaran.¶df = px.data.tips()
fig = px.bar(Group_data, y=Group_data['Player '].head(2).values, x=Group_data['5'].head(2).values,
color=Group_data['5'].head(2),
labels={'x':'5 Wickets','y':'Player'}, height=600,color_continuous_scale=px.colors.sequential.Jet)
fig.show()
df = px.data.tips()
fig = px.bar(Group_data, x=Group_data['Player '].head(2).values, y=Group_data['10'].head(2).values,
color=Group_data['10'].head(2),
labels={'y':'10 Wickets','x':'Player'}, height=600,color_continuous_scale=px.colors.sequential.Blackbody)
fig.show()
Observation:- Most 5 and 10 wickets taken in a match by M. Muralidaran.¶df = px.data.iris()
fig = px.scatter(data, x='Matches', y='Wickets ', size='Wickets ',color='Wickets ')
fig.show()
f,ax=plt.subplots(1,1,figsize=(18,10))
#sns.scatterplot(data["Matches"], data["Wickets "],hue=data['Country '])
sns.lineplot(data['Matches'],data['Wickets '], hue =data["Country "])
<AxesSubplot:xlabel='Matches', ylabel='Wickets '>
fig = px.scatter(data, x="Matches", y="Wickets ", trendline="ols",
labels={"Matches": "Matches",
"Wickets ": "Wickets"})
fig.update_layout(title_text='Relationship between Matches and Wickets',
title_x=0.5, title_font=dict(size=20))
fig.data[1].line.color = 'red'
fig.show()
Observation:- As you can see whosoever played more matches he gets more wickets.¶df = px.data.gapminder()
fig = px.line(data, x='Balls ', y='Wickets ', color='Player ', markers=True)
fig.show()
px.defaults.color_continuous_scale = px.colors.sequential.Blackbody
df = px.data.iris()
fig = px.scatter(data, x='Balls ', y='Wickets ', size='Wickets ',color='Wickets ',color_continuous_scale=px.colors.sequential.Jet)
fig.show()
df = px.data.tips()
fig = px.ecdf(data, x="Matches", y='Wickets ',color='Country ',ecdfnorm=None)
fig.show()
df = px.data.gapminder()
fig = px.sunburst(data, path=['Country ','Player ','Matches', 'Wickets '], values='Wickets ',
color='Wickets ', hover_data=['Player '],height=800)
fig.show()