In [26]:
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
df = pd.read_csv('.data.csv')

df.columns
Out[3]:
Index(['event_id', 'date', 'province', 'district', 'latitude', 'longitude',
       'peak_rainfall_mm_per_hr', 'duration_min', 'area_km2', 'deaths',
       'injuries', 'notes'],
      dtype='object')
In [10]:
df.head()
Out[10]:
event_id date province district latitude longitude peak_rainfall_mm_per_hr duration_min area_km2 deaths injuries notes year month
0 2010-01-10_143 2010-01-10 Punjab Multan 36.4547 68.6467 364.8 60 23.8 6 0 Cloudburst in Multan, Punjab; short-duration e... 2010 1
1 2010-01-15_117 2010-01-15 Khyber Pakhtunkhwa Kohat 28.2284 69.7835 100.8 45 58.2 0 0 Cloudburst in Kohat, Khyber Pakhtunkhwa; short... 2010 1
2 2010-02-10_093 2010-02-10 Punjab Multan 34.2131 67.3667 148.8 120 11.7 1 1 Cloudburst in Multan, Punjab; short-duration e... 2010 2
3 2010-04-07_118 2010-04-07 Islamabad Islamabad 27.1652 66.6173 327.8 90 22.5 0 1 Cloudburst in Islamabad, Islamabad; short-dura... 2010 4
4 2010-04-27_087 2010-04-27 Punjab Multan 29.3843 66.5263 192.1 180 29.6 0 1 Cloudburst in Multan, Punjab; short-duration e... 2010 4

Temporal Analysis e.g. are cloudbursts becoming more frequent?¶

In [5]:
df['date'] = pd.to_datetime(df['date'])
In [6]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
In [21]:
def get_season(month):
    if month in [6, 7, 8, 9]:
        return 'Summer'
    elif month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [10,11]:
        return 'before winter'
    else:
        return 'Other'
In [22]:
df['season'] = df['month'].apply(get_season)
In [86]:
events_by_year = df.groupby('year').size()

events_by_month = df.groupby('month').size()

events_by_season = df.groupby('season').size()

Trend Analysis¶

In [14]:
# plt.style.use('seaborn')

# Plot 1: Line Plot for Events by Year
plt.figure(figsize=(10, 6))
events_by_year.plot(kind='line', marker='o', color='#4e79a7', linewidth=2)
plt.title('Cloudburst Events by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image

Seasonal Analysis¶

In [15]:
plt.figure(figsize=(10, 6))
events_by_month.plot(kind='bar', color='#f28e2b', edgecolor='black')
plt.title('Cloudburst Events by Month', fontsize=14)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.xticks(ticks=range(12), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                                    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [25]:
plt.figure(figsize=(10, 6))
events_by_season.plot(kind='bar', color='#f28e2b', edgecolor='black')
plt.title('Cloudburst Events by Season', fontsize=14)
plt.xlabel('Season', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.grid(axis='y')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [85]:
# 1. Calculate Mean, Median, and Range of duration_min by Year
rainfall_stats_by_year = df.groupby('year')[['duration_min','peak_rainfall_mm_per_hr']].agg(['mean', 'median', 'min', 'max'])

rainfall_stats_by_year.columns = ['_'.join(col) for col in rainfall_stats_by_year.columns]
rainfall_stats_by_year.head()
Out[85]:
duration_min_mean duration_min_median duration_min_min duration_min_max peak_rainfall_mm_per_hr_mean peak_rainfall_mm_per_hr_median peak_rainfall_mm_per_hr_min peak_rainfall_mm_per_hr_max
year
2010 84.230769 90.0 30 180 223.269231 214.30 100.8 364.8
2011 82.500000 75.0 30 180 242.875000 254.80 110.5 368.6
2012 84.375000 52.5 30 180 212.150000 198.25 50.0 322.0
2013 121.250000 120.0 45 180 209.483333 199.35 109.9 334.3
2014 87.500000 90.0 30 180 197.933333 196.65 60.8 365.8
In [74]:
plt.figure(figsize=(12, 8))

# Subplot 1: Duration
plt.subplot(2, 1, 1)
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['duration_min_mean'],
         marker='o', color='#4e79a7', label='Mean Duration')
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['duration_min_median'],
         marker='s', color='#76b7b2', label='Median Duration')
plt.title('Cloudburst Duration by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Duration (Minutes)', fontsize=12)
plt.legend()
plt.grid(True)

# Subplot 2: Peak Rainfall
plt.subplot(2, 1, 2)
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['peak_rainfall_mm_per_hr_mean'],
         marker='o', color='#f28e2b', label='Mean Peak Rainfall')
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['peak_rainfall_mm_per_hr_median'],
         marker='s', color='#e15759', label='Median Peak Rainfall')
plt.title('Peak Rainfall Intensity by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Peak Rainfall (mm/hr)', fontsize=12)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [77]:
impact_stats_by_year = df.groupby('year')[['deaths', 'injuries']].sum()
combined_stats = rainfall_stats_by_year.join(impact_stats_by_year)
In [83]:
plt.figure(figsize=(12, 10))

# Mean Duration vs. Deaths and Injuries
ax1 = plt.subplot(2, 1, 1)
ax1.plot(combined_stats.index, combined_stats['duration_min_mean'], marker='o', color='#4e79a7', label='Mean Duration (min)')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Mean Duration (min)', fontsize=12, color='#4e79a7')
ax1.tick_params(axis='y', labelcolor='#4e79a7')
ax1.grid(True)

# Twin axis for Deaths and Injuries
ax1_twin = ax1.twinx()
ax1_twin.plot(combined_stats.index, combined_stats['deaths'], marker='s', color='#e15759', label='Deaths')
ax1_twin.plot(combined_stats.index, combined_stats['injuries'], marker='^', color='#76b7b2', label='Injuries')
ax1_twin.set_ylabel('Deaths / Injuries', fontsize=12, color='#e15759')
ax1_twin.tick_params(axis='y', labelcolor='#e15759')
ax1.set_title('Mean Cloudburst Duration vs. Deaths and Injuries by Year', fontsize=14)

# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines1_twin, labels1_twin = ax1_twin.get_legend_handles_labels()
ax1_twin.legend(lines1 + lines1_twin, labels1 + labels1_twin, loc='upper left')

# Subplot 2: Mean Peak Rainfall vs. Deaths and Injuries
ax2 = plt.subplot(2, 1, 2)
ax2.plot(combined_stats.index, combined_stats['peak_rainfall_mm_per_hr_mean'], marker='o', color='#f28e2b', label='Mean Peak Rainfall (mm/hr)')
ax2.set_xlabel('Year', fontsize=12)
ax2.set_ylabel('Mean Peak Rainfall (mm/hr)', fontsize=12, color='#f28e2b')
ax2.tick_params(axis='y', labelcolor='#f28e2b')
ax2.grid(True)

# Twin axis for Deaths and Injuries
ax2_twin = ax2.twinx()
ax2_twin.plot(combined_stats.index, combined_stats['deaths'], marker='s', color='#e15759', label='Deaths')
ax2_twin.plot(combined_stats.index, combined_stats['injuries'], marker='^', color='#76b7b2', label='Injuries')
ax2_twin.set_ylabel('Deaths / Injuries', fontsize=12, color='#e15759')
ax2_twin.tick_params(axis='y', labelcolor='#e15759')
ax2.set_title('Mean Peak Rainfall vs. Deaths and Injuries by Year', fontsize=14)

# Combine legends
lines2, labels2 = ax2.get_legend_handles_labels()
lines2_twin, labels2_twin = ax2_twin.get_legend_handles_labels()
ax2_twin.legend(lines2 + lines2_twin, labels2 + labels2_twin, loc='upper left')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [91]:
events_by_province = df.groupby('province')['event_id'].count()

deaths_injury_province = df.groupby('province')[['deaths', 'injuries']].sum()
Out[91]:
deaths injuries
province
Balochistan 16 40
Gilgit-Baltistan 17 52
Islamabad 21 33
Khyber Pakhtunkhwa 22 47
Punjab 16 36
Sindh 22 49
In [92]:
# Combine events_by_province and deaths_injury_province into one DataFrame
combined_province_stats = deaths_injury_province.join(events_by_province.rename('events'))

plt.figure(figsize=(12, 10))

# Subplot 1: Number of Events
plt.subplot(3, 1, 1)
combined_province_stats['events'].plot(kind='bar', color='#4e79a7', edgecolor='black')
plt.title('Number of Cloudburst Events by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

# Subplot 2: Total Deaths
plt.subplot(3, 1, 2)
combined_province_stats['deaths'].plot(kind='bar', color='#e15759', edgecolor='black')
plt.title('Total Deaths by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Total Deaths', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

# Subplot 3: Total Injuries
plt.subplot(3, 1, 3)
combined_province_stats['injuries'].plot(kind='bar', color='#76b7b2', edgecolor='black')
plt.title('Total Injuries by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Total Injuries', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [93]:
plt.figure(figsize=(12, 8))
combined_province_stats[['events', 'deaths', 'injuries']].plot(kind='bar',
                                                              color=['#4e79a7', '#e15759', '#76b7b2'],
                                                              edgecolor='black')
plt.title('Cloudburst Events, Deaths, and Injuries by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.legend(['Events', 'Deaths', 'Injuries'])
plt.grid(axis='y')
plt.tight_layout()
plt.show()
<Figure size 1200x800 with 0 Axes>
No description has been provided for this image
In [94]:
plt.figure(figsize=(12, 6))

# Subplot 1: Events vs. Deaths
plt.subplot(1, 2, 1)
plt.scatter(combined_province_stats['events'], combined_province_stats['deaths'],
            color='#e15759', s=100)
for i, province in enumerate(combined_province_stats.index):
    plt.text(combined_province_stats['events'].iloc[i],
             combined_province_stats['deaths'].iloc[i],
             province, fontsize=9)
plt.title('Cloudburst Events vs. Deaths by Province', fontsize=14)
plt.xlabel('Number of Events', fontsize=12)
plt.ylabel('Total Deaths', fontsize=12)
plt.grid(True)

# Subplot 2: Events vs. Injuries
plt.subplot(1, 2, 2)
plt.scatter(combined_province_stats['events'], combined_province_stats['injuries'],
            color='#76b7b2', s=100)
for i, province in enumerate(combined_province_stats.index):
    plt.text(combined_province_stats['events'].iloc[i],
             combined_province_stats['injuries'].iloc[i],
             province, fontsize=9)
plt.title('Cloudburst Events vs. Injuries by Province', fontsize=14)
plt.xlabel('Number of Events', fontsize=12)
plt.ylabel('Total Injuries', fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
 
In [97]:
import seaborn as sns

# Normalize the data to compare on the same scale
normalized_stats = combined_province_stats.copy()
normalized_stats['events'] = normalized_stats['events'] / normalized_stats['events'].max()
normalized_stats['deaths'] = normalized_stats['deaths'] / normalized_stats['deaths'].max()
normalized_stats['injuries'] = normalized_stats['injuries'] / normalized_stats['injuries'].max()

normalized_stats
Out[97]:
deaths injuries events
province
Balochistan 0.727273 0.769231 1.000000
Gilgit-Baltistan 0.772727 1.000000 0.862069
Islamabad 0.954545 0.634615 0.758621
Khyber Pakhtunkhwa 1.000000 0.903846 0.862069
Punjab 0.727273 0.692308 0.758621
Sindh 1.000000 0.942308 0.931034
In [96]:
# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(normalized_stats[['events', 'deaths', 'injuries']],
            annot=True, cmap='YlOrRd', cbar_kws={'label': 'Normalized Value'})
plt.title('Normalized Cloudburst Events, Deaths, and Injuries by Province', fontsize=14)
plt.xlabel('Metric', fontsize=12)
plt.ylabel('Province', fontsize=12)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]: