import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('.data.csv')

df.columns

Index(['event_id', 'date', 'province', 'district', 'latitude', 'longitude',
       'peak_rainfall_mm_per_hr', 'duration_min', 'area_km2', 'deaths',
       'injuries', 'notes'],
      dtype='object')

df.head()

df['date'] = pd.to_datetime(df['date'])

df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month

def get_season(month):
    if month in [6, 7, 8, 9]:
        return 'Summer'
    elif month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [10,11]:
        return 'before winter'
    else:
        return 'Other'

df['season'] = df['month'].apply(get_season)

events_by_year = df.groupby('year').size()

events_by_month = df.groupby('month').size()

events_by_season = df.groupby('season').size()

# plt.style.use('seaborn')

# Plot 1: Line Plot for Events by Year
plt.figure(figsize=(10, 6))
events_by_year.plot(kind='line', marker='o', color='#4e79a7', linewidth=2)
plt.title('Cloudburst Events by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
events_by_month.plot(kind='bar', color='#f28e2b', edgecolor='black')
plt.title('Cloudburst Events by Month', fontsize=14)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.xticks(ticks=range(12), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                                    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
events_by_season.plot(kind='bar', color='#f28e2b', edgecolor='black')
plt.title('Cloudburst Events by Season', fontsize=14)
plt.xlabel('Season', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

# 1. Calculate Mean, Median, and Range of duration_min by Year
rainfall_stats_by_year = df.groupby('year')[['duration_min','peak_rainfall_mm_per_hr']].agg(['mean', 'median', 'min', 'max'])

rainfall_stats_by_year.columns = ['_'.join(col) for col in rainfall_stats_by_year.columns]
rainfall_stats_by_year.head()

plt.figure(figsize=(12, 8))

# Subplot 1: Duration
plt.subplot(2, 1, 1)
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['duration_min_mean'],
         marker='o', color='#4e79a7', label='Mean Duration')
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['duration_min_median'],
         marker='s', color='#76b7b2', label='Median Duration')
plt.title('Cloudburst Duration by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Duration (Minutes)', fontsize=12)
plt.legend()
plt.grid(True)

# Subplot 2: Peak Rainfall
plt.subplot(2, 1, 2)
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['peak_rainfall_mm_per_hr_mean'],
         marker='o', color='#f28e2b', label='Mean Peak Rainfall')
plt.plot(rainfall_stats_by_year.index, rainfall_stats_by_year['peak_rainfall_mm_per_hr_median'],
         marker='s', color='#e15759', label='Median Peak Rainfall')
plt.title('Peak Rainfall Intensity by Year', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Peak Rainfall (mm/hr)', fontsize=12)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

impact_stats_by_year = df.groupby('year')[['deaths', 'injuries']].sum()
combined_stats = rainfall_stats_by_year.join(impact_stats_by_year)

plt.figure(figsize=(12, 10))

# Mean Duration vs. Deaths and Injuries
ax1 = plt.subplot(2, 1, 1)
ax1.plot(combined_stats.index, combined_stats['duration_min_mean'], marker='o', color='#4e79a7', label='Mean Duration (min)')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Mean Duration (min)', fontsize=12, color='#4e79a7')
ax1.tick_params(axis='y', labelcolor='#4e79a7')
ax1.grid(True)

# Twin axis for Deaths and Injuries
ax1_twin = ax1.twinx()
ax1_twin.plot(combined_stats.index, combined_stats['deaths'], marker='s', color='#e15759', label='Deaths')
ax1_twin.plot(combined_stats.index, combined_stats['injuries'], marker='^', color='#76b7b2', label='Injuries')
ax1_twin.set_ylabel('Deaths / Injuries', fontsize=12, color='#e15759')
ax1_twin.tick_params(axis='y', labelcolor='#e15759')
ax1.set_title('Mean Cloudburst Duration vs. Deaths and Injuries by Year', fontsize=14)

# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines1_twin, labels1_twin = ax1_twin.get_legend_handles_labels()
ax1_twin.legend(lines1 + lines1_twin, labels1 + labels1_twin, loc='upper left')

# Subplot 2: Mean Peak Rainfall vs. Deaths and Injuries
ax2 = plt.subplot(2, 1, 2)
ax2.plot(combined_stats.index, combined_stats['peak_rainfall_mm_per_hr_mean'], marker='o', color='#f28e2b', label='Mean Peak Rainfall (mm/hr)')
ax2.set_xlabel('Year', fontsize=12)
ax2.set_ylabel('Mean Peak Rainfall (mm/hr)', fontsize=12, color='#f28e2b')
ax2.tick_params(axis='y', labelcolor='#f28e2b')
ax2.grid(True)

# Twin axis for Deaths and Injuries
ax2_twin = ax2.twinx()
ax2_twin.plot(combined_stats.index, combined_stats['deaths'], marker='s', color='#e15759', label='Deaths')
ax2_twin.plot(combined_stats.index, combined_stats['injuries'], marker='^', color='#76b7b2', label='Injuries')
ax2_twin.set_ylabel('Deaths / Injuries', fontsize=12, color='#e15759')
ax2_twin.tick_params(axis='y', labelcolor='#e15759')
ax2.set_title('Mean Peak Rainfall vs. Deaths and Injuries by Year', fontsize=14)

# Combine legends
lines2, labels2 = ax2.get_legend_handles_labels()
lines2_twin, labels2_twin = ax2_twin.get_legend_handles_labels()
ax2_twin.legend(lines2 + lines2_twin, labels2 + labels2_twin, loc='upper left')

plt.tight_layout()
plt.show()

events_by_province = df.groupby('province')['event_id'].count()

deaths_injury_province = df.groupby('province')[['deaths', 'injuries']].sum()

# Combine events_by_province and deaths_injury_province into one DataFrame
combined_province_stats = deaths_injury_province.join(events_by_province.rename('events'))

plt.figure(figsize=(12, 10))

# Subplot 1: Number of Events
plt.subplot(3, 1, 1)
combined_province_stats['events'].plot(kind='bar', color='#4e79a7', edgecolor='black')
plt.title('Number of Cloudburst Events by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Number of Events', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

# Subplot 2: Total Deaths
plt.subplot(3, 1, 2)
combined_province_stats['deaths'].plot(kind='bar', color='#e15759', edgecolor='black')
plt.title('Total Deaths by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Total Deaths', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

# Subplot 3: Total Injuries
plt.subplot(3, 1, 3)
combined_province_stats['injuries'].plot(kind='bar', color='#76b7b2', edgecolor='black')
plt.title('Total Injuries by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Total Injuries', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')

plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 8))
combined_province_stats[['events', 'deaths', 'injuries']].plot(kind='bar',
                                                              color=['#4e79a7', '#e15759', '#76b7b2'],
                                                              edgecolor='black')
plt.title('Cloudburst Events, Deaths, and Injuries by Province', fontsize=14)
plt.xlabel('Province', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.legend(['Events', 'Deaths', 'Injuries'])
plt.grid(axis='y')
plt.tight_layout()
plt.show()

<Figure size 1200x800 with 0 Axes>

plt.figure(figsize=(12, 6))

# Subplot 1: Events vs. Deaths
plt.subplot(1, 2, 1)
plt.scatter(combined_province_stats['events'], combined_province_stats['deaths'],
            color='#e15759', s=100)
for i, province in enumerate(combined_province_stats.index):
    plt.text(combined_province_stats['events'].iloc[i],
             combined_province_stats['deaths'].iloc[i],
             province, fontsize=9)
plt.title('Cloudburst Events vs. Deaths by Province', fontsize=14)
plt.xlabel('Number of Events', fontsize=12)
plt.ylabel('Total Deaths', fontsize=12)
plt.grid(True)

# Subplot 2: Events vs. Injuries
plt.subplot(1, 2, 2)
plt.scatter(combined_province_stats['events'], combined_province_stats['injuries'],
            color='#76b7b2', s=100)
for i, province in enumerate(combined_province_stats.index):
    plt.text(combined_province_stats['events'].iloc[i],
             combined_province_stats['injuries'].iloc[i],
             province, fontsize=9)
plt.title('Cloudburst Events vs. Injuries by Province', fontsize=14)
plt.xlabel('Number of Events', fontsize=12)
plt.ylabel('Total Injuries', fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

import seaborn as sns

# Normalize the data to compare on the same scale
normalized_stats = combined_province_stats.copy()
normalized_stats['events'] = normalized_stats['events'] / normalized_stats['events'].max()
normalized_stats['deaths'] = normalized_stats['deaths'] / normalized_stats['deaths'].max()
normalized_stats['injuries'] = normalized_stats['injuries'] / normalized_stats['injuries'].max()

normalized_stats

# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(normalized_stats[['events', 'deaths', 'injuries']],
            annot=True, cmap='YlOrRd', cbar_kws={'label': 'Normalized Value'})
plt.title('Normalized Cloudburst Events, Deaths, and Injuries by Province', fontsize=14)
plt.xlabel('Metric', fontsize=12)
plt.ylabel('Province', fontsize=12)
plt.tight_layout()
plt.show()

	event_id	date	province	district	latitude	longitude	peak_rainfall_mm_per_hr	duration_min	area_km2	deaths	injuries	notes	year	month
0	2010-01-10_143	2010-01-10	Punjab	Multan	36.4547	68.6467	364.8	60	23.8	6	0	Cloudburst in Multan, Punjab; short-duration e...	2010	1
1	2010-01-15_117	2010-01-15	Khyber Pakhtunkhwa	Kohat	28.2284	69.7835	100.8	45	58.2	0	0	Cloudburst in Kohat, Khyber Pakhtunkhwa; short...	2010	1
2	2010-02-10_093	2010-02-10	Punjab	Multan	34.2131	67.3667	148.8	120	11.7	1	1	Cloudburst in Multan, Punjab; short-duration e...	2010	2
3	2010-04-07_118	2010-04-07	Islamabad	Islamabad	27.1652	66.6173	327.8	90	22.5	0	1	Cloudburst in Islamabad, Islamabad; short-dura...	2010	4
4	2010-04-27_087	2010-04-27	Punjab	Multan	29.3843	66.5263	192.1	180	29.6	0	1	Cloudburst in Multan, Punjab; short-duration e...	2010	4

	duration_min_mean	duration_min_median	duration_min_min	duration_min_max	peak_rainfall_mm_per_hr_mean	peak_rainfall_mm_per_hr_median	peak_rainfall_mm_per_hr_min	peak_rainfall_mm_per_hr_max
year
2010	84.230769	90.0	30	180	223.269231	214.30	100.8	364.8
2011	82.500000	75.0	30	180	242.875000	254.80	110.5	368.6
2012	84.375000	52.5	30	180	212.150000	198.25	50.0	322.0
2013	121.250000	120.0	45	180	209.483333	199.35	109.9	334.3
2014	87.500000	90.0	30	180	197.933333	196.65	60.8	365.8

	deaths	injuries
province
Balochistan	16	40
Gilgit-Baltistan	17	52
Islamabad	21	33
Khyber Pakhtunkhwa	22	47
Punjab	16	36
Sindh	22	49

	deaths	injuries	events
province
Balochistan	0.727273	0.769231	1.000000
Gilgit-Baltistan	0.772727	1.000000	0.862069
Islamabad	0.954545	0.634615	0.758621
Khyber Pakhtunkhwa	1.000000	0.903846	0.862069
Punjab	0.727273	0.692308	0.758621
Sindh	1.000000	0.942308	0.931034

Temporal Analysis e.g. are cloudbursts becoming more frequent?¶

Trend Analysis¶

Seasonal Analysis¶