import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score
data_dir = os.path.abspath(os.path.join(os.getcwd(), 'data'))
df = pd.read_csv(os.path.join(data_dir, 'SHARPevents.csv'))
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)
df.drop(['event_date', 'start_time', 'peak_time', 'end_time', 'goes_location', 'T_REC', 'NOAA_ARS', 'goes_class',
'FlareNumber','NOAA_NUM','QUALITY' ], axis=1, inplace=True)
categorical_columns = ['noaa_active_region', 'classFlare']
df = pd.get_dummies(df, columns=categorical_columns)
nan_columns = df.columns[df.isna().any()].tolist()
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(df.mean(), inplace=True)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)
def create_sequences(data, seq_length):
X, y = [], []
for i in range(len(data) - seq_length):
X.append(data[i:i+seq_length])
y.append(data[i+seq_length])
return np.array(X), np.array(y)
seq_length = 10
X, y = create_sequences(scaled_data, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length, X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(X.shape[2]))
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)
loss = model.evaluate(X_test, y_test)
lstm_features_train = model.predict(X_train)
lstm_features_test = model.predict(X_test)
original_features_train = X_train.reshape(X_train.shape[0], -1)
original_features_test = X_test.reshape(X_test.shape[0], -1)
combined_features_train = np.concatenate((original_features_train, lstm_features_train), axis=1)
combined_features_test = np.concatenate((original_features_test, lstm_features_test), axis=1)
original_feature_names = [f'Original_Feature_{i+1}' for i in range(original_features_train.shape[1])]
lstm_feature_names = [f'LSTM_Feature_{i+1}' for i in range(lstm_features_train.shape[1])]
combined_df_train = pd.DataFrame(combined_features_train, columns=original_feature_names + lstm_feature_names)
combined_df_test = pd.DataFrame(combined_features_test, columns=original_feature_names + lstm_feature_names)
classFlare_columns = [col for col in df.columns if col.startswith('classFlare_')]
for col in classFlare_columns:
combined_df_train[col] = df[col].values[:len(combined_df_train)]
combined_df_test[col] = df[col].values[len(combined_df_train):len(combined_df_train) + len(combined_df_test)]
y_train_class = np.argmax(y_train[:, -len(classFlare_columns):], axis=1)
y_test_class = np.argmax(y_test[:, -len(classFlare_columns):], axis=1)
rf_model_combined = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_combined.fit(combined_df_train.drop(columns=classFlare_columns), y_train_class)
rf_model_lstm = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_lstm.fit(lstm_features_train, y_train_class)
rf_model_non_lstm = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_non_lstm.fit(original_features_train, y_train_class)
y_pred_combined = rf_model_combined.predict(combined_df_test.drop(columns=classFlare_columns))
y_pred_lstm = rf_model_lstm.predict(lstm_features_test)
y_pred_non_lstm = rf_model_non_lstm.predict(original_features_test)
accuracy_combined = accuracy_score(y_test_class, y_pred_combined)
accuracy_lstm = accuracy_score(y_test_class, y_pred_lstm)
accuracy_non_lstm = accuracy_score(y_test_class, y_pred_non_lstm)
precision_combined = precision_score(y_test_class, y_pred_combined, average='weighted')
precision_lstm = precision_score(y_test_class, y_pred_lstm, average='weighted')
precision_non_lstm = precision_score(y_test_class, y_pred_non_lstm, average='weighted')
print(f"Accuracy with combined features: {accuracy_combined}")
print(f"Accuracy with LSTM features: {accuracy_lstm}")
print(f"Accuracy with non-LSTM features: {accuracy_non_lstm}")
print(f"Precision with combined features: {precision_combined}")
print(f"Precision with LSTM features: {precision_lstm}")
print(f"Precision with non-LSTM features: {precision_non_lstm}")
metrics = {
'Combined Features': {'Accuracy': accuracy_combined, 'Precision': precision_combined},
'LSTM Features': {'Accuracy': accuracy_lstm, 'Precision': precision_lstm},
'Non-LSTM Features': {'Accuracy': accuracy_non_lstm, 'Precision': precision_non_lstm}
}
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].bar(metrics.keys(), [metrics[model]['Accuracy'] for model in metrics], color=['blue', 'green', 'red'])
ax[0].set_title('Classifier Accuracies')
ax[0].set_xlabel('Classifier')
ax[0].set_ylabel('Accuracy')
ax[0].set_ylim(0, 1)
ax[1].bar(metrics.keys(), [metrics[model]['Precision'] for model in metrics], color=['blue', 'green', 'red'])
ax[1].set_title('Classifier Precisions')
ax[1].set_xlabel('Classifier')
ax[1].set_ylabel('Precision')
ax[1].set_ylim(0, 1)
plt.tight_layout()
plt.savefig('images/classifier_comparison.png')
plt.close()
feature_importances_combined = rf_model_combined.feature_importances_
features_combined = pd.DataFrame({'Feature': combined_df_train.drop(columns=classFlare_columns).columns, 'Importance': feature_importances_combined})
features_combined = features_combined.sort_values(by='Importance', ascending=False)
top10fc = features_combined.head(10)
plt.figure(figsize=(12, 6))
plt.bar(top10fc['Feature'], top10fc['Importance'])
plt.title('Feature Importances (Combined Features)')
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig('images/feature_importances_combined.png')
plt.close()
feature_importances_lstm = rf_model_lstm.feature_importances_
features_lstm = pd.DataFrame({'Feature': lstm_feature_names, 'Importance': feature_importances_lstm})
features_lstm = features_lstm.sort_values(by='Importance', ascending=False)
top10fc = features_lstm.head(10)
plt.figure(figsize=(12, 6))
plt.bar(top10fc['Feature'], top10fc['Importance'])
plt.title('Feature Importances (LSTM Features)')
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig('images/feature_importances_lstm.png')
plt.close()
feature_importances_non_lstm = rf_model_non_lstm.feature_importances_
features_non_lstm = pd.DataFrame({'Feature': original_feature_names, 'Importance': feature_importances_non_lstm})
features_non_lstm = features_non_lstm.sort_values(by='Importance', ascending=False)
top10fc = features_non_lstm.head(10)
plt.figure(figsize=(12, 6))
plt.bar(top10fc['Feature'], top10fc['Importance'])
plt.title('Feature Importances (Non-LSTM Features)')
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig('images/feature_importances_non_lstm.png')
plt.close()
accuracies = {
'Combined Features': accuracy_combined,
'LSTM Features': accuracy_lstm,
'Non-LSTM Features': accuracy_non_lstm
}
plt.figure(figsize=(10, 6))
plt.bar(accuracies.keys(), accuracies.values(), color=['blue', 'green', 'red'])
plt.title('Classifier Accuracies')
plt.xlabel('Classifier')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.tight_layout()
plt.savefig('images/classifier_accuracies.png')
plt.close()
conf_matrix_lstm = confusion_matrix(y_test_class, y_pred_lstm)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix_lstm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix (LSTM Features)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('images/confusion_matrix_lstm.png')
plt.close()
conf_matrix_non_lstm = confusion_matrix(y_test_class, y_pred_non_lstm)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix_non_lstm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix (Non-LSTM Features)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('images/confusion_matrix_non_lstm.png')
plt.close()
Language:Python