from Data_Preprocessing import DataPreprocessing
import pandas as pd
import os
from datetime import datetime
import concurrent.futures
from tabulate import tabulate
pd.set_option('display.max_columns', None)
api_key = "**********"
NASA_Data = DataPreprocessing(api_key)
start_date = "2020-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')
event_types = ["FLR", "CME"]
def fetch_data(event_type):
try:
return event_type, NASA_Data.get_data(event_type, start_date, end_date)
except Exception as e:
print(f"Error fetching data for {event_type}: {e}")
return event_type, pd.DataFrame()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
results = executor.map(fetch_data, event_types)
dataframes = {event_type: df for event_type, df in results}
solar_flare_df = dataframes.get("FLR", pd.DataFrame())
cme_df = dataframes.get("CME", pd.DataFrame())
NASA_Data.save_to_csv(solar_flare_df, "solar_flare_data","data")
NASA_Data.save_to_csv(cme_df, "cme_data","data")
csv_file_path = os.path.join('..', 'data')
cme_mod = NASA_Data.explode_columns(cme_df, ['cmeAnalyses','instruments'])
NASA_Data.save_to_csv(cme_mod, "cme_exploded","data")
solar_flare_mod = NASA_Data.explode_columns(solar_flare_df, ['instruments','linkedEvents'])
NASA_Data.save_to_csv(solar_flare_mod, "solar_exploded","data")
unique_fields = [
'activityID', 'startTime', 'sourceLocation', 'activeRegionNum',
'isMostAccurate', 'time21_5', 'latitude', 'longitude', 'halfAngle',
'speed', 'type', 'levelOfData', 'tilt', 'minorHalfWidth',
'speedMeasuredAtHeight'
]
unique_fields_solar = [
'flrID','beginTime','peakTime','endTime','classType',
'sourceLocation','activeRegionNum','activityID'
]
cme_clean = cme_mod[unique_fields].drop_duplicates()
cme_clean.reset_index(drop=True, inplace=True)
solar_clean = solar_flare_mod[unique_fields_solar].drop_duplicates()
solar_clean.reset_index(drop=True, inplace=True)
NASA_Data.save_to_csv(cme_clean, "cme_clean","data")
joined_data = NASA_Data.join_data([solar_clean,cme_clean],"activityID","left")
NASA_Data.check_missing_values(joined_data)
null_removed = NASA_Data.remove_null_rows(joined_data,"speed")
NASA_Data.save_to_csv(null_removed, "null_removed","data")
Language:Python