from Data_Preprocessing import DataPreprocessing
import pandas as pd
import os
from datetime import datetime
import concurrent.futures
from tabulate import tabulate


pd.set_option('display.max_columns', None)
api_key = "**********"
NASA_Data = DataPreprocessing(api_key)

start_date = "2020-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')  # gets the current date

# Solar Phenomenon event types
#event_types = ["FLR", "CME", "CMEAnalysis", "GST", "SEP"]
event_types = ["FLR", "CME"]

# Function to fetch data for each event type
def fetch_data(event_type):
    try:
        return event_type, NASA_Data.get_data(event_type, start_date, end_date)
    except Exception as e:
        print(f"Error fetching data for {event_type}: {e}")
        return event_type, pd.DataFrame()

# Fetch data simultaneously for all event types
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    results = executor.map(fetch_data, event_types)

# Store results in a dictionary
dataframes = {event_type: df for event_type, df in results}

# Access specific dataframes
solar_flare_df = dataframes.get("FLR", pd.DataFrame())
cme_df = dataframes.get("CME", pd.DataFrame())
# cme_analysis_df = dataframes.get("CMEAnalysis", pd.DataFrame())
# gst_df = dataframes.get("GST", pd.DataFrame())
# sep_df = dataframes.get("SEP", pd.DataFrame())

# Save DataFrames to CSV files
NASA_Data.save_to_csv(solar_flare_df, "solar_flare_data","data")
NASA_Data.save_to_csv(cme_df, "cme_data","data")
# NASA_Data.save_to_csv(cme_analysis_df, "cme_analysis_data","data")
# NASA_Data.save_to_csv(gst_df, "gst_data","data")
# NASA_Data.save_to_csv(sep_df, "sep_data","data")
csv_file_path = os.path.join('..', 'data')

#cme_df = pd.read_csv(f'{csv_file_path}\cme_data.csv')
#solar_flare_df = pd.read_csv(f'{csv_file_path}\solar_flare_data.csv')

#Exploded data
cme_mod = NASA_Data.explode_columns(cme_df, ['cmeAnalyses','instruments'])
NASA_Data.save_to_csv(cme_mod, "cme_exploded","data")

solar_flare_mod = NASA_Data.explode_columns(solar_flare_df, ['instruments','linkedEvents'])
NASA_Data.save_to_csv(solar_flare_mod, "solar_exploded","data")

#print(tabulate(cme_df.head(100), headers='keys', tablefmt='pretty'))

# sep_mod = NASA_Data.explode_columns(sep_df, ['linkedEvents'])
# NASA_Data.save_to_csv(sep_mod, "sep_exploded","data")
#
# gst_mod = NASA_Data.explode_columns(gst_df, ['allKpIndex','linkedEvents','parsed_allKpIndex'])
# NASA_Data.save_to_csv(gst_mod, "gst_exploded","data")
#
# # Read the CSV file into a DataFrame
# cme_data = pd.read_csv(f'{csv_file_path}\cme_exploded.csv')
# solar_data = pd.read_csv(f'{csv_file_path}\solar_exploded.csv')

unique_fields = [
    'activityID', 'startTime', 'sourceLocation', 'activeRegionNum',
    'isMostAccurate', 'time21_5', 'latitude', 'longitude', 'halfAngle',
    'speed', 'type', 'levelOfData', 'tilt', 'minorHalfWidth',
    'speedMeasuredAtHeight'
]

unique_fields_solar = [
    'flrID','beginTime','peakTime','endTime','classType',
    'sourceLocation','activeRegionNum','activityID'
]

# Remove duplicates based on the unique fields
cme_clean = cme_mod[unique_fields].drop_duplicates()
cme_clean.reset_index(drop=True, inplace=True)

solar_clean = solar_flare_mod[unique_fields_solar].drop_duplicates()
solar_clean.reset_index(drop=True, inplace=True)

NASA_Data.save_to_csv(cme_clean, "cme_clean","data")

joined_data = NASA_Data.join_data([solar_clean,cme_clean],"activityID","left")

NASA_Data.check_missing_values(joined_data)

null_removed = NASA_Data.remove_null_rows(joined_data,"speed")

NASA_Data.save_to_csv(null_removed, "null_removed","data")

# null_removed.loc[:, 'date_time'] = null_removed['flrID'].str.extract(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})')
#
# null_removed.loc[:, 'date'] = null_removed['date_time'].str.split('T').str[0]
#

#NASA_Data.save_to_csv(joined_data, "joined_data","data")Language:Python