import http.client
import json
import pandas as pd
import os
import ast

class DataPreprocessing:
    """
    Class for preprocessing data
    """
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.nasa.gov"

    #Fetching data from NASA
    def get_data(self, data_string, start_date, end_date):
        """
        Reusing Jordan api code
        :param data_string:
        :param start_date:
        :param end_date:
        :return:
        """
        conn = http.client.HTTPSConnection(self.base_url)

        conn.request("GET", f"/DONKI/{data_string}?startDate={start_date}&endDate={end_date}&api_key={self.api_key}")
        response = conn.getresponse()
        data = response.read().decode("utf-8")
        conn.close()

        #convert to JSON
        parsed_data = json.loads(data)

        #normalize json
        df = pd.json_normalize(parsed_data)
        return df

    def save_to_csv(self,df,file_name,folder):
        """
        Function to save csv
        :param df:
        :param file_name:
        :param folder:
        :return: file save to a location defined by user
        """


        directory = os.path.join("..", folder)

        os.makedirs(directory, exist_ok=True)

        file_path = os.path.join(directory, f"{file_name}.csv")

        df.to_csv(file_path, index=False)
        print(f"Data saved to {file_path}")


    def join_data(self, df_list, join_field, join_type='inner'):
        """
        function that handles joining multiple dataframes together
        :param df_list:
        :param join_type:
        :return: joined dataframe
        """
        join_df = df_list[0]

        for df in df_list[1:]:
            join_df = pd.merge(join_df, df, how=join_type, on=join_field)

        return join_df

    def check_missing_values(self, df):
        """
        Summarizes missing values and provides a report of missing counts per column.
        :param df: The DataFrame to check for missing values.
        :return: Series with missing value counts for each column.
        """
        missing_values = df.isnull().sum()
        print(f"Missing values per column:\n{missing_values}")
        return missing_values

    def summarize_columns(self, df):
        """
        Provides summary statistics for each column, including null counts, distinct values, and data types.
        :param df: The DataFrame to summarize.
        :return: DataFrame with column summaries.
        """
        summary = pd.DataFrame({
            'Data Type': df.dtypes,
            'Null Count': df.isnull().sum(),
            'Distinct Values': df.nunique()
        })
        print(f"Column Summary:\n{summary}")
        return summary

    def remove_null_rows(self, df, column_name):
        """
        Removes rows with null values in the specified column.
        :param df: The DataFrame to process.
        :param column_name: The column to check for null values.
        :return: DataFrame with rows removed where the specified column has null values.
        """
        cleaned_df = df.dropna(subset=[column_name])
        print(f"Rows with null values in column '{column_name}' have been removed.")
        return cleaned_df

    def explode_columns(self, df, columns):
        '''
        using this function to explode the dataframe columns. For this dataset, we have seen several
        columns that has a list key values pairs that could be used as a column.
        :param df:
        :param columns:
        :return: exploded dataframe
        '''
        df_exploded = df.copy()
        normalized_dfs = []

        for column in columns:
            if column in df_exploded.columns:
                df_exploded = df_exploded.explode(column).reset_index(drop=True)
                normalized_df = pd.json_normalize(df_exploded[column])
                normalized_dfs.append(normalized_df)
            else:
                print(f"Warning: Column '{column}' not found in DataFrame.")

        df_exploded = df_exploded.drop(columns=columns, errors='ignore')

        if normalized_dfs:
            result = pd.concat([df_exploded] + normalized_dfs, axis=1)
        else:
            result = df_exploded

        return result

    def explode_columns1(self, df, columns):
        df_exploded = df.copy()
        normalized_dfs = []

        for column in columns:
            if column in df_exploded.columns:
                # Only explode if column contains list-like structures
                if df_exploded[column].apply(lambda x: isinstance(x, (list, dict))).any():
                    df_exploded = df_exploded.explode(column).reset_index(drop=True)
                    normalized_df = pd.json_normalize(df_exploded[column])
                    print(f"Normalized Data for {column}:\n", normalized_df.head())
                    normalized_dfs.append(normalized_df)
                else:
                    print(f"Column '{column}' does not contain list or dict data.")
            else:
                print(f"Warning: Column '{column}' not found in DataFrame.")

        df_exploded = df_exploded.drop(columns=columns, errors='ignore')

        # Concatenate if there are normalized data frames
        if normalized_dfs:
            result = pd.concat([df_exploded] + normalized_dfs, axis=1)
        else:
            print("No normalized data frames to concatenate.")
            result = df_exploded

        return resultLanguage:Python