# By: Riasat Ullah
# This file represents an object that can be used to analyze multiple instances together.

from utils import constants, errors, helpers, times, var_names
import configuration as configs
import datetime
import numpy
import pandas


class InstanceAnalyzer(object):

    standard_columns = [
        var_names.instance_id, var_names.organization_instance_id, var_names.instance_timestamp,
        var_names.resolved_on, var_names.assignee_level, var_names.service_ref_id, var_names.task_id, var_names.task_title,
        var_names.urgency_level, var_names.service_id, var_names.service_name, var_names.tags,
        var_names.assignees, var_names.events, var_names.impacted_business_services
    ]

    def __init__(self, timezone, instance_list, columns=None):
        self.timezone = timezone
        self.pd = pandas.DataFrame(
            instance_list,
            columns=columns if columns is not None else self.standard_columns
        )

        self.pd[var_names.regional_timestamp] = self.pd[var_names.instance_timestamp].apply(
            lambda x: times.utc_to_region_time(x, self.timezone))
        self.pd[var_names.regional_date] = self.pd[var_names.regional_timestamp].apply(lambda x: x.date())
        self.create_duration_column()

    def get_id_of_instances_on_service_between_utc_timestamps(self, service_id, start_timestamp, end_timestamp):
        '''
        Get the instance ID of instances that occurred on a service within a given time period.
        End timestamp is not inclusive.
        :param service_id: ID of the service
        :param start_timestamp: (datetime.datetime) start timestamp
        :param end_timestamp: (datetime.datetime) end timestamp (not inclusive)
        :return: (list) of instance IDs
        '''
        if end_timestamp > pandas.Timestamp.max:
            end_timestamp = pandas.Timestamp.max
        return self.pd[(self.pd[var_names.instance_timestamp] >= start_timestamp) &
                       (self.pd[var_names.instance_timestamp] < end_timestamp) &
                       (self.pd[var_names.service_id] == service_id)][var_names.instance_id].values.tolist()

    def get_id_of_instances_on_escalation_policy_between_utc_timestamps(self, esc_pol_id, start_timestamp,
                                                                        end_timestamp):
        '''
        Get the instance ID of instances that were assigned to an escalation policy within a given time period.
        :param esc_pol_id: ID of the escalation policy
        :param start_timestamp: (datetime.datetime) start timestamp
        :param end_timestamp: (datetime.datetime) end timestamp (not inclusive)
        :return: (list) of instance IDs
        '''
        if end_timestamp > pandas.Timestamp.max:
            end_timestamp = pandas.Timestamp.max
        return self.pd[(self.pd[var_names.instance_timestamp] >= start_timestamp) &
                       (self.pd[var_names.instance_timestamp] < end_timestamp) &
                       (self.pd[var_names.assignees].apply(
                           lambda x: set([y[var_names.for_policyid] for y in x])).apply(
                           lambda x: len(x.intersection({esc_pol_id})) > 0))][var_names.instance_id].values.tolist()

    def filter_by_instance_ids(self, instance_ids):
        '''
        Filter the DataFrame by a list of instance IDs. The DateFrame is reset to be the filtered DataFrame.
            :param instance_ids: (list) of instance IDs to filter by
        '''
        self.pd = self.pd[self.pd[var_names.instance_id].isin(instance_ids)]

    def filter_by_urgency_level(self, urgency_level):
        '''
        Filter the DataFrame by a certain urgency level.
        :param urgency_level: (list) urgency level(s)
        '''
        if isinstance(urgency_level, int):
            urgency_level = [urgency_level]

        self.pd = self.pd[self.pd[var_names.urgency_level].isin(urgency_level)]

    def filter_by_period(self, start_date, end_date):
        '''
        Filter the DataFrame by for a period of dates.
        :param start_date: (datetime.date) start date of the period
        :param end_date: (datetime.date) end date of the period (inclusive)
        '''
        self.pd = self.pd[(self.pd[var_names.regional_date] >= start_date) &
                          (self.pd[var_names.regional_date] <= end_date)]

    def filter_by_teams(self, all_team_components, team_refs_to_filter_by):
        '''
        Filter the DataFrame to only retain instances that occurred on certain teams.
        :param all_team_components:
        :param team_refs_to_filter_by: (list) of team reference IDs (this will be used to get the component IDs)
        '''
        filter_by_inst_ids = []
        for ref in team_refs_to_filter_by:
            if ref not in all_team_components:
                raise LookupError(errors.err_unknown_resource)

            for item in all_team_components[ref][var_names.policies]:
                filter_by_inst_ids += self.get_id_of_instances_on_escalation_policy_between_utc_timestamps(
                    esc_pol_id=item[0], start_timestamp=item[1], end_timestamp=item[2])

            for item in all_team_components[ref][var_names.services]:
                filter_by_inst_ids += self.get_id_of_instances_on_service_between_utc_timestamps(
                    service_id=item[0], start_timestamp=item[1], end_timestamp=item[2])

        self.filter_by_instance_ids(list(set(filter_by_inst_ids)))

    def filter_by_user_policy_id(self, user_pol_id):
        '''
        Filter the DataFrame to only retain the instances that were assigned to a particular user policy ID.
        :param user_pol_id: (int) the user policy ID
        '''
        pd_columns = self.pd.columns

        self.pd = self.pd[self.pd[var_names.assignees].apply(
            lambda x: True if x is not None and user_pol_id in [y[var_names.user_policyid] for y in x] else False)]

        if len(self.pd) == 0:
            self.pd = pandas.DataFrame([], columns=pd_columns)

        self.pd[var_names.valid_start] = self.pd[var_names.assignees].apply(
            lambda x: min([y[var_names.valid_start] for y in x if y[var_names.user_policyid] == user_pol_id]))

    def filter_by_escalation_policy_id(self, esc_pol_id):
        '''
        Filter the DataFrame to only retain the instances that were assigned to a particular escalation policy.
        :param esc_pol_id: (int) the escalation policy ID
        '''
        if len(self.pd) > 0:
            self.pd = self.pd[self.pd[var_names.assignees].apply(
                lambda x: True if x is not None and esc_pol_id in [y[var_names.for_policyid] for y in x] else False)]

    def create_duration_column(self):
        '''
        Calculates and creates the column for duration of each instance.
        Columns needed: 'resolved_on', 'instance_timestamp'
        '''
        self.pd[var_names.duration] =\
            (self.pd[var_names.resolved_on] - self.pd[var_names.instance_timestamp])/numpy.timedelta64(1, 's')/60

    def create_escalation_count_column(self):
        '''
        Calculates and creates the column for the number of escalations that has happened in each instance.
        Columns needed: 'events'
        '''
        self.pd[var_names.escalation_count] = self.pd[var_names.events].apply(
            lambda x: sum([1 if y[var_names.event_type] == constants.escalate_event else 0 for y in x]))

    def create_acknowledgement_time_columns(self):
        '''
        Find out the time the instance was acknowledged (or resolved if resolved directly without acknowledgement)
        and put it in the 'acknowledged_on' column. Also calculate the number of minutes it took to get to
        acknowledgement and put that in the 'acknowledgement_time' column.
        Columns needed: 'events', 'instance_timestamp'
        '''
        self.pd[var_names.acknowledged_on] = self.pd[var_names.events].apply(
            lambda x: min([y[var_names.event_timestamp] for y in x
                           if y[var_names.event_type] in
                           [constants.acknowledge_event, constants.snooze_event, constants.resolve_event]])
        ).astype('datetime64[ns]')
        self.pd[var_names.acknowledgement_time] =\
            (self.pd[var_names.acknowledged_on] - self.pd[var_names.instance_timestamp])/numpy.timedelta64(1, 's')/60

    def create_off_hour_interruption_column(self):
        '''
        Create the column for off-hour interruption. It stores a boolean value of
        whether the instance occurred during sleep hours or after hours.
        '''
        assignees_dict = self.pd[[var_names.instance_id, var_names.instance_timestamp,
                                  var_names.assignees]].to_dict('records')

        off_hour_map = dict()
        for item in assignees_dict:
            inst_id = item[var_names.instance_id]
            off_hour_map[inst_id] = False

            for sub_item in item[var_names.assignees]:
                user_region_tmsp = times.utc_to_region_time(item[var_names.instance_timestamp],
                                                            sub_item[var_names.timezone])
                if configs.sleep_hours[0] <= user_region_tmsp.time() < configs.sleep_hours[1] or\
                        configs.after_hours[0] <= user_region_tmsp.time() < configs.after_hours[1]:
                    off_hour_map[inst_id] = True
                    break

        self.pd[var_names.off_hour_interruption] = self.pd[var_names.instance_id].map(off_hour_map)

    def create_resolved_by_column(self, with_display_name=False):
        '''
        Create a column to hold the name of the user who resolved the instance.
        Column needed: events
        '''
        col_name = var_names.event_by
        if with_display_name:
            col_name = var_names.display_name

        self.pd[var_names.resolved_by] = self.pd[var_names.events].apply(
            lambda x: [y[col_name] for y in x if y[var_names.event_type] == constants.resolve_event]
        ).apply(lambda z: z[0] if len(z) > 0 else None)

    def create_regional_time_column(self):
        '''
        Create the regional_time column. Only puts the time part of the regional timestamp without the date.
        '''
        self.pd[var_names.regional_time] = self.pd[var_names.regional_timestamp].apply(lambda x: x.time())

    def create_user_acknowledgement_and_resolution_columns(self, user_id):
        '''
        Create the 'user_acknowledged_on' and 'user_acknowledgement_time' columns. THe user acknowledgement time
        is calculated relative to the time the instance was assigned to them specifically; not the instance timestamp.
        :param user_id: (int) ID of the user
        '''
        self.pd[var_names.user_acknowledged_on] = self.pd[var_names.events].apply(
            lambda x: [y[var_names.event_timestamp] for y in x
                       if y[var_names.event_type] in [constants.acknowledge_event, constants.snooze_event,
                                                      constants.resolve_event]
                       and y[var_names.event_by] == user_id]
        ).apply(lambda z: min(z) if len(z) > 0 else None)

        # Timestamp values provided directly in columns are read as datetime64, while those that are read
        # from the list of dicts are read as objects. Subtracting these different data types throws errors.
        # Hence, we convert them to datetime64 here before subtracting them.

        self.pd[var_names.user_acknowledgement_time] =\
            ((self.pd[var_names.user_acknowledged_on].astype('datetime64[ns]') -
              self.pd[var_names.valid_start].astype('datetime64[ns]'))/numpy.timedelta64(1, 's')/60)\
            .where(self.pd[var_names.user_acknowledged_on].notnull())

        self.create_resolved_by_column()
        self.pd[var_names.user_resolution_time] =\
            ((self.pd[var_names.resolved_on].astype('datetime64[ns]') -
              self.pd[var_names.valid_start].astype('datetime64[ns]'))/numpy.timedelta64(1, 's')/60)\
            .where(self.pd[var_names.resolved_by] == user_id)

    def create_user_interruption_minutes_column(self, user_pol_id):
        '''
        Calculate the number of minutes that the user was interrupted for in total in an instance
        and put it in a new column called 'user_interruption_minutes'.
        :param user_pol_id: (int) policy ID of the user
        '''
        self.pd[var_names.user_interruption_minutes] = self.pd[var_names.assignees].apply(
            lambda x: sum([(y[var_names.valid_end] - y[var_names.valid_start]).total_seconds()/60
                           for y in x if y[var_names.user_policyid] == user_pol_id]))

    def create_user_rest_minutes_column(self):
        '''
        Calculate the number of minutes that the user gets to rest in between instances
        and put it in a new column called 'user_rest_minutes'.
        '''
        # Make sure the DataFrame is sorted by the valid start.
        assignees_dict = self.pd.sort_values(var_names.valid_start)[[var_names.instance_id, var_names.assignees]]\
            .to_dict('records')

        rest_minutes_map = dict()
        for i in range(0, len(assignees_dict)):
            item = assignees_dict[i]
            inst_id = item[var_names.instance_id]
            rest_minutes_map[inst_id] = None

            if i != 0:
                prev_valid_end = max([x[var_names.valid_end] for x in assignees_dict[i - 1][var_names.assignees]])
                curr_valid_start = min([x[var_names.valid_start] for x in assignees_dict[i][var_names.assignees]])
                rest_minutes_map[inst_id] = round((curr_valid_start - prev_valid_end).total_seconds()/60, 2)\
                    if prev_valid_end < curr_valid_start else 0

        self.pd[var_names.user_rest_minutes] = self.pd[var_names.instance_id].map(rest_minutes_map)

    def get_aggregate_instance_count(self):
        '''
        Get the total number of instances in the DateFrame.
        :return: (int) total instance count
        '''
        return len(self.pd)

    def get_aggregate_response_effort(self, in_hours=False):
        '''
        Get the total of the duration of all instances.
        Columns needed: duration
        :return: (float) total duration of all instances
        '''
        if in_hours:
            return self.pd[var_names.duration].apply(lambda x: x/60).sum()
        return self.pd[var_names.duration].sum()

    def get_aggregate_count_of_escalated_instances(self):
        '''
        Get the total number of instances that were escalated at least once.
        Columns needed: escalation_count
        return: (int) total number of incidents escalated
        '''
        return len(self.pd[self.pd[var_names.escalation_count] > 0])

    def get_aggregate_count_of_instances_in_off_hours(self):
        '''
        Get the total number of instances that occurred (started) in off hours.
        :return: (int) total number of off-hour incidents
        '''
        return len(self.pd[self.pd[var_names.off_hour_interruption]])

    def get_aggregate_count_of_instances_acknowledged_by_user(self):
        '''
        Get the total number of instances that were acknowledged by a user.
        Columns needed: user_acknowledgement_time
        :return: (int) number of instances
        '''
        return len(self.pd[self.pd[var_names.user_acknowledgement_time].notnull()])

    def get_aggregate_count_of_instances_resolved_by_user(self):
        '''
        Get the total number of instances that were resolved by a user.
        Columns needed: user_resolution_time
        :return: (int) number of instances
        '''
        return len(self.pd[self.pd[var_names.user_resolution_time].notnull()])

    def get_daily_instance_count(self):
        '''
        Get the number (count) of instances that occurred daily.
        :return: (dict) -> {date: count, ...}
        '''
        return dict() if len(self.pd) == 0\
            else self.pd.groupby(var_names.regional_date)[var_names.instance_id].count().to_dict()

    def get_daily_response_effort(self):
        '''
        Get the total duration of all instances per date.
        :return: (dict) -> {date: total minutes, ...}
        '''
        return dict() if len(self.pd) == 0\
            else self.pd.groupby(var_names.regional_date)[var_names.duration].sum().apply(lambda x: x/60).to_dict()

    def get_daily_count_of_escalated_instances(self):
        '''
        Get the number instances that were escalated per date.
        :return: (dict) -> {date: count, ...}
        '''
        return dict() if len(self.pd) == 0 else self.pd[self.pd[var_names.escalation_count] > 0]\
            .groupby(var_names.regional_date)[var_names.instance_id].count().to_dict()

    def get_daily_count_of_instances_in_off_hours(self):
        '''
        Get the number of instances that occurred in off hours per date.
        Columns needed: off_hour_interruption, regional_date
        :return: (dict) -> {date: count, ...}
        '''
        return dict() if len(self.pd) == 0 else self.pd[self.pd[var_names.off_hour_interruption] > 0]\
            .groupby(var_names.regional_date)[var_names.instance_id].count().to_dict()

    def get_daily_mean_resolution_time(self):
        '''
        Get the daily mean resolution time of instances.
        :return: (dict) -> {regional instance_date: mean time, ...}
        '''
        return dict() if len(self.pd) == 0\
            else self.pd.groupby(var_names.regional_date)[var_names.duration].mean().to_dict()

    def get_daily_mean_acknowledgement_time(self):
        '''
        Get the daily mean acknowledgement time of instances.
        :return: (dict) -> {regional instance_date: mean time, ...}
        '''
        return dict() if len(self.pd) == 0\
            else self.pd.groupby(var_names.regional_date)[var_names.acknowledgement_time].mean().to_dict()

    def get_average_response_effort(self):
        '''
        Get the average response effort of all instances.
        Columns needed: duration
        :return: (float) -> average duration
        '''
        return self.pd[var_names.duration].mean()

    def get_average_acknowledgement_time(self):
        '''
        Get the average acknowledgement time of all instances.
        Columns needed: acknowledgement_time
        :return: (float) -> average ack time
        '''
        return self.pd[var_names.acknowledgement_time].mean()

    def get_average_user_resolution_time(self):
        '''
        Get the average time it takes a user to resolve instances assigned to him.
        Columns needed: user_resolution_time
        :return: (float) -> average user resolution time
        '''
        return self.pd[var_names.user_resolution_time].mean()

    def get_average_user_acknowledgement_time(self):
        '''
        Get the average time it takes a user to acknowledge instances assigned to him.
        Columns needed: user_acknowledgement_time
        :return: (float) -> average user ack time
        '''
        return self.pd[var_names.user_acknowledgement_time].mean()

    def get_longest_response_effort(self):
        '''
        Get the longest duration among all instances.
        Columns needed: duration
        :return: (float) -> max duration
        '''
        return self.pd[var_names.duration].max()

    def get_aggregate_instance_count_within_period(self, start_date, end_date):
        '''
        Gets the total number of incidents that occurred within a given period. The end date is inclusive.
        Columns needed: instance_timestamp
        :param start_date: (datetime.date) start date
        :param end_date: (datetime.date) end date (inclusive)
        :return: (int) -> total number of incidents
        '''
        return len(self.pd[(self.pd[var_names.regional_date] >= start_date) &
                           (self.pd[var_names.regional_date] <= end_date)])

    def get_all_major_instances(self):
        '''
        Get the details of the instances that had the longest duration.
        Columns needed: urgency level, duration
        :return: (list of dict) -> [ {organization_instance_id: , task_title: , service_name: , duration: ,}, ...]
        '''
        major_urgency_levels = [constants.high_urgency, constants.critical_urgency]
        return self.pd[self.pd[var_names.urgency_level].isin(major_urgency_levels)][[
                   var_names.organization_instance_id, var_names.task_title, var_names.service_name,
                   var_names.urgency_level, var_names.instance_timestamp, var_names.resolved_on, var_names.duration,
                   var_names.resolved_by, var_names.escalation_count]]\
            .sort_values(var_names.duration, ascending=False).to_dict('records')

    def get_longest_major_instances(self, count):
        '''
        Get the details of the instances that had the longest duration.
        Columns needed: urgency level, duration
        :param count: (int) number of the longest instances to get
        :return: (list of dict) -> [ {organization_instance_id: , task_title: , service_name: , duration: ,}, ...]
        '''
        major_urgency_levels = [constants.high_urgency, constants.critical_urgency]
        return self.pd[self.pd[var_names.urgency_level].isin(major_urgency_levels)][[
                   var_names.organization_instance_id, var_names.task_title,
                   var_names.service_name, var_names.duration]].sort_values(var_names.duration, ascending=False)[:count]\
            .to_dict('records')

    def get_loudest_services_instance_count(self, count):
        '''
        Get the services that had the most instances occur on them.
        Columns needed: instance_id, service_id
        :param count: (int) number of loudest services to get
        :return: (dict): [{service name: , count: }, ...]
        '''
        loudest_servs = dict() if len(self.pd) == 0 else\
            self.pd.groupby([var_names.service_id, var_names.service_name])[
                var_names.instance_id].count()[:count].to_dict()

        # We are doing this extra step to make sure that when the name of the service changes in the middle,
        # they are not falsely identified as different services. Identification should be done only on the service ID.
        pre_data = dict()
        for key in loudest_servs:
            srv_id = key[0]
            srv_name = key[1]
            if srv_id not in pre_data:
                pre_data[srv_id] = {var_names.service_name: [], var_names.count: 0}
            pre_data[srv_id][var_names.service_name].append(srv_name)
            pre_data[srv_id][var_names.count] += loudest_servs[key]

        data = list(pre_data.values())
        for item in data:
            item[var_names.service_name] = ', '.join(item[var_names.service_name])
        data = helpers.sorted_list_of_dict(data, var_names.count, descending=True)
        return data

    def get_loudest_services_details(self, count):
        '''
        Get the details of the loudest services determined by the number of times instances have occurred on them.
        :param count: (int) number of results to return
        :return: (list of dict)
        '''
        inst_counts = dict() if len(self.pd) == 0\
            else self.pd.groupby(var_names.service_id)[var_names.instance_id].count()[:count].to_dict()
        serv_ids = list(inst_counts.keys())

        serv_names = self.pd[self.pd[var_names.service_id].isin(serv_ids)].groupby(var_names.service_id)[
            var_names.service_name].max().to_dict()

        serv_ref_ids = self.pd[self.pd[var_names.service_id].isin(serv_ids)].groupby(var_names.service_id)[
            var_names.service_ref_id].max().to_dict()

        total_duration = self.pd[self.pd[var_names.service_id].isin(serv_ids)].groupby(var_names.service_id)[
            var_names.duration].sum().to_dict()

        impacted_instances = self.pd[(self.pd[var_names.service_id].isin(serv_ids)) &
                                     (self.pd[var_names.impacted_business_services].notnull())]\
            .groupby(var_names.service_id)[var_names.impacted_business_services].sum().to_dict()
        impact_duration = dict()
        for key in impacted_instances:
            impact_duration[key] = sum([(item[var_names.valid_end] - item[var_names.valid_start]).total_seconds()/60
                                        for item in impacted_instances[key]])

        new_data = []
        for srv in inst_counts:
            service_dict = {
                var_names.service_name: serv_names[srv],
                var_names.service_ref_id: serv_ref_ids[srv]
            }
            new_data.append({
                var_names.service_id: srv,
                var_names.service_name: service_dict[var_names.service_name],
                var_names.service_ref_id: service_dict[var_names.service_ref_id],
                var_names.count: inst_counts[srv],
                var_names.duration: total_duration[srv],
                var_names.downtime: impact_duration[srv] if srv in impact_duration else 0
            })

        new_data = helpers.sorted_list_of_dict(new_data, var_names.count, descending=True)
        return new_data

    def get_most_business_impacting_services(self, count):
        '''
        Gets the list of services that have impacted business services the most.
        :param count: (int) number of most impacting services to get
        :return: (list of dict) -> [{service name: , count: , business services: []}, ...]
        '''
        impact_count_dict = self.pd[self.pd[var_names.impacted_business_services].notnull()]\
                                .groupby([var_names.service_id, var_names.service_name])[
                                var_names.impacted_business_services].count()[:count].to_dict()
        impact_bus_dict = self.pd[self.pd[var_names.impacted_business_services].notnull()]\
                              .groupby([var_names.service_id, var_names.service_name])[
                              var_names.impacted_business_services].apply(list)[:count].to_dict()

        ord_impact_count_items = sorted(impact_count_dict.items(), key=lambda y: y[1], reverse=True)
        ord_keys = [x[0] for x in ord_impact_count_items]

        data = []
        for key in ord_keys:
            # Create business services as list of dicts with name and ref_id
            unique_bus_services = {}
            for item in impact_bus_dict[key]:
                for bus_serv in item:
                    bus_id = bus_serv[var_names.business_service_id]
                    if bus_id not in unique_bus_services:
                        unique_bus_services[bus_id] = {
                            var_names.business_service_name: bus_serv[var_names.business_service_name],
                            var_names.business_service_ref_id: bus_serv[var_names.business_service_ref_id]
                        }
            business_services = list(unique_bus_services.values())

            data.append({
                var_names.service_name: key[1],
                var_names.count: impact_count_dict[key],
                var_names.business_services: business_services
            })

        return data

    def get_longest_impacted_business_services(self, count=None):
        '''
        Get the longest business impacts.
        :param count: (int) number of the longest business impacts to get
        :return: (list of dict) -> [{ impact details }, ...]
        '''
        impacted_instances = self.pd[self.pd[var_names.impacted_business_services].notnull()].to_dict('records')

        new_data = []
        for item in impacted_instances:
            for imp_bus in item[var_names.impacted_business_services]:
                service_dict = {
                    var_names.service_name: item[var_names.service_name],
                    var_names.service_ref_id: item[var_names.service_ref_id]
                }
                business_service_dict = {
                    var_names.business_service_name: imp_bus[var_names.business_service_name],
                    var_names.business_service_ref_id: imp_bus[var_names.business_service_ref_id]
                }
                new_data.append({
                    var_names.organization_instance_id: item[var_names.organization_instance_id],
                    var_names.task_title: item[var_names.task_title],
                    var_names.created_on: item[var_names.instance_timestamp],
                    var_names.urgency_level: item[var_names.urgency_level],
                    var_names.service_name: service_dict,
                    var_names.business_service_name: business_service_dict,
                    var_names.duration: round((imp_bus[var_names.valid_end] -
                                              imp_bus[var_names.valid_start]).total_seconds()/60, 2)
                })

        new_data = helpers.sorted_list_of_dict(new_data, sort_by_key=var_names.duration, descending=True)

        return new_data if count is None else new_data[:count]

    def get_aggregate_business_impacting_instance_count(self):
        '''
        Get the number of instances that impacted business services.
        :return: (int) number of instances
        '''
        return len(self.pd[self.pd[var_names.impacted_business_services].notnull()])

    def get_aggregate_business_downtime(self):
        '''
        Get the total number of minutes business services were impacted.
        Only select the max impact duration per instance.
        :return: (float) number of minutes of downtime
        '''
        impacted_instances = self.pd[self.pd[var_names.impacted_business_services].notnull()][[
            var_names.instance_id, var_names.impacted_business_services]].to_dict('records')

        downtime = 0
        for item in impacted_instances:
            downtime += max([(imp_bus[var_names.valid_end] - imp_bus[var_names.valid_start]).total_seconds()/60
                             for imp_bus in item[var_names.impacted_business_services]])
        return round(downtime, 2)

    def get_business_impacting_instances(self):
        '''
        Get all the instances that impacted business services.
        :return: (list of dict) -> [{ instance details }, ...]
        '''
        impacted_instances = self.pd[self.pd[var_names.impacted_business_services].notnull()].to_dict('records')
        return impacted_instances

    def get_weekly_interval_instance_count(self, interval_start, interval_end):
        '''
        Get the number of incidents that occurred per week within a time interval.
        :param interval_start: (datetime.time) start time of the interval
        :param interval_end: (datetime.time) end time of the interval
        :return: (int) number of incidents
        '''
        temp_pd = self.pd[(self.pd[var_names.regional_time] >= interval_start) &
                        (self.pd[var_names.regional_time] <= interval_end)]
        if len(temp_pd) == 0:
            count = 0
        else:
            count = temp_pd\
            .set_index(var_names.regional_timestamp)\
            .resample('W')[var_names.regional_date]\
            .count()\
            .fillna(0)\
            .mean()
        if numpy.isnan(count):
            return 0
        else:
            return int(round(count))

    @staticmethod
    def regular_and_fatigued_instance_count(actual_count, ideal_count):
        '''
        Splits the total number of instance counts into acceptable regular and fatigue counts
        as per the provided ideal count.
        :param actual_count: (int) number of instances that actually occurred
        :param ideal_count: (int) number of instances that are accepted as the ideal count
        :return: (tuple) -> acceptable count, fatigued count
        '''
        if actual_count > ideal_count:
            regular = ideal_count
            return regular, actual_count - regular
        return actual_count, 0

    def weekly_daytime_instance_stats(self):
        '''
        Get the number of incidents that occurred during the daytime/work hours.
        :return: (tuple) -> total, regular, fatigued incident counts
        '''
        total = self.get_weekly_interval_instance_count(configs.daytime_hours[0], configs.daytime_hours[1])
        regular, fatigued = self.regular_and_fatigued_instance_count(
            total, configs.ideal_weekly_daytime_hours_incident_count
        )
        return total, regular, fatigued

    def weekly_after_hours_instance_stats(self):
        '''
        Get the number of incidents that occurred after hours.
        :return: (tuple) -> total, regular, fatigued incident counts
        '''
        total = self.get_weekly_interval_instance_count(configs.after_hours[0], configs.after_hours[1])
        regular, fatigued = self.regular_and_fatigued_instance_count(
            total, configs.ideal_weekly_after_hours_incident_count
        )
        return total, regular, fatigued

    def weekly_sleep_hours_instance_stats(self):
        '''
        Get the number of incidents that occurred during sleep hours.
        :return: (tuple) -> total, regular, fatigued incident counts
        '''
        total = self.get_weekly_interval_instance_count(configs.sleep_hours[0], configs.sleep_hours[1])
        regular, fatigued = self.regular_and_fatigued_instance_count(
            total, configs.ideal_weekly_sleep_hours_incident_count
        )
        return total, regular, fatigued

    def get_average_user_interruption_minutes_per_day(self):
        '''
        Get the average number of minutes a user is interrupted for per day.
        Columns needed: user_interruption_minutes
        :return: (float) number of minutes
        '''
        return round((self.pd.groupby(var_names.regional_date)[var_names.user_interruption_minutes].mean()).mean(), 2)

    def get_average_rest_minutes_between_instances(self):
        '''
        Gets the average number of minutes of rest a user gets in between instances per day.
        Columns needed: user_rest_minutes
        :return: (float) number of minutes
        '''
        return round((self.pd.groupby(var_names.valid_start)[var_names.user_rest_minutes].mean()).mean(), 2)

    def get_user_hourly_effectiveness(self):
        '''
        Get the effectiveness of a user per hour of the day.
        :return: (list) of list -> [ [hour, count], ... ]
        '''
        hourly_data = []
        for i in range(0, 24):
            start_ = datetime.time(i, 0)
            end_ = datetime.time(i, 59)

            sub_frame = self.pd[(self.pd[var_names.regional_time] >= start_) &
                                (self.pd[var_names.regional_time] <= end_)]
            incident_count = len(sub_frame)
            resolved_count = len(sub_frame[sub_frame[var_names.user_resolution_time].notnull()])

            hourly_data.append([i, round(resolved_count/incident_count * 100, 2) if incident_count != 0 else 0])

        return hourly_data
