Coverage for src/spectroflat/utils/collections.py: 95%
38 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-03-28 07:59 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2024-03-28 07:59 +0000
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3"""
4The `Collections` utility provides methods to deal with Collections (lists, dictionaries, arrays, ...)
6@author: hoelken
7"""
9from typing import Callable
11import numpy as np
14class Collections:
15 """
16 Static utility for handling collections.
17 """
19 @staticmethod
20 def as_float_array(orig) -> np.array:
21 """
22 Creates a copy of the orig and converts all values to `float32`
24 ### Params
25 - orig: an object that can be converted to a list
27 ### Params
28 Array with float values converted from the orig
29 """
30 return np.array(list(orig), dtype=np.float32)
32 @staticmethod
33 def as_int_array(orig) -> np.array:
34 """
35 Creates a copy of the orig and converts all values to `int`
37 ### Params
38 - orig: an object that can be converted to a list
40 ### Params
41 Array with int values converted from the orig
42 """
43 return np.array(list(orig), dtype=int)
45 @staticmethod
46 def bin(orig: np.array, binning: list, method: Callable = np.mean) -> np.array:
47 """
48 Bins along a given set of axis.
50 ### Params
51 - orig: The original numpy array
52 - binning: A list of binning values.
53 - Length of the list must match the number of axis (i.e. the length of the `orig.shape`).
54 - Per axis set `1` for no binning, `-1` for bin all and any positive number
55 to specify the bin size along the axis.
56 - method: The function to apply to the bin (e.g. np.max for max pooling, np.mean for average)
57 ### Returns
58 The binned array
59 """
60 if np.all(np.array(binning) == 1):
61 # no binning whatsoever, return original
62 return orig
64 if len(orig.shape) != len(binning):
65 raise Exception(f"Shape {orig.shape} and number of binning axis {binning} don't match.")
67 data = orig
68 for ax in range(len(binning)):
69 data = Collections.bin_axis(data, binning[ax], axis=ax, method=method)
70 return data
72 @staticmethod
73 def bin_axis(data: np.array, binsize: int, axis: int = 0, method: Callable = np.mean):
74 """
75 Bins an array along a given axis.
77 ### Params
78 - data: The original numpy array
79 - axis: The axis to bin along
80 - binsize: The size of each bin
81 - method: The function to apply to the bin (e.g. np.max for max pooling, np.mean for average)
83 ### Returns
84 The binned array
85 """
86 if binsize < 0:
87 return np.array([method(data, axis=axis)])
89 dims = np.array(data.shape)
90 argdims = np.arange(data.ndim)
91 argdims[0], argdims[axis] = argdims[axis], argdims[0]
92 data = data.transpose(argdims)
93 data = [method(np.take(data, np.arange(int(i * binsize), int(i * binsize + binsize)), 0), 0)
94 for i in np.arange(dims[axis] // binsize)]
95 data = np.array(data).transpose(argdims)
96 return data
98 @staticmethod
99 def remove_sigma_outliers(data: np.array, s: float = 5) -> np.array:
100 """
101 Removes outliers from the data set.
103 :param data: The data to clean
104 :param s: the factor of sigma to clean for. Default is 5 Sigma (99.99994%)
105 :return: A cleaned copy of the dataset
106 """
107 copy = data.copy()
108 mean_val = np.mean(data)
109 sigma = s * np.std(data)
110 copy[np.where(np.abs(data - mean_val) > sigma)] = mean_val
111 return copy