Coverage for src/spectroflat/utils/collections.py: 95%

38 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-03-28 07:59 +0000

1#!/usr/bin/env python3 

2# -*- coding: utf-8 -*- 

3""" 

4The `Collections` utility provides methods to deal with Collections (lists, dictionaries, arrays, ...) 

5 

6@author: hoelken 

7""" 

8 

9from typing import Callable 

10 

11import numpy as np 

12 

13 

14class Collections: 

15 """ 

16 Static utility for handling collections. 

17 """ 

18 

19 @staticmethod 

20 def as_float_array(orig) -> np.array: 

21 """ 

22 Creates a copy of the orig and converts all values to `float32` 

23 

24 ### Params 

25 - orig: an object that can be converted to a list 

26 

27 ### Params 

28 Array with float values converted from the orig 

29 """ 

30 return np.array(list(orig), dtype=np.float32) 

31 

32 @staticmethod 

33 def as_int_array(orig) -> np.array: 

34 """ 

35 Creates a copy of the orig and converts all values to `int` 

36 

37 ### Params 

38 - orig: an object that can be converted to a list 

39 

40 ### Params 

41 Array with int values converted from the orig 

42 """ 

43 return np.array(list(orig), dtype=int) 

44 

45 @staticmethod 

46 def bin(orig: np.array, binning: list, method: Callable = np.mean) -> np.array: 

47 """ 

48 Bins along a given set of axis. 

49 

50 ### Params 

51 - orig: The original numpy array 

52 - binning: A list of binning values. 

53 - Length of the list must match the number of axis (i.e. the length of the `orig.shape`). 

54 - Per axis set `1` for no binning, `-1` for bin all and any positive number 

55 to specify the bin size along the axis. 

56 - method: The function to apply to the bin (e.g. np.max for max pooling, np.mean for average) 

57 ### Returns 

58 The binned array 

59 """ 

60 if np.all(np.array(binning) == 1): 

61 # no binning whatsoever, return original 

62 return orig 

63 

64 if len(orig.shape) != len(binning): 

65 raise Exception(f"Shape {orig.shape} and number of binning axis {binning} don't match.") 

66 

67 data = orig 

68 for ax in range(len(binning)): 

69 data = Collections.bin_axis(data, binning[ax], axis=ax, method=method) 

70 return data 

71 

72 @staticmethod 

73 def bin_axis(data: np.array, binsize: int, axis: int = 0, method: Callable = np.mean): 

74 """ 

75 Bins an array along a given axis. 

76 

77 ### Params 

78 - data: The original numpy array 

79 - axis: The axis to bin along 

80 - binsize: The size of each bin 

81 - method: The function to apply to the bin (e.g. np.max for max pooling, np.mean for average) 

82 

83 ### Returns 

84 The binned array 

85 """ 

86 if binsize < 0: 

87 return np.array([method(data, axis=axis)]) 

88 

89 dims = np.array(data.shape) 

90 argdims = np.arange(data.ndim) 

91 argdims[0], argdims[axis] = argdims[axis], argdims[0] 

92 data = data.transpose(argdims) 

93 data = [method(np.take(data, np.arange(int(i * binsize), int(i * binsize + binsize)), 0), 0) 

94 for i in np.arange(dims[axis] // binsize)] 

95 data = np.array(data).transpose(argdims) 

96 return data 

97 

98 @staticmethod 

99 def remove_sigma_outliers(data: np.array, s: float = 5) -> np.array: 

100 """ 

101 Removes outliers from the data set. 

102 

103 :param data: The data to clean 

104 :param s: the factor of sigma to clean for. Default is 5 Sigma (99.99994%) 

105 :return: A cleaned copy of the dataset 

106 """ 

107 copy = data.copy() 

108 mean_val = np.mean(data) 

109 sigma = s * np.std(data) 

110 copy[np.where(np.abs(data - mean_val) > sigma)] = mean_val 

111 return copy