import sys
import os
import errno
import numpy as np
from blimpy import read_header, Waterfall
[docs]def split_fil_generator(fil_fn, f_window, f_shift=None):
"""
Creates a generator that returns smaller Waterfall objects by 'splitting'
an input filterbank file according to the number of frequency samples.
Since this function only loads in data in chunks according to f_window,
it handles very large observations well. Specifically, it will not attempt
to load all the data into memory before splitting, which won't work when
the data is very large anyway.
Parameters
----------
fil_fn : str
Filterbank filename with .fil extension
f_window : int
Number of frequency samples per new filterbank file
f_shift : int, optional
Number of samples to shift when splitting filterbank. If
None, defaults to `f_shift=f_window` so that there is no
overlap between new filterbank files
Returns
-------
split : Waterfall
A blimpy Waterfall object containing a smaller section of the data
"""
fch1 = read_header(fil_fn)[b'fch1']
nchans = read_header(fil_fn)[b'nchans']
df = read_header(fil_fn)[b'foff']
if f_shift is None:
f_shift = f_window
# Note that df is negative!
f_start = fch1 + f_window * df
f_stop = fch1
# Iterates down frequencies, starting from highest
while f_start >= fch1 + nchans * df:
split_fil = Waterfall(fil_fn, f_start=f_start, f_stop=f_stop)
# Fix some header values
split_fil.header[b'fch1'] = split_fil.file_header[b'fch1'] = f_stop
split_fil.header[b'nchans'] = split_fil.file_header[b'nchans'] = f_window
yield split_fil
f_start += f_shift * df
f_stop += f_shift * df
[docs]def split_fil(fil_fn, output_dir, f_window, f_shift=None):
"""
Creates a set of new filterbank files by 'splitting' an input filterbank
file according to the number of frequency samples.
Parameters
----------
fil_fn : str
Filterbank filename with .fil extension
output_dir : str
Directory for new filterbank files
f_window : int
Number of frequency samples per new filterbank file
f_shift : int, optional
Number of samples to shift when splitting filterbank. If
None, defaults to `f_shift=f_window` so that there is no
overlap between new filterbank files
Returns
-------
split_fns : list of str
List of new filenames
"""
if output_dir[-1] != '/':
output_dir = output_dir + '/'
try:
os.makedirs(output_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
split_generator = split_fil_generator(fil_fn,
f_window,
f_shift=f_shift)
# Iterates down frequencies, starting from highest
split_fns = []
for i, split_fil in enumerate(split_generator):
output_fn = output_dir + '%s_%04d.fil' % (f_window, i)
split_fil.write_to_fil(output_fn)
split_fns.append(output_fn)
print('Saved %s' % output_fn)
return split_fns
[docs]def split_array(data, f_sample_num=None, t_sample_num=None,
f_shift=None, t_shift=None,
f_trim=False, t_trim=False):
"""
Splits NumPy arrays into a list of smaller arrays according to limits in
frequency and time. This doesn't reduce/combine data, it simply cuts the
data into smaller chunks.
Parameters
----------
data : ndarray
Time-frequency data
Returns
-------
split_data : list of ndarray
List of new time-frequency data frames
"""
split_data = []
if not isinstance(data, np.ndarray):
sys.exit("Input data must be a NumPy array!")
height, width = data.shape
if f_sample_num is None:
f_sample_num = width
if t_sample_num is None:
t_sample_num = height
if f_shift is None:
f_shift = f_sample_num
elif f_shift <= 0:
sys.exit("Invalid x-direction shift!")
if t_shift is None:
t_shift = t_sample_num
elif t_shift <= 0:
sys.exit("Invalid y-direction shift!")
# Save first frame, regardless of overstepping bounds
y_start = 0
y_stop = min(t_sample_num, height)
x_start = 0
x_stop = min(f_sample_num, width)
split_data.append(data[y_start:y_stop, x_start:x_stop])
y_in_bound = (y_stop < height)
x_in_bound = (x_stop < width)
# As long as either bound is valid, continue adding frames
while y_in_bound or x_in_bound:
# Shift frames in the x direction
while x_in_bound:
x_start = x_start + f_shift
x_stop = min(x_stop + f_shift, width)
split_data.append(data[y_start:y_stop, x_start:x_stop])
x_in_bound = (x_stop < width)
# Break when both y and x are out of bounds
if not y_in_bound:
break
# Shift frames in the y direction and reset x indices
y_start = y_start + t_shift
y_stop = min(y_stop + t_shift, height)
x_start = 0
x_stop = min(f_sample_num, width)
split_data.append(data[y_start:y_stop, x_start:x_stop])
y_in_bound = (y_stop < height)
x_in_bound = (x_stop < width)
# Filter out frames that aren't the same specied size
if t_trim:
split_data = list(filter(lambda A: A.shape[0] == t_sample_num,
split_data))
if f_trim:
split_data = list(filter(lambda A: A.shape[1] == f_sample_num,
split_data))
return split_data