"""
Input/Output handling for raw Echoview files.
"""
# This file is part of Echofilter.
#
# Copyright (C) 2020-2022 Scott C. Lowe and Offshore Energy Research Association (OERA)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import csv
import datetime
import os
import textwrap
import warnings
from collections import OrderedDict
import numpy as np
import pandas as pd
import scipy.interpolate
import scipy.ndimage
import skimage.measure
from ..ui import style
from . import utils
ROOT_DATA_DIR = "/data/dsforce/surveyExports"
TRANSECT_FIELD_TYPES = {
"Ping_index": int,
"Distance_gps": float,
"Distance_vl": float,
"Ping_date": str,
"Ping_time": str,
"Ping_milliseconds": float,
"Latitude": float,
"Longitude": float,
"Depth_start": float,
"Depth_stop": float,
"Range_start": float,
"Range_stop": float,
"Sample_count": int,
}
[docs]def transect_reader(fname):
"""
Create a generator which iterates through a survey csv file.
Parameters
----------
fname: str
Path to survey CSV file.
Returns
-------
generator
Yields a tupule of `(metadata, data)`, where metadata is a dict,
and data is a :class:`numpy.ndarray`. Each yield corresponds to a single
row in the data. Every row (except for the header) is yielded.
"""
metadata_header = []
with open(fname, "rb") as hf:
for i_row, row in enumerate(hf):
try:
row = row.decode("utf-8-sig" if i_row == 0 else "utf-8")
except Exception:
if i_row == 0:
raise
print(
"Row {} of {} contained a byte which is not in UTF-8"
" and will be skipped.".format(i_row, fname)
)
continue
row = row.split(",")
row = [entry.strip() for entry in row]
if i_row == 0:
metadata_header = row
continue
metadata = row[: len(metadata_header)]
metadata_d = OrderedDict()
for k, v in zip(metadata_header, metadata):
if k in TRANSECT_FIELD_TYPES:
metadata_d[k] = TRANSECT_FIELD_TYPES[k](v)
else:
metadata_d[k] = v
data = np.array([float(x) for x in row[len(metadata_header) :]])
yield metadata_d, data
[docs]def count_lines(filename):
"""
Count the number of lines in a file.
Parameters
----------
filename : str
Path to file.
Returns
-------
int
Number of lines in file.
"""
with open(filename, "rb") as f:
for _i, _ in enumerate(f):
pass
return _i + 1
[docs]def transect_loader(
fname,
skip_lines=0,
warn_row_overflow=None,
row_len_selector="mode",
):
"""
Load an entire survey transect CSV.
Parameters
----------
fname : str
Path to survey CSV file.
skip_lines : int, optional
Number of initial entries to skip. Default is 0.
warn_row_overflow : bool or int, optional
Whether to print a warning message if the number of elements in a
row exceeds the expected number. If this is an int, this is the number
of times to display the warnings before they are supressed. If this
is ``True``, the number of outputs is unlimited. If ``None``, the
maximum number of underflow and overflow warnings differ: if
``row_len_selector`` is ``"init"`` or ``"min"``, underflow always produces a
message and the overflow messages stop at 2; otherwise the values are
reversed. Default is ``None``.
row_len_selector : {"init", "min", "max", "median", "mode"}, optional
The method used to determine which row length (number of depth samples)
to use. Default is ``"mode"``, the most common row length across all
the measurement timepoints.
Returns
-------
numpy.ndarray
Timestamps for each row, in seconds. Note: not corrected for timezone
(so make sure your timezones are internally consistent).
numpy.ndarray
Depth of each column, in metres.
numpy.ndarray
Survey signal (Sv, for instance). Units match that of the file.
"""
row_len_selector = row_len_selector.lower()
if row_len_selector in {"init", "min"}:
expand_for_overflow = False
else:
expand_for_overflow = True
if warn_row_overflow is True:
warn_row_overflow = np.inf
if warn_row_overflow is not None:
warn_row_underflow = warn_row_overflow
elif expand_for_overflow:
warn_row_underflow = 2
warn_row_overflow = np.inf
else:
warn_row_underflow = np.inf
warn_row_overflow = 2
# We remove one from the line count because of the header
# which is excluded from output
n_lines = count_lines(fname) - 1
# Initialise output array
for i_line, (_, row) in enumerate(transect_reader(fname)):
if i_line < min(n_lines, max(1, skip_lines)):
continue
n_depths_init = len(row)
break
n_depth_exp = n_depths_init
data = np.empty((n_lines - skip_lines, n_depth_exp))
data[:] = np.nan
timestamps = np.empty((n_lines - skip_lines))
timestamps[:] = np.nan
row_lengths = np.empty((n_lines - skip_lines), dtype=np.int)
row_depth_starts = np.empty((n_lines - skip_lines))
row_depth_ends = np.empty((n_lines - skip_lines))
n_warn_overflow = 0
n_warn_underflow = 0
n_entry = 0
for i_line, (meta, row) in enumerate(transect_reader(fname)):
if i_line < skip_lines:
continue
i_entry = i_line - skip_lines
# Track the range of depths used in the row with this length
row_lengths[i_entry] = len(row)
row_depth_starts[i_entry] = meta["Depth_start"]
row_depth_ends[i_entry] = meta["Depth_stop"]
if len(row) > n_depth_exp:
if n_warn_overflow < warn_row_overflow:
print(
"Row {} of {} exceeds expected n_depth of {} with {}".format(
i_line, fname, n_depth_exp, len(row)
)
)
n_warn_overflow += 1
if expand_for_overflow:
data = np.pad(
data,
((0, 0), (0, len(row) - n_depth_exp)),
mode="constant",
constant_values=np.nan,
)
n_depth_exp = len(row)
if len(row) < n_depth_exp:
if n_warn_underflow < warn_row_underflow:
print(
"Row {} of {} shorter than expected n_depth_exp of {} with {}".format(
i_line, fname, n_depth_exp, len(row)
)
)
n_warn_underflow += 1
data[i_entry, : len(row)] = row
else:
data[i_entry, :] = row[:n_depth_exp]
timestamps[i_entry] = datetime.datetime.strptime(
"{}T{}.{:06d}".format(
meta["Ping_date"],
meta["Ping_time"],
int(1000 * float(meta["Ping_milliseconds"])),
),
"%Y-%m-%dT%H:%M:%S.%f",
).timestamp()
n_entry += 1
# Turn NaNs into NaNs (instead of extremely negative number)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "invalid value encountered in less")
warnings.filterwarnings("ignore", "invalid value encountered in greater")
# 9.9e+37 and -9.9e+37 are special values indicating missing data
# https://support.echoview.com/WebHelp/Reference/File_formats/Export_file_formats/Special_Export_Values.htm
data[data < -1e37] = np.nan
data[data > 1e37] = np.nan
# Trim timestamps dimension down to size
timestamps = timestamps[:n_entry]
data = data[:n_entry]
row_lengths = row_lengths[:n_entry]
row_depth_starts = row_depth_starts[:n_entry]
row_depth_ends = row_depth_ends[:n_entry]
# Work out what row length we should return
if row_len_selector == "init":
n_depth_use = n_depths_init
elif row_len_selector == "min":
n_depth_use = np.min(row_lengths)
elif row_len_selector == "max":
n_depth_use = np.max(row_lengths)
elif row_len_selector == "median":
n_depth_use = np.median(row_lengths)
# If the median is half-way between two values, round up
if n_depth_use not in row_depth_starts:
n_depth_use = int(np.round(n_depth_use))
# If the median is still not between values, drop the last value
# to make the array be odd, guaranteeing the median is an observed
# value, not an intermediary.
if n_depth_use not in row_depth_starts:
n_depth_use = np.median(row_lengths[:-1])
elif row_len_selector == "mode":
n_depth_use = utils.mode(row_lengths)
else:
raise ValueError(
"Unsupported row_len_selector value: {}".format(row_len_selector)
)
# Use depths corresponding to that declared in the rows which had the
# number of entries used.
if row_len_selector == "median":
d_start = np.median(row_depth_starts[row_lengths == n_depth_use])
d_stop = np.median(row_depth_ends[row_lengths == n_depth_use])
else:
d_start = utils.mode(row_depth_starts[row_lengths == n_depth_use])
d_stop = utils.mode(row_depth_ends[row_lengths == n_depth_use])
depths = np.linspace(d_start, d_stop, n_depth_use)
# Interpolate depths to get a consistent sampling grid
interp_kwargs = dict(nan_threshold=0.3, assume_sorted=True)
for i_entry, (nd, d0, d1) in enumerate(
zip(row_lengths, row_depth_starts, row_depth_ends)
):
if d0 < d1:
data[i_entry, :n_depth_use] = utils.interp1d_preserve_nan(
np.linspace(d0, d1, nd),
data[i_entry, :nd],
depths,
**interp_kwargs,
)
else:
data[i_entry, :n_depth_use] = utils.interp1d_preserve_nan(
np.linspace(d1, d0, nd),
data[i_entry, :nd][::-1],
depths,
**interp_kwargs,
)
# Crop the data down to size
data = data[:, :n_depth_use]
return timestamps, depths, data
[docs]def evl_reader(fname):
"""
EVL file reader.
Parameters
----------
fname : str
Path to .evl file.
Returns
-------
generator
A generator which yields the timestamp (in seconds), depth (in
metres), and status (int) for each entry. Note that the timestamp is
not corrected for timezone (so make sure your timezones are internally
consistent).
"""
with open(fname, "r") as hf:
continuance = True
for i_row, row in enumerate(csv.reader(hf, delimiter=" ")):
if i_row == 0:
continue
if len(row) < 4:
if not continuance:
raise ValueError("Trying to skip data after parsing began")
continue
continuance = False
timestamp = evdtstr2timestamp(row[0], row[1])
if len(row[2]) > 0:
raise ValueError("row[2] was non-empty: {}".format(row[2]))
yield timestamp, float(row[3]), int(row[4])
[docs]def evl_loader(fname, special_to_nan=True, return_status=False):
"""
EVL file loader.
Parameters
----------
fname : str
Path to .evl file.
special_to_nan : bool, optional
Whether to replace the special value, ``-10000.99``, which indicates no
depth value, with NaN.
https://support.echoview.com/WebHelp/Reference/File_formats/Export_file_formats/Special_Export_Values.htm
Returns
-------
numpy.ndarray of floats
Timestamps, in seconds.
numpy.ndarary of floats
Depth, in metres.
numpy.ndarary of ints, optional
Status codes.
"""
timestamps = []
values = []
statuses = []
for timestamp, value, status in evl_reader(fname):
timestamps.append(timestamp)
values.append(value)
statuses.append(status)
timestamps = np.array(timestamps)
values = np.array(values)
statuses = np.array(statuses)
if special_to_nan:
# Replace the special value -10000.99 with NaN
# https://support.echoview.com/WebHelp/Reference/File_formats/Export_file_formats/Special_Export_Values.htm
values[np.isclose(values, -10000.99)] = np.nan
if return_status:
return timestamps, values, statuses
return timestamps, values
[docs]def timestamp2evdtstr(timestamp):
"""
Convert a timestamp into an Echoview-compatible datetime string.
The output is in the format "CCYYMMDD HHmmSSssss", where:
| CC: century
| YY: year
| MM: month
| DD: day
| HH: hour
| mm: minute
| SS: second
| ssss: 0.1 milliseconds
Parameters
----------
timestamp : float
Number of seconds since Unix epoch.
Returns
-------
datetimestring : str
Datetime string in the Echoview-compatible format
"CCYYMMDD HHmmSSssss".
"""
# Datetime must be in the format CCYYMMDD HHmmSSssss
# where ssss = 0.1 milliseconds.
# We have to manually determine the number of "0.1 milliseconds"
# from the microsecond component.
dt = datetime.datetime.fromtimestamp(timestamp)
return "{}{:04d}".format(dt.strftime("%Y%m%d %H%M%S"), round(dt.microsecond / 100))
[docs]def evdtstr2timestamp(datestr, timestr=None):
"""
Convert an Echoview-compatible datetime string into a Unix epoch timestamp.
Parameters
----------
datestr : str
Datetime string in the Echoview-compatible format
``"CCYYMMDD HHmmSSssss"``, or (if timestr is also provided) just
the date part, ``"CCYYMMDD"``.
timestr : str, optional
Time string in the Echoview-compatible format "HHmmSSssss".
Returns
-------
timestamp : float
Number of seconds since Unix epoch.
"""
if timestr:
datestr = datestr + " " + timestr
return datetime.datetime.strptime(datestr, "%Y%m%d %H%M%S%f").timestamp()
[docs]def evr_reader(fname, parse_echofilter_regions=True):
"""
Echoview region file (EVR) reader.
Parameters
----------
fname : str
Path to .evr file.
parse_echofilter_regions : bool, default=True
Whether to separate out echofilter generated regions
(passive, removed vbands, and removed patches)
from other regions.
Returns
-------
regions_passive : list of tuples, optional
Start and end timestamps for passive regions.
regions_removed : list of tuples, optional
Start and end timestamps for removed vertical bands.
regions_patch : list of lists, optional
Start and end timestamps for bad data patches.
regions_other : list of dicts
Dictionary mapping creation type to points defining each region.
"""
regions_passive = []
regions_removed = []
regions_patch = []
regions_other = []
# Line 1: EVRG file version header
with open(fname, "r") as hf:
line = hf.readline()
if "EVRG" not in line:
raise EnvironmentError("This is not an EVR/EVRG file")
# Line 2: Number of regions
line = hf.readline().strip("\n\r")
n_regions = int(line)
for _ in range(n_regions):
# Line 3: Intentionally left blank
line = hf.readline().strip("\n\r")
if len(line):
print(
"Badly formatted EVRG file. Separating line is not blank: {}".format(
line
)
)
# Individual region
region_is_passive = False
region_is_removed = False
region_is_patch = False
# Region header
line = hf.readline().strip("\n\r")
line = line.replace(" ", " ")
lparts = line.split(" ")
if lparts[0] != "13":
raise EnvironmentError(
"Only Region structure version 13 is supported ({} given).".format(
lparts[0]
)
)
n_points = int(lparts[1])
_region_id = int(lparts[2]) # noqa: F841
# Selected indicator: lparts[3] == 0
region_ctype = int(lparts[4])
# Dummy: lparts[5] == -1
_has_bbox = bool(int(lparts[6])) # noqa: F841
_left = evdtstr2timestamp(lparts[7], lparts[8]) # noqa: F841
_top = float(lparts[9]) # noqa: F841
_right = evdtstr2timestamp(lparts[10], lparts[11]) # noqa: F841
_bottom = float(lparts[12]) # noqa: F841
# Notes
line = hf.readline().strip("\n\r")
n_notes = int(line)
notes = "\n".join(hf.readline() for _ in range(n_notes))
if notes.startswith("Passive data"):
region_is_passive = True
if notes.startswith("Removed data block"):
region_is_removed = True
if notes.startswith("Removed patch"):
region_is_patch = True
# Detection settings
line = hf.readline().strip("\n\r")
n_detset = int(line)
for _ in range(n_detset):
hf.readline()
# Region classification
_region_classification = hf.readline().strip("\n\r") # noqa: F841
# Points
points = hf.readline().strip().replace(" ", " ").split(" ")
_region_status = int(points.pop()) # noqa: F841
if len(points) % 3 != 0:
print("Points not composed correctly")
if len(points) // 3 != n_points:
print(
"Different number of points to expected: {} vs {}".format(
len(points), n_points
)
)
points = [
(
evdtstr2timestamp(points[i * 3], points[i * 3 + 1]),
float(points[i * 3 + 2]),
)
for i in range(n_points)
]
_region_name = hf.readline().strip("\n\r") # noqa: F841
# Add region to list
if not parse_echofilter_regions:
regions_other.append({region_ctype: points})
continue
if region_is_passive:
if region_ctype != 4:
print(
"Warning: Region creation type is {} not 4".format(region_ctype)
)
regions_passive.append((points[0][0], points[-1][0]))
elif region_is_removed:
if region_ctype != 4:
print(
"Warning: Region creation type is {} not 4".format(region_ctype)
)
regions_removed.append((points[0][0], points[-1][0]))
elif region_is_patch:
if region_ctype != 2:
print(
"Warning: Region creation type is {} not 2".format(region_ctype)
)
regions_patch.append(points)
else:
regions_other.append({region_ctype: points})
if not parse_echofilter_regions:
return regions_other
return regions_passive, regions_removed, regions_patch, regions_other
[docs]def regions2mask(
timestamps,
depths,
regions_passive=None,
regions_removed=None,
regions_patch=None,
regions_other=None,
):
"""
Convert regions to mask.
Takes the output from :func:evr_reader` and returns a set of masks.
Parameters
----------
timestamps : array_like
Timestamps for each node in the line.
depths : array_like
Depths (in meters) for each node in the line.
regions_passive : list of tuples, optional
Start and end timestamps for passive regions.
regions_removed : list of tuples, optional
Start and end timestamps for removed vertical bands.
regions_patch : list of lists, optional
Start and end timestamps for bad data patches.
regions_other : list of dicts
Dictionary mapping creation type to points defining each region.
Returns
-------
transect : dict
A dictionary with keys:
- "is_passive" : numpy.ndarray
Logical array showing whether a timepoint is of passive data.
Shaped ``(num_timestamps, )``. All passive recording data should
be excluded by the mask.
- "is_removed" : numpy.ndarray
Logical array showing whether a timepoint is entirely removed
by the mask. Shaped ``(num_timestamps, )``.
- "mask_patches" : numpy.ndarray
Logical array indicating which datapoints are inside a patch
from regions_patch (``True``) and should be excluded by the
mask.
Shaped ``(num_timestamps, num_depths)``.
- "mask" : numpy.ndarray
Logical array indicating which datapoints should be kept
(``True``) and which are marked as removed
(``False``) by one of the other three outputs.
Shaped ``(num_timestamps, num_depths)``.
"""
if regions_other is not None:
raise NotImplementedError("Other regions are not yet supported.")
is_passive = np.zeros(timestamps.shape, dtype=bool)
is_removed = np.zeros(timestamps.shape, dtype=bool)
for ts_start, ts_end in regions_passive:
is_passive[(ts_start <= timestamps) & (timestamps <= ts_end)] = 1
for ts_start, ts_end in regions_removed:
is_removed[(ts_start <= timestamps) & (timestamps <= ts_end)] = 1
# Create an empty image to store the masked array
mask_patches = np.zeros((len(timestamps), len(depths)), dtype=bool)
dt = np.abs(timestamps[-1] - timestamps[-2])
dd = np.abs(depths[-1] - depths[-2])
for patch in regions_patch:
patch = np.asarray(patch)
# Find closest indices to the contour coordinates given
x = np.searchsorted(timestamps + dt / 2, patch[:, 0], side="left")
y = np.searchsorted(depths + dd / 2, patch[:, 1], side="left")
# Make a mask for this patch
mask_i = np.zeros_like(mask_patches, dtype="bool")
# Create a contour image by using the contour coordinates rounded to their nearest integer value
mask_i[x, y] = 1
# Fill in the hole created by the contour boundary
mask_i = scipy.ndimage.binary_fill_holes(mask_i)
# Add this patch to the overall mask for all patches
mask_patches |= mask_i
# mask_patches shows where bad patches are; mask shows where good data is
mask = ~mask_patches
mask[is_passive | is_removed] = 0
transect = {
"is_passive": is_passive,
"is_removed": is_removed,
"mask": mask,
"mask_patches": mask_patches,
}
return transect
[docs]def evl_writer(fname, timestamps, depths, status=1, line_ending="\r\n", pad=False):
r"""
EVL file writer.
Parameters
----------
fname : str
Destination of output file.
timestamps : array_like
Timestamps for each node in the line.
depths : array_like
Depths (in meters) for each node in the line.
status : 0, 1, 2, or 3; optional
Status for the line.
- ``0`` : none
- ``1`` : unverified
- ``2`` : bad
- ``3`` : good
Default is ``1`` (unverified). For more details on line status, see
https://support.echoview.com/WebHelp/Using_Echoview/Echogram/Lines/About_Line_Status.htm
pad : bool, optional
Whether to pad the line with an extra datapoint half a pixel before the
first and after the last given timestamp. Default is ``False``.
line_ending : str, optional
Line ending. Default is ``"\r\n"`` the standard line ending on Windows/DOS,
as per the specification for the file format.
https://support.echoview.com/WebHelp/Using_Echoview/Exporting/Exporting_data/Exporting_line_data.htm
Set to ``"\n"`` to get Unix-style line endings instead.
Notes
-----
For more details on the format specification, see
https://support.echoview.com/WebHelp/Using_Echoview/Exporting/Exporting_data/Exporting_line_data.htm#Line_definition_file_format
"""
if len(timestamps) != len(depths):
raise ValueError(
"Number of timestamps ({}) and depths ({}) are not equal".format(
len(timestamps), len(depths)
)
)
if pad and len(timestamps) > 1:
timestamps = timestamps[:]
timestamps = np.r_[
timestamps[0] - (timestamps[1] - timestamps[0]) / 2,
timestamps,
timestamps[-1] + (timestamps[-1] - timestamps[-2]) / 2,
]
depths = np.r_[depths[0], depths, depths[-1]]
# The file object will automatically replace \n with our chosen line ending
with open(fname, "w+", encoding="utf-8-sig", newline=line_ending) as hf:
# Write header
hf.write("EVBD 3 10.0.270.37090\n")
n_row = len(depths)
hf.write(str(n_row) + "\n")
# Write each row
for timestamp, depth in zip(timestamps, depths):
# Datetime must be in the format CCYYMMDD HHmmSSssss
# where ssss = 0.1 milliseconds.
# We have to manually determine the number of "0.1 milliseconds"
# from the microsecond component.
hf.write("{} {} {} \n".format(timestamp2evdtstr(timestamp), depth, status))
[docs]def evr_writer(
fname,
rectangles=None,
contours=None,
common_notes="",
default_region_type=0,
line_ending="\r\n",
):
r"""
EVR file writer.
Writes regions to an Echoview region file.
Parameters
----------
fname : str
Destination of output file.
rectangles : list of dictionaries, optional
Rectangle region definitions. Default is an empty list. Each rectangle
region must implement fields ``"depths"`` and ``"timestamps"``, which
indicate the extent of the rectangle. Optionally, ``"creation_type"``,
``"region_name"``, ``"region_type"``, and ``"notes"`` may be set.
If these are not given, the default creation_type is 4 and region_type
is set by ``default_region_type``.
contours : list of dictionaries
Contour region definitions. Default is an empty list. Each contour
region must implement a ``"points"`` field containing a :class:`numpy.ndarray`
shaped `(n, 2)` defining the co-ordinates of nodes along the (open)
contour in units of timestamp and depth. Optionally, ``"creation_type"``,
``"region_name"``, ``"region_type"``, and ``"notes"`` may be set.
If these are not given, the default creation_type is 2 and region_type
is set by ``default_region_type``.
common_notes : str, optional
Notes to include for every region. Default is ``""``, an empty string.
default_region_type : int, optional
The region type to use for rectangles and contours which do not define
a ``"region_type"`` field. Possible region types are
- ``0`` : bad (no data)
- ``1`` : analysis
- ``2`` : marker
- ``3`` : fishtracks
- ``4`` : bad (empty water)
Default is ``0``.
line_ending : str, optional
Line ending. Default is ``"\r\n"`` the standard line ending on Windows/DOS,
as per the specification for the file format.
https://support.echoview.com/WebHelp/Using_Echoview/Exporting/Exporting_data/Exporting_line_data.htm
Set to ``"\n"`` to get Unix-style line endings instead.
Notes
-----
For more details on the format specification, see:
https://support.echoview.com/WebHelp/Reference/File_formats/Export_file_formats/2D_Region_definition_file_format.htm
"""
if rectangles is None:
rectangles = []
if contours is None:
contours = []
# Remove leading/trailing new lines, since we will join with our own line ending
common_notes = common_notes.strip("\r\n")
# Standardize line endings to be \n, regardless of input
common_notes = common_notes.replace("\r\n", "\n").replace("\r", "\n")
if len(common_notes) == 0:
n_lines_common_notes = 0
else:
n_lines_common_notes = 1 + common_notes.count(line_ending)
n_regions = len(rectangles) + len(contours)
i_region = 0
# The file object will automatically replace \n with our chosen line ending
with open(fname, "w+", encoding="utf-8-sig", newline=line_ending) as hf:
# Write header
hf.write("EVRG 7 10.0.283.37689\n")
hf.write(str(n_regions) + "\n")
# Write each rectangle
for region in rectangles:
# Regions are indexed from 1, so increment the counter first
i_region += 1
hf.write("\n") # Blank line separates regions
# Determine extent of rectangle
left = timestamp2evdtstr(np.min(region["timestamps"]))
right = timestamp2evdtstr(np.max(region["timestamps"]))
top = np.min(region["depths"])
bottom = np.max(region["depths"])
# Region header
hf.write(
"13 4 {i} 0 {type} -1 1 {left} {top} {right} {bottom}".format(
i=i_region,
type=region.get("creation_type", 4),
left=left,
right=right,
top=top,
bottom=bottom,
)
+ "\n"
)
# Notes
notes = region.get("notes", "")
if len(notes) == 0:
notes = common_notes
n_lines_notes = n_lines_common_notes
else:
notes = notes.strip("\n")
if len(common_notes) > 0:
notes += "\n" + common_notes
n_lines_notes = 1 + notes.count("\n")
hf.write(str(n_lines_notes) + "\n") # Number of lines of notes
if len(notes) > 0:
hf.write(notes + "\n")
# Detection settings
hf.write("0\n") # Number of lines of detection settings
# Region classification string
hf.write("Unclassified regions\n")
# The points defining the region itself
hf.write(
"{left} {top} {left} {bottom} {right} {bottom} {right} {top} ".format(
left=left,
right=right,
top=top,
bottom=bottom,
) # Terminates with a space, not a new line
)
# Region type
hf.write(str(region.get("region_type", default_region_type)) + "\n")
# Region name
hf.write(
str(region.get("region_name", "Region {}".format(i_region))) + "\n"
)
# Write each contour
for region in contours:
# Regions are indexed from 1, so increment the counter first
i_region += 1
hf.write("\n") # Blank line separates regions
# Header line
hf.write(
"13 {n} {i} 0 {type} -1 1 {left} {top} {right} {bottom}".format(
n=region["points"].shape[0],
i=i_region,
type=region.get("creation_type", 2),
left=timestamp2evdtstr(np.min(region["points"][:, 0])),
right=timestamp2evdtstr(np.max(region["points"][:, 0])),
top=np.min(region["points"][:, 1]),
bottom=np.max(region["points"][:, 1]),
)
+ "\n"
)
# Notes
notes = region.get("notes", "")
if len(notes) == 0:
notes = common_notes
n_lines_notes = n_lines_common_notes
else:
notes = notes.strip("\n")
if len(common_notes) > 0:
notes += "\n" + common_notes
n_lines_notes = 1 + notes.count("\n")
hf.write(str(n_lines_notes) + "\n") # Number of lines of notes
if len(notes) > 0:
hf.write(notes + "\n")
# Detection settings
hf.write("0\n") # Number of lines of detection settings
# Region classification string
hf.write("Unclassified regions\n")
# The region itself
for point in region["points"]:
hf.write("{} {} ".format(timestamp2evdtstr(point[0]), point[1]))
# Region type
hf.write(str(region.get("region_type", default_region_type)) + "\n")
# Region name
hf.write(
str(region.get("region_name", "Region {}".format(i_region))) + "\n"
)
[docs]def write_transect_regions(
fname,
transect,
depth_range=None,
passive_key="is_passive",
removed_key="is_removed",
patches_key="mask_patches",
collate_passive_length=0,
collate_removed_length=0,
minimum_passive_length=0,
minimum_removed_length=0,
minimum_patch_area=0,
name_suffix="",
common_notes="",
line_ending="\r\n",
verbose=0,
verbose_indent=0,
):
r"""
Convert a transect dictionary to a set of regions and write as an EVR file.
Parameters
----------
fname : str
Destination of output file.
transect : dict
Transect dictionary.
depth_range : array_like or None, optional
The minimum and maximum depth extents (in any order) of the passive and
removed block regions. If this is ``None`` (default), the minimum and
maximum of ``transect["depths"]`` is used.
passive_key : str, optional
Field name to use for passive data identification. Default is
``"is_passive"``.
removed_key : str, optional
Field name to use for removed blocks. Default is ``"is_removed"``.
patches_key : str, optional
Field name to use for the mask of patch regions. Default is
``"mask_patches"``.
collate_passive_length : int, optional
Maximum distance (in indices) over which passive regions should be
merged together, closing small gaps between them. Default is ``0``.
collate_removed_length : int, optional
Maximum distance (in indices) over which removed blocks should be
merged together, closing small gaps between them. Default is ``0``.
minimum_passive_length : int, optional
Minimum length (in indices) a passive region must have to be included
in the output. Set to -1 to omit all passive regions from the output.
Default is ``0``.
minimum_removed_length : int, optional
Minimum length (in indices) a removed block must have to be included in
the output. Set to -1 to omit all removed regions from the output.
Default is ``0``.
minimum_patch_area : float, optional
Minimum amount of area (in input pixel space) that a patch must occupy
in order to be included in the output. Set to ``0`` to include all
patches, no matter their area. Set to ``-1`` to omit all patches.
Default is ``0``.
name_suffix : str, optional
Suffix to append to variable names. Default is ``""``, an empty string.
common_notes : str, optional
Notes to include for every region. Default is ``""``, an empty string.
line_ending : str, optional
Line ending. Default is ``"\r\n"`` the standard line ending on Windows/DOS,
as per the specification for the file format,
https://support.echoview.com/WebHelp/Using_Echoview/Exporting/Exporting_data/Exporting_line_data.htm
Set to ``"\n"`` to get Unix-style line endings instead.
verbose : int, optional
Verbosity level. Default is ``0``.
verbose_indent : int, optional
Level of indentation (number of preceding spaces) before verbosity
messages. Default is ``0``.
"""
if depth_range is None:
depth_range = transect["depths"]
depth_range = [np.min(depth_range), np.max(depth_range)]
rectangles = []
# Regions around each period of passive data
key = passive_key
if key not in transect:
key = "p_" + key
if key not in transect:
raise ValueError("Key {} and {} not found in transect.".format(key[2:], key))
is_passive = transect[key] > 0.5
is_passive = ~utils.squash_gaps(~is_passive, collate_passive_length)
passive_starts, passive_ends = utils.get_indicator_onoffsets(is_passive)
i_passive = 1
n_passive_skipped = 0
for start_index, end_index in zip(passive_starts, passive_ends):
start_index -= 0.5
end_index += 0.5
if minimum_passive_length == -1:
# No passive regions
break
if end_index - start_index <= minimum_passive_length:
n_passive_skipped += 1
continue
region = {}
region["region_name"] = "Passive{} {}".format(name_suffix, i_passive)
region["creation_type"] = 4
region["region_type"] = 0
region["depths"] = depth_range
region["timestamps"] = scipy.interpolate.interp1d(
np.arange(len(transect["timestamps"])),
transect["timestamps"],
fill_value="extrapolate",
)([start_index, end_index])
region["notes"] = textwrap.dedent(
"""
Passive data
Length in pixels: {}
Duration in seconds: {}
""".format(
end_index - start_index,
region["timestamps"][1] - region["timestamps"][0],
)
)
rectangles.append(region)
i_passive += 1
# Regions around each period of removed data
key = removed_key
if key not in transect:
key = "p_" + key
if key not in transect:
raise ValueError("Key {} and {} not found in transect.".format(key[2:], key))
is_removed = transect[key] > 0.5
is_removed = ~utils.squash_gaps(~is_removed, collate_removed_length)
removed_starts, removed_ends = utils.get_indicator_onoffsets(is_removed)
i_removed = 1
n_removed_skipped = 0
for start_index, end_index in zip(removed_starts, removed_ends):
start_index -= 0.5
end_index += 0.5
if minimum_removed_length == -1:
# No passive regions
break
if end_index - start_index <= minimum_removed_length:
n_removed_skipped += 1
continue
region = {}
region["region_name"] = "Removed block{} {}".format(name_suffix, i_removed)
region["creation_type"] = 4
region["region_type"] = 0
region["depths"] = depth_range
region["timestamps"] = scipy.interpolate.interp1d(
np.arange(len(transect["timestamps"])),
transect["timestamps"],
fill_value="extrapolate",
)([start_index, end_index])
region["notes"] = textwrap.dedent(
"""
Removed data block
Length in pixels: {}
Duration in seconds: {}
""".format(
end_index - start_index,
region["timestamps"][1] - region["timestamps"][0],
)
)
rectangles.append(region)
i_removed += 1
# Contours around each removed patch
if patches_key not in transect:
raise ValueError("Key {} not found in transect.".format(patches_key))
patches = transect[patches_key]
patches = scipy.ndimage.binary_fill_holes(patches > 0.5)
contours_coords = skimage.measure.find_contours(patches, 0.5)
contour_dicts = []
i_contour = 1
n_contour_skipped = 0
for contour in contours_coords:
if minimum_patch_area == -1:
# No patches
break
area = utils.integrate_area_of_contour(
contour[:, 0], contour[:, 1], closed=False
)
if area < minimum_patch_area:
n_contour_skipped += 1
continue
region = {}
region["region_name"] = "Removed patch{} {}".format(name_suffix, i_contour)
region["creation_type"] = 2
region["region_type"] = 0
x = scipy.interpolate.interp1d(
np.arange(len(transect["timestamps"])),
transect["timestamps"],
fill_value="extrapolate",
)(contour[:, 0])
y = scipy.interpolate.interp1d(
np.arange(len(transect["depths"])),
transect["depths"],
fill_value="extrapolate",
)(contour[:, 1])
region["points"] = np.stack([x, y], axis=-1)
region["notes"] = textwrap.dedent(
"""
Removed patch
Area in pixels: {}
Area in meter-seconds: {}
""".format(
area, utils.integrate_area_of_contour(x, y, closed=False)
)
)
contour_dicts.append(region)
i_contour += 1
if verbose >= 1:
print(
" " * verbose_indent + "Outputting {} region{}:"
" {} passive, {} removed blocks, {} removed patches".format(
len(rectangles) + len(contour_dicts),
"" if len(rectangles) + len(contour_dicts) == 1 else "s",
i_passive - 1,
i_removed - 1,
i_contour - 1,
)
)
n_skipped = n_passive_skipped + n_removed_skipped + n_contour_skipped
if n_skipped > 0:
print(
" " * verbose_indent
+ style.skip_fmt(
"There {} {} skipped (too small) region{}:"
" {} passive, {} removed blocks, {} removed patches".format(
"was" if n_skipped == 1 else "were",
n_skipped,
"" if n_skipped == 1 else "s",
n_passive_skipped,
n_removed_skipped,
n_contour_skipped,
)
)
)
# Write the output
return evr_writer(
fname,
rectangles=rectangles,
contours=contour_dicts,
common_notes=common_notes,
line_ending=line_ending,
)
[docs]def load_transect_data(transect_pth, dataset="mobile", root_data_dir=ROOT_DATA_DIR):
"""
Load all data for one transect.
Parameters
----------
transect_pth : str
Relative path to transect, excluding ``"_Sv_raw.csv"``.
dataset : str, optional
Name of dataset. Default is ``"mobile"``.
root_data_dir : str
Path to root directory where data is located.
Returns
-------
timestamps : numpy.ndarray
Timestamps (in seconds since Unix epoch), with each entry
corresponding to each row in the ``signals`` data.
depths : numpy.ndarray
Depths from the surface (in metres), with each entry corresponding
to each column in the ``signals`` data.
signals : numpy.ndarray
Echogram Sv data, shaped (num_timestamps, num_depths).
turbulence : numpy.ndarray
Depth of turbulence line, shaped (num_timestamps, ).
bottom : numpy.ndarray
Depth of bottom line, shaped (num_timestamps, ).
"""
dirname = os.path.join(root_data_dir, dataset)
raw_fname = os.path.join(dirname, transect_pth + "_Sv_raw.csv")
bottom_fname = os.path.join(dirname, transect_pth + "_bottom.evl")
turbulence_fname = os.path.join(dirname, transect_pth + "_turbulence.evl")
timestamps, depths, signals = transect_loader(raw_fname)
t_bottom, d_bottom = evl_loader(bottom_fname)
t_turbulence, d_turbulence = evl_loader(turbulence_fname)
return (
timestamps,
depths,
signals,
np.interp(timestamps, t_turbulence, d_turbulence),
np.interp(timestamps, t_bottom, d_bottom),
)
[docs]def get_partition_data(
partition,
dataset="mobile",
partitioning_version="firstpass",
root_data_dir=ROOT_DATA_DIR,
):
"""
Load partition metadata.
Parameters
----------
transect_pth : str
Relative path to transect, excluding ``"_Sv_raw.csv"``.
dataset : str, optional
Name of dataset. Default is ``"mobile"``.
partitioning_version : str, optional
Name of partitioning method.
root_data_dir : str
Path to root directory where data is located.
Returns
-------
pandas.DataFrame
Metadata for all transects in the partition. Each row is a single
sample.
"""
dirname = os.path.join(root_data_dir, dataset, "sets", partitioning_version)
fname_partition = os.path.join(dirname, partition + ".txt")
fname_header = os.path.join(dirname, "header.txt")
with open(fname_header, "r") as hf:
for row in csv.reader(hf):
header = [entry.strip() for entry in row]
break
df = pd.read_csv(fname_partition, header=None, names=header)
return df
[docs]def remove_trailing_slash(s):
"""
Remove trailing forward slashes from a string.
Parameters
----------
s : str
String representing a path, possibly with trailing slashes.
Returns
-------
str
Same as ``s``, but without trailing forward slashes.
"""
while s[-1] == "/" or s[-1] == os.path.sep:
s = s[:-1]
return s
[docs]def list_from_file(fname):
"""
Get a list from a file.
Parameters
----------
fname : str
Path to file.
Returns
-------
list
Contents of the file, one line per entry in the list. Trailing
whitespace is removed from each end of each line.
"""
with open(fname, "r") as hf:
contents = hf.readlines()
contents = [x.strip() for x in contents]
return contents
[docs]def get_partition_list(
partition,
dataset="mobile",
full_path=False,
partitioning_version="firstpass",
root_data_dir=ROOT_DATA_DIR,
sharded=False,
):
"""
Get a list of transects in a single partition.
Parameters
----------
transect_pth : str
Relative path to transect, excluding ``"_Sv_raw.csv"``.
dataset : str, optional
Name of dataset. Default is ``"mobile"``.
full_path : bool, optional
Whether to return the full path to the sample. If ``False``, only the
relative path (from the dataset directory) is returned.
Default is ``False``.
partitioning_version : str, optional
Name of partitioning method.
root_data_dir : str, optional
Path to root directory where data is located.
sharded : bool, optional
Whether to return path to sharded version of data. Default is ``False``.
Returns
-------
list
Path for each sample in the partition.
"""
if dataset == "mobile":
df = get_partition_data(
partition,
dataset=dataset,
partitioning_version=partitioning_version,
root_data_dir=root_data_dir,
)
fnames = df["Filename"]
fnames = [os.path.join(f.split("_")[0], f.strip()) for f in fnames]
else:
partition_file = os.path.join(
root_data_dir,
dataset,
"sets",
partitioning_version,
partition + ".txt",
)
fnames = list_from_file(partition_file)
fnames = [f.replace("_Sv_raw.csv", "") for f in fnames]
if full_path and sharded:
root_data_dir = remove_trailing_slash(root_data_dir)
fnames = [os.path.join(root_data_dir + "_sharded", dataset, f) for f in fnames]
elif full_path:
fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]
return fnames