Commit bfaa1b8a authored by Tilmann Sager's avatar Tilmann Sager
Browse files

Cleaned up repository

parent 8f6caa54
......@@ -2,5 +2,4 @@ __pycache__/
.idea/
*.png
*.csv
snippets
_deprecated/
*.txt
import os
import re
import time
from collections import deque
from csv import DictWriter
from json import dump
import dask.dataframe as dd
import pandas as pd
def create_path(path: [str]):
return os.path.join(*path)
def check_path(path: [str], create: bool) -> str:
path = create_path(path)
if create:
os.makedirs(path, exist_ok=True)
if os.path.exists(path):
return path
else:
print(path + ' not found')
def init_writer(path: str, columns: []):
output_csv = open(path, 'w', newline='')
writer = DictWriter(output_csv, fieldnames=columns)
writer.writeheader()
return writer, output_csv
def write_results(output_file: str, results: [{}], columns: [str]):
with open(output_file, 'w', newline='') as output_csv:
writer = DictWriter(output_csv, fieldnames=columns)
writer.writeheader()
for row in results:
writer.writerow({key: value for key, value in row.items() if key in columns})
def write_config(output_file: str, params):
with open(output_file, "w") as output_json:
dump(params, output_json)
# def dicts_to_csv(output_file, dict_lst):
# with open(output_file, 'w', newline='') as csv_file:
# fieldnames = dict_lst[0].keys()
# writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
#
# writer.writeheader()
# for row in dict_lst:
# writer.writerow(row)
def read_csv(path, dtypes=None):
return pd.read_csv(path, dtype=dtypes)
def write_csv(df: pd.DataFrame, path: str):
df.to_csv(path, index=False)
return os.path.exists(path)
def _is_faulty(path: []):
is_faulty = False
with open(path, 'r') as f:
if not re.match("^[0-9]+$", deque(f, 1)[0].split_blocks(',')[0]):
is_faulty = True
return is_faulty
def _remove_last_line(path: str):
last_line_deleted = False
with open(path, "r+", encoding="utf-8") as file:
# find end of file
file.seek(0, os.SEEK_END)
# skip the last character
pos = file.tell() - 1
# search for the newline character (i.e. the last row)
while pos > 0 and file.read(1) != "\n":
pos -= 1
file.seek(pos, os.SEEK_SET)
# delete the last line
if pos > 0:
file.seek(pos, os.SEEK_SET)
file.truncate()
last_line_deleted = True
# wait until operation is finished
time.sleep(5)
return last_line_deleted
def read_dask_csv(path, dtypes=None, skiprows=None):
if _is_faulty(path):
_remove_last_line(path)
try:
df = dd.read_csv(path, skiprows=[skiprows], dtype=dtypes)
df.tail(10)
except TypeError:
df = None
return df
import datetime
import os
import numpy as np
import pandas as pd
from constants import Columns as col
import fileio
DTYPES = {
'FLIGHT_ID': np.int,
'SEGMENT_NO': np.int,
'LATITUDE': np.double,
'LONGITUDE': np.double,
'ALTITUDE': np.double,
'SEGMENT_MONTH': np.int,
'SEGMENT_DAY': np.int,
'SEGMENT_YEAR': np.int,
'SEGMENT_HOUR': np.int,
'SEGMENT_MIN': np.int,
'SEGMENT_SEC': np.double,
'EMISSIONS_MODE': np.double,
'TEMPERATURE': np.double,
'PRESSURE': np.double,
'HUMIDITY': np.double,
'SPEED': np.double,
'SEGMENT_TIME': np.object,
'TRACK_DISTANCE': np.double,
'THRUST': np.double,
'WEIGHT': np.double,
'FUELBURN': np.double,
'CO': np.double,
'HC': np.double,
'NOX': np.double,
'PMNV': np.double,
'PMSO': np.double,
'PMFO': np.double,
'CO2': np.double,
'H2O': np.double,
'SOX': np.double
}
def _processed_file(start_datetime: datetime, end_datetime: datetime, h: str, v: str) -> str:
start_date = str(start_datetime.year) + str(start_datetime.month) + str(start_datetime.day)
end_date = str(end_datetime.year) + str(end_datetime.month) + str(end_datetime.day)
start_time = str(start_datetime.hour) + str(start_datetime.minute)
end_time = str(end_datetime.hour) + str(end_datetime.minute)
return '_'.join([h, v, start_date, start_time, end_date, end_time]) + '.csv'
def _raw_file(timestamp):
return '_'.join([str(timestamp.month), str(timestamp.day), str(timestamp.year), 'SEGMENT']) + '.csv'
def _filter_by_time_and_position(day_segment_df: pd.DataFrame, south: float, north: float, east: float, west: float,
startdatetime: datetime, enddatetime: datetime):
lat_min = min(south, north)
lat_max = max(south, north)
lon_min = min(west, east)
lon_max = max(west, east)
filtered = day_segment_df.loc[(day_segment_df['LATITUDE'] >= lat_min)
& (day_segment_df['LATITUDE'] <= lat_max)
& (day_segment_df['LONGITUDE'] >= lon_min)
& (day_segment_df['LONGITUDE'] <= lon_max)
& (day_segment_df['SEGMENT_HOUR'] >= startdatetime.hour)
& (day_segment_df['SEGMENT_HOUR'] <= enddatetime.hour)
& (day_segment_df['SEGMENT_MIN'] >= startdatetime.minute)
& (day_segment_df['SEGMENT_MIN'] <= enddatetime.minute)
]
computed = filtered.compute(scheduler='threads', num_workers=2)
return computed
def filter(granule: {}, params: {}):
file_processed = _processed_file(granule.get(col.start), granule.get(col.end), granule.get(col.h),
granule.get(col.v))
path_processed = fileio.create_path([params.get('flight_proc_dir'), file_processed])
if os.path.exists(path_processed):
processed = fileio.read_csv(path_processed, DTYPES)
else:
file_raw = _raw_file(granule.get(col.start))
path_raw = fileio.create_path([params.get('flight_raw_dir'), file_raw])
raw = fileio.read_dask_csv(path_raw, DTYPES, 1)
if raw is not None:
processed = _filter_by_time_and_position(raw, granule[col.south], granule[col.north],
granule[col.east],
granule[col.west],
granule[col.start], granule[col.end])
else:
processed = pd.DataFrame(DTYPES.keys())
fileio.write_csv(processed, path_processed)
granule[col.flight_count] = len(processed['FLIGHT_ID'].unique().tolist())
del processed
return granule
"""
input: file list
output: metadata as df
"""
import datetime
import pytz
from osgeo import gdal
from constants import Columns as col
def extract(filepath: str) -> {}:
hdf_ds = gdal.Open(filepath)
metadata = hdf_ds.GetMetadata_Dict()
datetime_format = "%Y-%m-%dT%H:%M:%S.%fZ"
granule = {col.hdf: filepath,
col.name: filepath.split('/')[-1].replace('.hdf', ''),
col.start: datetime.datetime.strptime(metadata['GRANULEBEGINNINGDATETIME'],
datetime_format).astimezone(pytz.UTC),
col.end: datetime.datetime.strptime(metadata['GRANULEENDINGDATETIME'],
datetime_format).astimezone(pytz.UTC),
col.west: float(metadata['WESTBOUNDINGCOORDINATE']),
col.east: float(metadata['EASTBOUNDINGCOORDINATE']),
col.north: float(metadata['SOUTHBOUNDINGCOORDINATE']),
col.south: float(metadata['NORTHBOUNDINGCOORDINATE']),
col.dim: (int(metadata['DATACOLUMNS']), int(metadata['DATAROWS'])),
col.h: 'h' + metadata['HORIZONTALTILENUMBER'],
col.v: 'v' + metadata['VERTICALTILENUMBER']
}
del hdf_ds, metadata
return granule
import numpy as np
from skimage.draw import line
from skimage.feature import canny
from skimage.morphology import label, binary_closing, closing, square
from skimage.transform import probabilistic_hough_line
from constants import Columns as col
import config
CLOSING_SQUARE_SIZE = 3
LINE_LEN = 150
LINE_GAP = 20
MAX_SIZE_PX = config.max_size_px()
CANNY_SIGMA = config.sigma()
FILTER = config.filter_method()
METHOD = config.method()
PROB_THRESH = 50
def _probabilistic_hough(img):
# Line finding using the Probabilistic Hough Transform
image = np.array(img, dtype=bool)
edges = canny(image, sigma=3)
lines = probabilistic_hough_line(edges, threshold=PROB_THRESH, line_length=LINE_LEN,
line_gap=LINE_GAP)
return lines
def _get_intersection_labels_prob(labels, hough_lines):
intersection_labels = []
for hough_line in hough_lines:
p0, p1 = hough_line
x0 = p0[0]
x1 = p0[1]
y0 = p1[0]
y1 = p1[1]
line_arr = np.zeros(labels.shape, dtype=int)
rr, cc = line(x1, x0, y1, y0)
line_arr[rr, cc] = 1
for label_num in range(1, labels.max()):
if label_num == 0:
continue
mask = labels == label_num
masked = line_arr * mask
if masked.any():
intersection_labels.append(label_num)
return intersection_labels
def _filter_labels(img, label_list):
mask = np.zeros(img.shape, dtype=int)
for ix, iy in np.ndindex(img.shape):
if img[ix, iy] in label_list:
mask[ix, iy] = True
else:
mask[ix, iy] = False
return mask
def postprocess(granule):
img = granule[col.contrail_mask]
lines = _probabilistic_hough(img)
labeled = label(img)
inter_labels_proc = _get_intersection_labels_prob(labeled, lines)
mask = _filter_labels(labeled, inter_labels_proc)
mask = binary_closing(mask) # Rausfiltern von "Körnern"
# mask = closing(mask, square(CLOSING_SQUARE_SIZE))
granule[col.contrail_mask_post] = mask
return granule
import numpy as np
import pandas as pd
from constants import Columns as col
from stages.mapping import map_point_to_coordinates
"""
KEY NUMBERS
"""
def mean_segment_size():
return 0
def number_detected_pixel(granule):
return np.count_nonzero(granule[col.line_arr])
def relative_pixel_density(granule):
return np.count_nonzero(granule[col.line_arr]) / (granule[col.dim][0] * granule[col.dim][1])
def number_detected_lines(granule):
return len(granule[col.lines])
def relation_flight_number_pixel_density(number_flights, pixel_density):
if number_flights != 0:
return pixel_density / number_flights
else:
return 0
def line_center(lines, dim, lat, lon):
lines_coordinates = []
if lines:
for line in lines:
line_x = (line[0][0], line[1][0])
line_y = (line[0][1], line[1][1])
line_cent = (max(line_x) - min(line_x), max(line_y) - min(line_y))
lines_coordinates.append(map_point_to_coordinates(line_cent, dim, lon, lat))
return lines_coordinates
def map_lines_to_coordinates(lines, dim, lat, lon):
lines_coordinates = []
if lines:
for line in lines:
line_start = map_point_to_coordinates(line[0], dim, lon, lat)
line_end = map_point_to_coordinates(line[1], dim, lon, lat)
lines_coordinates.append((line_start, line_end))
return lines_coordinates
# TODO
def collect(granule):
granule[col.px_count] = number_detected_pixel(granule)
granule[col.line_count] = number_detected_lines(granule)
granule[col.px_dens] = relative_pixel_density(granule)
granule[col.rel_flight_px] = relation_flight_number_pixel_density(granule[col.flight_count],
granule[col.px_dens])
granule[col.line_center] = line_center(granule[col.lines], granule[col.dim],
(granule[col.north], granule[col.south]),
(granule[col.west], granule[col.east]))
granule[col.line_coord] = map_lines_to_coordinates(granule[col.lines], granule[col.dim],
(granule[col.north], granule[col.south]),
(granule[col.west], granule[col.east]))
return granule
import csv
import os
import re
import time
from collections import deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from constants import Columns as col
from stages.flight_filtering import DTYPES, _dd_read_csv
"""
CHECK FLIGHT CSV
"""
def _try_opening_csv(filepath_list: []):
faulty_files = []
for filepath in filepath_list:
try:
df = _dd_read_csv(filepath, DTYPES)
df.tail(10)
except:
faulty_files.append(filepath)
continue
return faulty_files
def _check_last_line_in_csv(filepath_list: []):
files_to_repair = []
for filepath in filepath_list:
with open(filepath, 'r') as f:
if not re.match("^[0-9]+$", deque(f, 1)[0].split_blocks(',')[0]):
files_to_repair.append(filepath)
return files_to_repair
def _delete_last_line_in_csv(filepath_list: []):
repaired_files = []
for filepath in filepath_list:
with open(filepath, "r+", encoding="utf-8") as file:
# find end of file
file.seek(0, os.SEEK_END)
# skip the last character
pos = file.tell() - 1
# search for the newline character (i.e. the last row)
while pos > 0 and file.read(1) != "\n":
pos -= 1
file.seek(pos, os.SEEK_SET)
# delete the last line
if pos > 0:
file.seek(pos, os.SEEK_SET)
file.truncate()
repaired_files.append(filepath)
# wait until operation is finished
time.sleep(5)
return repaired_files
def check_and_repair_csv(job_df: pd.DataFrame):
files_to_process = job_df[job_df[col.flight_processed].isnull()][col.in_flight_raw].tolist()
faulty_files = _try_opening_csv(files_to_process)
files_to_repair = _check_last_line_in_csv(faulty_files)
repaired_files = _delete_last_line_in_csv(files_to_repair)
return repaired_files
"""
FILE IO
"""
def dicts_to_csv(output_file, dict_lst):
with open(output_file, 'w', newline='') as csv_file:
fieldnames = dict_lst[0].keys()
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in dict_lst:
writer.writerow(row)
"""
DEBUGGING
"""
def show_plot(arr: np.array):
plt.imshow(arr)
plt.show()
import logging
import logging.config
logging.basicConfig(level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')
def get_child_logger(name: str):
return logging.getLogger(f'{name}')
......@@ -3,7 +3,7 @@ from analysis import metrics
from common.constants import Column
from detection import hough
from extraction import flights, hdf
from plot import satellite, maps
from plot import plot
from preprocessing import cda
def run(filepath: str, params: {}):
......@@ -38,8 +38,8 @@ def run(filepath: str, params: {}):
"""
PLOTTING
"""
granule = satellite.plot_contrail_on_cleaned(granule, params)
granule = satellite.plot_contrail_mask(granule, params)
granule = plot.plot_contrail_mask_on_cleaned(granule, params)
granule = plot.plot_contrail_mask(granule, params)
end = time.time()
granule[Column.runtime] = round(end - start, 2)
......
# from: https://github.com/tisljaricleo/GPS-visualization-Python/blob/main/gps_class.py
def map_coordinates_to_array(lat_lon, h_w, input_points):
old = (input_points[2], input_points[0])
new = (0, h_w[1])
y = ((lat_lon[0] - old[0]) * (new[1] - new[0]) / (old[1] - old[0])) + new[0]
old = (input_points[1], input_points[3])
new = (0, h_w[0])
x = ((lat_lon[1] - old[0]) * (new[1] - new[0]) / (old[1] - old[0])) + new[0]
# y must be reversed because the orientation of the image in the matplotlib.
# image - (0, 0) in upper left corner; coordinate system - (0, 0) in lower left corner
return int(x), int(y)
# return int(x), int(y)
def scale_pt_to_range(pt, arr_max, bound_tar):
return ((float(pt) / float(arr_max)) * (max(bound_tar) - min(bound_tar))) + min(bound_tar)
def map_point_to_coordinates(pt, dim, x_range, y_range):
return scale_pt_to_range(pt[0], dim[0], x_range), scale_pt_to_range(pt[1], dim[1], y_range)
# h_w[1] - int(y)
lat_min = 39.9999999964079
lat_max = 49.9999999955098
lon_min = -77.7861913287703
lon_max = -52.2054131717788
point = (700, 700)
coordinate = (45, -50.0)
width = 1200.0
height = 1200.0
input_points = (lat_max, lon_min, lat_min, lon_max)
h_w = (height, width)
"""
NOTES
# use ndi for mapping coordinates
# importing numpy package for
# creating arrays
import numpy as np
# importing scipy
from scipy import ndimage
# creating an array from 0 to 15 values
a = np.arange(16.).reshape((4, 4))
# finding coordinates
ndimage.map_coordinates(a, [[0.3, 1], [0.5, 1]], order=1)
point = (700, 700)
lat_min = 39.9999999964079
lat_max = 49.9999999955098
lon_min = -77.7861913287703
lon_max = -52.2054131717788
height = 1200
width = 1200
"""