Commit 2e80f20b authored by Tilmann Sager's avatar Tilmann Sager
Browse files

Added flight trail to contrail pixel ratio // minor improvements // wip: run...

Added flight trail to contrail pixel ratio // minor improvements // wip: run with all configurations
parent 087db7b7
......@@ -13,7 +13,7 @@ LINE_GAP = 20
MAX_SIZE_PX = config.max_size_px()
CANNY_SIGMA = config.sigma()
FILTER = config.filter_method()
METHOD = config.detection_method()
METHOD = config.method()
PROB_THRESH = 50
......
......@@ -73,7 +73,7 @@ def _delete_last_line_in_csv(filepath_list: []):
def check_and_repair_csv(job_df: pd.DataFrame):
files_to_process = job_df[job_df[col.flight_processed].isnull()][col.flight_raw].tolist()
files_to_process = job_df[job_df[col.flight_processed].isnull()][col.in_flight_raw].tolist()
faulty_files = _try_opening_csv(files_to_process)
files_to_repair = _check_last_line_in_csv(faulty_files)
repaired_files = _delete_last_line_in_csv(files_to_repair)
......
import pandas as pd
from common.constants import Action, Category, Column, Key
def _category(row: pd.Series, lower_limit: int, upper_limit: int):
if row[Column.flight_count] == 0:
return Category.CONTRAIL_OK
if row[Column.flight_pixel_ratio] <= lower_limit:
if row[Column.flight_to_contrail_area] <= lower_limit:
return Category.TOO_FEW_CONTRAILS
if row[Column.flight_pixel_ratio] >= upper_limit:
if row[Column.flight_to_contrail_area] >= upper_limit:
return Category.TOO_MANY_CONTRAILS
if lower_limit < row[Column.flight_pixel_ratio] < upper_limit:
if lower_limit < row[Column.flight_to_contrail_area] < upper_limit:
return Category.CONTRAIL_OK
......
import numpy as np
from shapely.geometry import LineString
from common.constants import Column
from common.constants import Column, Key
from common.fileio import read_csv_pandas
# def _normalize_column(df: pd.DataFrame, column_name: str):
# return (df[column_name] - df[column_name].min()) / (
# df[column_name].max() - df[column_name].min())
def collect_metrics(granule: {}, params: {}):
if granule[Column.flight_count] <= 0:
return granule
granule[Column.contrail_pixel] = granule[Column.mask].sum()
granule[Column.contrail_pixel_percent] = granule[Column.contrail_pixel] / (
(granule[Column.shape][0] * granule[Column.shape][1]) * 100)
granule[Column.flight_pixel] = _flight_pixel(granule[Column.flight_file_processed], params[Key.flight_trail_width])
granule[Column.flight_pixel_percent] = granule[Column.flight_pixel] / (
(granule[Column.shape][0] * granule[Column.shape][1]) * 100)
granule[Column.intersects_count] = sum(granule[Column.intersects])
granule[Column.line_types_count] = sum(granule[Column.line_types])
granule[Column.flight_to_contrail_count] = _ratio(granule[Column.contrail_pixel], granule[Column.flight_count])
granule[Column.flight_to_contrail_area] = _ratio(granule[Column.contrail_pixel], granule[Column.flight_pixel])
def _flight_pixel_ratio(flight_count: int, pixel_count: int):
result = 0
if pixel_count != 0 and flight_count != 0:
result = pixel_count / flight_count
return result
return granule
def _pixel_density(mask: np.array):
return mask.sum()
def _ratio(a: float, b: float):
if a <= 0:
return b
if b <= 0:
return a
else:
return a / b
def _sum_list(int_lst: [int]):
return sum(int_lst)
def _flight_pixel(csv_path: str, dilation: float):
flights = read_csv_pandas(csv_path)
flight_area = 0
def collect_metrics(granule: {}):
if granule[Column.flight_count] <= 0:
return granule
for flight_id in flights['FLIGHT_ID'].unique():
flight = flights[flights['FLIGHT_ID'] == flight_id]
points = list(zip(flight['LATITUDE'], flight['LONGITUDE']))
granule[Column.pixel_count] = _pixel_density(granule[Column.mask])
granule[Column.intersects_count] = _sum_list(granule[Column.intersects])
granule[Column.line_types_count] = _sum_list(granule[Column.line_types])
granule[Column.flight_pixel_ratio] = _flight_pixel_ratio(
granule[Column.flight_count], granule[Column.pixel_count])
return granule
if len(points) > 2:
line = LineString(points)
dilated = line.buffer(dilation)
flight_area += dilated.area
return flight_area
......@@ -11,15 +11,15 @@ from common.constants import Key
from common.path import check_path, create_path
def _today():
def _today() -> str:
return date.today().__str__()
def _now():
def _now() -> str:
return datetime.now().strftime("%Y%m%d-%H%M")
def _input_files(input_dir: str, limit: int):
def _input_files(input_dir: str, limit: int) -> [str]:
files = glob(input_dir + '/*.hdf')
if limit > 0:
......@@ -36,29 +36,32 @@ def _num_processes(use_mp: bool) -> int:
return num_processes
def init_params():
def init_params() -> {}:
cp = ConfigParser()
cp.read(check_path(['./config.ini'], create=False))
use_mp = bool(int(cp.get('general', 'multiprocessing')))
mask = str(cp.get('detection', 'mask')).lower()
cp.read(check_path(['./mask_' + mask + '.ini'], False))
out_root = str(cp.get('path', 'output_dir'))
method = str(cp.get('detection', 'method'))
limit = int(cp.get('general', 'limit'))
hdf_dir = check_path([cp.get('path', 'hdf_dir'), cp.get(
'product', 'name')], create=False)
output_dir = check_path(
[cp.get('path', 'output_dir'), _now()], create=True)
run_dir = '_'.join([_now(), method, mask, str(limit)])
out_dir = check_path([out_root, run_dir], create=True)
return {
params = {
# General
Key.num_proc: _num_processes(use_mp),
Key.input: _input_files(hdf_dir, limit),
Key.number_of_processes: _num_processes(bool(int(cp.get('general', 'multiprocessing')))),
Key.input_files: _input_files(check_path([cp.get('path', 'hdf_dir'), cp.get(
'product', 'name')], create=False), int(cp.get('general', 'limit'))),
# Path
Key.flight_raw: check_path([cp.get('path', 'flight_dir'), 'raw'], create=False),
Key.flight_proc: check_path([cp.get('path', 'flight_dir'), 'processed'], create=True),
Key.out_dir: output_dir,
Key.out_proc: create_path([output_dir, 'processed_files.csv']),
Key.out_res: create_path([output_dir, 'results.csv']),
Key.out_conf: create_path([output_dir, 'config.json']),
Key.in_flight_raw: check_path([cp.get('path', 'flight_dir'), 'raw'], create=False),
Key.in_flight_proc: check_path([cp.get('path', 'flight_dir'), 'processed'], create=True),
Key.out_dir: out_dir,
Key.out_proc: create_path([out_dir, 'processed_files.csv']),
Key.out_res: create_path([out_dir, 'results.csv']),
Key.out_conf: create_path([out_dir, 'config.json']),
# Bands
Key.bands: [int(band.strip()) for band in cp.get('product', 'bands').split(',')],
......@@ -68,29 +71,16 @@ def init_params():
Key.band_suffix: cp.get('product', 'band_suffix'),
# Preprocessing
Key.pp_scale: int(cp.get('preprocessing', 'norm_interval')),
Key.pp_kernel: int(cp.get('preprocessing', 'gauss_kernel_size')),
Key.pp_k: float(cp.get('preprocessing', 'k')),
Key.pp_order: int(cp.get('preprocessing', 'order')),
Key.norm_interval: int(cp.get('preprocessing', 'norm_interval')),
Key.kernel_size: int(cp.get('preprocessing', 'gauss_kernel_size')),
Key.cda_k: float(cp.get('preprocessing', 'k')),
Key.cda_order: int(cp.get('preprocessing', 'order')),
# Detection
Key.dt_method: cp.get('detection', 'method'),
Key.dt_max_px: int(cp.get('detection', 'max_px_size')),
Key.dt_split: int(cp.get('detection', 'split_by')),
Key.dt_post: bool(int(cp.get('detection', 'postprocess'))),
Key.dt_thresh_bin: int(cp.get('detection', 'binary_threshold')),
Key.dt_ratio: int(cp.get('detection', 'ratio_threshold')),
# SHT
Key.dt_thresh_hough: float(cp.get('standard', 'hough_threshold')),
# PPHT
Key.dt_linelength: int(cp.get('probabilistic', 'line_length')),
Key.dt_linegap: int(cp.get('probabilistic', 'line_gap')),
# Key.dt_cannysigma: int(cp.get('probabilistic', 'sigma_canny')),
# Key.dt_filter: cp.get('probabilistic', 'filter'),
Key.dt_thresh_prob: int(cp.get('probabilistic', 'hough_threshold')),
Key.dt_connect: int(cp.get('probabilistic', 'connectivity')),
Key.method: cp.get('detection', 'method'),
Key.split_by: int(cp.get('detection', 'split_by')),
Key.postprocessing: bool(int(cp.get('detection', 'postprocess'))),
Key.mask: str(cp.get('detection', 'mask')).lower(),
# Classification
Key.cls_thresh_low: int(cp.get('classification', 'low_threshold')),
......@@ -99,3 +89,37 @@ def init_params():
# Plots
Key.plt_world_map: cp.get('plot', 'world_map')
}
cp.read('./mask_' + mask + '.ini')
mask_params = {
Key.binary_threshold: int(cp.get('mask', 'binary_threshold')),
Key.max_px_size: int(cp.get('mask', 'max_px_size')),
Key.sht_threshold: float(cp.get('mask', 'sht_threshold')),
Key.ppht_threshold: int(cp.get('mask', 'ppht_threshold')),
Key.connectivity: int(cp.get('mask', 'connectivity')),
Key.line_length: int(cp.get('mask', 'line_length')),
Key.line_gap: int(cp.get('mask', 'line_gap')),
Key.ratio_threshold: int(cp.get('mask', 'ratio_threshold')),
Key.flight_trail_width: float(cp.get('mask', 'flight_trail_width'))
}
params.update(mask_params)
return params
def read_mask(mask: str) -> {}:
cp = ConfigParser()
cp.read('./mask_' + mask + '.ini')
mask_params = {
Key.binary_threshold: int(cp.get('mask', 'binary_threshold')),
Key.max_px_size: int(cp.get('mask', 'max_px_size')),
Key.sht_threshold: float(cp.get('mask', 'sht_threshold')),
Key.ppht_threshold: int(cp.get('mask', 'ppht_threshold')),
Key.connectivity: int(cp.get('mask', 'connectivity')),
Key.line_length: int(cp.get('mask', 'line_length')),
Key.line_gap: int(cp.get('mask', 'line_gap')),
Key.ratio_threshold: int(cp.get('mask', 'ratio_threshold')),
Key.flight_trail_width: float(cp.get('mask', 'flight_trail_width'))
}
return mask_params
......@@ -31,8 +31,12 @@ class Column:
line_types = 'line_type_intersections_per_block'
# key values
pixel_count = 'pixel_density'
flight_pixel_ratio = 'flight_pixel_ratio'
contrail_pixel = 'contrail_pixel'
contrail_pixel_percent = 'contrail_pixel_percent'
flight_pixel = 'flight_pixel'
flight_pixel_percent = 'flight_pixel_percent'
flight_to_contrail_area = 'flight_to_contrail_area'
flight_to_contrail_count = 'flight_to_contrail_count'
intersects_count = 'intersections_per_granule'
line_types_count = 'line_types_per_granule'
......@@ -60,9 +64,13 @@ file_columns = [Column.name,
result_columns = [Column.name,
Column.category,
Column.contrail_pixel,
Column.contrail_pixel_percent,
Column.flight_pixel,
Column.flight_pixel_percent,
Column.flight_to_contrail_area,
Column.flight_count,
Column.pixel_count,
Column.flight_pixel_ratio,
Column.flight_to_contrail_count,
Column.intersects_count,
Column.line_types_count
]
......@@ -81,12 +89,12 @@ class Action:
class Key:
num_proc = 'process_number'
number_of_processes = 'process_number'
# Path
input = 'input_files'
flight_raw = 'flight_raw_dir'
flight_proc = 'flight_proc_dir'
input_files = 'input_files'
in_flight_raw = 'flight_raw_dir'
in_flight_proc = 'flight_proc_dir'
out_dir = 'output_dir'
out_proc = 'processed_output'
out_res = 'result_output'
......@@ -100,33 +108,33 @@ class Key:
band_suffix = 'band_suffix'
# Preprocessing
pp_scale = 'pp_norm_interval'
pp_kernel = 'pp_kernel_size'
pp_k = 'pp_k'
pp_order = 'pp_order'
norm_interval = 'pp_norm_interval'
kernel_size = 'pp_kernel_size'
cda_k = 'pp_k'
cda_order = 'pp_order'
# Detection
dt_method = 'detection_method'
dt_max_px = 'detection_max_px_size'
dt_connect = 'connectivity'
dt_split = 'split_by'
dt_thresh_bin = 'binary_threshold'
dt_post = 'postprocessing'
dt_ratio = 'ratio_threshold'
method = 'detection_method'
max_px_size = 'detection_max_px_size'
connectivity = 'connectivity'
split_by = 'split_by'
binary_threshold = 'binary_threshold'
postprocessing = 'postprocessing'
ratio_threshold = 'ratio_threshold'
mask = 'mask'
# Probabilistic
dt_linelength = 'probabilistic_line_length'
dt_linegap = 'probabilistic_line_gap'
dt_cannysigma = 'filter_canny_sigma'
dt_filter = 'edge_detection'
dt_thresh_prob = 'probabilistic_threshold'
line_length = 'probabilistic_line_length'
line_gap = 'probabilistic_line_gap'
ppht_threshold = 'probabilistic_threshold'
# Standard
dt_thresh_hough = 'hough_threshold'
sht_threshold = 'hough_threshold'
# Classification
cls_thresh_low = 'lower_classification_threshold'
cls_thresh_high = 'upper_classification_threshold'
flight_trail_width = 'flight_trail_width'
# Plots
plt_world_map = 'path_world_map_shape'
......@@ -49,7 +49,7 @@ def _is_faulty(path: []):
is_faulty = False
with open(path, 'r') as f:
if not re.match("^[0-9]+$", deque(f, 1)[0].split_blocks(',')[0]):
if not re.match("^[0-9]+$", deque(f, 1)[0].split(',')[0]):
is_faulty = True
return is_faulty
......
[general]
multiprocessing = 1
limit = 5
limit = 100
[path]
hdf_dir = ../data/granules
......@@ -22,25 +22,14 @@ k = 0.1
order = 0
[detection]
method = probabilistic
method = standard
mask = A
split_by = 6
binary_threshold = 70
max_px_size = 150
postprocess = 0
ratio_threshold = 5
[standard]
hough_threshold = 0.5
[probabilistic]
connectivity = 2
line_length = 20
line_gap = 5
hough_threshold = 2
[classification]
low_threshold = 20
high_threshold = 300
low_threshold = 30
high_threshold = 1000
[plot]
world_map = ../data/maps/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp
......
......@@ -19,17 +19,17 @@ def standard(img: np.array, params: {}) -> np.array:
tested_angles = np.linspace(-np.pi / 2, np.pi / 2, 360, endpoint=False)
h, theta, d = hough_line(img, theta=tested_angles)
line_peaks = hough_line_peaks(
h, theta, d, threshold=params[Key.dt_thresh_hough] * h.max())
h, theta, d, threshold=params[Key.sht_threshold] * h.max())
labeled = label_segments(img)
intersect_labels = get_line_label_intersections(
labeled, line_peaks, img.shape)
line_type_labels = get_line_type_segments(
labeled, intersect_labels, params[Key.dt_ratio])
labeled, intersect_labels, params[Key.ratio_threshold])
mask = filter_labels(labeled, line_type_labels)
mask = close_gaps_binary(mask)
mask = filter_small_objects(mask, params[Key.dt_max_px], True)
mask = filter_small_objects(mask, params[Key.max_px_size], True)
return mask, len(intersect_labels), len(line_type_labels)
......@@ -41,12 +41,12 @@ PROBABILISTIC HOUGH TRANSFORMATION
def probabilistic(img, params):
lines = probabilistic_hough_line(
img, threshold=params[Key.dt_thresh_prob], line_length=params[Key.dt_linelength],
line_gap=params[Key.dt_linegap])
img, threshold=params[Key.ppht_threshold], line_length=params[Key.line_length],
line_gap=params[Key.line_gap])
labeled = label_segments(img)
intersect_labels = line_segment_intersect_prob(labeled, lines)
line_type_labels = get_line_type_segments(
labeled, intersect_labels, params[Key.dt_ratio])
labeled, intersect_labels, params[Key.ratio_threshold])
mask = filter_labels(labeled, line_type_labels)
return mask, len(intersect_labels), len(line_type_labels)
......@@ -60,22 +60,22 @@ def detect_lines_blockwise(granule: {}, params: {}):
split_by = 0
# Split blocks
if params[Key.dt_split] >= 0:
split_by = params[Key.dt_split]
if params[Key.split_by] >= 0:
split_by = params[Key.split_by]
block_size = granule.get(Column.shape)[0] // split_by
blocks = split_blocks(img, split_by, block_size)
else:
blocks = [img]
# Cleaning
blocks = [threshold_binary(block, params[Key.dt_thresh_bin])
blocks = [threshold_binary(block, params[Key.binary_threshold])
for block in blocks]
blocks = [filter_small_objects(
block, params[Key.dt_max_px]) for block in blocks]
block, params[Key.max_px_size]) for block in blocks]
blocks = [close_gaps_binary(block) for block in blocks]
# Detection
if params[Key.dt_method] == 'probabilistic':
if params[Key.method] == 'probabilistic':
masks, mask_intersection_count, mask_line_type_count = map(list, zip(*[probabilistic(block, params) for block in
blocks]))
else:
......@@ -86,7 +86,7 @@ def detect_lines_blockwise(granule: {}, params: {}):
mask = combine_blocks(masks, split_by)
# Postprocess blocks
if params[Key.dt_post]:
if params[Key.postprocessing]:
mask, mask_intersection_count, mask_line_type_count = probabilistic(mask, params)
granule[Column.mask] = mask
......
......@@ -79,13 +79,13 @@ def _filter_by_time_and_position(day_segment_df: pd.DataFrame, south: float, nor
def extract_granule_flights(granule: {}, params: {}):
processed_filepath = _processed_filepath(params[Key.flight_proc], granule)
processed_filepath = _processed_filepath(params[Key.in_flight_proc], granule)
if os.path.exists(processed_filepath):
processed = read_csv_pandas(processed_filepath, DTYPES)
else:
raw_filepath = _raw_filepath(params[Key.flight_raw], granule[Column.start])
raw_filepath = _raw_filepath(params[Key.in_flight_raw], granule[Column.start])
raw = read_csv_dask(raw_filepath, DTYPES, 1)
if raw is not None:
......
from datetime import datetime
from multiprocessing import Pool
import pipeline
from analysis.classification import categorise
from common.config import init_params
from common.config import init_params, read_mask
from common.constants import Key
from common.constants import file_columns, result_columns
from common.fileio import write_csv_dict, write_json, write_csv_pandas
......@@ -11,23 +12,40 @@ from common.fileio import write_csv_dict, write_json, write_csv_pandas
def main():
# read params
params = init_params()
mask_params = read_mask(params[Key.mask])
params.update(mask_params)
#
# if params[Key.mask] == 'all':
# mask_params
print('%--------------------------------------%')
print('Start: ' + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Number of files: ' + str(len(params[Key.input_files])))
print('Method: ' + params[Key.method])
print('Mask: ' + params[Key.mask].upper())
print('Number of processes: ' + str(params[Key.number_of_processes]))
print('Output: ' + params[Key.out_dir])
print('%--------------------------------------%')
# create processes
pool = Pool(processes=params[Key.num_proc])
jobs = [(input_file, params) for input_file in params[Key.input]]
pool = Pool(processes=params[Key.number_of_processes])
jobs = [(input_file, params) for input_file in params[Key.input_files]]
# run pipeline
granules = pool.starmap(pipeline.run, jobs)
# write results
# write logs
write_csv_dict(params[Key.out_proc], granules, file_columns)
write_json(params[Key.out_conf], params)
# run analysis
# run analysis and write results
results = categorise(granules, params)
write_csv_pandas(results, params[Key.out_res], result_columns)
# plot_flight_pixel_ratio(results)
print('%--------------------------------------%')
print('End: ' + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('%--------------------------------------%')
if __name__ == '__main__':
......
[mask]
binary_threshold = 50
max_px_size = 120
sht_threshold = 0.3
ppht_threshold = 1
connectivity = 1
line_length = 10
line_gap = 3
ratio_threshold = 3
flight_trail_width = 0.7
\ No newline at end of file
[mask]
binary_threshold = 70
max_px_size = 120
sht_threshold = 0.5
ppht_threshold = 2
connectivity = 2
line_length = 20
line_gap = 5
ratio_threshold = 5
flight_trail_width = 0.5
\ No newline at end of file
[mask]
binary_threshold = 90
max_px_size = 200
sht_threshold = 0.9
ppht_threshold = 1
connectivity = 2
line_length = 30
line_gap = 10
ratio_threshold = 7
flight_trail_width = 0.2
\ No newline at end of file
......@@ -30,7 +30,7 @@ def run(filepath: str, params: {}):
"""
ANALYSIS
"""
granule = metrics.collect_metrics(granule)
granule = metrics.collect_metrics(granule, params)
"""
PLOTTING
......
......@@ -5,7 +5,7 @@ from common.constants import Column
def plot_flight_pixel_ratio(df: pd.DataFrame):
y_values = df[Column.flight_pixel_ratio].to_list()
y_values = df[Column.flight_to_contrail_area].to_list()
x_values = range(0, len(y_values))
plt.plot(x_values, y_values)
plt.show()
......@@ -52,10 +52,10 @@ def preprocess_image(granule: {}, params: {}) -> {}:
b12 = get_sds_array(b12_path)
# normalize arrays between 0 and 100
b11_n = _normalize(b11, params[Key.pp_scale])