Commit cf248917 authored by Tilmann Sager's avatar Tilmann Sager
Browse files

Implemented straigh hough line

parent c3e73768
......@@ -6,9 +6,10 @@ bands = 31, 32
calculate_radiance = 0
[data]
hdf = ../contraildetect/data/hdf
flights = ../contraildetect/data/flights
output = ../contraildetect/data/output
hdf = ../data/granules
flights = ../data/flights/raw
output = ../data/output
world_map = ../data/maps/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp
[extraction]
band_keyword = BAND
......
......@@ -150,3 +150,12 @@ def filter_method():
def sigma():
return int(cp.get(detect_key, 'sigma'))
"""
PLOTS
"""
def world_map():
return cp.get(data_key, 'world_map')
......@@ -17,7 +17,7 @@ class Columns:
flight_count_norm = 'flight_count_norm'
bands = 'bands'
cda = 'cda'
cda_t = 'cda_t'
cda_i = 'cda_i'
lines = 'lines'
line_arr = 'line_arr'
line_count = 'line_count'
......@@ -27,6 +27,8 @@ class Columns:
px_count = 'pixel_count'
px_count_norm = 'pixel_count_norm'
rel_flight_px = 'flight_to_pixel_relation'
contrail_mask = 'contrail_mask'
contrail_mask_post = 'contrail_masked_processed'
columns_to_save = [Columns.hdf,
......@@ -43,6 +45,17 @@ columns_to_save = [Columns.hdf,
Columns.north,
Columns.south]
def columns_as_list():
columns = []
for column in list(Columns.__dict__.values()):
if type(column) == str:
columns.append(column)
return columns
DTYPES = {
'FLIGHT_ID': np.int,
'SEGMENT_NO': np.int,
......
import pandas as pd
import dask.dataframe as dd
import re
import os
import numpy as np
from collections import deque
import glob
import time
DTYPES = {
'FLIGHT_ID': int,
'SEGMENT_NO': int,
'LATITUDE': np.double,
'LONGITUDE': np.double,
'ALTITUDE': np.double,
'SEGMENT_MONTH': int,
'SEGMENT_DAY': int,
'SEGMENT_YEAR': int,
'SEGMENT_HOUR': int,
'SEGMENT_MIN': int,
'SEGMENT_SEC': np.double,
'EMISSIONS_MODE': np.double,
'TEMPERATURE': np.double,
'PRESSURE': np.double,
'HUMIDITY': np.double,
'SPEED': np.double,
'SEGMENT_TIME': object,
'TRACK_DISTANCE': np.double,
'THRUST': np.double,
'WEIGHT': np.double,
'FUELBURN': np.double,
'CO': np.double,
'HC': np.double,
'NOX': np.double,
'PMNV': np.double,
'PMSO': np.double,
'PMFO': np.double,
'CO2': np.double,
'H2O': np.double,
'SOX': np.double
}
def _dd_read_csv(filename, data_types):
return dd.read_csv(filename, skiprows=[1], dtype=data_types)
def _dd_write_csv(df, filename):
dd.to_csv(df, filename)
return os.path.exists(filename)
def _read_csv(filename, dtypes):
return pd.read_csv(filename, dtype=dtypes)
def _write_csv(df: pd.DataFrame, filename: str):
df.to_csv(filename, index=False)
return os.path.exists(filename)
def _try_opening_csv(filepath_list: []):
faulty_files = []
for filepath in filepath_list:
try:
df = _dd_read_csv(filepath, DTYPES)
df.tail(10)
except:
faulty_files.append(filepath)
continue
return faulty_files
def _check_last_line_in_csv(filepath_list: []):
files_to_repair = []
for filepath in filepath_list:
with open(filepath, 'r') as f:
if not re.match("^[0-9]+$", deque(f, 1)[0].split(',')[0]):
files_to_repair.append(filepath)
return files_to_repair
def _delete_last_line_in_csv(filepath_list: []):
repaired_files = []
for filepath in filepath_list:
with open(filepath, "r+", encoding="utf-8") as file:
# find end of file
file.seek(0, os.SEEK_END)
# skip the last character
pos = file.tell() - 1
# search for the newline character (i.e. the last row)
while pos > 0 and file.read(1) != "\n":
pos -= 1
file.seek(pos, os.SEEK_SET)
# delete the last line
if pos > 0:
file.seek(pos, os.SEEK_SET)
file.truncate()
repaired_files.append(filepath)
# wait until operation is finished
time.sleep(5)
return repaired_files
def main():
files_to_process = glob.glob('../data/flights/raw/*.csv')
faulty_files = _try_opening_csv(files_to_process)
files_to_repair = _check_last_line_in_csv(faulty_files)
repaired_files = _delete_last_line_in_csv(files_to_repair)
print(repaired_files)
if __name__ == "__main__":
main()
\ No newline at end of file
import os
from glob import glob
import pandas as pd
from tqdm import tqdm
import analysis
import config
import plots
from constants import columns_to_save
from stages import prepare, transformation, detection, flight_filtering, segmentation, stats
from stages import prepare, detection, flight_filtering, stats, preprocessing, postprocessing
from util import check_and_repair_csv
from glob import glob
tqdm.pandas()
......@@ -25,6 +24,12 @@ input_files = glob(config.product_dir() + '/*.hdf')
# TODO: improve logging
# TODO: filter time per day
## URGENT TODO
# split picture into chunks
# streamline model
# filter
print('Creating job')
job_df = prepare.create_job(input_files)
......@@ -33,48 +38,56 @@ job_df = prepare.create_job(input_files)
"""
CHECK FLIGHT CSV
"""
print('Checking flight files')
check_and_repair_csv(job_df)
# print('Checking flight files')
# check_and_repair_csv(job_df)
# TODO: print status
"""
FILTER FLIGHT CSV
"""
print('Filter flights')
job_df = job_df.progress_apply(flight_filtering.filter_flights, axis=1)
# print('Filter flights')
# job_df = job_df.progress_apply(flight_filtering.filter_flights, axis=1)
"""
PREPROCESSING
"""
# TODO: split picture into chunks
# TODO: get geoinformation/projection
"""
TRANSFORMATION
"""
print('Running CDA')
job_df = job_df.progress_apply(transformation.cda_preprocess, axis=1)
print('Running thresholding')
job_df = job_df.progress_apply(transformation.threshold, axis=1)
job_df = job_df.progress_apply(preprocessing.cda_preprocess, axis=1)
job_df = job_df.progress_apply(preprocessing.rescale_intensity, axis=1)
# print('Running thresholding')
# job_df = job_df.progress_apply(transformation.threshold, axis=1)
"""
DETECTION
"""
print('Running detection')
job_df = job_df.progress_apply(detection.probabilistic_hough, axis=1)
# job_df = job_df.progress_apply(detection.probabilistic_hough, axis=1)
job_df = job_df.progress_apply(detection.straight_hough, axis=1)
"""
SEGMENTATION
"""
print('Running segmentation')
job_df = job_df.progress_apply(segmentation.run, axis=1)
print('Running postprocessing')
job_df = job_df.progress_apply(postprocessing.postprocess, axis=1)
"""
ASSEMBLING INFORMATION
"""
print('Assembling information')
job_df = job_df.progress_apply(stats.collect, axis=1)
# print('Assembling information')
# job_df = job_df.progress_apply(stats.collect, axis=1)
"""
ANALYSIS
"""
print('Running analysis')
job_df = analysis.flight_pixel_relation(job_df)
# print('Running analysis')
# job_df = analysis.flight_pixel_relation(job_df)
"""
RESULTS
......@@ -85,13 +98,20 @@ job_df.to_csv(os.path.join(config.output_dir(), 'output.csv'), columns=columns_t
"""
PLOTS
"""
# TODO: one granule per day
print('Creating plots')
all_flights = pd.DataFrame()
for flight_csv in glob(config.processed_flights() + '/*.csv'):
all_flights = all_flights.append(pd.read_csv(flight_csv))
# all_flights = pd.DataFrame()
# for flight_csv in glob(config.processed_flights() + '/*.csv'):
# all_flights = all_flights.append(pd.read_csv(flight_csv))
# plots.boxplot(job_df)
# plots.flights(all_flights)
# plots.granules(job_df)
plots.contrails(job_df)
plots.density_map(job_df, all_flights)
# plots.contrails(job_df)
# plots.density_map(job_df, include=['granules', 'contrails'])
# job_df.apply(plots.plot_coordinates, axis=1)
# job_df.progress_apply(plots.export_shapefile, axis=1)
job_df.progress_apply(plots.plot_contrail_mask, axis=1)
job_df.progress_apply(plots.save_contrail_masks, axis=1)
......@@ -6,7 +6,7 @@ def run(granule):
TRANSFORMATION
"""
granule = transformation.cda_preprocess(granule)
granule = transformation.threshold(granule)
granule = transformation.rescale_intensity(granule)
"""
DETECTION
......
import geopandas as gpd
import geoplot as gplt
import os
# import geopandas as gpd
# import geoplot as gplt
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pyproj import CRS
from shapely.geometry import Point, Polygon, LineString
# import pandas as pd
# import seaborn as sns
# from pyproj import CRS
# from shapely.geometry import Point, Polygon, LineString
import config
from constants import Columns as col
sns.set_theme(style="whitegrid")
world_map = gpd.read_file('../contraildetect/data/maps/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp')
crs = CRS.from_epsg(4326)
default_include = ['flights', 'granules', 'contrails']
alpha = 0.1
def boxplot(df: pd.DataFrame, filename: str = 'flight_counts.png'):
flight_counts = df[df[col.flight_count] > 0][col.flight_count]
plot = sns.boxplot(x=flight_counts)
fig = plot.get_figure()
fig.savefig(filename)
def histogram(img, filename):
plot = sns.histplot(img.ravel(), bins=256)
fig = plot.get_figure()
fig.savefig(filename)
pass
def _flight_gdf(df: pd.DataFrame, visualize_as: str):
geometry = []
if visualize_as == 'pt':
geometry = [Point(xy) for xy in zip(df['LONGITUDE'], df['LATITUDE'])]
if visualize_as == 'line':
geometry = []
return gpd.GeoDataFrame(crs=crs, geometry=geometry)
def flights(flight_df: pd.DataFrame, visualize_as: str = 'pt', filename: str = 'flight_density.png', cmap='Reds'):
fig, ax = plt.subplots(figsize=(16, 10))
world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
gdf = _flight_gdf(flight_df, visualize_as)
gplt.kdeplot(gdf, ax=ax, cmap=cmap, shade=True, legend=False, alpha=0.5, markersize=1)
# gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=0.5, markersize=1)
plt.savefig(filename)
def _contrail_gdf(df: pd.DataFrame, visualize_as: str):
geometry = []
for index, row in df.iterrows():
for line in row[col.line_coord]:
if visualize_as == 'line':
geometry.append(LineString([Point(line[0]), Point(line[1])]))
if visualize_as == 'pt':
geometry.append(Point(line[0]))
geometry.append(Point(line[1]))
return gpd.GeoDataFrame(crs=crs, geometry=geometry)
def contrails(df: pd.DataFrame, visualize_as: str = 'line', filename: str = 'contrails.png', cmap: str = 'Blues'):
fig, ax = plt.subplots(figsize=(16, 10))
world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
gdf = _contrail_gdf(df, visualize_as)
gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=alpha)
plt.savefig(filename)
def _granule_gdf(df: pd.DataFrame, visualize_as: str):
points_lst = []
for index, row in df.iterrows():
lat_min = min(row[col.north], row[col.south])
lat_max = max(row[col.north], row[col.south])
lon_min = min(row[col.west], row[col.east])
lon_max = max(row[col.west], row[col.east])
points_lst.append(
[Point(lon_min, lat_min), Point(lon_min, lat_max), Point(lon_max, lat_max), Point(lon_max, lat_min)])
geometry = [Polygon(points) for points in points_lst]
return gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
def granules(df: pd.DataFrame, visualize_as: str = 'poly', filename: str = 'granules.png', cmap: str = 'Blues'):
fig, ax = plt.subplots(figsize=(16, 10))
world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
gdf = _granule_gdf(df, visualize_as)
gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=alpha)
plt.savefig(filename)
def density_map(df: pd.DataFrame, flight_df: pd.DataFrame,
include: [str] = default_include, title: str = 'density_map'):
fig, ax = plt.subplots(figsize=(16, 10))
world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
if 'granules' in include:
granule_gdf = _granule_gdf(df, 'TODO')
granule_gdf.plot(column=col.rel_flight_px, ax=ax, cmap="Blues", legend=False, alpha=alpha)
if 'flights' in include:
flight_gdf = _flight_gdf(flight_df, 'pt')
gplt.kdeplot(flight_gdf, ax=ax, cmap='Reds', shade=True, legend=False, alpha=0.5, markersize=1)
if 'contrails' in include:
contrail_gdf = _contrail_gdf(df, 'line')
contrail_gdf.plot(ax=ax, cmap='Greens', legend=False, alpha=alpha)
ax.set_title(title)
plt.savefig(title + '.png')
# sns.set_theme(style="whitegrid")
# world_map = gpd.read_file(config.world_map())
# crs = CRS.from_epsg(4326)
# default_include = ['flights', 'granules', 'contrails']
# alpha = 0.1 # transparency
# def boxplot(df: pd.DataFrame, filename: str = 'flight_counts.png'):
# flight_counts = df[df[col.flight_count] > 0][col.flight_count]
# plot = sns.boxplot(x=flight_counts)
# fig = plot.get_figure()
# fig.savefig(filename)
#
#
# def histogram(img, filename):
# plot = sns.histplot(img.ravel(), bins=256)
# fig = plot.get_figure()
# fig.savefig(filename)
# pass
#
#
# def _flight_gdf(df: pd.DataFrame, visualize_as: str):
# geometry = []
#
# if visualize_as == 'pt':
# geometry = [Point(xy) for xy in zip(df['LONGITUDE'], df['LATITUDE'])]
# if visualize_as == 'line':
# geometry = []
#
# return gpd.GeoDataFrame(crs=crs, geometry=geometry)
#
#
# def flights(flight_df: pd.DataFrame, visualize_as: str = 'pt', filename: str = 'flight_density.png', cmap='Reds'):
# fig, ax = plt.subplots(figsize=(16, 10))
# world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
# gdf = _flight_gdf(flight_df, visualize_as)
# gplt.kdeplot(gdf, ax=ax, cmap=cmap, shade=True, legend=False, alpha=0.5, markersize=1)
# # gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=0.5, markersize=1)
# plt.savefig(filename)
#
#
# def _contrail_gdf(df: pd.DataFrame, visualize_as: str):
# geometry = []
# for index, row in df.iterrows():
# for line in row[col.line_coord]:
# if visualize_as == 'line':
# geometry.append(LineString([Point(line[0][0], line[1][1]), Point(line[1][0], line[0][1])]))
# if visualize_as == 'pt':
# geometry.append(Point(line[0][0], line[0][1]))
# geometry.append(Point(line[1][0], line[1][1]))
# return gpd.GeoDataFrame(crs=crs, geometry=geometry)
#
#
# def contrails(df: pd.DataFrame, visualize_as: str = 'line', filename: str = 'contrails.png', cmap: str = 'Blues'):
# fig, ax = plt.subplots(figsize=(16, 10))
# world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
# gdf = _contrail_gdf(df, visualize_as)
# gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=alpha)
# plt.savefig(filename)
#
#
# def _granule_gdf(df: pd.DataFrame, visualize_as: str):
# points_lst = []
#
# for index, row in df.iterrows():
# lat_min = min(row[col.north], row[col.south])
# lat_max = max(row[col.north], row[col.south])
# lon_min = min(row[col.west], row[col.east])
# lon_max = max(row[col.west], row[col.east])
#
# points_lst.append(
# [Point(lon_min, lat_min), Point(lon_min, lat_max), Point(lon_max, lat_max), Point(lon_max, lat_min)])
#
# geometry = [Polygon(points) for points in points_lst]
#
# return gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
#
#
# def granules(df: pd.DataFrame, visualize_as: str = 'poly', filename: str = 'granules.png', cmap: str = 'Blues'):
# fig, ax = plt.subplots(figsize=(16, 10))
# world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
# gdf = _granule_gdf(df, visualize_as)
# gdf.plot(ax=ax, cmap=cmap, legend=False, alpha=alpha)
# plt.savefig(filename)
#
#
# def export_shapefile(row):
# geometry = []
# for line in row[col.line_coord]:
# geometry.append(LineString([Point(line[0]), Point(line[1])]))
# gdf = gpd.GeoDataFrame(crs=crs, geometry=geometry)
# filename = os.path.join(config.output_dir(), row[col.name] + ".shp")
# gdf.to_file(driver='ESRI Shapefile', filename=filename)
# print("Exported shapefile: " + filename)
#
#
# def density_map(df: pd.DataFrame, flight_df: pd.DataFrame = None,
# include: [str] = default_include, title: str = 'density_map'):
# fig, ax = plt.subplots(figsize=(16, 10))
# world_map.to_crs(epsg=4326).plot(ax=ax, color='lightgrey')
#
# if 'granules' in include:
# granule_gdf = _granule_gdf(df, 'TODO')
# granule_gdf.plot(column=col.rel_flight_px, ax=ax, cmap="Blues", legend=False, alpha=alpha)
#
# if 'flights' in include:
# flight_gdf = _flight_gdf(flight_df, 'pt')
# gplt.kdeplot(flight_gdf, ax=ax, cmap='Reds', shade=True, legend=False, alpha=0.5, markersize=1)
#
# if 'contrails' in include:
# contrail_gdf = _contrail_gdf(df, 'line')
# contrail_gdf.plot(ax=ax, cmap='Greens', legend=False, alpha=alpha)
#
# ax.set_title(title)
# plt.savefig(title + '.png')
def plot_contrail_mask(granule):
contrail_cmap = plt.cm.get_cmap("jet").copy()
contrail_cmap.set_under('k', alpha=0)
fig, ax = plt.subplots(figsize=(1, 1))
ax.imshow(granule[col.cda], cmap=plt.cm.get_cmap("gray"))
ax.imshow(granule[col.contrail_mask_post], interpolation='none', cmap=contrail_cmap, clim=[0.5, 1])
ax.set_axis_off()
filepath = os.path.join(config.output_dir(), granule[col.name]) + '.png'
fig.savefig(filepath, bbox_inches='tight', pad_inches=0, dpi=granule[col.dim][0])
plt.clf()
def save_contrail_masks(granule):
fig, ax = plt.subplots(figsize=(1, 1))
ax.imshow(granule[col.contrail_mask_post])
ax.set_axis_off()
filepath = os.path.join(config.output_dir(), granule[col.name]) + '_mask.png'
fig.savefig(filepath, bbox_inches='tight', pad_inches=0, dpi=granule[col.dim][0])
plt.clf()
# def plot_coordinates(row: pd.Series):
# geometry = []
# lon_min = min(row[col.east], row[col.west])
# lon_max = max(row[col.east], row[col.west])
# lat_min = min(row[col.north], row[col.south])
# lat_max = max(row[col.north], row[col.south])
#
# from mpl_toolkits.basemap import Basemap
# import matplotlib.pyplot as plt
#
# m = Basemap(projection='mill',
# llcrnrlat=25,
# llcrnrlon=-130,
# urcrnrlat=50,
# urcrnrlon=-60,
# resolution='l')
#
# m.drawcoastlines()
# m.drawcountries(linewidth=2)
"""
north-south: latitude
west-east: longitude
west-east: longitude -> x
north-south: latitude -> y
"""
import matplotlib.pyplot as plt
import numpy as np
from skimage.draw import line_aa
from skimage.draw import line_aa, line
from skimage.feature import canny
from skimage.filters import scharr
from skimage.morphology import binary_closing, remove_small_objects, binary_dilation
from skimage.transform import probabilistic_hough_line
from skimage.morphology import binary_closing, remove_small_objects, binary_dilation, label
from skimage.transform import probabilistic_hough_line, hough_line, hough_line_peaks
import config
from constants import Columns as col