Commit 9d75de11 authored by Florian Grosse's avatar Florian Grosse
Browse files

Branch Development Tip

In this branch, the 0th level in the KB is a normal VMS, which leads to 
problems when and how to insert samples into the 0th level, because 
fully dependent models of a new model then get timestamps intersected 
sideways, or alternatively the 0th level is not available for a new 
model, which then needs a buffer-VMS somewhere else. Master branch will 
get a different style of level 0 now.
parent 621a89cc
......@@ -3,7 +3,7 @@
"""
...usage
#VectorModelSpace, a structure containing a list of vectorial models and their pragmatic properties.
#VectorModelSpace, a structure containing a list of vectorial models and their pragmatic properties. Assumed to be sorted while execution of ConML.
"""
struct VMS{D<:Number}
#column-major layout
......@@ -81,19 +81,24 @@ function dataAsMatrix(v::VMS)
# res = reinterpret(SMatrix{N,M,D,L}, v.vals) # this reproducably freezes everything. seems broken for large arrays, try again with a later version of julia
return mapreduce(adjoint, vcat, v.vals)
end
function dataAsMatrix(v::VMS, (tmin::Int, tmax::Int))
irange = UnitRange(findfirst((tmin), v.T), findlast((tmax), v.T))
return mapreduce(adjoint, vcat, v.vals[irange])
end
"""
dataAsMatrixWithTimestamps(VMS, (Tₘᵢₙ, Tₘₐₓ))
dataAsMatrixWithTimestamps(VMS; (Tₘᵢₙ, Tₘₐₓ))
Get a timeslice of values of a VMS as a matrix of appropriate type plus a flat vector of timestamps belonging to those values.
Get a slice of values of a VMS as a matrix of appropriate type plus a flat vector of timestamps belonging to those values.
Timestamps in the VMS are expected to be ordered.
Shape of the resulting matrix will be (#samples, #features) in accordance to the format used by the Scikit API.
Return format is (timestamps, values)
"""
function dataAsMatrixWithTimestamps(v::VMS)
return (v.T, mapreduce(adjoint, vcat, v.vals))
end
function dataAsMatrixWithTimestamps(v::VMS, (tmin::Int, tmax::Int))
irange = UnitRange(searchsortedfirst(v.T,tmin), searchsortedlast(v.T,tmax))
return (v.T[irange], mapreduce(adjoint, vcat, v.vals[irange]))
......@@ -113,12 +118,37 @@ end
timesubset(VMS, (Tₘᵢₙ, Tₘₐₓ))
Return a copy of a VMS containing only those samples in the timerange (Tₘᵢₙ, Tₘₐₓ) (inclusive).
Assumes sorted timestamps.
"""
function timesubset(v::VMS{D}, (tmin::Int, tmax::Int)) where {D}
irange = UnitRange(findfirst((tmin), v.T), findlast((tmax), v.T))
return VMS{D}(v.T[irange], v.Σ[irange], v.Z[irange], v.vals[irange], v.fields)
end
"""
ΣZsubset(VMS, Σ, Z)
Return a copy of a VMS containing only those samples with specified Σ and Z.
"""
function ΣZsubset(v::VMS{D}, Σ::String, Z::String) where {D}
sigmatch = (==(Σ)).(v.Σ)
zmatch = (==(Z)).(v.Z)
mask = sigmatch .* zmatch
return VMS{D}(v.T[mask], v.Σ[mask], v.Z[mask], v.vals[mask], v.fields)
end
"""
subset(VMS, (Tₘᵢₙ, Tₘₐₓ), Σ, Z)
Return a copy of a VMS containing only those samples in the timerange (Tₘᵢₙ, Tₘₐₓ) (inclusive) with specified Σ and Z.
Assumes sorted timestamps.
"""
function subset(v::VMS{D}, (tmin::Int, tmax::Int), Σ::String, Z::String) where {D}
irange = UnitRange(findfirst((tmin), v.T), findlast((tmax), v.T))
mask = [ i for i in irange if ( v.Σ[i]==Σ && v.Z[i]==Z ) ]
return VMS{D}(v.T[mask], v.Σ[mask], v.Z[mask], v.vals[mask], v.fields)
end
"""
featureNumber(VMS)
......@@ -148,10 +178,13 @@ end
dropfeatures!(VMS, AbstractArray{String})
Delete a list of features from a VMS without copying.
Specified features that don't exist in the supplied VMS are silently ignored.
"""
function dropfeatures!(v::VMS, dropfeats::AbstractArray{String})
mask = .!(in(dropfeats).(v.fields))
map!(x->x[mask],v.vals,v.vals)
x.fields = x.fields[mask]
return v
end
"""
......@@ -163,6 +196,8 @@ Specified features that don't exist in the supplied VMS are silently ignored.
function keepfeatures!(v::VMS, keepfeats::AbstractArray{String})
mask = in(keepfeats).(v.fields)
map!(x->x[mask],v.vals,v.vals)
x.fields = x.fields[mask]
return v
end
# """
......@@ -224,7 +259,7 @@ struct MachineModel
model #Datentyp? Sci-Kit API hier schon referenzieren?
"The level in the KnowledgeBase on which this model resides. (for usability purposes, it's implicitly contained in the ID and Z)"
level::Int
"Source model UIDs in the same KnowledgeBase on the previous level or selected Feature names of underlying vector models (if on level 1)"
"Source model UIDs in the same KnowledgeBase on the previous level or [ Σ, Z, selected Feature names ] of underlying vector models (if on level 1)"
source::Vector{String}
# XXX ggf mehr Felder, zB field-names (wie im VMS) ...
"""
......@@ -248,6 +283,7 @@ struct KnowledgeBase{D<:Number}
level_0::VMS{D}
higherLevels::Vector{Dict{String,MachineModel}}
lastusedUID::Dict{Int,Int}
newmodeltempLB::Ref{VMS}(VMS{Int}())
# TODO add means of saving LBs and models and retrieving datablocks
function KnowledgeBase{D}() where {D<:Number}
new(VMS{D}(), Vector{Dict{String,MachineModel}}(), Dict(1=>0))
......@@ -297,8 +333,21 @@ end
function _getsourcedata(::Val{false}, mm::MachineModel, kb::KnowledgeBase)
# level <= 1 so source are vector models
# TODO what about Sigma and Z of vector models?
return dataAsMatrixWithTimestamps(kb[0], mm.T)
sig = mm.source[1]
z = mm.source[2]
feats = mm.source[3:end]
return dataAsMatrixWithTimestamps(keepfeatures!(subset(kb[0], mm.T, sig, z), feats))
end
# to inject a temporary (preliminary) learnblock
function _getsourcedata(mm::MachineModel, lb::VMS)
# level must be 1
if mm.level != 1
error("Tried to inject temporary learnblock into a model not on the first level")
sig = mm.source[1]
z = mm.source[2]
feats = mm.source[3:end]
return dataAsMatrixWithTimestamps(keepfeatures!(subset(lb, mm.T, sig, z), feats))
end
# if multiple source pairs of Ts (timestamps) and Ss (source data blocks) are present, recursively intersect them
......
......@@ -14,7 +14,7 @@ end
return ((first(right) >= first(left)-leftextend) && (last(right) <= last(left)+leftextend)) || ((first(left) >= first(right)-rightextend) && (last(left) <= last(right)+rightextend))
end
# don't thing we need this so outcommented until now
# don't thing we need this so commented out until now
# function purgequeue(queues, level::Int, modelID::String)
# for l in (level, level + 1)
# purgemodels = filter(queues[l]) do oldmodel
......@@ -28,20 +28,20 @@ end
"""
TODO
""" #TODO docstring
function deconstruction(kb::KnowledgeBase, par::ConstructivistMLParameters, construction::Construct, reconstruction::Reconstruct, newModel::MachineModel) #last thing is the output from Reconstruction
function deconstruction(kb::KnowledgeBase, par::ConstructivistMLParameters, construction::Construct, reconstruction::Reconstruct, (newModel::MachineModel, tempLB::VMS)) #last thing is the output from Reconstruction
kb.newmodeltempLB[] = tempLB # save temporary learnblock from initial reconstruction before it gets into KB[0]
queues = Dict{Int, Vector{MachineModel}}()
for i in 1:par.highestLevel
queues[i] = Vector{MachineModel}()
end
push!(queues[1], newModel)
while length(kb.higherLevels) < par.highestLevel
push!(kb.higherLevels, Dict{String,MachineModel}())
end # makes sure that when fetching an empty level in the KB, no error is thrown
for lvl in 1:par.highestLevel
newmodels = queues[lvl]
while length(kb.higherLevels) < par.highestLevel
push!(kb.higherLevels, Dict{String,MachineModel}())
end # makes sure that when fetching an empty level in the KB, no error is thrown
end
while !isempty(newmodels)
model = popfirst!(newmodels)
# get related models and relations
......@@ -76,6 +76,7 @@ function deconstruction(kb::KnowledgeBase, par::ConstructivistMLParameters, cons
end
end
# all related models identified, deconstruct consecutively
global_success::Bool = false
# TΣZ
......@@ -114,28 +115,26 @@ function deconstruction(kb::KnowledgeBase, par::ConstructivistMLParameters, cons
for rel in σz_relatives
success = σz_decon(kb, par, model, rel, lvl, construction, reconstruction, queues)
global_success = global_success | success
if success && par.deconstructionMode === :minimal
if success # model extended, new model enters KB/Deconstruction so this model here shouldn't be saved
σz_success = true
break # this inner loop
end
σz_success = σz_success | success
end
if global_success && par.deconstructionMode === :minimal
if global_success && par.deconstructionMode === :minimal || σz_success
continue # to next deconstruction
end
# TΣ
if !σz_success
for rel in tσ_relatives
success = tσ_decon(kb, par, model, rel, lvl, construction, reconstruction, queues)
global_success = global_success | success
if success && par.deconstructionMode === :minimal
break # this inner loop
end
end
if global_success && par.deconstructionMode === :minimal
continue # to next deconstruction
for rel in tσ_relatives
success = tσ_decon(kb, par, model, rel, lvl, construction, reconstruction, queues)
global_success = global_success | success
if success && par.deconstructionMode === :minimal
break # this inner loop
end
end
if global_success && par.deconstructionMode === :minimal
continue # to next deconstruction
end
# nothing for TZ
for rel in tz_relatives
......@@ -144,6 +143,7 @@ function deconstruction(kb::KnowledgeBase, par::ConstructivistMLParameters, cons
if !global_success
# not a single successful deconstruction, so save model
# TODO what about the learnblock?
kb[lvl][model.ID] = model
end
end
......@@ -158,12 +158,36 @@ end
# full deconstruction
function full_decon(kb::KnowledgeBase, par::ConstructivistMLParameters, newmodel::MachineModel, oldmodel::MachineModel)
success = false
# try to create feature intersection
featurematches = [feat for feat in newmodel.source if feat in oldmodel.source] # inherits order from newmodel
if length(featurematches) >= 2
# fuse models on feature intersection
# enter reconstruction, save if successfull, else differentiate
else
# check for matching timestamps
interT = [t for t in newmodT if t in oldmodT]
if length(interT) >= par.LearnBlockMinimum
# enough timestamps, so check reliability
# NOTE should think about the effect of class permutations here
if Krippendorff.krippendorff_alpha(hcat()' , 'n')
# fuse models on matching timestamps
# enter reconstruction, save if successfull, else differentiate
else
# model disposal
end
end
# if not enough timestamps, do nothing (leads to model saving)
end
end
# ΣZ deconstruction
function σz_decon(kb::KnowledgeBase, par::ConstructivistMLParameters, newmodel::MachineModel, oldmodel::currentlevel::Int, currentlevel::Int, construction::Construct, reconstruction::Reconstruct, deconqueues::Dict{Int,Vector{MachineModel}})
# consider putting time-check into main deconstruction method for faster checks
# NOTE consider putting time-check into main deconstruction method for faster checks
success = false
# dependent on settings, get an optimized method for comparing T distance
σz_timecheck = _get_σz_timecheck_method(par.deconstructionMaxDistanceT)
......@@ -172,26 +196,51 @@ function σz_decon(kb::KnowledgeBase, par::ConstructivistMLParameters, newmodel:
# try to create feature intersection
featurematches = [feat for feat in newmodel.source if feat in oldmodel.source] # inherits order from newmodel
if length(featurematches) >= 2
# get sourcedata (input features) of new and old model
(newmodT, newmodS) = getsourcedata(newmodel,kb)
# get sourcedata (input features) of new and old model + predicted targets
# NOTE this version uses predictions not targets
(newmodT, newmodS) = getsourcedata(newmodel, ifelse(currentlevel==1, kb.newmodeltempLB, kb) ) # if on level 1, source data are not yet in KB
newmodpredictions = predict(newmodel, newmodS)
(oldmodT, oldmodS) = getsourcedata(oldmodel,kb)
oldmodpredictions = predict(oldmodel, oldmodS)
# calculate overlapping timestamps
interT = [t for t in newmodT if t in oldmodT]
interS = Array{eltype(oldmodS)}(undef, 0)
resolved = false
nointersection = isempty(interT)
# if intersection not empty
if !nointersection
# logic mask to find values with timestamp intersection
timemask = in(interT)
# delete conflicts from newmodT/S only since features must be equal if overlaping
newmask = (!(timemask)).(newmodT)
newmodT = newmodT[newmask]
newmodS = newmodS[newmask,:]
# extract predictions and SourceBlocks for overlapping timestamps
newintersectionprediction = newmodpredictions[timemask.(newmodT)]
oldintersectionprediction = oldmodpredictions[timemask.(oldmodT)]
newintersectionS = newmodS[timemask.(newmodT)]
oldintersectionS = oldmodS[timemask.(oldmodT)]
# delete overlapping values from new and old model data so they are exclusively in the intersection objects
# make bitmasks to delete overlapping values
newdeletionmask = (!(timemask)).(newmodT)
olddeletionmask = (!(timemask)).(oldmodT)
# NOTE may make these views for less garbage collection
newmodT = newmodT[newdeletionmask]
newmodS = newmodS[newdeletionmask,:]
newmodpredictions = newmodpredictions[newdeletionmask]
oldmodT = oldmodT[olddeletionmask]
oldmodS = oldmodS[olddeletionmask,:]
oldmodpredictions = oldmodpredictions[olddeletionmask]
# check if predictions on overlapping timestamps conflict
interS = oldmodS[timemask.(oldmodT),:]
newmodpredictions = predict(newmodel, interS)
oldmodpredictions = predict(oldmodel, interS)
#TODO check somehow and error if too many conflicts or so. ask Thomas
numberofconflicts, solvedintersectionpredictions, interS = _solvetimeconflicts!(par.deconstructionStrategy, interT, newintersectionprediction, oldintersectionprediction, newintersectionS, oldintersectionS)
# check that number of conflicts doesn't exceed critical value
allowedtimeconflicts = par.MaxTimeConflicts isa Int ? MaxTimeConflicts : MaxTimeConflicts*(sum(length.([newmodT, interT, oldmodT])))
if numberofconflicts > allowedtimeconflicts
#TODO error
end
# check each overlapping timestamp for conflicts
# NOTE should think about the effect of class permutations here
resolved = true
end
# if intersection empty or conflicts successfully resolved
......@@ -277,6 +326,33 @@ _timecheckwithtolerancewrapper(max_dist_t::Float64) = ((lmin,lmax),(rmin,rmax))
return dist/range
end
# helper method for solving conflicts on overlapping timestamps depending on deconstruction strategy
# resulting target values shall be written to newpredictions
function _solvetimeconflicts!(strategy::Symbol, ts::Vector{Int}, newpredictions, oldpredictions, newS, oldS)
conflicts = 0
inds = eachindex(ts)
if strategy === :conservative
for i in inds
if newpredictions[i] != oldpredictions[i]
conflicts = conflicts+1
newpredictions[i] = oldpredictions[i]
end
end
elseif strategy === :integrative
for i in inds
if newpredictions[i] != oldpredictions[i]
conflicts = conflicts+1
end
end
else #strategy === :opportunistic
delinds = [newpredictions[i] != oldpredictions[i] for i in inds]
conflicts = conflicts + sum(delinds)
deleteat!(ts, delinds)
deleteat!(newpredictions, delinds)
deleteat!(oldpredictions, delinds)
end
return conflicts
end
"""
# ? redundancy checking = wenn modelle überlagern, schauen dass keine timestamps doppelt vorkommen
......
......@@ -62,6 +62,7 @@ end
Apply the feature selection methods stored in FeatureSelector to a data Tuple(leanblock::VMS, model_candidates::Array{<:BaseClassifier})
"""
(f::FeatureSelector)(par::ConstructivistMLParameters, tup::Tuple{VMS,AbstractVector{<:BaseEstimator},AbstractVector{String}}) = f(par, tup...) # unpacking of tuple type
(f::FeatureSelector)(par::ConstructivistMLParameters, tup::Nothing) = nothing # shortcut
function (f::FeatureSelector)(par::ConstructivistMLParameters, learnblock::VMS{D},candidates::AbstractVector{<:BaseEstimator},names::AbstractVector{String}) where {D}
maxFeatures = par.maxFeatures # threshold for applying feature selection
......
......@@ -68,7 +68,7 @@ function (r::Reconstruct)(kb::KnowledgeBase, par::ConstructivistMLParameters, le
if sum(mask) < 2 # less than 2 classifiers show sufficient accuracy
if par.verbose println("Insufficient number of classifiers left to continue. Skip candidate.") end
else
# TODO ask Thomas what is supposed to happen if e.g. only 2 of 3 estimators have sufficient accuracy. Should reliability be calculated with only the passing? all? abort?
# if e.g. only 2 of 3 estimators have sufficient accuracy, reliability will be calculated with only the passing estimators
# calculate intersubjectivity metric
# note that predictions is given as an adjoint, because the hcat of predictions ends up in shape (n_units, n_raters) and kripp-α expects the more reasonable (n_raters, n_units) (the adjoint is relatively cheap)
......@@ -109,11 +109,13 @@ function (r::Reconstruct)(kb::KnowledgeBase, par::ConstructivistMLParameters, le
if par.verbose println("Reconstruction successful.\nWinner: Candidate ", winnerConstructionMethod) end
writelog(par.Logger, ReconstructionWinnerLog(), winnerReconstructionMethod, bestAccuracy, bestIntersubjectivityValue) # TODO missing uid
#TODO save learnblock to current knowledge level? and what if reconstruction is called by deconstruction
# TODO iff on Level 1, add Learnblock to KB[0]
# retrain winner on whole learnblock without train/test split
winningClassifierRetrained = fit!(clone(winningClassifier), dataAsMatrix(winnerLB), winnerClasses)
# make proper MachineModel
winnerAsMachineModel = MachineModel(extrema(winnerLB.T), winnerReconstructionMethod, par.learningDomain, KnowledgeLevel, winnerConstructionMethod, nextFreeUIDnumber(kb, KnowledgeLevel), winningClassifierRetrained, winnerLB.fields)
return winnerAsMachineModel
sourcestring = ifelse(KnowledgeLevel==1, [winnerLB.Σ[1], winnerLB.Z[1], winnerLB.fields ...], winnerLB.fields)
winnerAsMachineModel = MachineModel(extrema(winnerLB.T), winnerReconstructionMethod, par.learningDomain, KnowledgeLevel, winnerConstructionMethod, nextFreeUIDnumber(kb, KnowledgeLevel), winningClassifierRetrained, sourcestring)
return (winnerAsMachineModel, winnerLB)
end
end
......@@ -126,7 +128,7 @@ Execute Reconstruction process with the specified configuration for a given (lea
Return the winner as tuple (learnblock, estimators) or nothing if Reconstruction failed.
"""
function (r::Reconstruct)( kb::KnowledgeBase, par::ConstructivistMLParameters, learnblocktuple::Tuple{VMS,AbstractVector{<:BaseEstimator},AbstractVector{String}}; KnowledgeLevel::Int = 1)
#repeat leanblok for all candidates and call the normal function
#repeat leanblock for all candidates and call the normal function
(lb, candids, connames) = learnblocktuple
r(kb,par,([lb for i in eachindex(candids)], candids, connames), KnowledgeLevel)
end
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment