% Probabilistic sampling of boulders on an aggrading fan surface, and
% determination of frequency distribution on end of aggradation including
% uncertainy on youngest sampled boulder.

% Figure 1 shows histograms of the results of the artificial data
% procedure.
% Figure 2 shows the results of the artificial data experiment that are
% normalized.
% Figure 3 illustrates how the full probability distribution of the
% abandonment window (single line in upper plot) should be weighted and
% shifted to account for age uncertainty on the youngest boulder.
% Figure 4 plots the total probability distribution of abandonment, shows  
% the probability within defined temporal bounds (results given as a
% fraction of 1), and shows the most probable abandonment age.

% Last modified 24.02.2019 by T. Schildgen

clear all;

% *************************************************************************
% USER DEFINED VALUES *****************************************************
% *************************************************************************

iterate = 10000;    % number of artificial sampling campaigns 
                    % (>=10k recommended; 100k produces smoother results)

T = 30300;          % duration of activity (yr)
end_dep = 10000;    % end time of deposition (yrs ago)

n = 4;                          % # of boulders sampled during campaign
min_real_sample_age = 121400;    % youngest sample age from real dataset (yr)
sigma = 5200;                    % 1-sigma uncertainty on youngest age (yr)

% Define the time range for which you would like to determine the
% probability of abandonment
min_age_test = 21200;   % younger limit of time range to evaluate (yr)  
max_age_test = 23000;   % older limit of time range to evaluate (yr)

% *************************************************************************
% *************************************************************************
% *************************************************************************

incr = sigma/10;    % increment of temporal discretization (yr)

young_age_difs =[]; % min diff. between ages from 1 campaign and true youngest age
means_all = [];     % all mean ages from each sampling campaign
mins_all = [];      % all minimum ages from each sampling campaign
ages_all = [];      % all ages selected from all campaigns

    for j = 1:iterate
        ages = [];       
        age_difs = [];    
    
        for i = 1:n
            % select age randomly from total duration of deposition
            age_new = end_dep + rand*T;             
            % accumulate new ages from n selections
            ages = [ages;age_new];                  
            age_dif_new = age_new-end_dep;  
            % accumulate new age differences (tao values)
            age_difs = [age_difs; age_dif_new];     
            
        end
        mean_of_n = mean(ages);         
        means_all = [means_all; mean_of_n];
        ages_all = [ages_all; ages];
        min_age = min(ages);
        mins_all = [mins_all; min_age];
        youngest_age_dif = min(age_difs);    
        % accumulate young_age_difs
        young_age_difs = [young_age_difs; youngest_age_dif];
    
    end
    
% Show results of artificial data     
    
figure
subplot(2,2,1)
histogram(ages_all,50)
xlim([end_dep end_dep+T])
title('All randomly selected ages')
xlabel('Age (yr)')
ylabel('Frequency')

subplot(2,2,2)
histogram(means_all,50)
xlim([end_dep end_dep+T])
title('Means from each sampling campaign')
xlabel('Mean Age (yr)')
ylabel('Frequency')

subplot(2,2,3)
histogram(mins_all,50)
xlim([end_dep end_dep+T])
title('Min. boulder age from each sampling campaign')
xlabel('Min. Boulder Age (yr)')
ylabel('Frequency')

subplot(2,2,4)
histogram(young_age_difs,50)
title('Min. age diffs (youngest boulder sampled minus true aband. age)')
xlim([0 T])
xlabel('Tao (yr)')
ylabel('Frequency')

tao_probs = [];
for P = 0:1:100
    value = prctile(young_age_difs,P);  % calculate percentiles
    tao_probs = [tao_probs; value];     % accumulate values for each percentile
end
figure(2)
subplot(2,2,1)
P = (0:1:100);
plot(tao_probs,P)
xlim([0 T])
title('Cum. frequency of tao')
xlabel('Tao (yr)')
ylabel('Cumlative frequency')

% Discretize the results according to a defined time interval, 'incr':
% 'discrete_prob' gives the counts of each histogram bin, 'tao' gives the edges of
% the bins (in time). Bin interval is 'incr'.
[discrete_prob,tao] = histcounts(young_age_difs,'BinWidth',incr);

subplot(2,2,2)
% delete last element in tao (there is one more 'tao' compared to 'discrete_prob')
tao(length(tao))=[]; 
% shift tao axis to the mid-point of the bin
tao = tao + 0.5*(incr); 
% norm_prob is the normalized frequency of tao ('iterate' is total # of values)
norm_prob = discrete_prob/iterate;  
scatter(tao, norm_prob)
title('Tao distribution')
xlabel('Tao (yr)')
ylabel('Norm. discrete frequency')

subplot(2,2,3)
negtao = -tao;
scatter(negtao, norm_prob)
title('Age correction needed on youngest boulder')
xlabel('Negative tao (yr)')
ylabel('Norm. discrete frequency')

subplot(2,2,4)
% Determine length of x-axis needed to plot real sample age with 3-sigma uncertainty
% Increments in the x-axis are set to 'incr'.
x = [min_real_sample_age-3*sigma:incr:min_real_sample_age+3*sigma]; 
% Plot the age of the youngest boulder (assumed normal uncertainty distribution.
age_norm = normpdf(x,min_real_sample_age,sigma);    
plot(x,age_norm,'o');
title('Age of youngest boulder')
xlabel('Age (yr)')
ylabel('Probability')

% Determine length of x-axis needed to plot norm. frequency distribution and
% shift it according to the +/- 3-sigma uncertainty range on the youngest real age.
% = number of bins in tao window ('norm_prob') + number of bins for sample age ('x')
xaxis_ext = length(norm_prob)+length(x); 

% Create matrix of zeros with rows equal to # of x positions along plot of
% of sample age, columns equal to 'xaxis_ext'.
% This matrix will be populated with weighted, x-shifted, normalized
% frequencies of tao. Summing along columns will then give total normalized
% expected frequency distribution.
cumulat_norm_probs_wt_shft = [zeros(length(x),xaxis_ext)]; 

% For each position along x (3-sigma range of yougest real sample age,
% weight and shift the normalized frequency of tao.
m = length(norm_prob);
for i = 1:length(x)
    norm_prob_wt_shft = zeros(1,xaxis_ext); % make a row of zeros
    % weight according to the x-position in the normal distribution of youngest sample age
    weight = age_norm(i); 
    % multiply the normalized frequency distribution by the weighting
    norm_prob_weighted = norm_prob * weight; 
    % put the weighted frequency into correct position along x-axis (shift by i)
    norm_prob_wt_shft(i:m+i-1) = norm_prob_weighted; 
    % Place new row showing weighted, shifted frequency distribution into
    % 2D matrix of zeros created earlier. 
    cumulat_norm_probs_wt_shft(i,:) = norm_prob_wt_shft; 
end

% Sum by column to get total frequency distribution of tao
S = sum(cumulat_norm_probs_wt_shft);

% Determine x-axis limits
xaxis_length = incr*xaxis_ext;          
max_age_plot = min_real_sample_age + 3*sigma;   
xaxis = incr:incr:xaxis_length;

figure(3)
subplot(2,1,1)
for i = 1:3:length(x) % plot 1 out of every 3, otherwise plot is too dense
    plot(max_age_plot-xaxis,cumulat_norm_probs_wt_shft(i,:))
    hold on
end
title('Individual frequency plots - Probable time of surface abandonment')
xlabel('Age (yr)')
ylabel('Probability')

subplot(2,1,2)
plot(max_age_plot-xaxis,S)
title('Probable time of surface abandonment')
xlabel('Age (yr)')
ylabel('Probability')

% Find most probable value (time of maximum probability)
index = find(S == max(S)); % gives index value of max probability
most_prob_age = max_age_plot - xaxis(index);

% Calculating area (total probability) under part of the curve
% (limited by 'min_age_test' and 'max_age_test')

area_tot = trapz(xaxis,S)       % Should be close to 1
new_xaxis = max_age_plot-xaxis;
figure(4)
flipS = fliplr(S);
flipx = fliplr(new_xaxis);
plot(flipx,flipS,'-k','LineWidth',2)
x_lower = find(flipx >= min_age_test & flipx < (min_age_test+incr));
x_upper = find(flipx >= max_age_test & flipx < (max_age_test+incr));
S_lower = flipS(x_lower);
S_upper = flipS(x_upper);

% Plot results and add numerical information to the plot

xaxis_test = flipx(x_lower:x_upper);
S_test = flipS(x_lower:x_upper);
hold on
plot(xaxis_test,S_test,'-r','LineWidth',1)
area_test = trapz(xaxis_test,S_test)
hold on
vert_range_lower = 0:S_lower/100:S_lower;
x_range_lower = min_age_test*ones(1,length(vert_range_lower));
plot(x_range_lower,vert_range_lower,'-r','LineWidth',1)
hold on
vert_range_upper = 0:S_upper/100:S_upper;
x_range_upper = max_age_test*ones(1,length(vert_range_upper));
plot(x_range_upper,vert_range_upper,'-r','LineWidth',1)
str = {'Probability of abandonment','during test window (red): '};
txt = [str,num2str(area_test)];
y_pos = max(S)/2;   % places text box half way up y-axis
text(flipx(2),y_pos,txt)
str = {'Most probable abandonment','age (yrs ago): '};
txt = [str,num2str(most_prob_age)];
y_pos = max(S)/4;   % places text box one quarter way up y-axis
text(flipx(2),y_pos,txt)
legend('Total probability distribution','Test window','Location','northwest')
title('Probability distribution of surface abandonment')
xlabel('Age (yr)')
ylabel('Probability')