%% MBS Chemistry data calibrations
%% Load data
 % Import data from text file as variable "CFA_runs" - Cell array 1 x
 % (number of runs) in which each cell contains an array of type double
 % which holds the full raw dataset from one CFA run.

CFA_runs = load(CFA_runs_filename);

 % Import variable "chem_ref" which each row contains the relevant calibration
 % information for each analyte for one CFA run. Columns are as follows:
 % 1: run number;
 % 2: delay time for conductivity;
 % 3: Ca2+ delay time;
 % 4: Ca2+ baseline at start of run;
 % 5: Ca2+ baseline at end of run;
 % 6: Ca2+ standard calibration coefficient (alpha);
 % 7: Ca2+ r-squared value for Ca2+ calibration;
 % 8: NH4+ delay time;
 % 9: NH4+ baseline at start of run;
 % 10: NH4+ baseline at end of run;
 % 11: NH4+ standard calibration coefficient (alpha);
 % 12: NH4+ r-squared value for NH4+ calibration;
 % 13: Na+ delay time;
 % 14: Na+ baseline;
 % 15: Na+ standard calibration coefficient 1 (alpha1);
 % 16: Na+ standard calibration coefficient 2 (alpha2);
 % 17: Na+ r-squared value for Na+ calibration;
 % 18: H2O2 delay time;
 % 19: H2O2 baseline;
 % 20: H2O2 standard calibration coefficient (alpha);
 % 21: H2O2 r-squared value for H2O2 calibration;
chem_ref = load(chem_ref_filename);

%% %% Create holding cell array for calibrated data for each run
% column 1 = raw epoch time;
% column 2 = raw acquisition time;
% column 3 = abs flowrate;
% column 4 = conductivity;
% column 5 = dust (calibrated);
% column 6 = calcium (calibrated);
% column 7 = ammonium (calibrated);
% column 8 = pH (calibrated);
% column 9 = peroxide (calibrated);
% column 10 = sodium (calibrated);
% column 11 = sample flag (when sample reaches melter);
% column 12 = new bag;
% column 13 = break flag;
% column 14 = run number
num_runs = 1; % total number of CFA runs
calibrated_chem = cell(1,num_runs);
for run = 1:num_runs
 calibrated_chem{1,run}(:,1) = CFA_runs{1,run}(:,3); % epoch time
 calibrated_chem{1,run}(:,2) = CFA_runs{1,run}(:,4); % acq time
 calibrated_chem{1,run}(:,11) = CFA_runs{1,run}(:,24); % sample flag
 calibrated_chem{1,run}(:,12) = CFA_runs{1,run}(:,25); % new bag
 calibrated_chem{1,run}(:,13) = CFA_runs{1,run}(:,26); % break flag
 calibrated_chem{1,run}(:,14) = CFA_runs{1,run}(:,28); % run number
end
sampleflag = 24;

%% Conductivity
for run = 1:num_runs

 cond_raw = CFA_runs{1,run}(:,13); % Conductivity data for this run

 %smoothing 10 seconds
 cond_smooth = smoothdata(cond_raw,'movmedian',10,'omitnan');

 % shift for delay time
 cond_delay = chem_ref(run,2); %inputs delay time from chem_ref file
 cond = circshift(cond_smooth,-1*(cond_delay));


 %clean at sharp spikes
 cond_diff = diff(cond); % differential of conductivity data

 % remove (overwrite with NaN) 5 seconds before and 25 seconds after any
 % data points that exceed the threshold for the differential defined
 % here
 cond_contamination_idx = find(cond_diff>0.1); %find index of data points above threshold
 for i = 1:length(cond_contamination_idx)
 if cond_contamination_idx(i)<=5
 cond(1:cond_contamination_idx(i)+25)=NaN;
 elseif cond_contamination_idx(i)<length(cond)-25
 cond(cond_contamination_idx(i)-5:cond_contamination_idx(i)+25)=NaN;
 else
 cond(cond_contamination_idx(i)-5:length(cond))=NaN;
 end
 end

 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,4) = cond;

%% DUST - calibrated_chem column 5

 %flowrate correction
 flowrate = smoothdata(abs(CFA_runs{1,run}(:,9)),'movmean',10,'omitnan'); % smooth flowrate to eliminate extraneous spikes

 dust = CFA_runs{1,run}(:,15); % Dust data for this run
 dust_cal = (dust*60)./(flowrate); % Convert from counts per second to counts per ml using flowrate (in ml/min)

 %smoothing
 dust_smooth = smoothdata(dust_cal,'movmedian',10,'omitnan'); % 10 second smoothing

 %shift for delay time
 dust_delay = chem_ref(run,2); % use same delay as conductivity sample delay
 flowrate = circshift(flowrate,-1*dust_delay);
 dust_smooth = circshift(dust_smooth,-1*dust_delay);

 %remove 2 mins at new bags and breaks to remove contaminated signal
 NB = find(calibrated_chem{1,run}(:,12)==1); % ice break at new ice stick
 brk = find(calibrated_chem{1,run}(:,13)==1); % ice breaks measured in lab

 for i = 1:length(brk)
 dust_smooth(brk(i)-25:brk(i)+95) = NaN;
 end

 for i = 1:length(NB)
 dust_smooth(NB(i)-25:NB(i)+95) = NaN;
 end


 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,3) = flowrate;
 calibrated_chem{1,run}(:,5) = dust_smooth;
 calibrated_chem{1,run}(:,16) = flowrate;

%% Calcium - calibrated_chem column 6 - not included in manuscript dataset
 Ca_raw = CFA_runs{1,run}(:,21); % Calcium data for this run

 %smoothing
 Ca_smooth = smoothdata(Ca_raw,'movmedian',10,'omitnan'); % 10 second smoothing to remove bubble spikes

 % calibration
 Ca_alpha = chem_ref(run,7);
 Ca_BL = chem_ref(run,5); % Baseline from chem_ref file

 Ca_calibrated = (Ca_smooth - Ca_BL)./Ca_alpha; % linear calibration
 % shift for delay time
 Ca_delay = chem_ref(run,4)+chem_ref(run,2); % delay time plus delay from melter to conductivity
 Ca = circshift(Ca_calibrated,-1*Ca_delay);

 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,6) = Ca(1:length(calibrated_chem{1,run}(:,1)));

%% Ammonium - calibrated_chem column 7
 NH4_raw = CFA_runs{1,run}(:,23);

 NH4_BL = chem_ref(run,10); % Baseline from chem_ref file
 % smoothing
 NH4_smooth = smoothdata(NH4_raw,'movmedian',10,'omitnan'); % 10 second smoothing to remove extraneous spikes
 % calibration
 NH4_working = zeros(size(NH4_smooth));
 NH4_alpha = chem_ref(run,11);
 NH4_calibrated = (NH4_smooth - NH4_BL)./NH4_alpha; % linear calibration

 % shift for delay time
 NH4_delay = chem_ref(run,9)+chem_ref(run,2); % delay time plus delay from melter to conductivity
 NH4 = circshift(NH4_calibrated,-1*NH4_delay);

 %remove 80 sec at new bags and breaks to remove contaminated signal
 for i = 1:length(brk)
 NH4(brk(i)-20:brk(i)+60) = NaN;
 end

 for i = 1:length(NB)
 NH4(NB(i)-20:NB(i)+60) = NaN;
 end

 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,7) = NH4;
%% Peroxide - calibrated_chem column 9
 h2o2_raw = CFA_runs{1,run}(:,22);

 % chem calibration
 h2o2_baseline = chem_ref(run,19);
 h2o2_alpha = chem_ref(run,20);
 h2o2_cal = (h2o2_raw - h2o2_baseline)./h2o2_alpha; % linear calibration

 % smoothing
 h2o2_smooth = smoothdata(h2o2_cal,'movmedian',10,'omitnan'); % 10 second smoothing to remove extraneous spikes

 % shift for delay time
 h2o2_delay = chem_ref(run,18)+chem_ref(run,2); % delay time plus delay from melter to conductivity
 h2o2 = circshift(h2o2_smooth,-1*h2o2_delay);

 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,9) = h2o2;

%% Sodium - calibrated_chem column 10

 Na_raw = CFA_runs{1,run}(:,19);

 % smoothing
 Na_smooth = smoothdata(Na_raw,'movmedian',10,'omitnan'); % 10 second smoothing to remove extraneous spikes
 % chem calibration
 Na_baseline = Na_baseline_start;
 Na_alpha1 = chem_ref(run,15);
 Na_alpha2 = chem_ref(run,16);
 Na_cal = -log(1+log10(Na_smooth./Na_baseline)/Na_alpha1)/Na_alpha2; % nonlinear calibration for sodium method

 % shift for delay time
 Na_delay = chem_ref(run,13)+chem_ref(run,2);
 Na = circshift(Na_cal,-1*Na_delay);

 % write to appropriate columns in calibrated_chem variable
 calibrated_chem{1,run}(:,10) = Na;
end