% --------------------------------------------------------------------------
%
% Script name: Iso2k sample code
%
% Purpose of script: Filter records from full Iso2k database
%
% Script author: Nick McKay (translated from Dr Georgina Falster's R code)
%
% Date created: 2020-04-01
%
% Email: nick@nau.edu
%
% Citation: Konecky et al. (2020) The Iso2k Database: A global compilation of paleo-d18O and d2H records to aid understanding of Common Era climate
% Database is available for download from https://doi.org/10.25921/57j8-vs18 or http://lipdverse.org/iso2k/current_version/
%
% code offered as-is. For questions or feedback on LiPD-utilities code, please contact Nick <nick@nau.edu>, or post an issue on github at https://github.com/nickmckay/lipd-utilities
% --------------------------------------------------------------------------
%
% Notes: Current for database version 1.0.0
%   
%
% --------------------------------------------------------------------------


%load the serialization
load('/Users/npm4/GitHub/lipdverse/html/iso2k/1_0_0/iso2k1_0_0.mat')

%define a function for cell arrays
uniqueCell = @(X) unique(X(find(cellfun(@ischar,X))));




%% look at individual records

TSids = {sTS.paleoData_iso2kUI}';

% filter chosen record from full record list
findRecordName = find(strcmp(TSids,'MS12CCCH01b'));
recordTS = sTS(findRecordName);

% extract dataset names
siteNames = {sTS.geo_siteName}';

% filter datasets containing the desired site name from the full record list
findSite = find(~cellfun(@isempty,strfind(lower(siteNames),'bahamas')));
selectedSiteTS = sTS(findSite);

% view site names for the Bahamas records
sort(unique(siteNames(findSite)))

%% initial filtering of the database, using Level 1 fields


% starting with the entire database, filter for records that
% have water isotope proxy data i.e.  d18O or d2H,
% have units in per mille, and
% are flagged as the primary timeseries
  
variableName = {sTS.paleoData_variableName}';
units = {sTS.paleoData_units}';
primaryTS = {sTS.paleoData_iso2kPrimaryTimeseries}';

% create filters for records with isotope data
isd18O = find(strcmp(variableName , 'd18O') & strcmp(primaryTS , 'TRUE')  & strcmp(units , 'permil'))';
isd2H = find(strcmp(variableName , 'd2H') & strcmp(primaryTS , 'TRUE')  & strcmp(units , 'permil'))';
isIso = [isd18O, isd2H];

allIsoTS = sTS(isIso); % filter the full timeseries for all records that have O or H isotopes as the variable and are also flagged as primary.

length(allIsoTS) % See how many records are in this filtered subset of the database


%% additional filtering of the database, using Level 1 or Level 2 fields

% from the filtered isotope records, restrict to records where
% the rank 1 interpretation is the isotopic composition of precipitation
% the archive is terrestrial, or
% the inferred material is groundwater or soil water or leaf water

interpretation = {allIsoTS.isotopeInterpretation1_variableGroup}';
description = {allIsoTS.paleoData_description}';
inferredMaterial = {allIsoTS.paleoData_inferredMaterial}';

% check names
uniqueCell(description)
uniqueCell(inferredMaterial)

%% create filters
isPrecipIsotope = find(strcmp(interpretation , 'P_isotope'));
isTerrestrial = find(strcmp(inferredMaterial , 'lake water') | strcmp(inferredMaterial , 'lagoon water') | ... 
                         strcmp(inferredMaterial , 'groundwater') | strcmp(inferredMaterial , 'soil water'));

isoPIso = allIsoTS(isPrecipIsotope);
isoTerrestrial = allIsoTS(isTerrestrial);

%% view descriptions for the terrestrial records. How many are there of each?
descriptionTerrestrial = {isoTerrestrial.paleoData_description}';
descriptionTerrestrial = descriptionTerrestrial(~cellfun(@isempty,descriptionTerrestrial));


descStats = tabulate(descriptionTerrestrial);
  
descStats


%% additional filtering of the database, using Level 2 fields

% from the filtered isotope records, restrict to records where
% the authors' primary climatic interpretation was based on the amount effect

climInterpretation = {allIsoTS.climateInterpretation1_variable}';

% check names
uniqueCell(climInterpretation)

%remove empties
climInterpretation(cellfun(@isempty,climInterpretation)) = {''};

% create filter
isAmount = find(~cellfun(@isempty,strfind(lower(climInterpretation),'p_')) | ...
    ~cellfun(@isempty,strfind(lower(climInterpretation),'precipitation')) | ...
    strcmp(lower(climInterpretation),'p'));

isoAmount = allIsoTS(isAmount);
 
% how many records were retained? 
length(isoAmount)

% what are the names of the climatic interpretations that were retained?
isoAmountNames = uniqueCell({isoAmount.climateInterpretation1_variable}');

isoAmountNames % maybe some additional filtering to be done here

