# --------------------------------------------------------------------------
## iso2k_sampleCode.py
# Modified 6/11/2020 
# Written by Sylvia Dee and Nick McKay
# <sylvia.dee@rice.edu> <nick@nau.edu>
# Purpose: Example workflow for filtering and querying data records
# Filter records from full Iso2k database
# (translated from Dr Georgina Falster's R code)
# Code offered as-is. For questions or feedback on LiPD-utilities code, please contact Nick <nick@nau.edu>, or post an issue on github at https://github.com/nickmckay/lipd-utilities
# --------------------------------------------------------------------------
#
# Records in the Iso2k database are provided as published (i.e., not re-calibrated or validated).
# This preserves the large amount of information contained within water isotope proxy measurements 
# that would be lost if condensed to reconstruct discrete
# variables. 

# Rather, we leave it to the database users to filter and assess records as needed.
# For initial querying of the database, in nearly all cases, we recommend first filtering by the following:

# 1. variableName = ‘d18O’ or ‘d2H’ (excludes any non-isotopic data)
# 2. paleoData_units = ‘permil’ (excludes records published as z-scores or anomalies)
# 3. paleoData_iso2kPrimaryTimeseries = ‘TRUE’ (includes only primary time series for each site)

# Additional filtering of records should be performed using Level 1 or Level 2 fields. For example:

# isotopeInterpretation1_variable = ‘P_isotope’ (includes only records where the first-order control of isotopic
# variability is the isotopic composition of precipitation)

# paleoData_description = ‘carbonate’ or ‘terrestrial biomarker’ or ‘tree ring cellulose'
# (to extract terrestrial archives sensitive to P_isotope aside from ice cores), or:

# paleoData_inferredMaterial = ‘groundwater’ or ‘soil water’ or 
# ‘lake water’ (accomplishes similar results to the above)

# Additional filtering of records may be useful with other Level 2 fields, for example:

# climateInterpretation1_variable = contains ‘P’ or ‘Precipitation_amount’ or ‘P_amount’ (to extract only records
# where authors’ primary climatic interpretation was based on the amount effect)

# --------------------------------------------------------------------------
# Citation: Konecky et al. (2020) The Iso2k Database: A global compilation of 
# paleo-d18O and d2H records to aid understanding of Common Era climate
# Database is available for download from https://doi.org/10.25921/57j8-vs18 or http://lipdverse.org/iso2k/current_version/
# 
# --------------------------------------------------------------------------
#
# Notes: Current for database version 1.0.0
#   
# URL: http://lipdverse.org/iso2k/current_version/
# --------------------------------------------------------------------------

# load('/Users/npm4/GitHub/lipdverse/html/iso2k/1_0_0/iso2k1_0_0.mat')

# --------------------------------------------------------------------------

# INITIALIZATION:
# --------------------------------------------------------------------------

from pylab import *
import sys, os, cdtime, cdutil, cdms2,  MV2, time, datetime
import numpy as np
import matplotlib.pyplot as plt
from math import exp
import pylab as Py
import pickle
import gzip

# --------------------------------------------------------------------------
# Change to working directory
# --------------------------------------------------------------------------

cd /rdf/sd75/sylvia/Iso2k/

# Open Pickle File: load the serialization

# iso2k=pickle.load(open('iso2k1_0_0.pkl')) # download here: http://lipdverse.org/iso2k/current_version/

# # OR:

# iso2k = '/Users/npm4/GitHub/lipdverse/html/iso2k/current_version/iso2k1_0_0.pkl'

# ## look at individual records

# print(iso2k.keys()) #prints keys

# --------------------------------------------------------------------------
# 1. LOAD DATA
# --------------------------------------------------------------------------

#D = lipd.readLipd('/Users/npm4/Dropbox/Temp12kSerialization/Temp12k/Temp12k_directory_LiPD_files')
#TS = lipd.extractTs(D)

pickles = open('iso2k1_0_0.pkl',"rb")

pTS=pickle.load(pickles)

TS= pTS['TS']

# In [15]: len(TS)
# Out[15]: 1955

# function to extract data from structure 'TS'

pullTsVariable = lambda ts, var: ts[var] if var in ts else 'missing'


# --------------------------------------------------------------------------
# 2. EXTRACT DATA FIELDS, UNITS, VAR NAMES
# --------------------------------------------------------------------------

variableName = np.array(list(map(pullTsVariable,TS,['paleoData_variableName'] * len(TS))))

# define units for each data series

units = np.array(list(map(pullTsVariable,TS,['paleoData_units'] * len(TS))))

# is the timeseries a primary time series for this site? pull only those records which are.

primary = np.array(list(map(pullTsVariable,TS,['paleoData_iso2kPrimaryTimeseries'] * len(TS))))


# --------------------------------------------------------------------------
# 3. CONDITIONALLY FILTER OUT SPECIFIC TIME SERIES 
# --------------------------------------------------------------------------

# extract all d2H fields from the database

isd18O = np.where(variableName == 'd18O')
isd2H = np.where(variableName == 'd2H')
isPermil = np.where(units == 'permil')
isPrimary = np.where(primary == 'TRUE')


# pull records which report d18O and dD in units of permil

d18Oin  = np.intersect1d(np.intersect1d(isPermil,isd18O),isPrimary)
d2Hin  = np.intersect1d(np.intersect1d(isPermil,isd2H),isPrimary)

# exclude all non-isotopic data

isIso = np.union1d(d2Hin,d18Oin)

allIsoTS = np.asarray(TS)[isIso].tolist()

len(allIsoTS)
#Out[34]: 597

# --------------------------------------------------------------------------
# 4. EXPLORE ISOTOPE INTERPRETATION AND DATA TYPES
# --------------------------------------------------------------------------

description = np.array(list(map(pullTsVariable,TS,['paleoData_description'] * len(TS))))

# what type of proxy record is this?

carbonates=np.where(description == 'carbonate')

cellulose=np.where(description == 'cellulose')

terrestrial = np.where(description == 'terrestrial biomarker')

# similarly, this can be performed for inferred material:
# paleoData_inferredMaterial = ‘groundwater’ or ‘soil water’ or ‘lake water’ (accomplishes similar results to the above)

# --------------------------------------------------------------------------
# 5. PLOT TIME SERIES
# --------------------------------------------------------------------------

# let's choose a cellulose record

example_timeseries= np.intersect1d(d18Oin,cellulose)

# check out the first record index, print its information

TS[811]

data = np.array(list(map(pullTsVariable,TS,['paleoData_values'] * len(TS))))[811]

year = np.array(list(map(pullTsVariable,TS,['year'] * len(TS))))[811]


# # ======================================================================

plt.plot(year, data,c='k',label=r'LS00WODE',linewidth=1.0)
plt.xlabel(r'Time ( Years A.D. )',fontsize=18)
plt.ylabel(r'Permil',fontsize=18)
plt.legend(fontsize=14)
plt.grid('on',color='lightgrey')
plt.tick_params(axis='both', which='major', labelsize=13)
plt.title(r'Individual Iso2k Cellulose Record ',fontsize=20)

plt.show()
# # ======================================================================



